Commit 9fd8b7d6 authored by Robert Haas's avatar Robert Haas

Factor some code out of create_grouping_paths.

This is preparatory refactoring to prepare the way for partition-wise
aggregate, which will reuse the new subroutines for child grouping
rels.  It also does not seem like a bad idea on general principle,
as the function was getting pretty long.

Jeevan Chalke.  The larger patch series of which this patch is a part
was reviewed and tested by Antonin Houska, Rajkumar Raghuwanshi,
Ashutosh Bapat, David Rowley, Dilip Kumar, Konstantin Knizhnik,
Pascal Legrand, and me.  Some cosmetic changes by me.

Discussion: http://postgr.es/m/CAM2+6=V64_xhstVHie0Rz=KPEQnLJMZt_e314P0jaT_oJ9MR8A@mail.gmail.com
parent 4971d2a3
...@@ -185,6 +185,26 @@ static PathTarget *make_sort_input_target(PlannerInfo *root, ...@@ -185,6 +185,26 @@ static PathTarget *make_sort_input_target(PlannerInfo *root,
bool *have_postponed_srfs); bool *have_postponed_srfs);
static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel, static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
List *targets, List *targets_contain_srfs); List *targets, List *targets_contain_srfs);
static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, PathTarget *target,
PathTarget *partial_grouping_target,
const AggClauseCosts *agg_costs,
const AggClauseCosts *agg_final_costs,
grouping_sets_data *gd, bool can_sort, bool can_hash,
double dNumGroups, List *havingQual);
static void add_partial_paths_to_grouping_rel(PlannerInfo *root,
RelOptInfo *input_rel,
RelOptInfo *grouped_rel,
PathTarget *target,
PathTarget *partial_grouping_target,
AggClauseCosts *agg_partial_costs,
AggClauseCosts *agg_final_costs,
grouping_sets_data *gd,
bool can_sort,
bool can_hash,
List *havingQual);
static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs);
/***************************************************************************** /*****************************************************************************
...@@ -3610,15 +3630,11 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3610,15 +3630,11 @@ create_grouping_paths(PlannerInfo *root,
PathTarget *partial_grouping_target = NULL; PathTarget *partial_grouping_target = NULL;
AggClauseCosts agg_partial_costs; /* parallel only */ AggClauseCosts agg_partial_costs; /* parallel only */
AggClauseCosts agg_final_costs; /* parallel only */ AggClauseCosts agg_final_costs; /* parallel only */
Size hashaggtablesize;
double dNumGroups; double dNumGroups;
double dNumPartialGroups = 0;
bool can_hash; bool can_hash;
bool can_sort; bool can_sort;
bool try_parallel_aggregation; bool try_parallel_aggregation;
ListCell *lc;
/* For now, do all work in the (GROUP_AGG, NULL) upperrel */ /* For now, do all work in the (GROUP_AGG, NULL) upperrel */
grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
...@@ -3754,44 +3770,11 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3754,44 +3770,11 @@ create_grouping_paths(PlannerInfo *root,
(gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))); (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
/* /*
* If grouped_rel->consider_parallel is true, then paths that we generate * Figure out whether a PartialAggregate/Finalize Aggregate execution
* for this grouping relation could be run inside of a worker, but that * strategy is viable.
* doesn't mean we can actually use the PartialAggregate/FinalizeAggregate
* execution strategy. Figure that out.
*/
if (!grouped_rel->consider_parallel)
{
/* Not even parallel-safe. */
try_parallel_aggregation = false;
}
else if (input_rel->partial_pathlist == NIL)
{
/* Nothing to use as input for partial aggregate. */
try_parallel_aggregation = false;
}
else if (!parse->hasAggs && parse->groupClause == NIL)
{
/*
* We don't know how to do parallel aggregation unless we have either
* some aggregates or a grouping clause.
*/ */
try_parallel_aggregation = false; try_parallel_aggregation = can_parallel_agg(root, input_rel, grouped_rel,
} agg_costs);
else if (parse->groupingSets)
{
/* We don't know how to do grouping sets in parallel. */
try_parallel_aggregation = false;
}
else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
{
/* Insufficient support for partial mode. */
try_parallel_aggregation = false;
}
else
{
/* Everything looks good. */
try_parallel_aggregation = true;
}
/* /*
* Before generating paths for grouped_rel, we first generate any possible * Before generating paths for grouped_rel, we first generate any possible
...@@ -3803,8 +3786,6 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3803,8 +3786,6 @@ create_grouping_paths(PlannerInfo *root,
*/ */
if (try_parallel_aggregation) if (try_parallel_aggregation)
{ {
Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
/* /*
* Build target list for partial aggregate paths. These paths cannot * Build target list for partial aggregate paths. These paths cannot
* just emit the same tlist as regular aggregate paths, because (1) we * just emit the same tlist as regular aggregate paths, because (1) we
...@@ -3814,11 +3795,6 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3814,11 +3795,6 @@ create_grouping_paths(PlannerInfo *root,
*/ */
partial_grouping_target = make_partial_grouping_target(root, target); partial_grouping_target = make_partial_grouping_target(root, target);
/* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows,
gd);
/* /*
* Collect statistics about aggregates for estimating costs of * Collect statistics about aggregates for estimating costs of
* performing aggregation in parallel. * performing aggregation in parallel.
...@@ -3841,503 +3817,353 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3841,503 +3817,353 @@ create_grouping_paths(PlannerInfo *root,
&agg_final_costs); &agg_final_costs);
} }
if (can_sort) add_partial_paths_to_grouping_rel(root, input_rel, grouped_rel, target,
{ partial_grouping_target,
/* This was checked before setting try_parallel_aggregation */ &agg_partial_costs, &agg_final_costs,
Assert(parse->hasAggs || parse->groupClause); gd, can_sort, can_hash,
(List *) parse->havingQual);
}
/* Build final grouping paths */
add_paths_to_grouping_rel(root, input_rel, grouped_rel, target,
partial_grouping_target, agg_costs,
&agg_final_costs, gd, can_sort, can_hash,
dNumGroups, (List *) parse->havingQual);
/* Give a helpful error if we failed to find any implementation */
if (grouped_rel->pathlist == NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement GROUP BY"),
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
/* /*
* Use any available suitably-sorted path as input, and also * If there is an FDW that's responsible for all baserels of the query,
* consider sorting the cheapest partial path. * let it consider adding ForeignPaths.
*/ */
foreach(lc, input_rel->partial_pathlist) if (grouped_rel->fdwroutine &&
{ grouped_rel->fdwroutine->GetForeignUpperPaths)
Path *path = (Path *) lfirst(lc); grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
bool is_sorted; input_rel, grouped_rel);
is_sorted = pathkeys_contained_in(root->group_pathkeys, /* Let extensions possibly add some more paths */
path->pathkeys); if (create_upper_paths_hook)
if (path == cheapest_partial_path || is_sorted) (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
{ input_rel, grouped_rel);
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
if (parse->hasAggs) /* Now choose the best path(s) */
add_partial_path(grouped_rel, (Path *) set_cheapest(grouped_rel);
create_agg_path(root,
grouped_rel,
path,
partial_grouping_target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
&agg_partial_costs,
dNumPartialGroups));
else
add_partial_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
partial_grouping_target,
parse->groupClause,
NIL,
dNumPartialGroups));
}
}
}
if (can_hash) /*
{ * We've been using the partial pathlist for the grouped relation to hold
/* Checked above */ * partially aggregated paths, but that's actually a little bit bogus
Assert(parse->hasAggs || parse->groupClause); * because it's unsafe for later planning stages -- like ordered_rel ---
* to get the idea that they can use these partial paths as if they didn't
* need a FinalizeAggregate step. Zap the partial pathlist at this stage
* so we don't get confused.
*/
grouped_rel->partial_pathlist = NIL;
hashaggtablesize = return grouped_rel;
estimate_hashagg_tablesize(cheapest_partial_path, }
&agg_partial_costs,
dNumPartialGroups);
/*
* Tentatively produce a partial HashAgg Path, depending on if it /*
* looks as if the hash table will fit in work_mem. * For a given input path, consider the possible ways of doing grouping sets on
* it, by combinations of hashing and sorting. This can be called multiple
* times, so it's important that it not scribble on input. No result is
* returned, but any generated paths are added to grouped_rel.
*/ */
if (hashaggtablesize < work_mem * 1024L) static void
{ consider_groupingsets_paths(PlannerInfo *root,
add_partial_path(grouped_rel, (Path *) RelOptInfo *grouped_rel,
create_agg_path(root, Path *path,
grouped_rel, bool is_sorted,
cheapest_partial_path, bool can_hash,
partial_grouping_target, PathTarget *target,
AGG_HASHED, grouping_sets_data *gd,
AGGSPLIT_INITIAL_SERIAL, const AggClauseCosts *agg_costs,
parse->groupClause, double dNumGroups)
NIL, {
&agg_partial_costs, Query *parse = root->parse;
dNumPartialGroups));
}
}
}
/* Build final grouping paths */
if (can_sort)
{
/* /*
* Use any available suitably-sorted path as input, and also consider * If we're not being offered sorted input, then only consider plans that
* sorting the cheapest-total path. * can be done entirely by hashing.
*
* We can hash everything if it looks like it'll fit in work_mem. But if
* the input is actually sorted despite not being advertised as such, we
* prefer to make use of that in order to use less memory.
*
* If none of the grouping sets are sortable, then ignore the work_mem
* limit and generate a path anyway, since otherwise we'll just fail.
*/ */
foreach(lc, input_rel->pathlist) if (!is_sorted)
{ {
Path *path = (Path *) lfirst(lc); List *new_rollups = NIL;
bool is_sorted; RollupData *unhashed_rollup = NULL;
List *sets_data;
List *empty_sets_data = NIL;
List *empty_sets = NIL;
ListCell *lc;
ListCell *l_start = list_head(gd->rollups);
AggStrategy strat = AGG_HASHED;
Size hashsize;
double exclude_groups = 0.0;
is_sorted = pathkeys_contained_in(root->group_pathkeys, Assert(can_hash);
path->pathkeys);
if (path == cheapest_path || is_sorted)
{
/* Sort the cheapest-total path if it isn't already sorted */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
/* Now decide what to stick atop it */ if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
if (parse->groupingSets)
{ {
consider_groupingsets_paths(root, grouped_rel, unhashed_rollup = lfirst_node(RollupData, l_start);
path, true, can_hash, target, exclude_groups = unhashed_rollup->numGroups;
gd, agg_costs, dNumGroups); l_start = lnext(l_start);
} }
else if (parse->hasAggs)
{ hashsize = estimate_hashagg_tablesize(path,
agg_costs,
dNumGroups - exclude_groups);
/* /*
* We have aggregation, possibly with plain GROUP BY. Make * gd->rollups is empty if we have only unsortable columns to work
* an AggPath. * with. Override work_mem in that case; otherwise, we'll rely on the
* sorted-input case to generate usable mixed paths.
*/ */
add_path(grouped_rel, (Path *) if (hashsize > work_mem * 1024L && gd->rollups)
create_agg_path(root, return; /* nope, won't fit */
grouped_rel,
path,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
parse->groupClause,
(List *) parse->havingQual,
agg_costs,
dNumGroups));
}
else if (parse->groupClause)
{
/* /*
* We have GROUP BY without aggregation or grouping sets. * We need to burst the existing rollups list into individual grouping
* Make a GroupPath. * sets and recompute a groupClause for each set.
*/ */
add_path(grouped_rel, (Path *) sets_data = list_copy(gd->unsortable_sets);
create_group_path(root,
grouped_rel,
path,
target,
parse->groupClause,
(List *) parse->havingQual,
dNumGroups));
}
else
{
/* Other cases should have been handled above */
Assert(false);
}
}
}
/* for_each_cell(lc, l_start)
* Now generate a complete GroupAgg Path atop of the cheapest partial
* path. We can do this using either Gather or Gather Merge.
*/
if (grouped_rel->partial_pathlist)
{ {
Path *path = (Path *) linitial(grouped_rel->partial_pathlist); RollupData *rollup = lfirst_node(RollupData, lc);
double total_groups = path->rows * path->parallel_workers;
path = (Path *) create_gather_path(root,
grouped_rel,
path,
partial_grouping_target,
NULL,
&total_groups);
/* /*
* Since Gather's output is always unsorted, we'll need to sort, * If we find an unhashable rollup that's not been skipped by the
* unless there's no GROUP BY clause or a degenerate (constant) * "actually sorted" check above, we can't cope; we'd need sorted
* one, in which case there will only be a single group. * input (with a different sort order) but we can't get that here.
* So bail out; we'll get a valid path from the is_sorted case
* instead.
*
* The mere presence of empty grouping sets doesn't make a rollup
* unhashable (see preprocess_grouping_sets), we handle those
* specially below.
*/ */
if (root->group_pathkeys) if (!rollup->hashable)
path = (Path *) create_sort_path(root, return;
grouped_rel, else
path, sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
root->group_pathkeys, }
-1.0); foreach(lc, sets_data)
{
GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
List *gset = gs->set;
RollupData *rollup;
if (parse->hasAggs) if (gset == NIL)
add_path(grouped_rel, (Path *) {
create_agg_path(root, /* Empty grouping sets can't be hashed. */
grouped_rel, empty_sets_data = lappend(empty_sets_data, gs);
path, empty_sets = lappend(empty_sets, NIL);
target, }
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
(List *) parse->havingQual,
&agg_final_costs,
dNumGroups));
else else
add_path(grouped_rel, (Path *) {
create_group_path(root, rollup = makeNode(RollupData);
grouped_rel,
path, rollup->groupClause = preprocess_groupclause(root, gset);
target, rollup->gsets_data = list_make1(gs);
parse->groupClause, rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
(List *) parse->havingQual, rollup->gsets_data,
dNumGroups)); gd->tleref_to_colnum_map);
rollup->numGroups = gs->numGroups;
rollup->hashable = true;
rollup->is_hashed = true;
new_rollups = lappend(new_rollups, rollup);
}
}
/* /*
* The point of using Gather Merge rather than Gather is that it * If we didn't find anything nonempty to hash, then bail. We'll
* can preserve the ordering of the input path, so there's no * generate a path from the is_sorted case.
* reason to try it unless (1) it's possible to produce more than
* one output row and (2) we want the output path to be ordered.
*/ */
if (parse->groupClause != NIL && root->group_pathkeys != NIL) if (new_rollups == NIL)
{ return;
foreach(lc, grouped_rel->partial_pathlist)
{
Path *subpath = (Path *) lfirst(lc);
Path *gmpath;
double total_groups;
/* /*
* It's useful to consider paths that are already properly * If there were empty grouping sets they should have been in the
* ordered for Gather Merge, because those don't need a * first rollup.
* sort. It's also useful to consider the cheapest path,
* because sorting it in parallel and then doing Gather
* Merge may be better than doing an unordered Gather
* followed by a sort. But there's no point in
* considering non-cheapest paths that aren't already
* sorted correctly.
*/ */
if (path != subpath && Assert(!unhashed_rollup || !empty_sets);
!pathkeys_contained_in(root->group_pathkeys,
subpath->pathkeys))
continue;
total_groups = subpath->rows * subpath->parallel_workers; if (unhashed_rollup)
{
new_rollups = lappend(new_rollups, unhashed_rollup);
strat = AGG_MIXED;
}
else if (empty_sets)
{
RollupData *rollup = makeNode(RollupData);
gmpath = (Path *) rollup->groupClause = NIL;
create_gather_merge_path(root, rollup->gsets_data = empty_sets_data;
grouped_rel, rollup->gsets = empty_sets;
subpath, rollup->numGroups = list_length(empty_sets);
partial_grouping_target, rollup->hashable = false;
root->group_pathkeys, rollup->is_hashed = false;
NULL, new_rollups = lappend(new_rollups, rollup);
&total_groups); strat = AGG_MIXED;
}
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
gmpath,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
(List *) parse->havingQual,
&agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *) add_path(grouped_rel, (Path *)
create_group_path(root, create_groupingsets_path(root,
grouped_rel, grouped_rel,
gmpath, path,
target, target,
parse->groupClause,
(List *) parse->havingQual, (List *) parse->havingQual,
strat,
new_rollups,
agg_costs,
dNumGroups)); dNumGroups));
} return;
}
}
} }
if (can_hash)
{
if (parse->groupingSets)
{
/* /*
* Try for a hash-only groupingsets path over unsorted input. * If we have sorted input but nothing we can do with it, bail.
*/ */
consider_groupingsets_paths(root, grouped_rel, if (list_length(gd->rollups) == 0)
cheapest_path, false, true, target, return;
gd, agg_costs, dNumGroups);
}
else
{
hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
agg_costs,
dNumGroups);
/* /*
* Provided that the estimated size of the hashtable does not * Given sorted input, we try and make two paths: one sorted and one mixed
* exceed work_mem, we'll generate a HashAgg Path, although if we * sort/hash. (We need to try both because hashagg might be disabled, or
* were unable to sort above, then we'd better generate a Path, so * some columns might not be sortable.)
* that we at least have one. *
* can_hash is passed in as false if some obstacle elsewhere (such as
* ordered aggs) means that we shouldn't consider hashing at all.
*/ */
if (hashaggtablesize < work_mem * 1024L || if (can_hash && gd->any_hashable)
grouped_rel->pathlist == NIL)
{ {
List *rollups = NIL;
List *hash_sets = list_copy(gd->unsortable_sets);
double availspace = (work_mem * 1024.0);
ListCell *lc;
/* /*
* We just need an Agg over the cheapest-total input path, * Account first for space needed for groups we can't sort at all.
* since input order won't matter.
*/ */
add_path(grouped_rel, (Path *) availspace -= (double) estimate_hashagg_tablesize(path,
create_agg_path(root, grouped_rel,
cheapest_path,
target,
AGG_HASHED,
AGGSPLIT_SIMPLE,
parse->groupClause,
(List *) parse->havingQual,
agg_costs, agg_costs,
dNumGroups)); gd->dNumHashGroups);
}
} if (availspace > 0 && list_length(gd->rollups) > 1)
{
double scale;
int num_rollups = list_length(gd->rollups);
int k_capacity;
int *k_weights = palloc(num_rollups * sizeof(int));
Bitmapset *hash_items = NULL;
int i;
/* /*
* Generate a HashAgg Path atop of the cheapest partial path. Once * We treat this as a knapsack problem: the knapsack capacity
* again, we'll only do this if it looks as though the hash table * represents work_mem, the item weights are the estimated memory
* won't exceed work_mem. * usage of the hashtables needed to implement a single rollup,
* and we really ought to use the cost saving as the item value;
* however, currently the costs assigned to sort nodes don't
* reflect the comparison costs well, and so we treat all items as
* of equal value (each rollup we hash instead saves us one sort).
*
* To use the discrete knapsack, we need to scale the values to a
* reasonably small bounded range. We choose to allow a 5% error
* margin; we have no more than 4096 rollups in the worst possible
* case, which with a 5% error margin will require a bit over 42MB
* of workspace. (Anyone wanting to plan queries that complex had
* better have the memory for it. In more reasonable cases, with
* no more than a couple of dozen rollups, the memory usage will
* be negligible.)
*
* k_capacity is naturally bounded, but we clamp the values for
* scale and weight (below) to avoid overflows or underflows (or
* uselessly trying to use a scale factor less than 1 byte).
*/ */
if (grouped_rel->partial_pathlist) scale = Max(availspace / (20.0 * num_rollups), 1.0);
{ k_capacity = (int) floor(availspace / scale);
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
hashaggtablesize = estimate_hashagg_tablesize(path,
&agg_final_costs,
dNumGroups);
if (hashaggtablesize < work_mem * 1024L)
{
double total_groups = path->rows * path->parallel_workers;
path = (Path *) create_gather_path(root,
grouped_rel,
path,
partial_grouping_target,
NULL,
&total_groups);
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
AGG_HASHED,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
(List *) parse->havingQual,
&agg_final_costs,
dNumGroups));
}
}
}
/* Give a helpful error if we failed to find any implementation */
if (grouped_rel->pathlist == NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement GROUP BY"),
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
/*
* If there is an FDW that's responsible for all baserels of the query,
* let it consider adding ForeignPaths.
*/
if (grouped_rel->fdwroutine &&
grouped_rel->fdwroutine->GetForeignUpperPaths)
grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
input_rel, grouped_rel);
/* Let extensions possibly add some more paths */
if (create_upper_paths_hook)
(*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
input_rel, grouped_rel);
/* Now choose the best path(s) */
set_cheapest(grouped_rel);
/*
* We've been using the partial pathlist for the grouped relation to hold
* partially aggregated paths, but that's actually a little bit bogus
* because it's unsafe for later planning stages -- like ordered_rel ---
* to get the idea that they can use these partial paths as if they didn't
* need a FinalizeAggregate step. Zap the partial pathlist at this stage
* so we don't get confused.
*/
grouped_rel->partial_pathlist = NIL;
return grouped_rel;
}
/*
* For a given input path, consider the possible ways of doing grouping sets on
* it, by combinations of hashing and sorting. This can be called multiple
* times, so it's important that it not scribble on input. No result is
* returned, but any generated paths are added to grouped_rel.
*/
static void
consider_groupingsets_paths(PlannerInfo *root,
RelOptInfo *grouped_rel,
Path *path,
bool is_sorted,
bool can_hash,
PathTarget *target,
grouping_sets_data *gd,
const AggClauseCosts *agg_costs,
double dNumGroups)
{
Query *parse = root->parse;
/* /*
* If we're not being offered sorted input, then only consider plans that * We leave the first rollup out of consideration since it's the
* can be done entirely by hashing. * one that matches the input sort order. We assign indexes "i"
* * to only those entries considered for hashing; the second loop,
* We can hash everything if it looks like it'll fit in work_mem. But if * below, must use the same condition.
* the input is actually sorted despite not being advertised as such, we
* prefer to make use of that in order to use less memory.
*
* If none of the grouping sets are sortable, then ignore the work_mem
* limit and generate a path anyway, since otherwise we'll just fail.
*/ */
if (!is_sorted) i = 0;
for_each_cell(lc, lnext(list_head(gd->rollups)))
{ {
List *new_rollups = NIL; RollupData *rollup = lfirst_node(RollupData, lc);
RollupData *unhashed_rollup = NULL;
List *sets_data;
List *empty_sets_data = NIL;
List *empty_sets = NIL;
ListCell *lc;
ListCell *l_start = list_head(gd->rollups);
AggStrategy strat = AGG_HASHED;
Size hashsize;
double exclude_groups = 0.0;
Assert(can_hash);
if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys)) if (rollup->hashable)
{ {
unhashed_rollup = lfirst_node(RollupData, l_start); double sz = estimate_hashagg_tablesize(path,
exclude_groups = unhashed_rollup->numGroups;
l_start = lnext(l_start);
}
hashsize = estimate_hashagg_tablesize(path,
agg_costs, agg_costs,
dNumGroups - exclude_groups); rollup->numGroups);
/* /*
* gd->rollups is empty if we have only unsortable columns to work * If sz is enormous, but work_mem (and hence scale) is
* with. Override work_mem in that case; otherwise, we'll rely on the * small, avoid integer overflow here.
* sorted-input case to generate usable mixed paths.
*/ */
if (hashsize > work_mem * 1024L && gd->rollups) k_weights[i] = (int) Min(floor(sz / scale),
return; /* nope, won't fit */ k_capacity + 1.0);
++i;
}
}
/* /*
* We need to burst the existing rollups list into individual grouping * Apply knapsack algorithm; compute the set of items which
* sets and recompute a groupClause for each set. * maximizes the value stored (in this case the number of sorts
* saved) while keeping the total size (approximately) within
* capacity.
*/ */
sets_data = list_copy(gd->unsortable_sets); if (i > 0)
hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
for_each_cell(lc, l_start) if (!bms_is_empty(hash_items))
{ {
RollupData *rollup = lfirst_node(RollupData, lc); rollups = list_make1(linitial(gd->rollups));
/* i = 0;
* If we find an unhashable rollup that's not been skipped by the for_each_cell(lc, lnext(list_head(gd->rollups)))
* "actually sorted" check above, we can't cope; we'd need sorted
* input (with a different sort order) but we can't get that here.
* So bail out; we'll get a valid path from the is_sorted case
* instead.
*
* The mere presence of empty grouping sets doesn't make a rollup
* unhashable (see preprocess_grouping_sets), we handle those
* specially below.
*/
if (!rollup->hashable)
return;
else
sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
}
foreach(lc, sets_data)
{ {
GroupingSetData *gs = lfirst_node(GroupingSetData, lc); RollupData *rollup = lfirst_node(RollupData, lc);
List *gset = gs->set;
RollupData *rollup;
if (gset == NIL) if (rollup->hashable)
{ {
/* Empty grouping sets can't be hashed. */ if (bms_is_member(i, hash_items))
empty_sets_data = lappend(empty_sets_data, gs); hash_sets = list_concat(hash_sets,
empty_sets = lappend(empty_sets, NIL); list_copy(rollup->gsets_data));
else
rollups = lappend(rollups, rollup);
++i;
} }
else else
rollups = lappend(rollups, rollup);
}
}
}
if (!rollups && hash_sets)
rollups = list_copy(gd->rollups);
foreach(lc, hash_sets)
{ {
rollup = makeNode(RollupData); GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
RollupData *rollup = makeNode(RollupData);
rollup->groupClause = preprocess_groupclause(root, gset); Assert(gs->set != NIL);
rollup->groupClause = preprocess_groupclause(root, gs->set);
rollup->gsets_data = list_make1(gs); rollup->gsets_data = list_make1(gs);
rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
rollup->gsets_data, rollup->gsets_data,
...@@ -4345,228 +4171,39 @@ consider_groupingsets_paths(PlannerInfo *root, ...@@ -4345,228 +4171,39 @@ consider_groupingsets_paths(PlannerInfo *root,
rollup->numGroups = gs->numGroups; rollup->numGroups = gs->numGroups;
rollup->hashable = true; rollup->hashable = true;
rollup->is_hashed = true; rollup->is_hashed = true;
new_rollups = lappend(new_rollups, rollup); rollups = lcons(rollup, rollups);
}
} }
/* if (rollups)
* If we didn't find anything nonempty to hash, then bail. We'll
* generate a path from the is_sorted case.
*/
if (new_rollups == NIL)
return;
/*
* If there were empty grouping sets they should have been in the
* first rollup.
*/
Assert(!unhashed_rollup || !empty_sets);
if (unhashed_rollup)
{
new_rollups = lappend(new_rollups, unhashed_rollup);
strat = AGG_MIXED;
}
else if (empty_sets)
{ {
RollupData *rollup = makeNode(RollupData);
rollup->groupClause = NIL;
rollup->gsets_data = empty_sets_data;
rollup->gsets = empty_sets;
rollup->numGroups = list_length(empty_sets);
rollup->hashable = false;
rollup->is_hashed = false;
new_rollups = lappend(new_rollups, rollup);
strat = AGG_MIXED;
}
add_path(grouped_rel, (Path *) add_path(grouped_rel, (Path *)
create_groupingsets_path(root, create_groupingsets_path(root,
grouped_rel, grouped_rel,
path, path,
target, target,
(List *) parse->havingQual, (List *) parse->havingQual,
strat, AGG_MIXED,
new_rollups, rollups,
agg_costs, agg_costs,
dNumGroups)); dNumGroups));
return; }
} }
/* /*
* If we have sorted input but nothing we can do with it, bail. * Now try the simple sorted case.
*/ */
if (list_length(gd->rollups) == 0) if (!gd->unsortable_sets)
return; add_path(grouped_rel, (Path *)
create_groupingsets_path(root,
/* grouped_rel,
* Given sorted input, we try and make two paths: one sorted and one mixed path,
* sort/hash. (We need to try both because hashagg might be disabled, or target,
* some columns might not be sortable.) (List *) parse->havingQual,
* AGG_SORTED,
* can_hash is passed in as false if some obstacle elsewhere (such as gd->rollups,
* ordered aggs) means that we shouldn't consider hashing at all. agg_costs,
*/ dNumGroups));
if (can_hash && gd->any_hashable) }
{
List *rollups = NIL;
List *hash_sets = list_copy(gd->unsortable_sets);
double availspace = (work_mem * 1024.0);
ListCell *lc;
/*
* Account first for space needed for groups we can't sort at all.
*/
availspace -= (double) estimate_hashagg_tablesize(path,
agg_costs,
gd->dNumHashGroups);
if (availspace > 0 && list_length(gd->rollups) > 1)
{
double scale;
int num_rollups = list_length(gd->rollups);
int k_capacity;
int *k_weights = palloc(num_rollups * sizeof(int));
Bitmapset *hash_items = NULL;
int i;
/*
* We treat this as a knapsack problem: the knapsack capacity
* represents work_mem, the item weights are the estimated memory
* usage of the hashtables needed to implement a single rollup,
* and we really ought to use the cost saving as the item value;
* however, currently the costs assigned to sort nodes don't
* reflect the comparison costs well, and so we treat all items as
* of equal value (each rollup we hash instead saves us one sort).
*
* To use the discrete knapsack, we need to scale the values to a
* reasonably small bounded range. We choose to allow a 5% error
* margin; we have no more than 4096 rollups in the worst possible
* case, which with a 5% error margin will require a bit over 42MB
* of workspace. (Anyone wanting to plan queries that complex had
* better have the memory for it. In more reasonable cases, with
* no more than a couple of dozen rollups, the memory usage will
* be negligible.)
*
* k_capacity is naturally bounded, but we clamp the values for
* scale and weight (below) to avoid overflows or underflows (or
* uselessly trying to use a scale factor less than 1 byte).
*/
scale = Max(availspace / (20.0 * num_rollups), 1.0);
k_capacity = (int) floor(availspace / scale);
/*
* We leave the first rollup out of consideration since it's the
* one that matches the input sort order. We assign indexes "i"
* to only those entries considered for hashing; the second loop,
* below, must use the same condition.
*/
i = 0;
for_each_cell(lc, lnext(list_head(gd->rollups)))
{
RollupData *rollup = lfirst_node(RollupData, lc);
if (rollup->hashable)
{
double sz = estimate_hashagg_tablesize(path,
agg_costs,
rollup->numGroups);
/*
* If sz is enormous, but work_mem (and hence scale) is
* small, avoid integer overflow here.
*/
k_weights[i] = (int) Min(floor(sz / scale),
k_capacity + 1.0);
++i;
}
}
/*
* Apply knapsack algorithm; compute the set of items which
* maximizes the value stored (in this case the number of sorts
* saved) while keeping the total size (approximately) within
* capacity.
*/
if (i > 0)
hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
if (!bms_is_empty(hash_items))
{
rollups = list_make1(linitial(gd->rollups));
i = 0;
for_each_cell(lc, lnext(list_head(gd->rollups)))
{
RollupData *rollup = lfirst_node(RollupData, lc);
if (rollup->hashable)
{
if (bms_is_member(i, hash_items))
hash_sets = list_concat(hash_sets,
list_copy(rollup->gsets_data));
else
rollups = lappend(rollups, rollup);
++i;
}
else
rollups = lappend(rollups, rollup);
}
}
}
if (!rollups && hash_sets)
rollups = list_copy(gd->rollups);
foreach(lc, hash_sets)
{
GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
RollupData *rollup = makeNode(RollupData);
Assert(gs->set != NIL);
rollup->groupClause = preprocess_groupclause(root, gs->set);
rollup->gsets_data = list_make1(gs);
rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
rollup->gsets_data,
gd->tleref_to_colnum_map);
rollup->numGroups = gs->numGroups;
rollup->hashable = true;
rollup->is_hashed = true;
rollups = lcons(rollup, rollups);
}
if (rollups)
{
add_path(grouped_rel, (Path *)
create_groupingsets_path(root,
grouped_rel,
path,
target,
(List *) parse->havingQual,
AGG_MIXED,
rollups,
agg_costs,
dNumGroups));
}
}
/*
* Now try the simple sorted case.
*/
if (!gd->unsortable_sets)
add_path(grouped_rel, (Path *)
create_groupingsets_path(root,
grouped_rel,
path,
target,
(List *) parse->havingQual,
AGG_SORTED,
gd->rollups,
agg_costs,
dNumGroups));
}
/* /*
* create_window_paths * create_window_paths
...@@ -6101,116 +5738,563 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) ...@@ -6101,116 +5738,563 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
/* Set up RTE/RelOptInfo arrays */ /* Set up RTE/RelOptInfo arrays */
setup_simple_rel_arrays(root); setup_simple_rel_arrays(root);
/* Build RelOptInfo */ /* Build RelOptInfo */
rel = build_simple_rel(root, 1, NULL); rel = build_simple_rel(root, 1, NULL);
/* Locate IndexOptInfo for the target index */
indexInfo = NULL;
foreach(lc, rel->indexlist)
{
indexInfo = lfirst_node(IndexOptInfo, lc);
if (indexInfo->indexoid == indexOid)
break;
}
/*
* It's possible that get_relation_info did not generate an IndexOptInfo
* for the desired index; this could happen if it's not yet reached its
* indcheckxmin usability horizon, or if it's a system index and we're
* ignoring system indexes. In such cases we should tell CLUSTER to not
* trust the index contents but use seqscan-and-sort.
*/
if (lc == NULL) /* not in the list? */
return true; /* use sort */
/*
* Rather than doing all the pushups that would be needed to use
* set_baserel_size_estimates, just do a quick hack for rows and width.
*/
rel->rows = rel->tuples;
rel->reltarget->width = get_relation_data_width(tableOid, NULL);
root->total_table_pages = rel->pages;
/*
* Determine eval cost of the index expressions, if any. We need to
* charge twice that amount for each tuple comparison that happens during
* the sort, since tuplesort.c will have to re-evaluate the index
* expressions each time. (XXX that's pretty inefficient...)
*/
cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
/* Estimate the cost of seq scan + sort */
seqScanPath = create_seqscan_path(root, rel, NULL, 0);
cost_sort(&seqScanAndSortPath, root, NIL,
seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
comparisonCost, maintenance_work_mem, -1.0);
/* Estimate the cost of index scan */
indexScanPath = create_index_path(root, indexInfo,
NIL, NIL, NIL, NIL, NIL,
ForwardScanDirection, false,
NULL, 1.0, false);
return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
}
/*
* get_partitioned_child_rels
* Returns a list of the RT indexes of the partitioned child relations
* with rti as the root parent RT index. Also sets
* *part_cols_updated to true if any of the root rte's updated
* columns is used in the partition key either of the relation whose RTI
* is specified or of any child relation.
*
* Note: This function might get called even for range table entries that
* are not partitioned tables; in such a case, it will simply return NIL.
*/
List *
get_partitioned_child_rels(PlannerInfo *root, Index rti,
bool *part_cols_updated)
{
List *result = NIL;
ListCell *l;
if (part_cols_updated)
*part_cols_updated = false;
foreach(l, root->pcinfo_list)
{
PartitionedChildRelInfo *pc = lfirst_node(PartitionedChildRelInfo, l);
if (pc->parent_relid == rti)
{
result = pc->child_rels;
if (part_cols_updated)
*part_cols_updated = pc->part_cols_updated;
break;
}
}
return result;
}
/*
* get_partitioned_child_rels_for_join
* Build and return a list containing the RTI of every partitioned
* relation which is a child of some rel included in the join.
*/
List *
get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids)
{
List *result = NIL;
ListCell *l;
foreach(l, root->pcinfo_list)
{
PartitionedChildRelInfo *pc = lfirst(l);
if (bms_is_member(pc->parent_relid, join_relids))
result = list_concat(result, list_copy(pc->child_rels));
}
return result;
}
/*
* add_paths_to_grouping_rel
*
* Add non-partial paths to grouping relation.
*/
static void
add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, PathTarget *target,
PathTarget *partial_grouping_target,
const AggClauseCosts *agg_costs,
const AggClauseCosts *agg_final_costs,
grouping_sets_data *gd, bool can_sort, bool can_hash,
double dNumGroups, List *havingQual)
{
Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path;
ListCell *lc;
if (can_sort)
{
/*
* Use any available suitably-sorted path as input, and also consider
* sorting the cheapest-total path.
*/
foreach(lc, input_rel->pathlist)
{
Path *path = (Path *) lfirst(lc);
bool is_sorted;
is_sorted = pathkeys_contained_in(root->group_pathkeys,
path->pathkeys);
if (path == cheapest_path || is_sorted)
{
/* Sort the cheapest-total path if it isn't already sorted */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
consider_groupingsets_paths(root, grouped_rel,
path, true, can_hash, target,
gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
/*
* We have aggregation, possibly with plain GROUP BY. Make
* an AggPath.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
parse->groupClause,
havingQual,
agg_costs,
dNumGroups));
}
else if (parse->groupClause)
{
/*
* We have GROUP BY without aggregation or grouping sets.
* Make a GroupPath.
*/
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
target,
parse->groupClause,
havingQual,
dNumGroups));
}
else
{
/* Other cases should have been handled above */
Assert(false);
}
}
}
/*
* Now generate a complete GroupAgg Path atop of the cheapest partial
* path. We can do this using either Gather or Gather Merge.
*/
if (grouped_rel->partial_pathlist)
{
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
double total_groups = path->rows * path->parallel_workers;
path = (Path *) create_gather_path(root,
grouped_rel,
path,
partial_grouping_target,
NULL,
&total_groups);
/*
* Since Gather's output is always unsorted, we'll need to sort,
* unless there's no GROUP BY clause or a degenerate (constant)
* one, in which case there will only be a single group.
*/
if (root->group_pathkeys)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
target,
parse->groupClause,
havingQual,
dNumGroups));
/*
* The point of using Gather Merge rather than Gather is that it
* can preserve the ordering of the input path, so there's no
* reason to try it unless (1) it's possible to produce more than
* one output row and (2) we want the output path to be ordered.
*/
if (parse->groupClause != NIL && root->group_pathkeys != NIL)
{
foreach(lc, grouped_rel->partial_pathlist)
{
Path *subpath = (Path *) lfirst(lc);
Path *gmpath;
double total_groups;
/*
* It's useful to consider paths that are already properly
* ordered for Gather Merge, because those don't need a
* sort. It's also useful to consider the cheapest path,
* because sorting it in parallel and then doing Gather
* Merge may be better than doing an unordered Gather
* followed by a sort. But there's no point in considering
* non-cheapest paths that aren't already sorted
* correctly.
*/
if (path != subpath &&
!pathkeys_contained_in(root->group_pathkeys,
subpath->pathkeys))
continue;
total_groups = subpath->rows * subpath->parallel_workers;
gmpath = (Path *)
create_gather_merge_path(root,
grouped_rel,
subpath,
partial_grouping_target,
root->group_pathkeys,
NULL,
&total_groups);
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
gmpath,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
gmpath,
target,
parse->groupClause,
havingQual,
dNumGroups));
}
}
}
}
if (can_hash)
{
Size hashaggtablesize;
/* Locate IndexOptInfo for the target index */ if (parse->groupingSets)
indexInfo = NULL;
foreach(lc, rel->indexlist)
{ {
indexInfo = lfirst_node(IndexOptInfo, lc); /*
if (indexInfo->indexoid == indexOid) * Try for a hash-only groupingsets path over unsorted input.
break; */
consider_groupingsets_paths(root, grouped_rel,
cheapest_path, false, true, target,
gd, agg_costs, dNumGroups);
} }
else
{
hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
agg_costs,
dNumGroups);
/* /*
* It's possible that get_relation_info did not generate an IndexOptInfo * Provided that the estimated size of the hashtable does not
* for the desired index; this could happen if it's not yet reached its * exceed work_mem, we'll generate a HashAgg Path, although if we
* indcheckxmin usability horizon, or if it's a system index and we're * were unable to sort above, then we'd better generate a Path, so
* ignoring system indexes. In such cases we should tell CLUSTER to not * that we at least have one.
* trust the index contents but use seqscan-and-sort.
*/ */
if (lc == NULL) /* not in the list? */ if (hashaggtablesize < work_mem * 1024L ||
return true; /* use sort */ grouped_rel->pathlist == NIL)
{
/* /*
* Rather than doing all the pushups that would be needed to use * We just need an Agg over the cheapest-total input path,
* set_baserel_size_estimates, just do a quick hack for rows and width. * since input order won't matter.
*/ */
rel->rows = rel->tuples; add_path(grouped_rel, (Path *)
rel->reltarget->width = get_relation_data_width(tableOid, NULL); create_agg_path(root, grouped_rel,
cheapest_path,
root->total_table_pages = rel->pages; target,
AGG_HASHED,
AGGSPLIT_SIMPLE,
parse->groupClause,
havingQual,
agg_costs,
dNumGroups));
}
}
/* /*
* Determine eval cost of the index expressions, if any. We need to * Generate a HashAgg Path atop of the cheapest partial path. Once
* charge twice that amount for each tuple comparison that happens during * again, we'll only do this if it looks as though the hash table
* the sort, since tuplesort.c will have to re-evaluate the index * won't exceed work_mem.
* expressions each time. (XXX that's pretty inefficient...)
*/ */
cost_qual_eval(&indexExprCost, indexInfo->indexprs, root); if (grouped_rel->partial_pathlist)
comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple); {
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
/* Estimate the cost of seq scan + sort */ hashaggtablesize = estimate_hashagg_tablesize(path,
seqScanPath = create_seqscan_path(root, rel, NULL, 0); agg_final_costs,
cost_sort(&seqScanAndSortPath, root, NIL, dNumGroups);
seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
comparisonCost, maintenance_work_mem, -1.0);
/* Estimate the cost of index scan */ if (hashaggtablesize < work_mem * 1024L)
indexScanPath = create_index_path(root, indexInfo, {
NIL, NIL, NIL, NIL, NIL, double total_groups = path->rows * path->parallel_workers;
ForwardScanDirection, false,
NULL, 1.0, false);
return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost); path = (Path *) create_gather_path(root,
grouped_rel,
path,
partial_grouping_target,
NULL,
&total_groups);
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
AGG_HASHED,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
}
}
}
} }
/* /*
* get_partitioned_child_rels * add_partial_paths_to_grouping_rel
* Returns a list of the RT indexes of the partitioned child relations
* with rti as the root parent RT index. Also sets
* *part_cols_updated to true if any of the root rte's updated
* columns is used in the partition key either of the relation whose RTI
* is specified or of any child relation.
* *
* Note: This function might get called even for range table entries that * Add partial paths to grouping relation. These paths are not fully
* are not partitioned tables; in such a case, it will simply return NIL. * aggregated; a FinalizeAggregate step is still required.
*/ */
List * static void
get_partitioned_child_rels(PlannerInfo *root, Index rti, add_partial_paths_to_grouping_rel(PlannerInfo *root,
bool *part_cols_updated) RelOptInfo *input_rel,
RelOptInfo *grouped_rel,
PathTarget *target,
PathTarget *partial_grouping_target,
AggClauseCosts *agg_partial_costs,
AggClauseCosts *agg_final_costs,
grouping_sets_data *gd,
bool can_sort,
bool can_hash,
List *havingQual)
{ {
List *result = NIL; Query *parse = root->parse;
ListCell *l; Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
Size hashaggtablesize;
double dNumPartialGroups = 0;
ListCell *lc;
if (part_cols_updated) /* Estimate number of partial groups. */
*part_cols_updated = false; dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows,
gd);
foreach(l, root->pcinfo_list) if (can_sort)
{ {
PartitionedChildRelInfo *pc = lfirst_node(PartitionedChildRelInfo, l); /* This should have been checked previously */
Assert(parse->hasAggs || parse->groupClause);
if (pc->parent_relid == rti) /*
* Use any available suitably-sorted path as input, and also consider
* sorting the cheapest partial path.
*/
foreach(lc, input_rel->partial_pathlist)
{ {
result = pc->child_rels; Path *path = (Path *) lfirst(lc);
if (part_cols_updated) bool is_sorted;
*part_cols_updated = pc->part_cols_updated;
break; is_sorted = pathkeys_contained_in(root->group_pathkeys,
path->pathkeys);
if (path == cheapest_partial_path || is_sorted)
{
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
if (parse->hasAggs)
add_partial_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
partial_grouping_target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialGroups));
else
add_partial_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
partial_grouping_target,
parse->groupClause,
NIL,
dNumPartialGroups));
}
} }
} }
return result; if (can_hash)
{
/* Checked above */
Assert(parse->hasAggs || parse->groupClause);
hashaggtablesize =
estimate_hashagg_tablesize(cheapest_partial_path,
agg_partial_costs,
dNumPartialGroups);
/*
* Tentatively produce a partial HashAgg Path, depending on if it
* looks as if the hash table will fit in work_mem.
*/
if (hashaggtablesize < work_mem * 1024L)
{
add_partial_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
cheapest_partial_path,
partial_grouping_target,
AGG_HASHED,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialGroups));
}
}
} }
/* /*
* get_partitioned_child_rels_for_join * can_parallel_agg
* Build and return a list containing the RTI of every partitioned *
* relation which is a child of some rel included in the join. * Determines whether or not parallel grouping and/or aggregation is possible.
* Returns true when possible, false otherwise.
*/ */
List * static bool
get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids) can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs)
{ {
List *result = NIL; Query *parse = root->parse;
ListCell *l;
foreach(l, root->pcinfo_list) if (!grouped_rel->consider_parallel)
{ {
PartitionedChildRelInfo *pc = lfirst(l); /* Not even parallel-safe. */
return false;
if (bms_is_member(pc->parent_relid, join_relids)) }
result = list_concat(result, list_copy(pc->child_rels)); else if (input_rel->partial_pathlist == NIL)
{
/* Nothing to use as input for partial aggregate. */
return false;
}
else if (!parse->hasAggs && parse->groupClause == NIL)
{
/*
* We don't know how to do parallel aggregation unless we have either
* some aggregates or a grouping clause.
*/
return false;
}
else if (parse->groupingSets)
{
/* We don't know how to do grouping sets in parallel. */
return false;
}
else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
{
/* Insufficient support for partial mode. */
return false;
} }
return result; /* Everything looks good. */
return true;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment