Commit 9fd8b7d6 authored by Robert Haas's avatar Robert Haas

Factor some code out of create_grouping_paths.

This is preparatory refactoring to prepare the way for partition-wise
aggregate, which will reuse the new subroutines for child grouping
rels.  It also does not seem like a bad idea on general principle,
as the function was getting pretty long.

Jeevan Chalke.  The larger patch series of which this patch is a part
was reviewed and tested by Antonin Houska, Rajkumar Raghuwanshi,
Ashutosh Bapat, David Rowley, Dilip Kumar, Konstantin Knizhnik,
Pascal Legrand, and me.  Some cosmetic changes by me.

Discussion: http://postgr.es/m/CAM2+6=V64_xhstVHie0Rz=KPEQnLJMZt_e314P0jaT_oJ9MR8A@mail.gmail.com
parent 4971d2a3
......@@ -185,6 +185,26 @@ static PathTarget *make_sort_input_target(PlannerInfo *root,
bool *have_postponed_srfs);
static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
List *targets, List *targets_contain_srfs);
static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, PathTarget *target,
PathTarget *partial_grouping_target,
const AggClauseCosts *agg_costs,
const AggClauseCosts *agg_final_costs,
grouping_sets_data *gd, bool can_sort, bool can_hash,
double dNumGroups, List *havingQual);
static void add_partial_paths_to_grouping_rel(PlannerInfo *root,
RelOptInfo *input_rel,
RelOptInfo *grouped_rel,
PathTarget *target,
PathTarget *partial_grouping_target,
AggClauseCosts *agg_partial_costs,
AggClauseCosts *agg_final_costs,
grouping_sets_data *gd,
bool can_sort,
bool can_hash,
List *havingQual);
static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs);
/*****************************************************************************
......@@ -3610,15 +3630,11 @@ create_grouping_paths(PlannerInfo *root,
PathTarget *partial_grouping_target = NULL;
AggClauseCosts agg_partial_costs; /* parallel only */
AggClauseCosts agg_final_costs; /* parallel only */
Size hashaggtablesize;
double dNumGroups;
double dNumPartialGroups = 0;
bool can_hash;
bool can_sort;
bool try_parallel_aggregation;
ListCell *lc;
/* For now, do all work in the (GROUP_AGG, NULL) upperrel */
grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
......@@ -3754,44 +3770,11 @@ create_grouping_paths(PlannerInfo *root,
(gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
/*
* If grouped_rel->consider_parallel is true, then paths that we generate
* for this grouping relation could be run inside of a worker, but that
* doesn't mean we can actually use the PartialAggregate/FinalizeAggregate
* execution strategy. Figure that out.
*/
if (!grouped_rel->consider_parallel)
{
/* Not even parallel-safe. */
try_parallel_aggregation = false;
}
else if (input_rel->partial_pathlist == NIL)
{
/* Nothing to use as input for partial aggregate. */
try_parallel_aggregation = false;
}
else if (!parse->hasAggs && parse->groupClause == NIL)
{
/*
* We don't know how to do parallel aggregation unless we have either
* some aggregates or a grouping clause.
* Figure out whether a PartialAggregate/Finalize Aggregate execution
* strategy is viable.
*/
try_parallel_aggregation = false;
}
else if (parse->groupingSets)
{
/* We don't know how to do grouping sets in parallel. */
try_parallel_aggregation = false;
}
else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
{
/* Insufficient support for partial mode. */
try_parallel_aggregation = false;
}
else
{
/* Everything looks good. */
try_parallel_aggregation = true;
}
try_parallel_aggregation = can_parallel_agg(root, input_rel, grouped_rel,
agg_costs);
/*
* Before generating paths for grouped_rel, we first generate any possible
......@@ -3803,8 +3786,6 @@ create_grouping_paths(PlannerInfo *root,
*/
if (try_parallel_aggregation)
{
Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
/*
* Build target list for partial aggregate paths. These paths cannot
* just emit the same tlist as regular aggregate paths, because (1) we
......@@ -3814,11 +3795,6 @@ create_grouping_paths(PlannerInfo *root,
*/
partial_grouping_target = make_partial_grouping_target(root, target);
/* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows,
gd);
/*
* Collect statistics about aggregates for estimating costs of
* performing aggregation in parallel.
......@@ -3841,503 +3817,353 @@ create_grouping_paths(PlannerInfo *root,
&agg_final_costs);
}
if (can_sort)
{
/* This was checked before setting try_parallel_aggregation */
Assert(parse->hasAggs || parse->groupClause);
add_partial_paths_to_grouping_rel(root, input_rel, grouped_rel, target,
partial_grouping_target,
&agg_partial_costs, &agg_final_costs,
gd, can_sort, can_hash,
(List *) parse->havingQual);
}
/* Build final grouping paths */
add_paths_to_grouping_rel(root, input_rel, grouped_rel, target,
partial_grouping_target, agg_costs,
&agg_final_costs, gd, can_sort, can_hash,
dNumGroups, (List *) parse->havingQual);
/* Give a helpful error if we failed to find any implementation */
if (grouped_rel->pathlist == NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement GROUP BY"),
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
/*
* Use any available suitably-sorted path as input, and also
* consider sorting the cheapest partial path.
* If there is an FDW that's responsible for all baserels of the query,
* let it consider adding ForeignPaths.
*/
foreach(lc, input_rel->partial_pathlist)
{
Path *path = (Path *) lfirst(lc);
bool is_sorted;
if (grouped_rel->fdwroutine &&
grouped_rel->fdwroutine->GetForeignUpperPaths)
grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
input_rel, grouped_rel);
is_sorted = pathkeys_contained_in(root->group_pathkeys,
path->pathkeys);
if (path == cheapest_partial_path || is_sorted)
{
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
/* Let extensions possibly add some more paths */
if (create_upper_paths_hook)
(*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
input_rel, grouped_rel);
if (parse->hasAggs)
add_partial_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
partial_grouping_target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
&agg_partial_costs,
dNumPartialGroups));
else
add_partial_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
partial_grouping_target,
parse->groupClause,
NIL,
dNumPartialGroups));
}
}
}
/* Now choose the best path(s) */
set_cheapest(grouped_rel);
if (can_hash)
{
/* Checked above */
Assert(parse->hasAggs || parse->groupClause);
/*
* We've been using the partial pathlist for the grouped relation to hold
* partially aggregated paths, but that's actually a little bit bogus
* because it's unsafe for later planning stages -- like ordered_rel ---
* to get the idea that they can use these partial paths as if they didn't
* need a FinalizeAggregate step. Zap the partial pathlist at this stage
* so we don't get confused.
*/
grouped_rel->partial_pathlist = NIL;
hashaggtablesize =
estimate_hashagg_tablesize(cheapest_partial_path,
&agg_partial_costs,
dNumPartialGroups);
return grouped_rel;
}
/*
* Tentatively produce a partial HashAgg Path, depending on if it
* looks as if the hash table will fit in work_mem.
/*
* For a given input path, consider the possible ways of doing grouping sets on
* it, by combinations of hashing and sorting. This can be called multiple
* times, so it's important that it not scribble on input. No result is
* returned, but any generated paths are added to grouped_rel.
*/
if (hashaggtablesize < work_mem * 1024L)
{
add_partial_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
cheapest_partial_path,
partial_grouping_target,
AGG_HASHED,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
&agg_partial_costs,
dNumPartialGroups));
}
}
}
static void
consider_groupingsets_paths(PlannerInfo *root,
RelOptInfo *grouped_rel,
Path *path,
bool is_sorted,
bool can_hash,
PathTarget *target,
grouping_sets_data *gd,
const AggClauseCosts *agg_costs,
double dNumGroups)
{
Query *parse = root->parse;
/* Build final grouping paths */
if (can_sort)
{
/*
* Use any available suitably-sorted path as input, and also consider
* sorting the cheapest-total path.
* If we're not being offered sorted input, then only consider plans that
* can be done entirely by hashing.
*
* We can hash everything if it looks like it'll fit in work_mem. But if
* the input is actually sorted despite not being advertised as such, we
* prefer to make use of that in order to use less memory.
*
* If none of the grouping sets are sortable, then ignore the work_mem
* limit and generate a path anyway, since otherwise we'll just fail.
*/
foreach(lc, input_rel->pathlist)
if (!is_sorted)
{
Path *path = (Path *) lfirst(lc);
bool is_sorted;
List *new_rollups = NIL;
RollupData *unhashed_rollup = NULL;
List *sets_data;
List *empty_sets_data = NIL;
List *empty_sets = NIL;
ListCell *lc;
ListCell *l_start = list_head(gd->rollups);
AggStrategy strat = AGG_HASHED;
Size hashsize;
double exclude_groups = 0.0;
is_sorted = pathkeys_contained_in(root->group_pathkeys,
path->pathkeys);
if (path == cheapest_path || is_sorted)
{
/* Sort the cheapest-total path if it isn't already sorted */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
Assert(can_hash);
/* Now decide what to stick atop it */
if (parse->groupingSets)
if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
{
consider_groupingsets_paths(root, grouped_rel,
path, true, can_hash, target,
gd, agg_costs, dNumGroups);
unhashed_rollup = lfirst_node(RollupData, l_start);
exclude_groups = unhashed_rollup->numGroups;
l_start = lnext(l_start);
}
else if (parse->hasAggs)
{
hashsize = estimate_hashagg_tablesize(path,
agg_costs,
dNumGroups - exclude_groups);
/*
* We have aggregation, possibly with plain GROUP BY. Make
* an AggPath.
* gd->rollups is empty if we have only unsortable columns to work
* with. Override work_mem in that case; otherwise, we'll rely on the
* sorted-input case to generate usable mixed paths.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
parse->groupClause,
(List *) parse->havingQual,
agg_costs,
dNumGroups));
}
else if (parse->groupClause)
{
if (hashsize > work_mem * 1024L && gd->rollups)
return; /* nope, won't fit */
/*
* We have GROUP BY without aggregation or grouping sets.
* Make a GroupPath.
* We need to burst the existing rollups list into individual grouping
* sets and recompute a groupClause for each set.
*/
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
target,
parse->groupClause,
(List *) parse->havingQual,
dNumGroups));
}
else
{
/* Other cases should have been handled above */
Assert(false);
}
}
}
sets_data = list_copy(gd->unsortable_sets);
/*
* Now generate a complete GroupAgg Path atop of the cheapest partial
* path. We can do this using either Gather or Gather Merge.
*/
if (grouped_rel->partial_pathlist)
for_each_cell(lc, l_start)
{
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
double total_groups = path->rows * path->parallel_workers;
path = (Path *) create_gather_path(root,
grouped_rel,
path,
partial_grouping_target,
NULL,
&total_groups);
RollupData *rollup = lfirst_node(RollupData, lc);
/*
* Since Gather's output is always unsorted, we'll need to sort,
* unless there's no GROUP BY clause or a degenerate (constant)
* one, in which case there will only be a single group.
* If we find an unhashable rollup that's not been skipped by the
* "actually sorted" check above, we can't cope; we'd need sorted
* input (with a different sort order) but we can't get that here.
* So bail out; we'll get a valid path from the is_sorted case
* instead.
*
* The mere presence of empty grouping sets doesn't make a rollup
* unhashable (see preprocess_grouping_sets), we handle those
* specially below.
*/
if (root->group_pathkeys)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
if (!rollup->hashable)
return;
else
sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
}
foreach(lc, sets_data)
{
GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
List *gset = gs->set;
RollupData *rollup;
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
(List *) parse->havingQual,
&agg_final_costs,
dNumGroups));
if (gset == NIL)
{
/* Empty grouping sets can't be hashed. */
empty_sets_data = lappend(empty_sets_data, gs);
empty_sets = lappend(empty_sets, NIL);
}
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
target,
parse->groupClause,
(List *) parse->havingQual,
dNumGroups));
{
rollup = makeNode(RollupData);
rollup->groupClause = preprocess_groupclause(root, gset);
rollup->gsets_data = list_make1(gs);
rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
rollup->gsets_data,
gd->tleref_to_colnum_map);
rollup->numGroups = gs->numGroups;
rollup->hashable = true;
rollup->is_hashed = true;
new_rollups = lappend(new_rollups, rollup);
}
}
/*
* The point of using Gather Merge rather than Gather is that it
* can preserve the ordering of the input path, so there's no
* reason to try it unless (1) it's possible to produce more than
* one output row and (2) we want the output path to be ordered.
* If we didn't find anything nonempty to hash, then bail. We'll
* generate a path from the is_sorted case.
*/
if (parse->groupClause != NIL && root->group_pathkeys != NIL)
{
foreach(lc, grouped_rel->partial_pathlist)
{
Path *subpath = (Path *) lfirst(lc);
Path *gmpath;
double total_groups;
if (new_rollups == NIL)
return;
/*
* It's useful to consider paths that are already properly
* ordered for Gather Merge, because those don't need a
* sort. It's also useful to consider the cheapest path,
* because sorting it in parallel and then doing Gather
* Merge may be better than doing an unordered Gather
* followed by a sort. But there's no point in
* considering non-cheapest paths that aren't already
* sorted correctly.
* If there were empty grouping sets they should have been in the
* first rollup.
*/
if (path != subpath &&
!pathkeys_contained_in(root->group_pathkeys,
subpath->pathkeys))
continue;
Assert(!unhashed_rollup || !empty_sets);
total_groups = subpath->rows * subpath->parallel_workers;
if (unhashed_rollup)
{
new_rollups = lappend(new_rollups, unhashed_rollup);
strat = AGG_MIXED;
}
else if (empty_sets)
{
RollupData *rollup = makeNode(RollupData);
gmpath = (Path *)
create_gather_merge_path(root,
grouped_rel,
subpath,
partial_grouping_target,
root->group_pathkeys,
NULL,
&total_groups);
rollup->groupClause = NIL;
rollup->gsets_data = empty_sets_data;
rollup->gsets = empty_sets;
rollup->numGroups = list_length(empty_sets);
rollup->hashable = false;
rollup->is_hashed = false;
new_rollups = lappend(new_rollups, rollup);
strat = AGG_MIXED;
}
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
gmpath,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
(List *) parse->havingQual,
&agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
create_groupingsets_path(root,
grouped_rel,
gmpath,
path,
target,
parse->groupClause,
(List *) parse->havingQual,
strat,
new_rollups,
agg_costs,
dNumGroups));
}
}
}
return;
}
if (can_hash)
{
if (parse->groupingSets)
{
/*
* Try for a hash-only groupingsets path over unsorted input.
* If we have sorted input but nothing we can do with it, bail.
*/
consider_groupingsets_paths(root, grouped_rel,
cheapest_path, false, true, target,
gd, agg_costs, dNumGroups);
}
else
{
hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
agg_costs,
dNumGroups);
if (list_length(gd->rollups) == 0)
return;
/*
* Provided that the estimated size of the hashtable does not
* exceed work_mem, we'll generate a HashAgg Path, although if we
* were unable to sort above, then we'd better generate a Path, so
* that we at least have one.
* Given sorted input, we try and make two paths: one sorted and one mixed
* sort/hash. (We need to try both because hashagg might be disabled, or
* some columns might not be sortable.)
*
* can_hash is passed in as false if some obstacle elsewhere (such as
* ordered aggs) means that we shouldn't consider hashing at all.
*/
if (hashaggtablesize < work_mem * 1024L ||
grouped_rel->pathlist == NIL)
if (can_hash && gd->any_hashable)
{
List *rollups = NIL;
List *hash_sets = list_copy(gd->unsortable_sets);
double availspace = (work_mem * 1024.0);
ListCell *lc;
/*
* We just need an Agg over the cheapest-total input path,
* since input order won't matter.
* Account first for space needed for groups we can't sort at all.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root, grouped_rel,
cheapest_path,
target,
AGG_HASHED,
AGGSPLIT_SIMPLE,
parse->groupClause,
(List *) parse->havingQual,
availspace -= (double) estimate_hashagg_tablesize(path,
agg_costs,
dNumGroups));
}
}
gd->dNumHashGroups);
if (availspace > 0 && list_length(gd->rollups) > 1)
{
double scale;
int num_rollups = list_length(gd->rollups);
int k_capacity;
int *k_weights = palloc(num_rollups * sizeof(int));
Bitmapset *hash_items = NULL;
int i;
/*
* Generate a HashAgg Path atop of the cheapest partial path. Once
* again, we'll only do this if it looks as though the hash table
* won't exceed work_mem.
* We treat this as a knapsack problem: the knapsack capacity
* represents work_mem, the item weights are the estimated memory
* usage of the hashtables needed to implement a single rollup,
* and we really ought to use the cost saving as the item value;
* however, currently the costs assigned to sort nodes don't
* reflect the comparison costs well, and so we treat all items as
* of equal value (each rollup we hash instead saves us one sort).
*
* To use the discrete knapsack, we need to scale the values to a
* reasonably small bounded range. We choose to allow a 5% error
* margin; we have no more than 4096 rollups in the worst possible
* case, which with a 5% error margin will require a bit over 42MB
* of workspace. (Anyone wanting to plan queries that complex had
* better have the memory for it. In more reasonable cases, with
* no more than a couple of dozen rollups, the memory usage will
* be negligible.)
*
* k_capacity is naturally bounded, but we clamp the values for
* scale and weight (below) to avoid overflows or underflows (or
* uselessly trying to use a scale factor less than 1 byte).
*/
if (grouped_rel->partial_pathlist)
{
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
hashaggtablesize = estimate_hashagg_tablesize(path,
&agg_final_costs,
dNumGroups);
if (hashaggtablesize < work_mem * 1024L)
{
double total_groups = path->rows * path->parallel_workers;
path = (Path *) create_gather_path(root,
grouped_rel,
path,
partial_grouping_target,
NULL,
&total_groups);
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
AGG_HASHED,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
(List *) parse->havingQual,
&agg_final_costs,
dNumGroups));
}
}
}
/* Give a helpful error if we failed to find any implementation */
if (grouped_rel->pathlist == NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement GROUP BY"),
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
/*
* If there is an FDW that's responsible for all baserels of the query,
* let it consider adding ForeignPaths.
*/
if (grouped_rel->fdwroutine &&
grouped_rel->fdwroutine->GetForeignUpperPaths)
grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
input_rel, grouped_rel);
/* Let extensions possibly add some more paths */
if (create_upper_paths_hook)
(*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
input_rel, grouped_rel);
/* Now choose the best path(s) */
set_cheapest(grouped_rel);
/*
* We've been using the partial pathlist for the grouped relation to hold
* partially aggregated paths, but that's actually a little bit bogus
* because it's unsafe for later planning stages -- like ordered_rel ---
* to get the idea that they can use these partial paths as if they didn't
* need a FinalizeAggregate step. Zap the partial pathlist at this stage
* so we don't get confused.
*/
grouped_rel->partial_pathlist = NIL;
return grouped_rel;
}
/*
* For a given input path, consider the possible ways of doing grouping sets on
* it, by combinations of hashing and sorting. This can be called multiple
* times, so it's important that it not scribble on input. No result is
* returned, but any generated paths are added to grouped_rel.
*/
static void
consider_groupingsets_paths(PlannerInfo *root,
RelOptInfo *grouped_rel,
Path *path,
bool is_sorted,
bool can_hash,
PathTarget *target,
grouping_sets_data *gd,
const AggClauseCosts *agg_costs,
double dNumGroups)
{
Query *parse = root->parse;
scale = Max(availspace / (20.0 * num_rollups), 1.0);
k_capacity = (int) floor(availspace / scale);
/*
* If we're not being offered sorted input, then only consider plans that
* can be done entirely by hashing.
*
* We can hash everything if it looks like it'll fit in work_mem. But if
* the input is actually sorted despite not being advertised as such, we
* prefer to make use of that in order to use less memory.
*
* If none of the grouping sets are sortable, then ignore the work_mem
* limit and generate a path anyway, since otherwise we'll just fail.
* We leave the first rollup out of consideration since it's the
* one that matches the input sort order. We assign indexes "i"
* to only those entries considered for hashing; the second loop,
* below, must use the same condition.
*/
if (!is_sorted)
i = 0;
for_each_cell(lc, lnext(list_head(gd->rollups)))
{
List *new_rollups = NIL;
RollupData *unhashed_rollup = NULL;
List *sets_data;
List *empty_sets_data = NIL;
List *empty_sets = NIL;
ListCell *lc;
ListCell *l_start = list_head(gd->rollups);
AggStrategy strat = AGG_HASHED;
Size hashsize;
double exclude_groups = 0.0;
Assert(can_hash);
RollupData *rollup = lfirst_node(RollupData, lc);
if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
if (rollup->hashable)
{
unhashed_rollup = lfirst_node(RollupData, l_start);
exclude_groups = unhashed_rollup->numGroups;
l_start = lnext(l_start);
}
hashsize = estimate_hashagg_tablesize(path,
double sz = estimate_hashagg_tablesize(path,
agg_costs,
dNumGroups - exclude_groups);
rollup->numGroups);
/*
* gd->rollups is empty if we have only unsortable columns to work
* with. Override work_mem in that case; otherwise, we'll rely on the
* sorted-input case to generate usable mixed paths.
* If sz is enormous, but work_mem (and hence scale) is
* small, avoid integer overflow here.
*/
if (hashsize > work_mem * 1024L && gd->rollups)
return; /* nope, won't fit */
k_weights[i] = (int) Min(floor(sz / scale),
k_capacity + 1.0);
++i;
}
}
/*
* We need to burst the existing rollups list into individual grouping
* sets and recompute a groupClause for each set.
* Apply knapsack algorithm; compute the set of items which
* maximizes the value stored (in this case the number of sorts
* saved) while keeping the total size (approximately) within
* capacity.
*/
sets_data = list_copy(gd->unsortable_sets);
if (i > 0)
hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
for_each_cell(lc, l_start)
if (!bms_is_empty(hash_items))
{
RollupData *rollup = lfirst_node(RollupData, lc);
rollups = list_make1(linitial(gd->rollups));
/*
* If we find an unhashable rollup that's not been skipped by the
* "actually sorted" check above, we can't cope; we'd need sorted
* input (with a different sort order) but we can't get that here.
* So bail out; we'll get a valid path from the is_sorted case
* instead.
*
* The mere presence of empty grouping sets doesn't make a rollup
* unhashable (see preprocess_grouping_sets), we handle those
* specially below.
*/
if (!rollup->hashable)
return;
else
sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
}
foreach(lc, sets_data)
i = 0;
for_each_cell(lc, lnext(list_head(gd->rollups)))
{
GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
List *gset = gs->set;
RollupData *rollup;
RollupData *rollup = lfirst_node(RollupData, lc);
if (gset == NIL)
if (rollup->hashable)
{
/* Empty grouping sets can't be hashed. */
empty_sets_data = lappend(empty_sets_data, gs);
empty_sets = lappend(empty_sets, NIL);
if (bms_is_member(i, hash_items))
hash_sets = list_concat(hash_sets,
list_copy(rollup->gsets_data));
else
rollups = lappend(rollups, rollup);
++i;
}
else
rollups = lappend(rollups, rollup);
}
}
}
if (!rollups && hash_sets)
rollups = list_copy(gd->rollups);
foreach(lc, hash_sets)
{
rollup = makeNode(RollupData);
GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
RollupData *rollup = makeNode(RollupData);
rollup->groupClause = preprocess_groupclause(root, gset);
Assert(gs->set != NIL);
rollup->groupClause = preprocess_groupclause(root, gs->set);
rollup->gsets_data = list_make1(gs);
rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
rollup->gsets_data,
......@@ -4345,228 +4171,39 @@ consider_groupingsets_paths(PlannerInfo *root,
rollup->numGroups = gs->numGroups;
rollup->hashable = true;
rollup->is_hashed = true;
new_rollups = lappend(new_rollups, rollup);
}
rollups = lcons(rollup, rollups);
}
/*
* If we didn't find anything nonempty to hash, then bail. We'll
* generate a path from the is_sorted case.
*/
if (new_rollups == NIL)
return;
/*
* If there were empty grouping sets they should have been in the
* first rollup.
*/
Assert(!unhashed_rollup || !empty_sets);
if (unhashed_rollup)
{
new_rollups = lappend(new_rollups, unhashed_rollup);
strat = AGG_MIXED;
}
else if (empty_sets)
if (rollups)
{
RollupData *rollup = makeNode(RollupData);
rollup->groupClause = NIL;
rollup->gsets_data = empty_sets_data;
rollup->gsets = empty_sets;
rollup->numGroups = list_length(empty_sets);
rollup->hashable = false;
rollup->is_hashed = false;
new_rollups = lappend(new_rollups, rollup);
strat = AGG_MIXED;
}
add_path(grouped_rel, (Path *)
create_groupingsets_path(root,
grouped_rel,
path,
target,
(List *) parse->havingQual,
strat,
new_rollups,
AGG_MIXED,
rollups,
agg_costs,
dNumGroups));
return;
}
}
/*
* If we have sorted input but nothing we can do with it, bail.
* Now try the simple sorted case.
*/
if (list_length(gd->rollups) == 0)
return;
/*
* Given sorted input, we try and make two paths: one sorted and one mixed
* sort/hash. (We need to try both because hashagg might be disabled, or
* some columns might not be sortable.)
*
* can_hash is passed in as false if some obstacle elsewhere (such as
* ordered aggs) means that we shouldn't consider hashing at all.
*/
if (can_hash && gd->any_hashable)
{
List *rollups = NIL;
List *hash_sets = list_copy(gd->unsortable_sets);
double availspace = (work_mem * 1024.0);
ListCell *lc;
/*
* Account first for space needed for groups we can't sort at all.
*/
availspace -= (double) estimate_hashagg_tablesize(path,
agg_costs,
gd->dNumHashGroups);
if (availspace > 0 && list_length(gd->rollups) > 1)
{
double scale;
int num_rollups = list_length(gd->rollups);
int k_capacity;
int *k_weights = palloc(num_rollups * sizeof(int));
Bitmapset *hash_items = NULL;
int i;
/*
* We treat this as a knapsack problem: the knapsack capacity
* represents work_mem, the item weights are the estimated memory
* usage of the hashtables needed to implement a single rollup,
* and we really ought to use the cost saving as the item value;
* however, currently the costs assigned to sort nodes don't
* reflect the comparison costs well, and so we treat all items as
* of equal value (each rollup we hash instead saves us one sort).
*
* To use the discrete knapsack, we need to scale the values to a
* reasonably small bounded range. We choose to allow a 5% error
* margin; we have no more than 4096 rollups in the worst possible
* case, which with a 5% error margin will require a bit over 42MB
* of workspace. (Anyone wanting to plan queries that complex had
* better have the memory for it. In more reasonable cases, with
* no more than a couple of dozen rollups, the memory usage will
* be negligible.)
*
* k_capacity is naturally bounded, but we clamp the values for
* scale and weight (below) to avoid overflows or underflows (or
* uselessly trying to use a scale factor less than 1 byte).
*/
scale = Max(availspace / (20.0 * num_rollups), 1.0);
k_capacity = (int) floor(availspace / scale);
/*
* We leave the first rollup out of consideration since it's the
* one that matches the input sort order. We assign indexes "i"
* to only those entries considered for hashing; the second loop,
* below, must use the same condition.
*/
i = 0;
for_each_cell(lc, lnext(list_head(gd->rollups)))
{
RollupData *rollup = lfirst_node(RollupData, lc);
if (rollup->hashable)
{
double sz = estimate_hashagg_tablesize(path,
agg_costs,
rollup->numGroups);
/*
* If sz is enormous, but work_mem (and hence scale) is
* small, avoid integer overflow here.
*/
k_weights[i] = (int) Min(floor(sz / scale),
k_capacity + 1.0);
++i;
}
}
/*
* Apply knapsack algorithm; compute the set of items which
* maximizes the value stored (in this case the number of sorts
* saved) while keeping the total size (approximately) within
* capacity.
*/
if (i > 0)
hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
if (!bms_is_empty(hash_items))
{
rollups = list_make1(linitial(gd->rollups));
i = 0;
for_each_cell(lc, lnext(list_head(gd->rollups)))
{
RollupData *rollup = lfirst_node(RollupData, lc);
if (rollup->hashable)
{
if (bms_is_member(i, hash_items))
hash_sets = list_concat(hash_sets,
list_copy(rollup->gsets_data));
else
rollups = lappend(rollups, rollup);
++i;
}
else
rollups = lappend(rollups, rollup);
}
}
}
if (!rollups && hash_sets)
rollups = list_copy(gd->rollups);
foreach(lc, hash_sets)
{
GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
RollupData *rollup = makeNode(RollupData);
Assert(gs->set != NIL);
rollup->groupClause = preprocess_groupclause(root, gs->set);
rollup->gsets_data = list_make1(gs);
rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
rollup->gsets_data,
gd->tleref_to_colnum_map);
rollup->numGroups = gs->numGroups;
rollup->hashable = true;
rollup->is_hashed = true;
rollups = lcons(rollup, rollups);
}
if (rollups)
{
add_path(grouped_rel, (Path *)
create_groupingsets_path(root,
grouped_rel,
path,
target,
(List *) parse->havingQual,
AGG_MIXED,
rollups,
agg_costs,
dNumGroups));
}
}
/*
* Now try the simple sorted case.
*/
if (!gd->unsortable_sets)
add_path(grouped_rel, (Path *)
create_groupingsets_path(root,
grouped_rel,
path,
target,
(List *) parse->havingQual,
AGG_SORTED,
gd->rollups,
agg_costs,
dNumGroups));
}
if (!gd->unsortable_sets)
add_path(grouped_rel, (Path *)
create_groupingsets_path(root,
grouped_rel,
path,
target,
(List *) parse->havingQual,
AGG_SORTED,
gd->rollups,
agg_costs,
dNumGroups));
}
/*
* create_window_paths
......@@ -6101,116 +5738,563 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
/* Set up RTE/RelOptInfo arrays */
setup_simple_rel_arrays(root);
/* Build RelOptInfo */
rel = build_simple_rel(root, 1, NULL);
/* Build RelOptInfo */
rel = build_simple_rel(root, 1, NULL);
/* Locate IndexOptInfo for the target index */
indexInfo = NULL;
foreach(lc, rel->indexlist)
{
indexInfo = lfirst_node(IndexOptInfo, lc);
if (indexInfo->indexoid == indexOid)
break;
}
/*
* It's possible that get_relation_info did not generate an IndexOptInfo
* for the desired index; this could happen if it's not yet reached its
* indcheckxmin usability horizon, or if it's a system index and we're
* ignoring system indexes. In such cases we should tell CLUSTER to not
* trust the index contents but use seqscan-and-sort.
*/
if (lc == NULL) /* not in the list? */
return true; /* use sort */
/*
* Rather than doing all the pushups that would be needed to use
* set_baserel_size_estimates, just do a quick hack for rows and width.
*/
rel->rows = rel->tuples;
rel->reltarget->width = get_relation_data_width(tableOid, NULL);
root->total_table_pages = rel->pages;
/*
* Determine eval cost of the index expressions, if any. We need to
* charge twice that amount for each tuple comparison that happens during
* the sort, since tuplesort.c will have to re-evaluate the index
* expressions each time. (XXX that's pretty inefficient...)
*/
cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
/* Estimate the cost of seq scan + sort */
seqScanPath = create_seqscan_path(root, rel, NULL, 0);
cost_sort(&seqScanAndSortPath, root, NIL,
seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
comparisonCost, maintenance_work_mem, -1.0);
/* Estimate the cost of index scan */
indexScanPath = create_index_path(root, indexInfo,
NIL, NIL, NIL, NIL, NIL,
ForwardScanDirection, false,
NULL, 1.0, false);
return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
}
/*
* get_partitioned_child_rels
* Returns a list of the RT indexes of the partitioned child relations
* with rti as the root parent RT index. Also sets
* *part_cols_updated to true if any of the root rte's updated
* columns is used in the partition key either of the relation whose RTI
* is specified or of any child relation.
*
* Note: This function might get called even for range table entries that
* are not partitioned tables; in such a case, it will simply return NIL.
*/
List *
get_partitioned_child_rels(PlannerInfo *root, Index rti,
bool *part_cols_updated)
{
List *result = NIL;
ListCell *l;
if (part_cols_updated)
*part_cols_updated = false;
foreach(l, root->pcinfo_list)
{
PartitionedChildRelInfo *pc = lfirst_node(PartitionedChildRelInfo, l);
if (pc->parent_relid == rti)
{
result = pc->child_rels;
if (part_cols_updated)
*part_cols_updated = pc->part_cols_updated;
break;
}
}
return result;
}
/*
* get_partitioned_child_rels_for_join
* Build and return a list containing the RTI of every partitioned
* relation which is a child of some rel included in the join.
*/
List *
get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids)
{
List *result = NIL;
ListCell *l;
foreach(l, root->pcinfo_list)
{
PartitionedChildRelInfo *pc = lfirst(l);
if (bms_is_member(pc->parent_relid, join_relids))
result = list_concat(result, list_copy(pc->child_rels));
}
return result;
}
/*
* add_paths_to_grouping_rel
*
* Add non-partial paths to grouping relation.
*/
static void
add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, PathTarget *target,
PathTarget *partial_grouping_target,
const AggClauseCosts *agg_costs,
const AggClauseCosts *agg_final_costs,
grouping_sets_data *gd, bool can_sort, bool can_hash,
double dNumGroups, List *havingQual)
{
Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path;
ListCell *lc;
if (can_sort)
{
/*
* Use any available suitably-sorted path as input, and also consider
* sorting the cheapest-total path.
*/
foreach(lc, input_rel->pathlist)
{
Path *path = (Path *) lfirst(lc);
bool is_sorted;
is_sorted = pathkeys_contained_in(root->group_pathkeys,
path->pathkeys);
if (path == cheapest_path || is_sorted)
{
/* Sort the cheapest-total path if it isn't already sorted */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
consider_groupingsets_paths(root, grouped_rel,
path, true, can_hash, target,
gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
/*
* We have aggregation, possibly with plain GROUP BY. Make
* an AggPath.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
parse->groupClause,
havingQual,
agg_costs,
dNumGroups));
}
else if (parse->groupClause)
{
/*
* We have GROUP BY without aggregation or grouping sets.
* Make a GroupPath.
*/
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
target,
parse->groupClause,
havingQual,
dNumGroups));
}
else
{
/* Other cases should have been handled above */
Assert(false);
}
}
}
/*
* Now generate a complete GroupAgg Path atop of the cheapest partial
* path. We can do this using either Gather or Gather Merge.
*/
if (grouped_rel->partial_pathlist)
{
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
double total_groups = path->rows * path->parallel_workers;
path = (Path *) create_gather_path(root,
grouped_rel,
path,
partial_grouping_target,
NULL,
&total_groups);
/*
* Since Gather's output is always unsorted, we'll need to sort,
* unless there's no GROUP BY clause or a degenerate (constant)
* one, in which case there will only be a single group.
*/
if (root->group_pathkeys)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
target,
parse->groupClause,
havingQual,
dNumGroups));
/*
* The point of using Gather Merge rather than Gather is that it
* can preserve the ordering of the input path, so there's no
* reason to try it unless (1) it's possible to produce more than
* one output row and (2) we want the output path to be ordered.
*/
if (parse->groupClause != NIL && root->group_pathkeys != NIL)
{
foreach(lc, grouped_rel->partial_pathlist)
{
Path *subpath = (Path *) lfirst(lc);
Path *gmpath;
double total_groups;
/*
* It's useful to consider paths that are already properly
* ordered for Gather Merge, because those don't need a
* sort. It's also useful to consider the cheapest path,
* because sorting it in parallel and then doing Gather
* Merge may be better than doing an unordered Gather
* followed by a sort. But there's no point in considering
* non-cheapest paths that aren't already sorted
* correctly.
*/
if (path != subpath &&
!pathkeys_contained_in(root->group_pathkeys,
subpath->pathkeys))
continue;
total_groups = subpath->rows * subpath->parallel_workers;
gmpath = (Path *)
create_gather_merge_path(root,
grouped_rel,
subpath,
partial_grouping_target,
root->group_pathkeys,
NULL,
&total_groups);
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
gmpath,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
gmpath,
target,
parse->groupClause,
havingQual,
dNumGroups));
}
}
}
}
if (can_hash)
{
Size hashaggtablesize;
/* Locate IndexOptInfo for the target index */
indexInfo = NULL;
foreach(lc, rel->indexlist)
if (parse->groupingSets)
{
indexInfo = lfirst_node(IndexOptInfo, lc);
if (indexInfo->indexoid == indexOid)
break;
/*
* Try for a hash-only groupingsets path over unsorted input.
*/
consider_groupingsets_paths(root, grouped_rel,
cheapest_path, false, true, target,
gd, agg_costs, dNumGroups);
}
else
{
hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
agg_costs,
dNumGroups);
/*
* It's possible that get_relation_info did not generate an IndexOptInfo
* for the desired index; this could happen if it's not yet reached its
* indcheckxmin usability horizon, or if it's a system index and we're
* ignoring system indexes. In such cases we should tell CLUSTER to not
* trust the index contents but use seqscan-and-sort.
* Provided that the estimated size of the hashtable does not
* exceed work_mem, we'll generate a HashAgg Path, although if we
* were unable to sort above, then we'd better generate a Path, so
* that we at least have one.
*/
if (lc == NULL) /* not in the list? */
return true; /* use sort */
if (hashaggtablesize < work_mem * 1024L ||
grouped_rel->pathlist == NIL)
{
/*
* Rather than doing all the pushups that would be needed to use
* set_baserel_size_estimates, just do a quick hack for rows and width.
* We just need an Agg over the cheapest-total input path,
* since input order won't matter.
*/
rel->rows = rel->tuples;
rel->reltarget->width = get_relation_data_width(tableOid, NULL);
root->total_table_pages = rel->pages;
add_path(grouped_rel, (Path *)
create_agg_path(root, grouped_rel,
cheapest_path,
target,
AGG_HASHED,
AGGSPLIT_SIMPLE,
parse->groupClause,
havingQual,
agg_costs,
dNumGroups));
}
}
/*
* Determine eval cost of the index expressions, if any. We need to
* charge twice that amount for each tuple comparison that happens during
* the sort, since tuplesort.c will have to re-evaluate the index
* expressions each time. (XXX that's pretty inefficient...)
* Generate a HashAgg Path atop of the cheapest partial path. Once
* again, we'll only do this if it looks as though the hash table
* won't exceed work_mem.
*/
cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
if (grouped_rel->partial_pathlist)
{
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
/* Estimate the cost of seq scan + sort */
seqScanPath = create_seqscan_path(root, rel, NULL, 0);
cost_sort(&seqScanAndSortPath, root, NIL,
seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
comparisonCost, maintenance_work_mem, -1.0);
hashaggtablesize = estimate_hashagg_tablesize(path,
agg_final_costs,
dNumGroups);
/* Estimate the cost of index scan */
indexScanPath = create_index_path(root, indexInfo,
NIL, NIL, NIL, NIL, NIL,
ForwardScanDirection, false,
NULL, 1.0, false);
if (hashaggtablesize < work_mem * 1024L)
{
double total_groups = path->rows * path->parallel_workers;
return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
path = (Path *) create_gather_path(root,
grouped_rel,
path,
partial_grouping_target,
NULL,
&total_groups);
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
AGG_HASHED,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
}
}
}
}
/*
* get_partitioned_child_rels
* Returns a list of the RT indexes of the partitioned child relations
* with rti as the root parent RT index. Also sets
* *part_cols_updated to true if any of the root rte's updated
* columns is used in the partition key either of the relation whose RTI
* is specified or of any child relation.
* add_partial_paths_to_grouping_rel
*
* Note: This function might get called even for range table entries that
* are not partitioned tables; in such a case, it will simply return NIL.
* Add partial paths to grouping relation. These paths are not fully
* aggregated; a FinalizeAggregate step is still required.
*/
List *
get_partitioned_child_rels(PlannerInfo *root, Index rti,
bool *part_cols_updated)
static void
add_partial_paths_to_grouping_rel(PlannerInfo *root,
RelOptInfo *input_rel,
RelOptInfo *grouped_rel,
PathTarget *target,
PathTarget *partial_grouping_target,
AggClauseCosts *agg_partial_costs,
AggClauseCosts *agg_final_costs,
grouping_sets_data *gd,
bool can_sort,
bool can_hash,
List *havingQual)
{
List *result = NIL;
ListCell *l;
Query *parse = root->parse;
Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
Size hashaggtablesize;
double dNumPartialGroups = 0;
ListCell *lc;
if (part_cols_updated)
*part_cols_updated = false;
/* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows,
gd);
foreach(l, root->pcinfo_list)
if (can_sort)
{
PartitionedChildRelInfo *pc = lfirst_node(PartitionedChildRelInfo, l);
/* This should have been checked previously */
Assert(parse->hasAggs || parse->groupClause);
if (pc->parent_relid == rti)
/*
* Use any available suitably-sorted path as input, and also consider
* sorting the cheapest partial path.
*/
foreach(lc, input_rel->partial_pathlist)
{
result = pc->child_rels;
if (part_cols_updated)
*part_cols_updated = pc->part_cols_updated;
break;
Path *path = (Path *) lfirst(lc);
bool is_sorted;
is_sorted = pathkeys_contained_in(root->group_pathkeys,
path->pathkeys);
if (path == cheapest_partial_path || is_sorted)
{
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
if (parse->hasAggs)
add_partial_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
partial_grouping_target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialGroups));
else
add_partial_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
partial_grouping_target,
parse->groupClause,
NIL,
dNumPartialGroups));
}
}
}
return result;
if (can_hash)
{
/* Checked above */
Assert(parse->hasAggs || parse->groupClause);
hashaggtablesize =
estimate_hashagg_tablesize(cheapest_partial_path,
agg_partial_costs,
dNumPartialGroups);
/*
* Tentatively produce a partial HashAgg Path, depending on if it
* looks as if the hash table will fit in work_mem.
*/
if (hashaggtablesize < work_mem * 1024L)
{
add_partial_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
cheapest_partial_path,
partial_grouping_target,
AGG_HASHED,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialGroups));
}
}
}
/*
* get_partitioned_child_rels_for_join
* Build and return a list containing the RTI of every partitioned
* relation which is a child of some rel included in the join.
* can_parallel_agg
*
* Determines whether or not parallel grouping and/or aggregation is possible.
* Returns true when possible, false otherwise.
*/
List *
get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids)
static bool
can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs)
{
List *result = NIL;
ListCell *l;
Query *parse = root->parse;
foreach(l, root->pcinfo_list)
if (!grouped_rel->consider_parallel)
{
PartitionedChildRelInfo *pc = lfirst(l);
if (bms_is_member(pc->parent_relid, join_relids))
result = list_concat(result, list_copy(pc->child_rels));
/* Not even parallel-safe. */
return false;
}
else if (input_rel->partial_pathlist == NIL)
{
/* Nothing to use as input for partial aggregate. */
return false;
}
else if (!parse->hasAggs && parse->groupClause == NIL)
{
/*
* We don't know how to do parallel aggregation unless we have either
* some aggregates or a grouping clause.
*/
return false;
}
else if (parse->groupingSets)
{
/* We don't know how to do grouping sets in parallel. */
return false;
}
else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
{
/* Insufficient support for partial mode. */
return false;
}
return result;
/* Everything looks good. */
return true;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment