Commit 9fd8b7d6 authored by Robert Haas's avatar Robert Haas

Factor some code out of create_grouping_paths.

This is preparatory refactoring to prepare the way for partition-wise
aggregate, which will reuse the new subroutines for child grouping
rels.  It also does not seem like a bad idea on general principle,
as the function was getting pretty long.

Jeevan Chalke.  The larger patch series of which this patch is a part
was reviewed and tested by Antonin Houska, Rajkumar Raghuwanshi,
Ashutosh Bapat, David Rowley, Dilip Kumar, Konstantin Knizhnik,
Pascal Legrand, and me.  Some cosmetic changes by me.

Discussion: http://postgr.es/m/CAM2+6=V64_xhstVHie0Rz=KPEQnLJMZt_e314P0jaT_oJ9MR8A@mail.gmail.com
parent 4971d2a3
...@@ -185,6 +185,26 @@ static PathTarget *make_sort_input_target(PlannerInfo *root, ...@@ -185,6 +185,26 @@ static PathTarget *make_sort_input_target(PlannerInfo *root,
bool *have_postponed_srfs); bool *have_postponed_srfs);
static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel, static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
List *targets, List *targets_contain_srfs); List *targets, List *targets_contain_srfs);
static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, PathTarget *target,
PathTarget *partial_grouping_target,
const AggClauseCosts *agg_costs,
const AggClauseCosts *agg_final_costs,
grouping_sets_data *gd, bool can_sort, bool can_hash,
double dNumGroups, List *havingQual);
static void add_partial_paths_to_grouping_rel(PlannerInfo *root,
RelOptInfo *input_rel,
RelOptInfo *grouped_rel,
PathTarget *target,
PathTarget *partial_grouping_target,
AggClauseCosts *agg_partial_costs,
AggClauseCosts *agg_final_costs,
grouping_sets_data *gd,
bool can_sort,
bool can_hash,
List *havingQual);
static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs);
/***************************************************************************** /*****************************************************************************
...@@ -3610,15 +3630,11 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3610,15 +3630,11 @@ create_grouping_paths(PlannerInfo *root,
PathTarget *partial_grouping_target = NULL; PathTarget *partial_grouping_target = NULL;
AggClauseCosts agg_partial_costs; /* parallel only */ AggClauseCosts agg_partial_costs; /* parallel only */
AggClauseCosts agg_final_costs; /* parallel only */ AggClauseCosts agg_final_costs; /* parallel only */
Size hashaggtablesize;
double dNumGroups; double dNumGroups;
double dNumPartialGroups = 0;
bool can_hash; bool can_hash;
bool can_sort; bool can_sort;
bool try_parallel_aggregation; bool try_parallel_aggregation;
ListCell *lc;
/* For now, do all work in the (GROUP_AGG, NULL) upperrel */ /* For now, do all work in the (GROUP_AGG, NULL) upperrel */
grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
...@@ -3754,44 +3770,11 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3754,44 +3770,11 @@ create_grouping_paths(PlannerInfo *root,
(gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))); (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
/* /*
* If grouped_rel->consider_parallel is true, then paths that we generate * Figure out whether a PartialAggregate/Finalize Aggregate execution
* for this grouping relation could be run inside of a worker, but that * strategy is viable.
* doesn't mean we can actually use the PartialAggregate/FinalizeAggregate
* execution strategy. Figure that out.
*/ */
if (!grouped_rel->consider_parallel) try_parallel_aggregation = can_parallel_agg(root, input_rel, grouped_rel,
{ agg_costs);
/* Not even parallel-safe. */
try_parallel_aggregation = false;
}
else if (input_rel->partial_pathlist == NIL)
{
/* Nothing to use as input for partial aggregate. */
try_parallel_aggregation = false;
}
else if (!parse->hasAggs && parse->groupClause == NIL)
{
/*
* We don't know how to do parallel aggregation unless we have either
* some aggregates or a grouping clause.
*/
try_parallel_aggregation = false;
}
else if (parse->groupingSets)
{
/* We don't know how to do grouping sets in parallel. */
try_parallel_aggregation = false;
}
else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
{
/* Insufficient support for partial mode. */
try_parallel_aggregation = false;
}
else
{
/* Everything looks good. */
try_parallel_aggregation = true;
}
/* /*
* Before generating paths for grouped_rel, we first generate any possible * Before generating paths for grouped_rel, we first generate any possible
...@@ -3803,8 +3786,6 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3803,8 +3786,6 @@ create_grouping_paths(PlannerInfo *root,
*/ */
if (try_parallel_aggregation) if (try_parallel_aggregation)
{ {
Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
/* /*
* Build target list for partial aggregate paths. These paths cannot * Build target list for partial aggregate paths. These paths cannot
* just emit the same tlist as regular aggregate paths, because (1) we * just emit the same tlist as regular aggregate paths, because (1) we
...@@ -3814,11 +3795,6 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3814,11 +3795,6 @@ create_grouping_paths(PlannerInfo *root,
*/ */
partial_grouping_target = make_partial_grouping_target(root, target); partial_grouping_target = make_partial_grouping_target(root, target);
/* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows,
gd);
/* /*
* Collect statistics about aggregates for estimating costs of * Collect statistics about aggregates for estimating costs of
* performing aggregation in parallel. * performing aggregation in parallel.
...@@ -3841,480 +3817,141 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3841,480 +3817,141 @@ create_grouping_paths(PlannerInfo *root,
&agg_final_costs); &agg_final_costs);
} }
if (can_sort) add_partial_paths_to_grouping_rel(root, input_rel, grouped_rel, target,
{ partial_grouping_target,
/* This was checked before setting try_parallel_aggregation */ &agg_partial_costs, &agg_final_costs,
Assert(parse->hasAggs || parse->groupClause); gd, can_sort, can_hash,
(List *) parse->havingQual);
}
/* /* Build final grouping paths */
* Use any available suitably-sorted path as input, and also add_paths_to_grouping_rel(root, input_rel, grouped_rel, target,
* consider sorting the cheapest partial path. partial_grouping_target, agg_costs,
*/ &agg_final_costs, gd, can_sort, can_hash,
foreach(lc, input_rel->partial_pathlist) dNumGroups, (List *) parse->havingQual);
{
Path *path = (Path *) lfirst(lc);
bool is_sorted;
is_sorted = pathkeys_contained_in(root->group_pathkeys, /* Give a helpful error if we failed to find any implementation */
path->pathkeys); if (grouped_rel->pathlist == NIL)
if (path == cheapest_partial_path || is_sorted) ereport(ERROR,
{ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
/* Sort the cheapest partial path, if it isn't already */ errmsg("could not implement GROUP BY"),
if (!is_sorted) errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
if (parse->hasAggs) /*
add_partial_path(grouped_rel, (Path *) * If there is an FDW that's responsible for all baserels of the query,
create_agg_path(root, * let it consider adding ForeignPaths.
grouped_rel, */
path, if (grouped_rel->fdwroutine &&
partial_grouping_target, grouped_rel->fdwroutine->GetForeignUpperPaths)
parse->groupClause ? AGG_SORTED : AGG_PLAIN, grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
AGGSPLIT_INITIAL_SERIAL, input_rel, grouped_rel);
parse->groupClause,
NIL,
&agg_partial_costs,
dNumPartialGroups));
else
add_partial_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
partial_grouping_target,
parse->groupClause,
NIL,
dNumPartialGroups));
}
}
}
if (can_hash) /* Let extensions possibly add some more paths */
{ if (create_upper_paths_hook)
/* Checked above */ (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
Assert(parse->hasAggs || parse->groupClause); input_rel, grouped_rel);
/* Now choose the best path(s) */
set_cheapest(grouped_rel);
hashaggtablesize = /*
estimate_hashagg_tablesize(cheapest_partial_path, * We've been using the partial pathlist for the grouped relation to hold
&agg_partial_costs, * partially aggregated paths, but that's actually a little bit bogus
dNumPartialGroups); * because it's unsafe for later planning stages -- like ordered_rel ---
* to get the idea that they can use these partial paths as if they didn't
* need a FinalizeAggregate step. Zap the partial pathlist at this stage
* so we don't get confused.
*/
grouped_rel->partial_pathlist = NIL;
/* return grouped_rel;
* Tentatively produce a partial HashAgg Path, depending on if it }
* looks as if the hash table will fit in work_mem.
*/
if (hashaggtablesize < work_mem * 1024L)
{
add_partial_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
cheapest_partial_path,
partial_grouping_target,
AGG_HASHED,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
&agg_partial_costs,
dNumPartialGroups));
}
}
}
/* Build final grouping paths */
if (can_sort) /*
* For a given input path, consider the possible ways of doing grouping sets on
* it, by combinations of hashing and sorting. This can be called multiple
* times, so it's important that it not scribble on input. No result is
* returned, but any generated paths are added to grouped_rel.
*/
static void
consider_groupingsets_paths(PlannerInfo *root,
RelOptInfo *grouped_rel,
Path *path,
bool is_sorted,
bool can_hash,
PathTarget *target,
grouping_sets_data *gd,
const AggClauseCosts *agg_costs,
double dNumGroups)
{
Query *parse = root->parse;
/*
* If we're not being offered sorted input, then only consider plans that
* can be done entirely by hashing.
*
* We can hash everything if it looks like it'll fit in work_mem. But if
* the input is actually sorted despite not being advertised as such, we
* prefer to make use of that in order to use less memory.
*
* If none of the grouping sets are sortable, then ignore the work_mem
* limit and generate a path anyway, since otherwise we'll just fail.
*/
if (!is_sorted)
{ {
/* List *new_rollups = NIL;
* Use any available suitably-sorted path as input, and also consider RollupData *unhashed_rollup = NULL;
* sorting the cheapest-total path. List *sets_data;
*/ List *empty_sets_data = NIL;
foreach(lc, input_rel->pathlist) List *empty_sets = NIL;
{ ListCell *lc;
Path *path = (Path *) lfirst(lc); ListCell *l_start = list_head(gd->rollups);
bool is_sorted; AggStrategy strat = AGG_HASHED;
Size hashsize;
double exclude_groups = 0.0;
is_sorted = pathkeys_contained_in(root->group_pathkeys, Assert(can_hash);
path->pathkeys);
if (path == cheapest_path || is_sorted)
{
/* Sort the cheapest-total path if it isn't already sorted */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
/* Now decide what to stick atop it */ if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
if (parse->groupingSets) {
{ unhashed_rollup = lfirst_node(RollupData, l_start);
consider_groupingsets_paths(root, grouped_rel, exclude_groups = unhashed_rollup->numGroups;
path, true, can_hash, target, l_start = lnext(l_start);
gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
/*
* We have aggregation, possibly with plain GROUP BY. Make
* an AggPath.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
parse->groupClause,
(List *) parse->havingQual,
agg_costs,
dNumGroups));
}
else if (parse->groupClause)
{
/*
* We have GROUP BY without aggregation or grouping sets.
* Make a GroupPath.
*/
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
target,
parse->groupClause,
(List *) parse->havingQual,
dNumGroups));
}
else
{
/* Other cases should have been handled above */
Assert(false);
}
}
} }
hashsize = estimate_hashagg_tablesize(path,
agg_costs,
dNumGroups - exclude_groups);
/* /*
* Now generate a complete GroupAgg Path atop of the cheapest partial * gd->rollups is empty if we have only unsortable columns to work
* path. We can do this using either Gather or Gather Merge. * with. Override work_mem in that case; otherwise, we'll rely on the
* sorted-input case to generate usable mixed paths.
*/ */
if (grouped_rel->partial_pathlist) if (hashsize > work_mem * 1024L && gd->rollups)
{ return; /* nope, won't fit */
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
double total_groups = path->rows * path->parallel_workers;
path = (Path *) create_gather_path(root, /*
grouped_rel, * We need to burst the existing rollups list into individual grouping
path, * sets and recompute a groupClause for each set.
partial_grouping_target, */
NULL, sets_data = list_copy(gd->unsortable_sets);
&total_groups);
for_each_cell(lc, l_start)
{
RollupData *rollup = lfirst_node(RollupData, lc);
/* /*
* Since Gather's output is always unsorted, we'll need to sort, * If we find an unhashable rollup that's not been skipped by the
* unless there's no GROUP BY clause or a degenerate (constant) * "actually sorted" check above, we can't cope; we'd need sorted
* one, in which case there will only be a single group. * input (with a different sort order) but we can't get that here.
*/ * So bail out; we'll get a valid path from the is_sorted case
if (root->group_pathkeys) * instead.
path = (Path *) create_sort_path(root, *
grouped_rel, * The mere presence of empty grouping sets doesn't make a rollup
path, * unhashable (see preprocess_grouping_sets), we handle those
root->group_pathkeys, * specially below.
-1.0);
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
(List *) parse->havingQual,
&agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
target,
parse->groupClause,
(List *) parse->havingQual,
dNumGroups));
/*
* The point of using Gather Merge rather than Gather is that it
* can preserve the ordering of the input path, so there's no
* reason to try it unless (1) it's possible to produce more than
* one output row and (2) we want the output path to be ordered.
*/
if (parse->groupClause != NIL && root->group_pathkeys != NIL)
{
foreach(lc, grouped_rel->partial_pathlist)
{
Path *subpath = (Path *) lfirst(lc);
Path *gmpath;
double total_groups;
/*
* It's useful to consider paths that are already properly
* ordered for Gather Merge, because those don't need a
* sort. It's also useful to consider the cheapest path,
* because sorting it in parallel and then doing Gather
* Merge may be better than doing an unordered Gather
* followed by a sort. But there's no point in
* considering non-cheapest paths that aren't already
* sorted correctly.
*/
if (path != subpath &&
!pathkeys_contained_in(root->group_pathkeys,
subpath->pathkeys))
continue;
total_groups = subpath->rows * subpath->parallel_workers;
gmpath = (Path *)
create_gather_merge_path(root,
grouped_rel,
subpath,
partial_grouping_target,
root->group_pathkeys,
NULL,
&total_groups);
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
gmpath,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
(List *) parse->havingQual,
&agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
gmpath,
target,
parse->groupClause,
(List *) parse->havingQual,
dNumGroups));
}
}
}
}
if (can_hash)
{
if (parse->groupingSets)
{
/*
* Try for a hash-only groupingsets path over unsorted input.
*/
consider_groupingsets_paths(root, grouped_rel,
cheapest_path, false, true, target,
gd, agg_costs, dNumGroups);
}
else
{
hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
agg_costs,
dNumGroups);
/*
* Provided that the estimated size of the hashtable does not
* exceed work_mem, we'll generate a HashAgg Path, although if we
* were unable to sort above, then we'd better generate a Path, so
* that we at least have one.
*/
if (hashaggtablesize < work_mem * 1024L ||
grouped_rel->pathlist == NIL)
{
/*
* We just need an Agg over the cheapest-total input path,
* since input order won't matter.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root, grouped_rel,
cheapest_path,
target,
AGG_HASHED,
AGGSPLIT_SIMPLE,
parse->groupClause,
(List *) parse->havingQual,
agg_costs,
dNumGroups));
}
}
/*
* Generate a HashAgg Path atop of the cheapest partial path. Once
* again, we'll only do this if it looks as though the hash table
* won't exceed work_mem.
*/
if (grouped_rel->partial_pathlist)
{
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
hashaggtablesize = estimate_hashagg_tablesize(path,
&agg_final_costs,
dNumGroups);
if (hashaggtablesize < work_mem * 1024L)
{
double total_groups = path->rows * path->parallel_workers;
path = (Path *) create_gather_path(root,
grouped_rel,
path,
partial_grouping_target,
NULL,
&total_groups);
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
AGG_HASHED,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
(List *) parse->havingQual,
&agg_final_costs,
dNumGroups));
}
}
}
/* Give a helpful error if we failed to find any implementation */
if (grouped_rel->pathlist == NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement GROUP BY"),
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
/*
* If there is an FDW that's responsible for all baserels of the query,
* let it consider adding ForeignPaths.
*/
if (grouped_rel->fdwroutine &&
grouped_rel->fdwroutine->GetForeignUpperPaths)
grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
input_rel, grouped_rel);
/* Let extensions possibly add some more paths */
if (create_upper_paths_hook)
(*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
input_rel, grouped_rel);
/* Now choose the best path(s) */
set_cheapest(grouped_rel);
/*
* We've been using the partial pathlist for the grouped relation to hold
* partially aggregated paths, but that's actually a little bit bogus
* because it's unsafe for later planning stages -- like ordered_rel ---
* to get the idea that they can use these partial paths as if they didn't
* need a FinalizeAggregate step. Zap the partial pathlist at this stage
* so we don't get confused.
*/
grouped_rel->partial_pathlist = NIL;
return grouped_rel;
}
/*
* For a given input path, consider the possible ways of doing grouping sets on
* it, by combinations of hashing and sorting. This can be called multiple
* times, so it's important that it not scribble on input. No result is
* returned, but any generated paths are added to grouped_rel.
*/
static void
consider_groupingsets_paths(PlannerInfo *root,
RelOptInfo *grouped_rel,
Path *path,
bool is_sorted,
bool can_hash,
PathTarget *target,
grouping_sets_data *gd,
const AggClauseCosts *agg_costs,
double dNumGroups)
{
Query *parse = root->parse;
/*
* If we're not being offered sorted input, then only consider plans that
* can be done entirely by hashing.
*
* We can hash everything if it looks like it'll fit in work_mem. But if
* the input is actually sorted despite not being advertised as such, we
* prefer to make use of that in order to use less memory.
*
* If none of the grouping sets are sortable, then ignore the work_mem
* limit and generate a path anyway, since otherwise we'll just fail.
*/
if (!is_sorted)
{
List *new_rollups = NIL;
RollupData *unhashed_rollup = NULL;
List *sets_data;
List *empty_sets_data = NIL;
List *empty_sets = NIL;
ListCell *lc;
ListCell *l_start = list_head(gd->rollups);
AggStrategy strat = AGG_HASHED;
Size hashsize;
double exclude_groups = 0.0;
Assert(can_hash);
if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
{
unhashed_rollup = lfirst_node(RollupData, l_start);
exclude_groups = unhashed_rollup->numGroups;
l_start = lnext(l_start);
}
hashsize = estimate_hashagg_tablesize(path,
agg_costs,
dNumGroups - exclude_groups);
/*
* gd->rollups is empty if we have only unsortable columns to work
* with. Override work_mem in that case; otherwise, we'll rely on the
* sorted-input case to generate usable mixed paths.
*/
if (hashsize > work_mem * 1024L && gd->rollups)
return; /* nope, won't fit */
/*
* We need to burst the existing rollups list into individual grouping
* sets and recompute a groupClause for each set.
*/
sets_data = list_copy(gd->unsortable_sets);
for_each_cell(lc, l_start)
{
RollupData *rollup = lfirst_node(RollupData, lc);
/*
* If we find an unhashable rollup that's not been skipped by the
* "actually sorted" check above, we can't cope; we'd need sorted
* input (with a different sort order) but we can't get that here.
* So bail out; we'll get a valid path from the is_sorted case
* instead.
*
* The mere presence of empty grouping sets doesn't make a rollup
* unhashable (see preprocess_grouping_sets), we handle those
* specially below.
*/ */
if (!rollup->hashable) if (!rollup->hashable)
return; return;
...@@ -5971,246 +5608,693 @@ adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel, ...@@ -5971,246 +5608,693 @@ adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
rel->cheapest_total_path = newpath; rel->cheapest_total_path = newpath;
} }
/* Likewise for partial paths, if any */ /* Likewise for partial paths, if any */
foreach(lc, rel->partial_pathlist) foreach(lc, rel->partial_pathlist)
{
Path *subpath = (Path *) lfirst(lc);
Path *newpath = subpath;
ListCell *lc1,
*lc2;
Assert(subpath->param_info == NULL);
forboth(lc1, targets, lc2, targets_contain_srfs)
{
PathTarget *thistarget = lfirst_node(PathTarget, lc1);
bool contains_srfs = (bool) lfirst_int(lc2);
/* If this level doesn't contain SRFs, do regular projection */
if (contains_srfs)
newpath = (Path *) create_set_projection_path(root,
rel,
newpath,
thistarget);
else
{
/* avoid apply_projection_to_path, in case of multiple refs */
newpath = (Path *) create_projection_path(root,
rel,
newpath,
thistarget);
}
}
lfirst(lc) = newpath;
}
}
/*
* expression_planner
* Perform planner's transformations on a standalone expression.
*
* Various utility commands need to evaluate expressions that are not part
* of a plannable query. They can do so using the executor's regular
* expression-execution machinery, but first the expression has to be fed
* through here to transform it from parser output to something executable.
*
* Currently, we disallow sublinks in standalone expressions, so there's no
* real "planning" involved here. (That might not always be true though.)
* What we must do is run eval_const_expressions to ensure that any function
* calls are converted to positional notation and function default arguments
* get inserted. The fact that constant subexpressions get simplified is a
* side-effect that is useful when the expression will get evaluated more than
* once. Also, we must fix operator function IDs.
*
* Note: this must not make any damaging changes to the passed-in expression
* tree. (It would actually be okay to apply fix_opfuncids to it, but since
* we first do an expression_tree_mutator-based walk, what is returned will
* be a new node tree.)
*/
Expr *
expression_planner(Expr *expr)
{
Node *result;
/*
* Convert named-argument function calls, insert default arguments and
* simplify constant subexprs
*/
result = eval_const_expressions(NULL, (Node *) expr);
/* Fill in opfuncid values if missing */
fix_opfuncids(result);
return (Expr *) result;
}
/*
* plan_cluster_use_sort
* Use the planner to decide how CLUSTER should implement sorting
*
* tableOid is the OID of a table to be clustered on its index indexOid
* (which is already known to be a btree index). Decide whether it's
* cheaper to do an indexscan or a seqscan-plus-sort to execute the CLUSTER.
* Return true to use sorting, false to use an indexscan.
*
* Note: caller had better already hold some type of lock on the table.
*/
bool
plan_cluster_use_sort(Oid tableOid, Oid indexOid)
{
PlannerInfo *root;
Query *query;
PlannerGlobal *glob;
RangeTblEntry *rte;
RelOptInfo *rel;
IndexOptInfo *indexInfo;
QualCost indexExprCost;
Cost comparisonCost;
Path *seqScanPath;
Path seqScanAndSortPath;
IndexPath *indexScanPath;
ListCell *lc;
/* We can short-circuit the cost comparison if indexscans are disabled */
if (!enable_indexscan)
return true; /* use sort */
/* Set up mostly-dummy planner state */
query = makeNode(Query);
query->commandType = CMD_SELECT;
glob = makeNode(PlannerGlobal);
root = makeNode(PlannerInfo);
root->parse = query;
root->glob = glob;
root->query_level = 1;
root->planner_cxt = CurrentMemoryContext;
root->wt_param_id = -1;
/* Build a minimal RTE for the rel */
rte = makeNode(RangeTblEntry);
rte->rtekind = RTE_RELATION;
rte->relid = tableOid;
rte->relkind = RELKIND_RELATION; /* Don't be too picky. */
rte->lateral = false;
rte->inh = false;
rte->inFromCl = true;
query->rtable = list_make1(rte);
/* Set up RTE/RelOptInfo arrays */
setup_simple_rel_arrays(root);
/* Build RelOptInfo */
rel = build_simple_rel(root, 1, NULL);
/* Locate IndexOptInfo for the target index */
indexInfo = NULL;
foreach(lc, rel->indexlist)
{
indexInfo = lfirst_node(IndexOptInfo, lc);
if (indexInfo->indexoid == indexOid)
break;
}
/*
* It's possible that get_relation_info did not generate an IndexOptInfo
* for the desired index; this could happen if it's not yet reached its
* indcheckxmin usability horizon, or if it's a system index and we're
* ignoring system indexes. In such cases we should tell CLUSTER to not
* trust the index contents but use seqscan-and-sort.
*/
if (lc == NULL) /* not in the list? */
return true; /* use sort */
/*
* Rather than doing all the pushups that would be needed to use
* set_baserel_size_estimates, just do a quick hack for rows and width.
*/
rel->rows = rel->tuples;
rel->reltarget->width = get_relation_data_width(tableOid, NULL);
root->total_table_pages = rel->pages;
/*
* Determine eval cost of the index expressions, if any. We need to
* charge twice that amount for each tuple comparison that happens during
* the sort, since tuplesort.c will have to re-evaluate the index
* expressions each time. (XXX that's pretty inefficient...)
*/
cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
/* Estimate the cost of seq scan + sort */
seqScanPath = create_seqscan_path(root, rel, NULL, 0);
cost_sort(&seqScanAndSortPath, root, NIL,
seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
comparisonCost, maintenance_work_mem, -1.0);
/* Estimate the cost of index scan */
indexScanPath = create_index_path(root, indexInfo,
NIL, NIL, NIL, NIL, NIL,
ForwardScanDirection, false,
NULL, 1.0, false);
return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
}
/*
* get_partitioned_child_rels
* Returns a list of the RT indexes of the partitioned child relations
* with rti as the root parent RT index. Also sets
* *part_cols_updated to true if any of the root rte's updated
* columns is used in the partition key either of the relation whose RTI
* is specified or of any child relation.
*
* Note: This function might get called even for range table entries that
* are not partitioned tables; in such a case, it will simply return NIL.
*/
List *
get_partitioned_child_rels(PlannerInfo *root, Index rti,
bool *part_cols_updated)
{
List *result = NIL;
ListCell *l;
if (part_cols_updated)
*part_cols_updated = false;
foreach(l, root->pcinfo_list)
{ {
Path *subpath = (Path *) lfirst(lc); PartitionedChildRelInfo *pc = lfirst_node(PartitionedChildRelInfo, l);
Path *newpath = subpath;
ListCell *lc1,
*lc2;
Assert(subpath->param_info == NULL); if (pc->parent_relid == rti)
forboth(lc1, targets, lc2, targets_contain_srfs)
{ {
PathTarget *thistarget = lfirst_node(PathTarget, lc1); result = pc->child_rels;
bool contains_srfs = (bool) lfirst_int(lc2); if (part_cols_updated)
*part_cols_updated = pc->part_cols_updated;
/* If this level doesn't contain SRFs, do regular projection */ break;
if (contains_srfs)
newpath = (Path *) create_set_projection_path(root,
rel,
newpath,
thistarget);
else
{
/* avoid apply_projection_to_path, in case of multiple refs */
newpath = (Path *) create_projection_path(root,
rel,
newpath,
thistarget);
}
} }
lfirst(lc) = newpath;
} }
return result;
} }
/* /*
* expression_planner * get_partitioned_child_rels_for_join
* Perform planner's transformations on a standalone expression. * Build and return a list containing the RTI of every partitioned
* * relation which is a child of some rel included in the join.
* Various utility commands need to evaluate expressions that are not part
* of a plannable query. They can do so using the executor's regular
* expression-execution machinery, but first the expression has to be fed
* through here to transform it from parser output to something executable.
*
* Currently, we disallow sublinks in standalone expressions, so there's no
* real "planning" involved here. (That might not always be true though.)
* What we must do is run eval_const_expressions to ensure that any function
* calls are converted to positional notation and function default arguments
* get inserted. The fact that constant subexpressions get simplified is a
* side-effect that is useful when the expression will get evaluated more than
* once. Also, we must fix operator function IDs.
*
* Note: this must not make any damaging changes to the passed-in expression
* tree. (It would actually be okay to apply fix_opfuncids to it, but since
* we first do an expression_tree_mutator-based walk, what is returned will
* be a new node tree.)
*/ */
Expr * List *
expression_planner(Expr *expr) get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids)
{ {
Node *result; List *result = NIL;
ListCell *l;
/* foreach(l, root->pcinfo_list)
* Convert named-argument function calls, insert default arguments and {
* simplify constant subexprs PartitionedChildRelInfo *pc = lfirst(l);
*/
result = eval_const_expressions(NULL, (Node *) expr);
/* Fill in opfuncid values if missing */ if (bms_is_member(pc->parent_relid, join_relids))
fix_opfuncids(result); result = list_concat(result, list_copy(pc->child_rels));
}
return (Expr *) result; return result;
} }
/* /*
* plan_cluster_use_sort * add_paths_to_grouping_rel
* Use the planner to decide how CLUSTER should implement sorting
*
* tableOid is the OID of a table to be clustered on its index indexOid
* (which is already known to be a btree index). Decide whether it's
* cheaper to do an indexscan or a seqscan-plus-sort to execute the CLUSTER.
* Return true to use sorting, false to use an indexscan.
* *
* Note: caller had better already hold some type of lock on the table. * Add non-partial paths to grouping relation.
*/ */
bool static void
plan_cluster_use_sort(Oid tableOid, Oid indexOid) add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, PathTarget *target,
PathTarget *partial_grouping_target,
const AggClauseCosts *agg_costs,
const AggClauseCosts *agg_final_costs,
grouping_sets_data *gd, bool can_sort, bool can_hash,
double dNumGroups, List *havingQual)
{ {
PlannerInfo *root; Query *parse = root->parse;
Query *query; Path *cheapest_path = input_rel->cheapest_total_path;
PlannerGlobal *glob;
RangeTblEntry *rte;
RelOptInfo *rel;
IndexOptInfo *indexInfo;
QualCost indexExprCost;
Cost comparisonCost;
Path *seqScanPath;
Path seqScanAndSortPath;
IndexPath *indexScanPath;
ListCell *lc; ListCell *lc;
/* We can short-circuit the cost comparison if indexscans are disabled */ if (can_sort)
if (!enable_indexscan) {
return true; /* use sort */ /*
* Use any available suitably-sorted path as input, and also consider
* sorting the cheapest-total path.
*/
foreach(lc, input_rel->pathlist)
{
Path *path = (Path *) lfirst(lc);
bool is_sorted;
/* Set up mostly-dummy planner state */ is_sorted = pathkeys_contained_in(root->group_pathkeys,
query = makeNode(Query); path->pathkeys);
query->commandType = CMD_SELECT; if (path == cheapest_path || is_sorted)
{
/* Sort the cheapest-total path if it isn't already sorted */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
glob = makeNode(PlannerGlobal); /* Now decide what to stick atop it */
if (parse->groupingSets)
{
consider_groupingsets_paths(root, grouped_rel,
path, true, can_hash, target,
gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
/*
* We have aggregation, possibly with plain GROUP BY. Make
* an AggPath.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
parse->groupClause,
havingQual,
agg_costs,
dNumGroups));
}
else if (parse->groupClause)
{
/*
* We have GROUP BY without aggregation or grouping sets.
* Make a GroupPath.
*/
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
target,
parse->groupClause,
havingQual,
dNumGroups));
}
else
{
/* Other cases should have been handled above */
Assert(false);
}
}
}
root = makeNode(PlannerInfo); /*
root->parse = query; * Now generate a complete GroupAgg Path atop of the cheapest partial
root->glob = glob; * path. We can do this using either Gather or Gather Merge.
root->query_level = 1; */
root->planner_cxt = CurrentMemoryContext; if (grouped_rel->partial_pathlist)
root->wt_param_id = -1; {
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
double total_groups = path->rows * path->parallel_workers;
path = (Path *) create_gather_path(root,
grouped_rel,
path,
partial_grouping_target,
NULL,
&total_groups);
/*
* Since Gather's output is always unsorted, we'll need to sort,
* unless there's no GROUP BY clause or a degenerate (constant)
* one, in which case there will only be a single group.
*/
if (root->group_pathkeys)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
target,
parse->groupClause,
havingQual,
dNumGroups));
/*
* The point of using Gather Merge rather than Gather is that it
* can preserve the ordering of the input path, so there's no
* reason to try it unless (1) it's possible to produce more than
* one output row and (2) we want the output path to be ordered.
*/
if (parse->groupClause != NIL && root->group_pathkeys != NIL)
{
foreach(lc, grouped_rel->partial_pathlist)
{
Path *subpath = (Path *) lfirst(lc);
Path *gmpath;
double total_groups;
/*
* It's useful to consider paths that are already properly
* ordered for Gather Merge, because those don't need a
* sort. It's also useful to consider the cheapest path,
* because sorting it in parallel and then doing Gather
* Merge may be better than doing an unordered Gather
* followed by a sort. But there's no point in considering
* non-cheapest paths that aren't already sorted
* correctly.
*/
if (path != subpath &&
!pathkeys_contained_in(root->group_pathkeys,
subpath->pathkeys))
continue;
/* Build a minimal RTE for the rel */ total_groups = subpath->rows * subpath->parallel_workers;
rte = makeNode(RangeTblEntry);
rte->rtekind = RTE_RELATION;
rte->relid = tableOid;
rte->relkind = RELKIND_RELATION; /* Don't be too picky. */
rte->lateral = false;
rte->inh = false;
rte->inFromCl = true;
query->rtable = list_make1(rte);
/* Set up RTE/RelOptInfo arrays */ gmpath = (Path *)
setup_simple_rel_arrays(root); create_gather_merge_path(root,
grouped_rel,
subpath,
partial_grouping_target,
root->group_pathkeys,
NULL,
&total_groups);
/* Build RelOptInfo */ if (parse->hasAggs)
rel = build_simple_rel(root, 1, NULL); add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
gmpath,
target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
gmpath,
target,
parse->groupClause,
havingQual,
dNumGroups));
}
}
}
}
/* Locate IndexOptInfo for the target index */ if (can_hash)
indexInfo = NULL;
foreach(lc, rel->indexlist)
{ {
indexInfo = lfirst_node(IndexOptInfo, lc); Size hashaggtablesize;
if (indexInfo->indexoid == indexOid)
break;
}
/* if (parse->groupingSets)
* It's possible that get_relation_info did not generate an IndexOptInfo {
* for the desired index; this could happen if it's not yet reached its /*
* indcheckxmin usability horizon, or if it's a system index and we're * Try for a hash-only groupingsets path over unsorted input.
* ignoring system indexes. In such cases we should tell CLUSTER to not */
* trust the index contents but use seqscan-and-sort. consider_groupingsets_paths(root, grouped_rel,
*/ cheapest_path, false, true, target,
if (lc == NULL) /* not in the list? */ gd, agg_costs, dNumGroups);
return true; /* use sort */ }
else
{
hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
agg_costs,
dNumGroups);
/* /*
* Rather than doing all the pushups that would be needed to use * Provided that the estimated size of the hashtable does not
* set_baserel_size_estimates, just do a quick hack for rows and width. * exceed work_mem, we'll generate a HashAgg Path, although if we
*/ * were unable to sort above, then we'd better generate a Path, so
rel->rows = rel->tuples; * that we at least have one.
rel->reltarget->width = get_relation_data_width(tableOid, NULL); */
if (hashaggtablesize < work_mem * 1024L ||
grouped_rel->pathlist == NIL)
{
/*
* We just need an Agg over the cheapest-total input path,
* since input order won't matter.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root, grouped_rel,
cheapest_path,
target,
AGG_HASHED,
AGGSPLIT_SIMPLE,
parse->groupClause,
havingQual,
agg_costs,
dNumGroups));
}
}
root->total_table_pages = rel->pages; /*
* Generate a HashAgg Path atop of the cheapest partial path. Once
* again, we'll only do this if it looks as though the hash table
* won't exceed work_mem.
*/
if (grouped_rel->partial_pathlist)
{
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
/* hashaggtablesize = estimate_hashagg_tablesize(path,
* Determine eval cost of the index expressions, if any. We need to agg_final_costs,
* charge twice that amount for each tuple comparison that happens during dNumGroups);
* the sort, since tuplesort.c will have to re-evaluate the index
* expressions each time. (XXX that's pretty inefficient...)
*/
cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
/* Estimate the cost of seq scan + sort */ if (hashaggtablesize < work_mem * 1024L)
seqScanPath = create_seqscan_path(root, rel, NULL, 0); {
cost_sort(&seqScanAndSortPath, root, NIL, double total_groups = path->rows * path->parallel_workers;
seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
comparisonCost, maintenance_work_mem, -1.0);
/* Estimate the cost of index scan */ path = (Path *) create_gather_path(root,
indexScanPath = create_index_path(root, indexInfo, grouped_rel,
NIL, NIL, NIL, NIL, NIL, path,
ForwardScanDirection, false, partial_grouping_target,
NULL, 1.0, false); NULL,
&total_groups);
return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost); add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
target,
AGG_HASHED,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
}
}
}
} }
/* /*
* get_partitioned_child_rels * add_partial_paths_to_grouping_rel
* Returns a list of the RT indexes of the partitioned child relations
* with rti as the root parent RT index. Also sets
* *part_cols_updated to true if any of the root rte's updated
* columns is used in the partition key either of the relation whose RTI
* is specified or of any child relation.
* *
* Note: This function might get called even for range table entries that * Add partial paths to grouping relation. These paths are not fully
* are not partitioned tables; in such a case, it will simply return NIL. * aggregated; a FinalizeAggregate step is still required.
*/ */
List * static void
get_partitioned_child_rels(PlannerInfo *root, Index rti, add_partial_paths_to_grouping_rel(PlannerInfo *root,
bool *part_cols_updated) RelOptInfo *input_rel,
RelOptInfo *grouped_rel,
PathTarget *target,
PathTarget *partial_grouping_target,
AggClauseCosts *agg_partial_costs,
AggClauseCosts *agg_final_costs,
grouping_sets_data *gd,
bool can_sort,
bool can_hash,
List *havingQual)
{ {
List *result = NIL; Query *parse = root->parse;
ListCell *l; Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
Size hashaggtablesize;
double dNumPartialGroups = 0;
ListCell *lc;
if (part_cols_updated) /* Estimate number of partial groups. */
*part_cols_updated = false; dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows,
gd);
foreach(l, root->pcinfo_list) if (can_sort)
{ {
PartitionedChildRelInfo *pc = lfirst_node(PartitionedChildRelInfo, l); /* This should have been checked previously */
Assert(parse->hasAggs || parse->groupClause);
if (pc->parent_relid == rti) /*
* Use any available suitably-sorted path as input, and also consider
* sorting the cheapest partial path.
*/
foreach(lc, input_rel->partial_pathlist)
{ {
result = pc->child_rels; Path *path = (Path *) lfirst(lc);
if (part_cols_updated) bool is_sorted;
*part_cols_updated = pc->part_cols_updated;
break; is_sorted = pathkeys_contained_in(root->group_pathkeys,
path->pathkeys);
if (path == cheapest_partial_path || is_sorted)
{
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
if (parse->hasAggs)
add_partial_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
partial_grouping_target,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialGroups));
else
add_partial_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
partial_grouping_target,
parse->groupClause,
NIL,
dNumPartialGroups));
}
} }
} }
return result; if (can_hash)
{
/* Checked above */
Assert(parse->hasAggs || parse->groupClause);
hashaggtablesize =
estimate_hashagg_tablesize(cheapest_partial_path,
agg_partial_costs,
dNumPartialGroups);
/*
* Tentatively produce a partial HashAgg Path, depending on if it
* looks as if the hash table will fit in work_mem.
*/
if (hashaggtablesize < work_mem * 1024L)
{
add_partial_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
cheapest_partial_path,
partial_grouping_target,
AGG_HASHED,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialGroups));
}
}
} }
/* /*
* get_partitioned_child_rels_for_join * can_parallel_agg
* Build and return a list containing the RTI of every partitioned *
* relation which is a child of some rel included in the join. * Determines whether or not parallel grouping and/or aggregation is possible.
* Returns true when possible, false otherwise.
*/ */
List * static bool
get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids) can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs)
{ {
List *result = NIL; Query *parse = root->parse;
ListCell *l;
foreach(l, root->pcinfo_list) if (!grouped_rel->consider_parallel)
{ {
PartitionedChildRelInfo *pc = lfirst(l); /* Not even parallel-safe. */
return false;
if (bms_is_member(pc->parent_relid, join_relids)) }
result = list_concat(result, list_copy(pc->child_rels)); else if (input_rel->partial_pathlist == NIL)
{
/* Nothing to use as input for partial aggregate. */
return false;
}
else if (!parse->hasAggs && parse->groupClause == NIL)
{
/*
* We don't know how to do parallel aggregation unless we have either
* some aggregates or a grouping clause.
*/
return false;
}
else if (parse->groupingSets)
{
/* We don't know how to do grouping sets in parallel. */
return false;
}
else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
{
/* Insufficient support for partial mode. */
return false;
} }
return result; /* Everything looks good. */
return true;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment