Commit 0a2bc5d6 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Move per-agg and per-trans duplicate finding to the planner.

This has the advantage that the cost estimates for aggregates can count
the number of calls to transition and final functions correctly.

Bump catalog version, because views can contain Aggrefs.

Reviewed-by: Andres Freund
Discussion: https://www.postgresql.org/message-id/b2e3536b-1dbc-8303-c97e-89cb0b4a9a48%40iki.fi
parent e522024b
......@@ -32,6 +32,7 @@
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
#include "parser/parsetree.h"
......@@ -2944,16 +2945,7 @@ estimate_path_cost_size(PlannerInfo *root,
MemSet(&aggcosts, 0, sizeof(AggClauseCosts));
if (root->parse->hasAggs)
{
get_agg_clause_costs(root, (Node *) fpinfo->grouped_tlist,
AGGSPLIT_SIMPLE, &aggcosts);
/*
* The cost of aggregates in the HAVING qual will be the same
* for each child as it is for the parent, so there's no need
* to use a translated version of havingQual.
*/
get_agg_clause_costs(root, (Node *) root->parse->havingQual,
AGGSPLIT_SIMPLE, &aggcosts);
get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &aggcosts);
}
/* Get number of grouping columns and possible number of groups */
......
......@@ -99,8 +99,7 @@ static void ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
* the same as the per-query context of the associated ExprContext.
*
* Any Aggref, WindowFunc, or SubPlan nodes found in the tree are added to
* the lists of such nodes held by the parent PlanState (or more accurately,
* the AggrefExprState etc. nodes created for them are added).
* the lists of such nodes held by the parent PlanState.
*
* Note: there is no ExecEndExpr function; we assume that any resource
* cleanup needed will be handled by just releasing the memory context
......@@ -779,18 +778,15 @@ ExecInitExprRec(Expr *node, ExprState *state,
case T_Aggref:
{
Aggref *aggref = (Aggref *) node;
AggrefExprState *astate = makeNode(AggrefExprState);
scratch.opcode = EEOP_AGGREF;
scratch.d.aggref.astate = astate;
astate->aggref = aggref;
scratch.d.aggref.aggno = aggref->aggno;
if (state->parent && IsA(state->parent, AggState))
{
AggState *aggstate = (AggState *) state->parent;
aggstate->aggs = lappend(aggstate->aggs, astate);
aggstate->numaggs++;
aggstate->aggs = lappend(aggstate->aggs, aggref);
}
else
{
......
......@@ -1494,12 +1494,12 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
* Returns a Datum whose value is the precomputed aggregate value
* found in the given expression context.
*/
AggrefExprState *aggref = op->d.aggref.astate;
int aggno = op->d.aggref.aggno;
Assert(econtext->ecxt_aggvalues != NULL);
*op->resvalue = econtext->ecxt_aggvalues[aggref->aggno];
*op->resnull = econtext->ecxt_aggnulls[aggref->aggno];
*op->resvalue = econtext->ecxt_aggvalues[aggno];
*op->resnull = econtext->ecxt_aggnulls[aggno];
EEO_NEXT();
}
......
This diff is collapsed.
......@@ -1849,20 +1849,11 @@ llvm_compile_expr(ExprState *state)
case EEOP_AGGREF:
{
AggrefExprState *aggref = op->d.aggref.astate;
LLVMValueRef v_aggnop;
LLVMValueRef v_aggno;
LLVMValueRef value,
isnull;
/*
* At this point aggref->aggno is not yet set (it's set up
* in ExecInitAgg() after initializing the expression). So
* load it from memory each time round.
*/
v_aggnop = l_ptr_const(&aggref->aggno,
l_ptr(LLVMInt32Type()));
v_aggno = LLVMBuildLoad(b, v_aggnop, "v_aggno");
v_aggno = l_int32_const(op->d.aggref.aggno);
/* load agg value / null */
value = l_load_gep1(b, v_aggvalues, v_aggno, "aggvalue");
......
......@@ -1492,6 +1492,8 @@ _copyAggref(const Aggref *from)
COPY_SCALAR_FIELD(aggkind);
COPY_SCALAR_FIELD(agglevelsup);
COPY_SCALAR_FIELD(aggsplit);
COPY_SCALAR_FIELD(aggno);
COPY_SCALAR_FIELD(aggtransno);
COPY_LOCATION_FIELD(location);
return newnode;
......
......@@ -232,6 +232,8 @@ _equalAggref(const Aggref *a, const Aggref *b)
COMPARE_SCALAR_FIELD(aggkind);
COMPARE_SCALAR_FIELD(agglevelsup);
COMPARE_SCALAR_FIELD(aggsplit);
COMPARE_SCALAR_FIELD(aggno);
COMPARE_SCALAR_FIELD(aggtransno);
COMPARE_LOCATION_FIELD(location);
return true;
......
......@@ -1153,6 +1153,8 @@ _outAggref(StringInfo str, const Aggref *node)
WRITE_CHAR_FIELD(aggkind);
WRITE_UINT_FIELD(agglevelsup);
WRITE_ENUM_FIELD(aggsplit, AggSplit);
WRITE_INT_FIELD(aggno);
WRITE_INT_FIELD(aggtransno);
WRITE_LOCATION_FIELD(location);
}
......
......@@ -615,6 +615,8 @@ _readAggref(void)
READ_CHAR_FIELD(aggkind);
READ_UINT_FIELD(agglevelsup);
READ_ENUM_FIELD(aggsplit, AggSplit);
READ_INT_FIELD(aggno);
READ_INT_FIELD(aggtransno);
READ_LOCATION_FIELD(location);
READ_DONE();
......
......@@ -2439,7 +2439,8 @@ cost_agg(Path *path, PlannerInfo *root,
* than or equal to one, all groups are expected to fit in memory;
* otherwise we expect to spill.
*/
hashentrysize = hash_agg_entry_size(aggcosts->numAggs, input_width,
hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos),
input_width,
aggcosts->transitionSpace);
hash_agg_set_limits(hashentrysize, numGroups, 0, &mem_limit,
&ngroups_limit, &num_partitions);
......
......@@ -47,7 +47,7 @@
#include "utils/lsyscache.h"
#include "utils/syscache.h"
static bool find_minmax_aggs_walker(Node *node, List **context);
static bool can_minmax_aggs(PlannerInfo *root, List **context);
static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
Oid eqop, Oid sortop, bool nulls_first);
static void minmax_qp_callback(PlannerInfo *root, void *extra);
......@@ -66,7 +66,8 @@ static Oid fetch_agg_sort_op(Oid aggfnoid);
* query_planner(), because we generate indexscan paths by cloning the
* planner's state and invoking query_planner() on a modified version of
* the query parsetree. Thus, all preprocessing needed before query_planner()
* must already be done.
* must already be done. This relies on the list of aggregates in
* root->agginfos, so preprocess_aggrefs() must have been called already, too.
*/
void
preprocess_minmax_aggregates(PlannerInfo *root)
......@@ -140,9 +141,7 @@ preprocess_minmax_aggregates(PlannerInfo *root)
* all are MIN/MAX aggregates. Stop as soon as we find one that isn't.
*/
aggs_list = NIL;
if (find_minmax_aggs_walker((Node *) root->processed_tlist, &aggs_list))
return;
if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
if (!can_minmax_aggs(root, &aggs_list))
return;
/*
......@@ -227,38 +226,33 @@ preprocess_minmax_aggregates(PlannerInfo *root)
}
/*
* find_minmax_aggs_walker
* Recursively scan the Aggref nodes in an expression tree, and check
* that each one is a MIN/MAX aggregate. If so, build a list of the
* can_minmax_aggs
* Walk through all the aggregates in the query, and check
* if they are all MIN/MAX aggregates. If so, build a list of the
* distinct aggregate calls in the tree.
*
* Returns true if a non-MIN/MAX aggregate is found, false otherwise.
* (This seemingly-backward definition is used because expression_tree_walker
* aborts the scan on true return, which is what we want.)
*
* Found aggregates are added to the list at *context; it's up to the caller
* to initialize the list to NIL.
* Returns false if a non-MIN/MAX aggregate is found, true otherwise.
*
* This does not descend into subqueries, and so should be used only after
* reduction of sublinks to subplans. There mustn't be outer-aggregate
* references either.
*/
static bool
find_minmax_aggs_walker(Node *node, List **context)
can_minmax_aggs(PlannerInfo *root, List **context)
{
if (node == NULL)
return false;
if (IsA(node, Aggref))
ListCell *lc;
foreach(lc, root->agginfos)
{
Aggref *aggref = (Aggref *) node;
AggInfo *agginfo = (AggInfo *) lfirst(lc);
Aggref *aggref = agginfo->representative_aggref;
Oid aggsortop;
TargetEntry *curTarget;
MinMaxAggInfo *mminfo;
ListCell *l;
Assert(aggref->agglevelsup == 0);
if (list_length(aggref->args) != 1)
return true; /* it couldn't be MIN/MAX */
return false; /* it couldn't be MIN/MAX */
/*
* ORDER BY is usually irrelevant for MIN/MAX, but it can change the
......@@ -274,7 +268,7 @@ find_minmax_aggs_walker(Node *node, List **context)
* quickly.
*/
if (aggref->aggorder != NIL)
return true;
return false;
/* note: we do not care if DISTINCT is mentioned ... */
/*
......@@ -283,30 +277,19 @@ find_minmax_aggs_walker(Node *node, List **context)
* now, just punt.
*/
if (aggref->aggfilter != NULL)
return true;
return false;
aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
if (!OidIsValid(aggsortop))
return true; /* not a MIN/MAX aggregate */
return false; /* not a MIN/MAX aggregate */
curTarget = (TargetEntry *) linitial(aggref->args);
if (contain_mutable_functions((Node *) curTarget->expr))
return true; /* not potentially indexable */
return false; /* not potentially indexable */
if (type_is_rowtype(exprType((Node *) curTarget->expr)))
return true; /* IS NOT NULL would have weird semantics */
/*
* Check whether it's already in the list, and add it if not.
*/
foreach(l, *context)
{
mminfo = (MinMaxAggInfo *) lfirst(l);
if (mminfo->aggfnoid == aggref->aggfnoid &&
equal(mminfo->target, curTarget->expr))
return false;
}
return false; /* IS NOT NULL would have weird semantics */
mminfo = makeNode(MinMaxAggInfo);
mminfo->aggfnoid = aggref->aggfnoid;
......@@ -318,16 +301,8 @@ find_minmax_aggs_walker(Node *node, List **context)
mminfo->param = NULL;
*context = lappend(*context, mminfo);
/*
* We need not recurse into the argument, since it can't contain any
* aggregates.
*/
return false;
}
Assert(!IsA(node, SubLink));
return expression_tree_walker(node, find_minmax_aggs_walker,
(void *) context);
return true;
}
/*
......@@ -368,6 +343,8 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
subroot->plan_params = NIL;
subroot->outer_params = NULL;
subroot->init_plans = NIL;
subroot->agginfos = NIL;
subroot->aggtransinfos = NIL;
subroot->parse = parse = copyObject(root->parse);
IncrementVarSublevelsUp((Node *) parse, 1, 1);
......
......@@ -152,7 +152,6 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
bool target_parallel_safe,
const AggClauseCosts *agg_costs,
grouping_sets_data *gd);
static bool is_degenerate_grouping(PlannerInfo *root);
static void create_degenerate_grouping_paths(PlannerInfo *root,
......@@ -228,8 +227,7 @@ static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root,
GroupPathExtraData *extra,
bool force_rel_creation);
static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
static bool can_partial_agg(PlannerInfo *root,
const AggClauseCosts *agg_costs);
static bool can_partial_agg(PlannerInfo *root);
static void apply_scanjoin_target_to_paths(PlannerInfo *root,
RelOptInfo *rel,
List *scanjoin_targets,
......@@ -1944,7 +1942,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
bool scanjoin_target_parallel_safe;
bool scanjoin_target_same_exprs;
bool have_grouping;
AggClauseCosts agg_costs;
WindowFuncLists *wflists = NULL;
List *activeWindows = NIL;
grouping_sets_data *gset_data = NULL;
......@@ -1975,25 +1972,16 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
root->processed_tlist = preprocess_targetlist(root);
/*
* Collect statistics about aggregates for estimating costs, and mark
* all the aggregates with resolved aggtranstypes. We must do this
* before slicing and dicing the tlist into various pathtargets, else
* some copies of the Aggref nodes might escape being marked with the
* correct transtypes.
*
* Note: currently, we do not detect duplicate aggregates here. This
* may result in somewhat-overestimated cost, which is fine for our
* purposes since all Paths will get charged the same. But at some
* point we might wish to do that detection in the planner, rather
* than during executor startup.
* Mark all the aggregates with resolved aggtranstypes, and detect
* aggregates that are duplicates or can share transition state. We
* must do this before slicing and dicing the tlist into various
* pathtargets, else some copies of the Aggref nodes might escape
* being marked.
*/
MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
if (parse->hasAggs)
{
get_agg_clause_costs(root, (Node *) root->processed_tlist,
AGGSPLIT_SIMPLE, &agg_costs);
get_agg_clause_costs(root, parse->havingQual, AGGSPLIT_SIMPLE,
&agg_costs);
preprocess_aggrefs(root, (Node *) root->processed_tlist);
preprocess_aggrefs(root, (Node *) parse->havingQual);
}
/*
......@@ -2198,7 +2186,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel,
grouping_target,
grouping_target_parallel_safe,
&agg_costs,
gset_data);
/* Fix things up if grouping_target contains SRFs */
if (parse->hasTargetSRFs)
......@@ -3790,7 +3777,6 @@ get_number_of_groups(PlannerInfo *root,
*
* input_rel: contains the source-data Paths
* target: the pathtarget for the result Paths to compute
* agg_costs: cost info about all aggregates in query (in AGGSPLIT_SIMPLE mode)
* gd: grouping sets data including list of grouping sets and their clauses
*
* Note: all Paths in input_rel are expected to return the target computed
......@@ -3801,12 +3787,15 @@ create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
bool target_parallel_safe,
const AggClauseCosts *agg_costs,
grouping_sets_data *gd)
{
Query *parse = root->parse;
RelOptInfo *grouped_rel;
RelOptInfo *partially_grouped_rel;
AggClauseCosts agg_costs;
MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs);
/*
* Create grouping relation to hold fully aggregated grouping and/or
......@@ -3862,14 +3851,14 @@ create_grouping_paths(PlannerInfo *root,
* the other gating conditions, so we want to do it last.
*/
if ((parse->groupClause != NIL &&
agg_costs->numOrderedAggs == 0 &&
root->numOrderedAggs == 0 &&
(gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))))
flags |= GROUPING_CAN_USE_HASH;
/*
* Determine whether partial aggregation is possible.
*/
if (can_partial_agg(root, agg_costs))
if (can_partial_agg(root))
flags |= GROUPING_CAN_PARTIAL_AGG;
extra.flags = flags;
......@@ -3890,7 +3879,7 @@ create_grouping_paths(PlannerInfo *root,
extra.patype = PARTITIONWISE_AGGREGATE_NONE;
create_ordinary_grouping_paths(root, input_rel, grouped_rel,
agg_costs, gd, &extra,
&agg_costs, gd, &extra,
&partially_grouped_rel);
}
......@@ -4248,7 +4237,8 @@ consider_groupingsets_paths(PlannerInfo *root,
l_start = lnext(gd->rollups, l_start);
}
hashsize = estimate_hashagg_tablesize(path,
hashsize = estimate_hashagg_tablesize(root,
path,
agg_costs,
dNumGroups - exclude_groups);
......@@ -4382,7 +4372,8 @@ consider_groupingsets_paths(PlannerInfo *root,
/*
* Account first for space needed for groups we can't sort at all.
*/
availspace -= estimate_hashagg_tablesize(path,
availspace -= estimate_hashagg_tablesize(root,
path,
agg_costs,
gd->dNumHashGroups);
......@@ -4433,7 +4424,8 @@ consider_groupingsets_paths(PlannerInfo *root,
if (rollup->hashable)
{
double sz = estimate_hashagg_tablesize(path,
double sz = estimate_hashagg_tablesize(root,
path,
agg_costs,
rollup->numGroups);
......@@ -6926,20 +6918,12 @@ create_partial_grouping_paths(PlannerInfo *root,
MemSet(agg_final_costs, 0, sizeof(AggClauseCosts));
if (parse->hasAggs)
{
List *partial_target_exprs;
/* partial phase */
partial_target_exprs = partially_grouped_rel->reltarget->exprs;
get_agg_clause_costs(root, (Node *) partial_target_exprs,
AGGSPLIT_INITIAL_SERIAL,
get_agg_clause_costs(root, AGGSPLIT_INITIAL_SERIAL,
agg_partial_costs);
/* final phase */
get_agg_clause_costs(root, (Node *) grouped_rel->reltarget->exprs,
AGGSPLIT_FINAL_DESERIAL,
agg_final_costs);
get_agg_clause_costs(root, extra->havingQual,
AGGSPLIT_FINAL_DESERIAL,
get_agg_clause_costs(root, AGGSPLIT_FINAL_DESERIAL,
agg_final_costs);
}
......@@ -7324,7 +7308,7 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
* Returns true when possible, false otherwise.
*/
static bool
can_partial_agg(PlannerInfo *root, const AggClauseCosts *agg_costs)
can_partial_agg(PlannerInfo *root)
{
Query *parse = root->parse;
......@@ -7341,7 +7325,7 @@ can_partial_agg(PlannerInfo *root, const AggClauseCosts *agg_costs)
/* We don't know how to do grouping sets in parallel. */
return false;
}
else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
else if (root->hasNonPartialAggs || root->hasNonSerialAggs)
{
/* Insufficient support for partial mode. */
return false;
......
......@@ -13,6 +13,7 @@ top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = \
prepagg.o \
prepjointree.o \
prepqual.o \
preptlist.o \
......
This diff is collapsed.
This diff is collapsed.
......@@ -769,6 +769,8 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
aggref->aggkind = aggkind;
/* agglevelsup will be set by transformAggregateCall */
aggref->aggsplit = AGGSPLIT_SIMPLE; /* planner might change this */
aggref->aggno = -1; /* planner will set aggno and aggtransno */
aggref->aggtransno = -1;
aggref->location = location;
/*
......
......@@ -3839,12 +3839,14 @@ estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
* won't store them. Is this a problem?
*/
double
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
double dNumGroups)
estimate_hashagg_tablesize(PlannerInfo *root, Path *path,
const AggClauseCosts *agg_costs, double dNumGroups)
{
Size hashentrysize = hash_agg_entry_size(agg_costs->numAggs,
path->pathtarget->width,
agg_costs->transitionSpace);
Size hashentrysize;
hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos),
path->pathtarget->width,
agg_costs->transitionSpace);
/*
* Note that this disregards the effect of fill-factor and growth policy
......
......@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202011231
#define CATALOG_VERSION_NO 202011241
#endif
......@@ -564,8 +564,7 @@ typedef struct ExprEvalStep
/* for EEOP_AGGREF */
struct
{
/* out-of-line state, modified by nodeAgg.c */
AggrefExprState *astate;
int aggno;
} aggref;
/* for EEOP_GROUPING_FUNC */
......
......@@ -746,17 +746,6 @@ typedef tuplehash_iterator TupleHashIterator;
* ----------------------------------------------------------------
*/
/* ----------------
* AggrefExprState node
* ----------------
*/
typedef struct AggrefExprState
{
NodeTag type;
Aggref *aggref; /* expression plan node */
int aggno; /* ID number for agg within its plan node */
} AggrefExprState;
/* ----------------
* WindowFuncExprState node
* ----------------
......
......@@ -206,10 +206,9 @@ typedef enum NodeTag
* Most Expr-based plan nodes do not have a corresponding expression state
* node, they're fully handled within execExpr* - but sometimes the state
* needs to be shared with other parts of the executor, as for example
* with AggrefExprState, which nodeAgg.c has to modify.
* with SubPlanState, which nodeSubplan.c has to modify.
*/
T_ExprState,
T_AggrefExprState,
T_WindowFuncExprState,
T_SetExprState,
T_SubPlanState,
......
......@@ -55,10 +55,6 @@ typedef struct QualCost
*/
typedef struct AggClauseCosts
{
int numAggs; /* total number of aggregate functions */
int numOrderedAggs; /* number w/ DISTINCT/ORDER BY/WITHIN GROUP */
bool hasNonPartial; /* does any agg not support partial mode? */
bool hasNonSerial; /* is any partial agg non-serializable? */
QualCost transCost; /* total per-input-row execution costs */
QualCost finalCost; /* total per-aggregated-row costs */
Size transitionSpace; /* space for pass-by-ref transition data */
......@@ -348,6 +344,15 @@ struct PlannerInfo
bool hasAlternativeSubPlans; /* true if we've made any of those */
bool hasRecursion; /* true if planning a recursive WITH item */
/*
* Information about aggregates. Filled by preprocess_aggrefs().
*/
List *agginfos; /* AggInfo structs */
List *aggtransinfos; /* AggTransInfo structs */
int numOrderedAggs; /* number w/ DISTINCT/ORDER BY/WITHIN GROUP */
bool hasNonPartialAggs; /* does any agg not support partial mode? */
bool hasNonSerialAggs; /* is any partial agg non-serializable? */
/* These fields are used only when hasRecursion is true: */
int wt_param_id; /* PARAM_EXEC ID for the work table */
struct Path *non_recursive_path; /* a path for non-recursive term */
......@@ -2549,4 +2554,71 @@ typedef struct JoinCostWorkspace
double inner_rows_total;
} JoinCostWorkspace;
/*
* AggInfo holds information about an aggregate that needs to be computed.
* Multiple Aggrefs in a query can refer to the same AggInfo by having the
* same 'aggno' value, so that the aggregate is computed only once.
*/
typedef struct AggInfo
{
/*
* Link to an Aggref expr this state value is for.
*
* There can be multiple identical Aggref's sharing the same per-agg. This
* points to the first one of them.
*/
Aggref *representative_aggref;
int transno;
/*
* "shareable" is false if this agg cannot share state values with other
* aggregates because the final function is read-write.
*/
bool shareable;
/* Oid of the final function or InvalidOid */
Oid finalfn_oid;
} AggInfo;
/*
* AggTransInfo holds information about transition state that is used by one
* or more aggregates in the query. Multiple aggregates can share the same
* transition state, if they have the same inputs and the same transition
* function. Aggrefs that share the same transition info have the same
* 'aggtransno' value.
*/
typedef struct AggTransInfo
{
List *args;
Expr *aggfilter;
/* Oid of the state transition function */
Oid transfn_oid;
/* Oid of the serialization function or InvalidOid */
Oid serialfn_oid;
/* Oid of the deserialization function or InvalidOid */
Oid deserialfn_oid;
/* Oid of the combine function or InvalidOid */
Oid combinefn_oid;
/* Oid of state value's datatype */
Oid aggtranstype;
int32 aggtranstypmod;
int transtypeLen;
bool transtypeByVal;
int32 aggtransspace;
/*
* initial value from pg_aggregate entry
*/
Datum initValue;
bool initValueIsNull;
} AggTransInfo;
#endif /* PATHNODES_H */
......@@ -305,6 +305,12 @@ typedef struct Param
* a crosscheck that the Aggrefs match the plan; but note that when aggsplit
* indicates a non-final mode, aggtype reflects the transition data type
* not the SQL-level output type of the aggregate.
*
* aggno and aggtransno are -1 in the parse stage, and are set in planning.
* Aggregates with the same 'aggno' represent the same aggregate expression,
* and can share the result. Aggregates with same 'transno' but different
* 'aggno' can share the same transition state, only the final function needs
* to be called separately.
*/
typedef struct Aggref
{
......@@ -326,6 +332,8 @@ typedef struct Aggref
char aggkind; /* aggregate kind (see pg_aggregate.h) */
Index agglevelsup; /* > 0 if agg belongs to outer query */
AggSplit aggsplit; /* expected agg-splitting mode of parent Agg */
int aggno; /* unique ID within the Agg node */
int aggtransno; /* unique ID of transition state in the Agg */
int location; /* token location, or -1 if unknown */
} Aggref;
......
......@@ -24,8 +24,6 @@ typedef struct
} WindowFuncLists;
extern bool contain_agg_clause(Node *clause);
extern void get_agg_clause_costs(PlannerInfo *root, Node *clause,
AggSplit aggsplit, AggClauseCosts *costs);
extern bool contain_window_function(Node *clause);
extern WindowFuncLists *find_window_functions(Node *clause, Index maxWinRef);
......
......@@ -38,9 +38,17 @@ extern List *preprocess_targetlist(PlannerInfo *root);
extern PlanRowMark *get_plan_rowmark(List *rowmarks, Index rtindex);
/*
* prototypes for prepagg.c
*/
extern void get_agg_clause_costs(PlannerInfo *root, AggSplit aggsplit,
AggClauseCosts *agg_costs);
extern void preprocess_aggrefs(PlannerInfo *root, Node *clause);
/*
* prototypes for prepunion.c
*/
extern RelOptInfo *plan_set_operations(PlannerInfo *root);
#endif /* PREP_H */
......@@ -200,7 +200,7 @@ extern void estimate_hash_bucket_stats(PlannerInfo *root,
Node *hashkey, double nbuckets,
Selectivity *mcv_freq,
Selectivity *bucketsize_frac);
extern double estimate_hashagg_tablesize(Path *path,
extern double estimate_hashagg_tablesize(PlannerInfo *root, Path *path,
const AggClauseCosts *agg_costs,
double dNumGroups);
......
......@@ -1412,11 +1412,12 @@ SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) <
(4 rows)
-- Test when parent can produce parallel paths but not any (or some) of its children
-- (Use one more aggregate to tilt the cost estimates for the plan we want)
ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0);
ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
QUERY PLAN
-------------------------------------------------------------------------------------------
Sort
......@@ -1436,21 +1437,21 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
-> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_2
(15 rows)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
x | sum | avg | count
----+------+--------------------+-------
0 | 5000 | 5.0000000000000000 | 1000
1 | 6000 | 6.0000000000000000 | 1000
10 | 5000 | 5.0000000000000000 | 1000
11 | 6000 | 6.0000000000000000 | 1000
20 | 5000 | 5.0000000000000000 | 1000
21 | 6000 | 6.0000000000000000 | 1000
SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
x | sum | avg | sum | count
----+------+--------------------+-------+-------
0 | 5000 | 5.0000000000000000 | 5000 | 1000
1 | 6000 | 6.0000000000000000 | 7000 | 1000
10 | 5000 | 5.0000000000000000 | 15000 | 1000
11 | 6000 | 6.0000000000000000 | 17000 | 1000
20 | 5000 | 5.0000000000000000 | 25000 | 1000
21 | 6000 | 6.0000000000000000 | 27000 | 1000
(6 rows)
ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
QUERY PLAN
----------------------------------------------------------------------------------
Sort
......@@ -1470,15 +1471,15 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
-> Seq Scan on pagg_tab_para_p3 pagg_tab_para_3
(15 rows)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
x | sum | avg | count
----+------+--------------------+-------
0 | 5000 | 5.0000000000000000 | 1000
1 | 6000 | 6.0000000000000000 | 1000
10 | 5000 | 5.0000000000000000 | 1000
11 | 6000 | 6.0000000000000000 | 1000
20 | 5000 | 5.0000000000000000 | 1000
21 | 6000 | 6.0000000000000000 | 1000
SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
x | sum | avg | sum | count
----+------+--------------------+-------+-------
0 | 5000 | 5.0000000000000000 | 5000 | 1000
1 | 6000 | 6.0000000000000000 | 7000 | 1000
10 | 5000 | 5.0000000000000000 | 15000 | 1000
11 | 6000 | 6.0000000000000000 | 17000 | 1000
20 | 5000 | 5.0000000000000000 | 25000 | 1000
21 | 6000 | 6.0000000000000000 | 27000 | 1000
(6 rows)
-- Reset parallelism parameters to get partitionwise aggregation plan.
......
......@@ -308,20 +308,21 @@ SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) <
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-- Test when parent can produce parallel paths but not any (or some) of its children
-- (Use one more aggregate to tilt the cost estimates for the plan we want)
ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0);
ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
-- Reset parallelism parameters to get partitionwise aggregation plan.
RESET min_parallel_table_scan_size;
......
......@@ -50,7 +50,6 @@ AggStatePerPhase
AggStatePerTrans
AggStrategy
Aggref
AggrefExprState
AlenState
Alias
AllocBlock
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment