Commit b5635948 authored by Andrew Gierth's avatar Andrew Gierth

Support hashed aggregation with grouping sets.

This extends the Aggregate node with two new features: HashAggregate
can now run multiple hashtables concurrently, and a new strategy
MixedAggregate populates hashtables while doing sorted grouping.

The planner will now attempt to save as many sorts as possible when
planning grouping sets queries, while not exceeding work_mem for the
estimated combined sizes of all hashtables used.  No SQL-level changes
are required.  There should be no user-visible impact other than the
new EXPLAIN output and possible changes to result ordering when ORDER
BY was not used (which affected a few regression tests).  The
enable_hashagg option is respected.

Author: Andrew Gierth
Reviewers: Mark Dilger, Andres Freund
Discussion: https://postgr.es/m/87vatszyhj.fsf@news-spur.riddles.org.uk
parent f0a6046b
...@@ -3277,15 +3277,18 @@ select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2. ...@@ -3277,15 +3277,18 @@ select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.
explain (verbose, costs off) explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last; select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
QUERY PLAN QUERY PLAN
--------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------
GroupAggregate Sort
Output: c2, (sum(c1))
Sort Key: ft1.c2
-> MixedAggregate
Output: c2, sum(c1) Output: c2, sum(c1)
Group Key: ft1.c2 Hash Key: ft1.c2
Group Key: () Group Key: ()
-> Foreign Scan on public.ft1 -> Foreign Scan on public.ft1
Output: c2, c1 Output: c2, c1
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
(7 rows) (10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last; select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
c2 | sum c2 | sum
...@@ -3299,15 +3302,18 @@ select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls la ...@@ -3299,15 +3302,18 @@ select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls la
explain (verbose, costs off) explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last; select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
QUERY PLAN QUERY PLAN
--------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------
GroupAggregate Sort
Output: c2, (sum(c1))
Sort Key: ft1.c2
-> MixedAggregate
Output: c2, sum(c1) Output: c2, sum(c1)
Group Key: ft1.c2 Hash Key: ft1.c2
Group Key: () Group Key: ()
-> Foreign Scan on public.ft1 -> Foreign Scan on public.ft1
Output: c2, c1 Output: c2, c1
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
(7 rows) (10 rows)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last; select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
c2 | sum c2 | sum
...@@ -3321,19 +3327,18 @@ select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last ...@@ -3321,19 +3327,18 @@ select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last
explain (verbose, costs off) explain (verbose, costs off)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last; select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
QUERY PLAN QUERY PLAN
------------------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------
Sort Sort
Output: c2, c6, (sum(c1)) Output: c2, c6, (sum(c1))
Sort Key: ft1.c2, ft1.c6 Sort Key: ft1.c2, ft1.c6
-> GroupAggregate -> HashAggregate
Output: c2, c6, sum(c1) Output: c2, c6, sum(c1)
Group Key: ft1.c2 Hash Key: ft1.c2
Sort Key: ft1.c6 Hash Key: ft1.c6
Group Key: ft1.c6
-> Foreign Scan on public.ft1 -> Foreign Scan on public.ft1
Output: c2, c6, c1 Output: c2, c6, c1
Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3))
(11 rows) (10 rows)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last; select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
c2 | c6 | sum c2 | c6 | sum
......
...@@ -1015,6 +1015,10 @@ ExplainNode(PlanState *planstate, List *ancestors, ...@@ -1015,6 +1015,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
pname = "HashAggregate"; pname = "HashAggregate";
strategy = "Hashed"; strategy = "Hashed";
break; break;
case AGG_MIXED:
pname = "MixedAggregate";
strategy = "Mixed";
break;
default: default:
pname = "Aggregate ???"; pname = "Aggregate ???";
strategy = "???"; strategy = "???";
...@@ -1978,6 +1982,19 @@ show_grouping_set_keys(PlanState *planstate, ...@@ -1978,6 +1982,19 @@ show_grouping_set_keys(PlanState *planstate,
ListCell *lc; ListCell *lc;
List *gsets = aggnode->groupingSets; List *gsets = aggnode->groupingSets;
AttrNumber *keycols = aggnode->grpColIdx; AttrNumber *keycols = aggnode->grpColIdx;
const char *keyname;
const char *keysetname;
if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED)
{
keyname = "Hash Key";
keysetname = "Hash Keys";
}
else
{
keyname = "Group Key";
keysetname = "Group Keys";
}
ExplainOpenGroup("Grouping Set", NULL, true, es); ExplainOpenGroup("Grouping Set", NULL, true, es);
...@@ -1992,7 +2009,7 @@ show_grouping_set_keys(PlanState *planstate, ...@@ -1992,7 +2009,7 @@ show_grouping_set_keys(PlanState *planstate,
es->indent++; es->indent++;
} }
ExplainOpenGroup("Group Keys", "Group Keys", false, es); ExplainOpenGroup(keysetname, keysetname, false, es);
foreach(lc, gsets) foreach(lc, gsets)
{ {
...@@ -2016,12 +2033,12 @@ show_grouping_set_keys(PlanState *planstate, ...@@ -2016,12 +2033,12 @@ show_grouping_set_keys(PlanState *planstate,
} }
if (!result && es->format == EXPLAIN_FORMAT_TEXT) if (!result && es->format == EXPLAIN_FORMAT_TEXT)
ExplainPropertyText("Group Key", "()", es); ExplainPropertyText(keyname, "()", es);
else else
ExplainPropertyListNested("Group Key", result, es); ExplainPropertyListNested(keyname, result, es);
} }
ExplainCloseGroup("Group Keys", "Group Keys", false, es); ExplainCloseGroup(keysetname, keysetname, false, es);
if (sortnode && es->format == EXPLAIN_FORMAT_TEXT) if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
es->indent--; es->indent--;
......
...@@ -122,12 +122,19 @@ ...@@ -122,12 +122,19 @@
* specific). * specific).
* *
* Where more complex grouping sets are used, we break them down into * Where more complex grouping sets are used, we break them down into
* "phases", where each phase has a different sort order. During each * "phases", where each phase has a different sort order (except phase 0
* phase but the last, the input tuples are additionally stored in a * which is reserved for hashing). During each phase but the last, the
* tuplesort which is keyed to the next phase's sort order; during each * input tuples are additionally stored in a tuplesort which is keyed to the
* phase but the first, the input tuples are drawn from the previously * next phase's sort order; during each phase but the first, the input
* sorted data. (The sorting of the data for the first phase is handled by * tuples are drawn from the previously sorted data. (The sorting of the
* the planner, as it might be satisfied by underlying nodes.) * data for the first phase is handled by the planner, as it might be
* satisfied by underlying nodes.)
*
* Hashing can be mixed with sorted grouping. To do this, we have an
* AGG_MIXED strategy that populates the hashtables during the first sorted
* phase, and switches to reading them out after completing all sort phases.
* We can also support AGG_HASHED with multiple hash tables and no sorting
* at all.
* *
* From the perspective of aggregate transition and final functions, the * From the perspective of aggregate transition and final functions, the
* only issue regarding grouping sets is this: a single call site (flinfo) * only issue regarding grouping sets is this: a single call site (flinfo)
...@@ -139,7 +146,54 @@ ...@@ -139,7 +146,54 @@
* sensitive to the grouping set for which the aggregate function is * sensitive to the grouping set for which the aggregate function is
* currently being called. * currently being called.
* *
* TODO: AGG_HASHED doesn't support multiple grouping sets yet. * Plan structure:
*
* What we get from the planner is actually one "real" Agg node which is
* part of the plan tree proper, but which optionally has an additional list
* of Agg nodes hung off the side via the "chain" field. This is because an
* Agg node happens to be a convenient representation of all the data we
* need for grouping sets.
*
* For many purposes, we treat the "real" node as if it were just the first
* node in the chain. The chain must be ordered such that hashed entries
* come before sorted/plain entries; the real node is marked AGG_MIXED if
* there are both types present (in which case the real node describes one
* of the hashed groupings, other AGG_HASHED nodes may optionally follow in
* the chain, followed in turn by AGG_SORTED or (one) AGG_PLAIN node). If
* the real node is marked AGG_HASHED or AGG_SORTED, then all the chained
* nodes must be of the same type; if it is AGG_PLAIN, there can be no
* chained nodes.
*
* We collect all hashed nodes into a single "phase", numbered 0, and create
* a sorted phase (numbered 1..n) for each AGG_SORTED or AGG_PLAIN node.
* Phase 0 is allocated even if there are no hashes, but remains unused in
* that case.
*
* AGG_HASHED nodes actually refer to only a single grouping set each,
* because for each hashed grouping we need a separate grpColIdx and
* numGroups estimate. AGG_SORTED nodes represent a "rollup", a list of
* grouping sets that share a sort order. Each AGG_SORTED node other than
* the first one has an associated Sort node which describes the sort order
* to be used; the first sorted node takes its input from the outer subtree,
* which the planner has already arranged to provide ordered data.
*
* Memory and ExprContext usage:
*
* Because we're accumulating aggregate values across input rows, we need to
* use more memory contexts than just simple input/output tuple contexts.
* In fact, for a rollup, we need a separate context for each grouping set
* so that we can reset the inner (finer-grained) aggregates on their group
* boundaries while continuing to accumulate values for outer
* (coarser-grained) groupings. On top of this, we might be simultaneously
* populating hashtables; however, we only need one context for all the
* hashtables.
*
* So we create an array, aggcontexts, with an ExprContext for each grouping
* set in the largest rollup that we're going to process, and use the
* per-tuple memory context of those ExprContexts to store the aggregate
* transition values. hashcontext is the single context created to support
* all hash tables.
*
* *
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
...@@ -432,6 +486,7 @@ typedef struct AggStatePerGroupData ...@@ -432,6 +486,7 @@ typedef struct AggStatePerGroupData
*/ */
typedef struct AggStatePerPhaseData typedef struct AggStatePerPhaseData
{ {
AggStrategy aggstrategy; /* strategy for this phase */
int numsets; /* number of grouping sets (or 0) */ int numsets; /* number of grouping sets (or 0) */
int *gset_lengths; /* lengths of grouping sets */ int *gset_lengths; /* lengths of grouping sets */
Bitmapset **grouped_cols; /* column groupings for rollup */ Bitmapset **grouped_cols; /* column groupings for rollup */
...@@ -440,7 +495,30 @@ typedef struct AggStatePerPhaseData ...@@ -440,7 +495,30 @@ typedef struct AggStatePerPhaseData
Sort *sortnode; /* Sort node for input ordering for phase */ Sort *sortnode; /* Sort node for input ordering for phase */
} AggStatePerPhaseData; } AggStatePerPhaseData;
/*
* AggStatePerHashData - per-hashtable state
*
* When doing grouping sets with hashing, we have one of these for each
* grouping set. (When doing hashing without grouping sets, we have just one of
* them.)
*/
typedef struct AggStatePerHashData
{
TupleHashTable hashtable; /* hash table with one entry per group */
TupleHashIterator hashiter; /* for iterating through hash table */
TupleTableSlot *hashslot; /* slot for loading hash table */
FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
FmgrInfo *eqfunctions; /* per-grouping-field equality fns */
int numCols; /* number of hash key columns */
int numhashGrpCols; /* number of columns in hash table */
int largestGrpColIdx; /* largest col required for hashing */
AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
AttrNumber *hashGrpColIdxHash; /* indices in hashtbl tuples */
Agg *aggnode; /* original Agg node, for numGroups etc. */
} AggStatePerHashData;
static void select_current_set(AggState *aggstate, int setno, bool is_hash);
static void initialize_phase(AggState *aggstate, int newphase); static void initialize_phase(AggState *aggstate, int newphase);
static TupleTableSlot *fetch_input_tuple(AggState *aggstate); static TupleTableSlot *fetch_input_tuple(AggState *aggstate);
static void initialize_aggregates(AggState *aggstate, static void initialize_aggregates(AggState *aggstate,
...@@ -449,7 +527,8 @@ static void initialize_aggregates(AggState *aggstate, ...@@ -449,7 +527,8 @@ static void initialize_aggregates(AggState *aggstate,
static void advance_transition_function(AggState *aggstate, static void advance_transition_function(AggState *aggstate,
AggStatePerTrans pertrans, AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate); AggStatePerGroup pergroupstate);
static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup); static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup,
AggStatePerGroup *pergroups);
static void advance_combine_function(AggState *aggstate, static void advance_combine_function(AggState *aggstate,
AggStatePerTrans pertrans, AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate); AggStatePerGroup pergroupstate);
...@@ -473,14 +552,13 @@ static void prepare_projection_slot(AggState *aggstate, ...@@ -473,14 +552,13 @@ static void prepare_projection_slot(AggState *aggstate,
int currentSet); int currentSet);
static void finalize_aggregates(AggState *aggstate, static void finalize_aggregates(AggState *aggstate,
AggStatePerAgg peragg, AggStatePerAgg peragg,
AggStatePerGroup pergroup, AggStatePerGroup pergroup);
int currentSet);
static TupleTableSlot *project_aggregates(AggState *aggstate); static TupleTableSlot *project_aggregates(AggState *aggstate);
static Bitmapset *find_unaggregated_cols(AggState *aggstate); static Bitmapset *find_unaggregated_cols(AggState *aggstate);
static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos); static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos);
static void build_hash_table(AggState *aggstate); static void build_hash_table(AggState *aggstate);
static TupleHashEntryData *lookup_hash_entry(AggState *aggstate, static TupleHashEntryData *lookup_hash_entry(AggState *aggstate);
TupleTableSlot *inputslot); static AggStatePerGroup *lookup_hash_entries(AggState *aggstate);
static TupleTableSlot *agg_retrieve_direct(AggState *aggstate); static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
static void agg_fill_hash_table(AggState *aggstate); static void agg_fill_hash_table(AggState *aggstate);
static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate); static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
...@@ -501,13 +579,31 @@ static int find_compatible_pertrans(AggState *aggstate, Aggref *newagg, ...@@ -501,13 +579,31 @@ static int find_compatible_pertrans(AggState *aggstate, Aggref *newagg,
/* /*
* Switch to phase "newphase", which must either be 0 (to reset) or * Select the current grouping set; affects current_set and
* curaggcontext.
*/
static void
select_current_set(AggState *aggstate, int setno, bool is_hash)
{
if (is_hash)
aggstate->curaggcontext = aggstate->hashcontext;
else
aggstate->curaggcontext = aggstate->aggcontexts[setno];
aggstate->current_set = setno;
}
/*
* Switch to phase "newphase", which must either be 0 or 1 (to reset) or
* current_phase + 1. Juggle the tuplesorts accordingly. * current_phase + 1. Juggle the tuplesorts accordingly.
*
* Phase 0 is for hashing, which we currently handle last in the AGG_MIXED
* case, so when entering phase 0, all we need to do is drop open sorts.
*/ */
static void static void
initialize_phase(AggState *aggstate, int newphase) initialize_phase(AggState *aggstate, int newphase)
{ {
Assert(newphase == 0 || newphase == aggstate->current_phase + 1); Assert(newphase <= 1 || newphase == aggstate->current_phase + 1);
/* /*
* Whatever the previous state, we're now done with whatever input * Whatever the previous state, we're now done with whatever input
...@@ -519,7 +615,7 @@ initialize_phase(AggState *aggstate, int newphase) ...@@ -519,7 +615,7 @@ initialize_phase(AggState *aggstate, int newphase)
aggstate->sort_in = NULL; aggstate->sort_in = NULL;
} }
if (newphase == 0) if (newphase <= 1)
{ {
/* /*
* Discard any existing output tuplesort. * Discard any existing output tuplesort.
...@@ -546,7 +642,7 @@ initialize_phase(AggState *aggstate, int newphase) ...@@ -546,7 +642,7 @@ initialize_phase(AggState *aggstate, int newphase)
* If this isn't the last phase, we need to sort appropriately for the * If this isn't the last phase, we need to sort appropriately for the
* next phase in sequence. * next phase in sequence.
*/ */
if (newphase < aggstate->numphases - 1) if (newphase > 0 && newphase < aggstate->numphases - 1)
{ {
Sort *sortnode = aggstate->phases[newphase + 1].sortnode; Sort *sortnode = aggstate->phases[newphase + 1].sortnode;
PlanState *outerNode = outerPlanState(aggstate); PlanState *outerNode = outerPlanState(aggstate);
...@@ -567,7 +663,7 @@ initialize_phase(AggState *aggstate, int newphase) ...@@ -567,7 +663,7 @@ initialize_phase(AggState *aggstate, int newphase)
} }
/* /*
* Fetch a tuple from either the outer plan (for phase 0) or from the sorter * Fetch a tuple from either the outer plan (for phase 1) or from the sorter
* populated by the previous phase. Copy it to the sorter for the next phase * populated by the previous phase. Copy it to the sorter for the next phase
* if any. * if any.
*/ */
...@@ -595,8 +691,8 @@ fetch_input_tuple(AggState *aggstate) ...@@ -595,8 +691,8 @@ fetch_input_tuple(AggState *aggstate)
/* /*
* (Re)Initialize an individual aggregate. * (Re)Initialize an individual aggregate.
* *
* This function handles only one grouping set (already set in * This function handles only one grouping set, already set in
* aggstate->current_set). * aggstate->current_set.
* *
* When called, CurrentMemoryContext should be the per-query context. * When called, CurrentMemoryContext should be the per-query context.
*/ */
...@@ -653,7 +749,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans, ...@@ -653,7 +749,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
MemoryContext oldContext; MemoryContext oldContext;
oldContext = MemoryContextSwitchTo( oldContext = MemoryContextSwitchTo(
aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(pertrans->initValue, pergroupstate->transValue = datumCopy(pertrans->initValue,
pertrans->transtypeByVal, pertrans->transtypeByVal,
pertrans->transtypeLen); pertrans->transtypeLen);
...@@ -676,8 +772,9 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans, ...@@ -676,8 +772,9 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
* *
* If there are multiple grouping sets, we initialize only the first numReset * If there are multiple grouping sets, we initialize only the first numReset
* of them (the grouping sets are ordered so that the most specific one, which * of them (the grouping sets are ordered so that the most specific one, which
* is reset most often, is first). As a convenience, if numReset is < 1, we * is reset most often, is first). As a convenience, if numReset is 0, we
* reinitialize all sets. * reinitialize all sets. numReset is -1 to initialize a hashtable entry, in
* which case the caller must have used select_current_set appropriately.
* *
* When called, CurrentMemoryContext should be the per-query context. * When called, CurrentMemoryContext should be the per-query context.
*/ */
...@@ -689,26 +786,38 @@ initialize_aggregates(AggState *aggstate, ...@@ -689,26 +786,38 @@ initialize_aggregates(AggState *aggstate,
int transno; int transno;
int numGroupingSets = Max(aggstate->phase->numsets, 1); int numGroupingSets = Max(aggstate->phase->numsets, 1);
int setno = 0; int setno = 0;
int numTrans = aggstate->numtrans;
AggStatePerTrans transstates = aggstate->pertrans; AggStatePerTrans transstates = aggstate->pertrans;
if (numReset < 1) if (numReset == 0)
numReset = numGroupingSets; numReset = numGroupingSets;
for (transno = 0; transno < aggstate->numtrans; transno++) for (transno = 0; transno < numTrans; transno++)
{ {
AggStatePerTrans pertrans = &transstates[transno]; AggStatePerTrans pertrans = &transstates[transno];
if (numReset < 0)
{
AggStatePerGroup pergroupstate;
pergroupstate = &pergroup[transno];
initialize_aggregate(aggstate, pertrans, pergroupstate);
}
else
{
for (setno = 0; setno < numReset; setno++) for (setno = 0; setno < numReset; setno++)
{ {
AggStatePerGroup pergroupstate; AggStatePerGroup pergroupstate;
pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))]; pergroupstate = &pergroup[transno + (setno * numTrans)];
aggstate->current_set = setno; select_current_set(aggstate, setno, false);
initialize_aggregate(aggstate, pertrans, pergroupstate); initialize_aggregate(aggstate, pertrans, pergroupstate);
} }
} }
}
} }
/* /*
...@@ -757,7 +866,7 @@ advance_transition_function(AggState *aggstate, ...@@ -757,7 +866,7 @@ advance_transition_function(AggState *aggstate,
* do not need to pfree the old transValue, since it's NULL. * do not need to pfree the old transValue, since it's NULL.
*/ */
oldContext = MemoryContextSwitchTo( oldContext = MemoryContextSwitchTo(
aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1], pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal, pertrans->transtypeByVal,
pertrans->transtypeLen); pertrans->transtypeLen);
...@@ -807,7 +916,7 @@ advance_transition_function(AggState *aggstate, ...@@ -807,7 +916,7 @@ advance_transition_function(AggState *aggstate,
{ {
if (!fcinfo->isnull) if (!fcinfo->isnull)
{ {
MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal, if (DatumIsReadWriteExpandedObject(newVal,
false, false,
pertrans->transtypeLen) && pertrans->transtypeLen) &&
...@@ -838,17 +947,21 @@ advance_transition_function(AggState *aggstate, ...@@ -838,17 +947,21 @@ advance_transition_function(AggState *aggstate,
/* /*
* Advance each aggregate transition state for one input tuple. The input * Advance each aggregate transition state for one input tuple. The input
* tuple has been stored in tmpcontext->ecxt_outertuple, so that it is * tuple has been stored in tmpcontext->ecxt_outertuple, so that it is
* accessible to ExecEvalExpr. pergroup is the array of per-group structs to * accessible to ExecEvalExpr.
* use (this might be in a hashtable entry). *
* We have two sets of transition states to handle: one for sorted aggregation
* and one for hashed; we do them both here, to avoid multiple evaluation of
* the inputs.
* *
* When called, CurrentMemoryContext should be the per-query context. * When called, CurrentMemoryContext should be the per-query context.
*/ */
static void static void
advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGroup *pergroups)
{ {
int transno; int transno;
int setno = 0; int setno = 0;
int numGroupingSets = Max(aggstate->phase->numsets, 1); int numGroupingSets = Max(aggstate->phase->numsets, 1);
int numHashes = aggstate->num_hashes;
int numTrans = aggstate->numtrans; int numTrans = aggstate->numtrans;
TupleTableSlot *slot = aggstate->evalslot; TupleTableSlot *slot = aggstate->evalslot;
...@@ -880,6 +993,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) ...@@ -880,6 +993,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
{ {
/* DISTINCT and/or ORDER BY case */ /* DISTINCT and/or ORDER BY case */
Assert(slot->tts_nvalid >= (pertrans->numInputs + inputoff)); Assert(slot->tts_nvalid >= (pertrans->numInputs + inputoff));
Assert(!pergroups);
/* /*
* If the transfn is strict, we want to check for nullity before * If the transfn is strict, we want to check for nullity before
...@@ -940,16 +1054,39 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) ...@@ -940,16 +1054,39 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
fcinfo->argnull[i + 1] = slot->tts_isnull[i + inputoff]; fcinfo->argnull[i + 1] = slot->tts_isnull[i + inputoff];
} }
if (pergroup)
{
/* advance transition states for ordered grouping */
for (setno = 0; setno < numGroupingSets; setno++) for (setno = 0; setno < numGroupingSets; setno++)
{ {
AggStatePerGroup pergroupstate = &pergroup[transno + (setno * numTrans)]; AggStatePerGroup pergroupstate;
aggstate->current_set = setno; select_current_set(aggstate, setno, false);
pergroupstate = &pergroup[transno + (setno * numTrans)];
advance_transition_function(aggstate, pertrans, pergroupstate);
}
}
if (pergroups)
{
/* advance transition states for hashed grouping */
for (setno = 0; setno < numHashes; setno++)
{
AggStatePerGroup pergroupstate;
select_current_set(aggstate, setno, true);
pergroupstate = &pergroups[setno][transno];
advance_transition_function(aggstate, pertrans, pergroupstate); advance_transition_function(aggstate, pertrans, pergroupstate);
} }
} }
} }
}
} }
/* /*
...@@ -967,7 +1104,7 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup) ...@@ -967,7 +1104,7 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
TupleTableSlot *slot; TupleTableSlot *slot;
/* combine not supported with grouping sets */ /* combine not supported with grouping sets */
Assert(aggstate->phase->numsets == 0); Assert(aggstate->phase->numsets <= 1);
/* compute input for all aggregates */ /* compute input for all aggregates */
slot = ExecProject(aggstate->evalproj); slot = ExecProject(aggstate->evalproj);
...@@ -1060,7 +1197,7 @@ advance_combine_function(AggState *aggstate, ...@@ -1060,7 +1197,7 @@ advance_combine_function(AggState *aggstate,
if (!pertrans->transtypeByVal) if (!pertrans->transtypeByVal)
{ {
oldContext = MemoryContextSwitchTo( oldContext = MemoryContextSwitchTo(
aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); aggstate->curaggcontext->ecxt_per_tuple_memory);
pergroupstate->transValue = datumCopy(fcinfo->arg[1], pergroupstate->transValue = datumCopy(fcinfo->arg[1],
pertrans->transtypeByVal, pertrans->transtypeByVal,
pertrans->transtypeLen); pertrans->transtypeLen);
...@@ -1105,7 +1242,7 @@ advance_combine_function(AggState *aggstate, ...@@ -1105,7 +1242,7 @@ advance_combine_function(AggState *aggstate,
{ {
if (!fcinfo->isnull) if (!fcinfo->isnull)
{ {
MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
if (DatumIsReadWriteExpandedObject(newVal, if (DatumIsReadWriteExpandedObject(newVal,
false, false,
pertrans->transtypeLen) && pertrans->transtypeLen) &&
...@@ -1559,15 +1696,16 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet ...@@ -1559,15 +1696,16 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet
/* /*
* Compute the final value of all aggregates for one group. * Compute the final value of all aggregates for one group.
* *
* This function handles only one grouping set at a time. * This function handles only one grouping set at a time, which the caller must
* have selected. It's also the caller's responsibility to adjust the supplied
* pergroup parameter to point to the current set's transvalues.
* *
* Results are stored in the output econtext aggvalues/aggnulls. * Results are stored in the output econtext aggvalues/aggnulls.
*/ */
static void static void
finalize_aggregates(AggState *aggstate, finalize_aggregates(AggState *aggstate,
AggStatePerAgg peraggs, AggStatePerAgg peraggs,
AggStatePerGroup pergroup, AggStatePerGroup pergroup)
int currentSet)
{ {
ExprContext *econtext = aggstate->ss.ps.ps_ExprContext; ExprContext *econtext = aggstate->ss.ps.ps_ExprContext;
Datum *aggvalues = econtext->ecxt_aggvalues; Datum *aggvalues = econtext->ecxt_aggvalues;
...@@ -1575,11 +1713,6 @@ finalize_aggregates(AggState *aggstate, ...@@ -1575,11 +1713,6 @@ finalize_aggregates(AggState *aggstate,
int aggno; int aggno;
int transno; int transno;
Assert(currentSet == 0 ||
((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED);
aggstate->current_set = currentSet;
/* /*
* If there were any DISTINCT and/or ORDER BY aggregates, sort their * If there were any DISTINCT and/or ORDER BY aggregates, sort their
* inputs and run the transition functions. * inputs and run the transition functions.
...@@ -1589,11 +1722,12 @@ finalize_aggregates(AggState *aggstate, ...@@ -1589,11 +1722,12 @@ finalize_aggregates(AggState *aggstate,
AggStatePerTrans pertrans = &aggstate->pertrans[transno]; AggStatePerTrans pertrans = &aggstate->pertrans[transno];
AggStatePerGroup pergroupstate; AggStatePerGroup pergroupstate;
pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))]; pergroupstate = &pergroup[transno];
if (pertrans->numSortCols > 0) if (pertrans->numSortCols > 0)
{ {
Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED); Assert(aggstate->aggstrategy != AGG_HASHED &&
aggstate->aggstrategy != AGG_MIXED);
if (pertrans->numInputs == 1) if (pertrans->numInputs == 1)
process_ordered_aggregate_single(aggstate, process_ordered_aggregate_single(aggstate,
...@@ -1615,7 +1749,7 @@ finalize_aggregates(AggState *aggstate, ...@@ -1615,7 +1749,7 @@ finalize_aggregates(AggState *aggstate,
int transno = peragg->transno; int transno = peragg->transno;
AggStatePerGroup pergroupstate; AggStatePerGroup pergroupstate;
pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))]; pergroupstate = &pergroup[transno];
if (DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit)) if (DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit))
finalize_partialaggregate(aggstate, peragg, pergroupstate, finalize_partialaggregate(aggstate, peragg, pergroupstate,
...@@ -1697,7 +1831,7 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos) ...@@ -1697,7 +1831,7 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
} }
/* /*
* Initialize the hash table to empty. * Initialize the hash table(s) to empty.
* *
* To implement hashed aggregation, we need a hashtable that stores a * To implement hashed aggregation, we need a hashtable that stores a
* representative tuple and an array of AggStatePerGroup structs for each * representative tuple and an array of AggStatePerGroup structs for each
...@@ -1705,29 +1839,40 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos) ...@@ -1705,29 +1839,40 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
* GROUP BY columns. The per-group data is allocated in lookup_hash_entry(), * GROUP BY columns. The per-group data is allocated in lookup_hash_entry(),
* for each entry. * for each entry.
* *
* The hash table always lives in the aggcontext memory context. * We have a separate hashtable and associated perhash data structure for each
* grouping set for which we're doing hashing.
*
* The hash tables always live in the hashcontext's per-tuple memory context
* (there is only one of these for all tables together, since they are all
* reset at the same time).
*/ */
static void static void
build_hash_table(AggState *aggstate) build_hash_table(AggState *aggstate)
{ {
Agg *node = (Agg *) aggstate->ss.ps.plan;
MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory; MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
Size additionalsize; Size additionalsize;
int i;
Assert(node->aggstrategy == AGG_HASHED); Assert(aggstate->aggstrategy == AGG_HASHED || aggstate->aggstrategy == AGG_MIXED);
Assert(node->numGroups > 0);
additionalsize = aggstate->numaggs * sizeof(AggStatePerGroupData); additionalsize = aggstate->numtrans * sizeof(AggStatePerGroupData);
aggstate->hashtable = BuildTupleHashTable(node->numCols, for (i = 0; i < aggstate->num_hashes; ++i)
aggstate->hashGrpColIdxHash, {
aggstate->phase->eqfunctions, AggStatePerHash perhash = &aggstate->perhash[i];
aggstate->hashfunctions,
node->numGroups, Assert(perhash->aggnode->numGroups > 0);
perhash->hashtable = BuildTupleHashTable(perhash->numCols,
perhash->hashGrpColIdxHash,
perhash->eqfunctions,
perhash->hashfunctions,
perhash->aggnode->numGroups,
additionalsize, additionalsize,
aggstate->aggcontexts[0]->ecxt_per_tuple_memory, aggstate->hashcontext->ecxt_per_tuple_memory,
tmpmem, tmpmem,
DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit)); DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
}
} }
/* /*
...@@ -1750,72 +1895,98 @@ build_hash_table(AggState *aggstate) ...@@ -1750,72 +1895,98 @@ build_hash_table(AggState *aggstate)
* the array is preserved over ExecReScanAgg, so we allocate it in the * the array is preserved over ExecReScanAgg, so we allocate it in the
* per-query context (unlike the hash table itself). * per-query context (unlike the hash table itself).
*/ */
static List * static void
find_hash_columns(AggState *aggstate) find_hash_columns(AggState *aggstate)
{ {
Agg *node = (Agg *) aggstate->ss.ps.plan; Bitmapset *base_colnos;
Bitmapset *colnos;
List *collist;
TupleDesc hashDesc;
List *outerTlist = outerPlanState(aggstate)->plan->targetlist; List *outerTlist = outerPlanState(aggstate)->plan->targetlist;
int numHashes = aggstate->num_hashes;
int j;
/* Find Vars that will be needed in tlist and qual */
base_colnos = find_unaggregated_cols(aggstate);
for (j = 0; j < numHashes; ++j)
{
AggStatePerHash perhash = &aggstate->perhash[j];
Bitmapset *colnos = bms_copy(base_colnos);
AttrNumber *grpColIdx = perhash->aggnode->grpColIdx;
List *hashTlist = NIL; List *hashTlist = NIL;
TupleDesc hashDesc;
int i; int i;
aggstate->largestGrpColIdx = 0; perhash->largestGrpColIdx = 0;
/* Find Vars that will be needed in tlist and qual */ /*
colnos = find_unaggregated_cols(aggstate); * If we're doing grouping sets, then some Vars might be referenced in
* tlist/qual for the benefit of other grouping sets, but not needed
* when hashing; i.e. prepare_projection_slot will null them out, so
* there'd be no point storing them. Use prepare_projection_slot's
* logic to determine which.
*/
if (aggstate->phases[0].grouped_cols)
{
Bitmapset *grouped_cols = aggstate->phases[0].grouped_cols[j];
ListCell *lc;
foreach(lc, aggstate->all_grouped_cols)
{
int attnum = lfirst_int(lc);
if (!bms_is_member(attnum, grouped_cols))
colnos = bms_del_member(colnos, attnum);
}
}
/* Add in all the grouping columns */ /* Add in all the grouping columns */
for (i = 0; i < node->numCols; i++) for (i = 0; i < perhash->numCols; i++)
colnos = bms_add_member(colnos, node->grpColIdx[i]); colnos = bms_add_member(colnos, grpColIdx[i]);
/* Convert to list, using lcons so largest element ends up first */
collist = NIL;
aggstate->hashGrpColIdxInput = perhash->hashGrpColIdxInput =
palloc(bms_num_members(colnos) * sizeof(AttrNumber)); palloc(bms_num_members(colnos) * sizeof(AttrNumber));
aggstate->hashGrpColIdxHash = perhash->hashGrpColIdxHash =
palloc(node->numCols * sizeof(AttrNumber)); palloc(perhash->numCols * sizeof(AttrNumber));
/* /*
* First build mapping for columns directly hashed. These are the first, * First build mapping for columns directly hashed. These are the
* because they'll be accessed when computing hash values and comparing * first, because they'll be accessed when computing hash values and
* tuples for exact matches. We also build simple mapping for * comparing tuples for exact matches. We also build simple mapping
* execGrouping, so it knows where to find the to-be-hashed / compared * for execGrouping, so it knows where to find the to-be-hashed /
* columns in the input. * compared columns in the input.
*/ */
for (i = 0; i < node->numCols; i++) for (i = 0; i < perhash->numCols; i++)
{ {
aggstate->hashGrpColIdxInput[i] = node->grpColIdx[i]; perhash->hashGrpColIdxInput[i] = grpColIdx[i];
aggstate->hashGrpColIdxHash[i] = i + 1; perhash->hashGrpColIdxHash[i] = i + 1;
aggstate->numhashGrpCols++; perhash->numhashGrpCols++;
/* delete already mapped columns */ /* delete already mapped columns */
bms_del_member(colnos, node->grpColIdx[i]); bms_del_member(colnos, grpColIdx[i]);
} }
/* and add the remaining columns */ /* and add the remaining columns */
while ((i = bms_first_member(colnos)) >= 0) while ((i = bms_first_member(colnos)) >= 0)
{ {
aggstate->hashGrpColIdxInput[aggstate->numhashGrpCols] = i; perhash->hashGrpColIdxInput[perhash->numhashGrpCols] = i;
aggstate->numhashGrpCols++; perhash->numhashGrpCols++;
} }
/* and build a tuple descriptor for the hashtable */ /* and build a tuple descriptor for the hashtable */
for (i = 0; i < aggstate->numhashGrpCols; i++) for (i = 0; i < perhash->numhashGrpCols; i++)
{ {
int varNumber = aggstate->hashGrpColIdxInput[i] - 1; int varNumber = perhash->hashGrpColIdxInput[i] - 1;
hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber)); hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber));
aggstate->largestGrpColIdx = perhash->largestGrpColIdx =
Max(varNumber + 1, aggstate->largestGrpColIdx); Max(varNumber + 1, perhash->largestGrpColIdx);
} }
hashDesc = ExecTypeFromTL(hashTlist, false); hashDesc = ExecTypeFromTL(hashTlist, false);
ExecSetSlotDescriptor(aggstate->hashslot, hashDesc); ExecSetSlotDescriptor(perhash->hashslot, hashDesc);
list_free(hashTlist); list_free(hashTlist);
bms_free(colnos); bms_free(colnos);
}
return collist; bms_free(base_colnos);
} }
/* /*
...@@ -1840,26 +2011,30 @@ hash_agg_entry_size(int numAggs) ...@@ -1840,26 +2011,30 @@ hash_agg_entry_size(int numAggs)
} }
/* /*
* Find or create a hashtable entry for the tuple group containing the * Find or create a hashtable entry for the tuple group containing the current
* given tuple. * tuple (already set in tmpcontext's outertuple slot), in the current grouping
* set (which the caller must have selected - note that initialize_aggregate
* depends on this).
* *
* When called, CurrentMemoryContext should be the per-query context. * When called, CurrentMemoryContext should be the per-query context.
*/ */
static TupleHashEntryData * static TupleHashEntryData *
lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot) lookup_hash_entry(AggState *aggstate)
{ {
TupleTableSlot *hashslot = aggstate->hashslot; TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple;
AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
TupleTableSlot *hashslot = perhash->hashslot;
TupleHashEntryData *entry; TupleHashEntryData *entry;
bool isnew; bool isnew;
int i; int i;
/* transfer just the needed columns into hashslot */ /* transfer just the needed columns into hashslot */
slot_getsomeattrs(inputslot, aggstate->largestGrpColIdx); slot_getsomeattrs(inputslot, perhash->largestGrpColIdx);
ExecClearTuple(hashslot); ExecClearTuple(hashslot);
for (i = 0; i < aggstate->numhashGrpCols; i++) for (i = 0; i < perhash->numhashGrpCols; i++)
{ {
int varNumber = aggstate->hashGrpColIdxInput[i] - 1; int varNumber = perhash->hashGrpColIdxInput[i] - 1;
hashslot->tts_values[i] = inputslot->tts_values[varNumber]; hashslot->tts_values[i] = inputslot->tts_values[varNumber];
hashslot->tts_isnull[i] = inputslot->tts_isnull[varNumber]; hashslot->tts_isnull[i] = inputslot->tts_isnull[varNumber];
...@@ -1867,21 +2042,43 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot) ...@@ -1867,21 +2042,43 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
ExecStoreVirtualTuple(hashslot); ExecStoreVirtualTuple(hashslot);
/* find or create the hashtable entry using the filtered tuple */ /* find or create the hashtable entry using the filtered tuple */
entry = LookupTupleHashEntry(aggstate->hashtable, hashslot, &isnew); entry = LookupTupleHashEntry(perhash->hashtable, hashslot, &isnew);
if (isnew) if (isnew)
{ {
entry->additional = (AggStatePerGroup) entry->additional = (AggStatePerGroup)
MemoryContextAlloc(aggstate->hashtable->tablecxt, MemoryContextAlloc(perhash->hashtable->tablecxt,
sizeof(AggStatePerGroupData) * aggstate->numtrans); sizeof(AggStatePerGroupData) * aggstate->numtrans);
/* initialize aggregates for new tuple group */ /* initialize aggregates for new tuple group */
initialize_aggregates(aggstate, (AggStatePerGroup) entry->additional, initialize_aggregates(aggstate, (AggStatePerGroup) entry->additional,
0); -1);
} }
return entry; return entry;
} }
/*
* Look up hash entries for the current tuple in all hashed grouping sets,
* returning an array of pergroup pointers suitable for advance_aggregates.
*
* Be aware that lookup_hash_entry can reset the tmpcontext.
*/
static AggStatePerGroup *
lookup_hash_entries(AggState *aggstate)
{
int numHashes = aggstate->num_hashes;
AggStatePerGroup *pergroup = aggstate->hash_pergroup;
int setno;
for (setno = 0; setno < numHashes; setno++)
{
select_current_set(aggstate, setno, true);
pergroup[setno] = lookup_hash_entry(aggstate)->additional;
}
return pergroup;
}
/* /*
* ExecAgg - * ExecAgg -
* *
...@@ -1898,19 +2095,22 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot) ...@@ -1898,19 +2095,22 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
TupleTableSlot * TupleTableSlot *
ExecAgg(AggState *node) ExecAgg(AggState *node)
{ {
TupleTableSlot *result; TupleTableSlot *result = NULL;
if (!node->agg_done) if (!node->agg_done)
{ {
/* Dispatch based on strategy */ /* Dispatch based on strategy */
switch (node->phase->aggnode->aggstrategy) switch (node->phase->aggstrategy)
{ {
case AGG_HASHED: case AGG_HASHED:
if (!node->table_filled) if (!node->table_filled)
agg_fill_hash_table(node); agg_fill_hash_table(node);
/* FALLTHROUGH */
case AGG_MIXED:
result = agg_retrieve_hash_table(node); result = agg_retrieve_hash_table(node);
break; break;
default: case AGG_PLAIN:
case AGG_SORTED:
result = agg_retrieve_direct(node); result = agg_retrieve_direct(node);
break; break;
} }
...@@ -1933,6 +2133,7 @@ agg_retrieve_direct(AggState *aggstate) ...@@ -1933,6 +2133,7 @@ agg_retrieve_direct(AggState *aggstate)
ExprContext *tmpcontext; ExprContext *tmpcontext;
AggStatePerAgg peragg; AggStatePerAgg peragg;
AggStatePerGroup pergroup; AggStatePerGroup pergroup;
AggStatePerGroup *hash_pergroups = NULL;
TupleTableSlot *outerslot; TupleTableSlot *outerslot;
TupleTableSlot *firstSlot; TupleTableSlot *firstSlot;
TupleTableSlot *result; TupleTableSlot *result;
...@@ -2019,6 +2220,19 @@ agg_retrieve_direct(AggState *aggstate) ...@@ -2019,6 +2220,19 @@ agg_retrieve_direct(AggState *aggstate)
node = aggstate->phase->aggnode; node = aggstate->phase->aggnode;
numReset = numGroupingSets; numReset = numGroupingSets;
} }
else if (aggstate->aggstrategy == AGG_MIXED)
{
/*
* Mixed mode; we've output all the grouped stuff and have
* full hashtables, so switch to outputting those.
*/
initialize_phase(aggstate, 0);
aggstate->table_filled = true;
ResetTupleHashIterator(aggstate->perhash[0].hashtable,
&aggstate->perhash[0].hashiter);
select_current_set(aggstate, 0, true);
return agg_retrieve_hash_table(aggstate);
}
else else
{ {
aggstate->agg_done = true; aggstate->agg_done = true;
...@@ -2055,7 +2269,7 @@ agg_retrieve_direct(AggState *aggstate) ...@@ -2055,7 +2269,7 @@ agg_retrieve_direct(AggState *aggstate)
*---------- *----------
*/ */
if (aggstate->input_done || if (aggstate->input_done ||
(node->aggstrategy == AGG_SORTED && (node->aggstrategy != AGG_PLAIN &&
aggstate->projected_set != -1 && aggstate->projected_set != -1 &&
aggstate->projected_set < (numGroupingSets - 1) && aggstate->projected_set < (numGroupingSets - 1) &&
nextSetSize > 0 && nextSetSize > 0 &&
...@@ -2168,10 +2382,22 @@ agg_retrieve_direct(AggState *aggstate) ...@@ -2168,10 +2382,22 @@ agg_retrieve_direct(AggState *aggstate)
*/ */
for (;;) for (;;)
{ {
/*
* During phase 1 only of a mixed agg, we need to update
* hashtables as well in advance_aggregates.
*/
if (aggstate->aggstrategy == AGG_MIXED &&
aggstate->current_phase == 1)
{
hash_pergroups = lookup_hash_entries(aggstate);
}
else
hash_pergroups = NULL;
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit)) if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
combine_aggregates(aggstate, pergroup); combine_aggregates(aggstate, pergroup);
else else
advance_aggregates(aggstate, pergroup); advance_aggregates(aggstate, pergroup, hash_pergroups);
/* Reset per-input-tuple context after each tuple */ /* Reset per-input-tuple context after each tuple */
ResetExprContext(tmpcontext); ResetExprContext(tmpcontext);
...@@ -2198,7 +2424,7 @@ agg_retrieve_direct(AggState *aggstate) ...@@ -2198,7 +2424,7 @@ agg_retrieve_direct(AggState *aggstate)
* If we are grouping, check whether we've crossed a group * If we are grouping, check whether we've crossed a group
* boundary. * boundary.
*/ */
if (node->aggstrategy == AGG_SORTED) if (node->aggstrategy != AGG_PLAIN)
{ {
if (!execTuplesMatch(firstSlot, if (!execTuplesMatch(firstSlot,
outerslot, outerslot,
...@@ -2231,7 +2457,11 @@ agg_retrieve_direct(AggState *aggstate) ...@@ -2231,7 +2457,11 @@ agg_retrieve_direct(AggState *aggstate)
prepare_projection_slot(aggstate, econtext->ecxt_outertuple, currentSet); prepare_projection_slot(aggstate, econtext->ecxt_outertuple, currentSet);
finalize_aggregates(aggstate, peragg, pergroup, currentSet); select_current_set(aggstate, currentSet, false);
finalize_aggregates(aggstate,
peragg,
pergroup + (currentSet * aggstate->numtrans));
/* /*
* If there's no row to project right now, we must continue rather * If there's no row to project right now, we must continue rather
...@@ -2247,21 +2477,13 @@ agg_retrieve_direct(AggState *aggstate) ...@@ -2247,21 +2477,13 @@ agg_retrieve_direct(AggState *aggstate)
} }
/* /*
* ExecAgg for hashed case: phase 1, read input and build hash table * ExecAgg for hashed case: read input and build hash table
*/ */
static void static void
agg_fill_hash_table(AggState *aggstate) agg_fill_hash_table(AggState *aggstate)
{ {
ExprContext *tmpcontext;
TupleHashEntryData *entry;
TupleTableSlot *outerslot; TupleTableSlot *outerslot;
ExprContext *tmpcontext = aggstate->tmpcontext;
/*
* get state info from node
*
* tmpcontext is the per-input-tuple expression context
*/
tmpcontext = aggstate->tmpcontext;
/* /*
* Process each outer-plan tuple, and then fetch the next one, until we * Process each outer-plan tuple, and then fetch the next one, until we
...@@ -2269,32 +2491,40 @@ agg_fill_hash_table(AggState *aggstate) ...@@ -2269,32 +2491,40 @@ agg_fill_hash_table(AggState *aggstate)
*/ */
for (;;) for (;;)
{ {
AggStatePerGroup *pergroups;
outerslot = fetch_input_tuple(aggstate); outerslot = fetch_input_tuple(aggstate);
if (TupIsNull(outerslot)) if (TupIsNull(outerslot))
break; break;
/* set up for advance_aggregates call */
/* set up for lookup_hash_entries and advance_aggregates */
tmpcontext->ecxt_outertuple = outerslot; tmpcontext->ecxt_outertuple = outerslot;
/* Find or build hashtable entry for this tuple's group */ /* Find or build hashtable entries */
entry = lookup_hash_entry(aggstate, outerslot); pergroups = lookup_hash_entries(aggstate);
/* Advance the aggregates */ /* Advance the aggregates */
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit)) if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
combine_aggregates(aggstate, (AggStatePerGroup) entry->additional); combine_aggregates(aggstate, pergroups[0]);
else else
advance_aggregates(aggstate, (AggStatePerGroup) entry->additional); advance_aggregates(aggstate, NULL, pergroups);
/* Reset per-input-tuple context after each tuple */ /*
ResetExprContext(tmpcontext); * Reset per-input-tuple context after each tuple, but note that the
* hash lookups do this too
*/
ResetExprContext(aggstate->tmpcontext);
} }
aggstate->table_filled = true; aggstate->table_filled = true;
/* Initialize to walk the hash table */ /* Initialize to walk the first hash table */
ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter); select_current_set(aggstate, 0, true);
ResetTupleHashIterator(aggstate->perhash[0].hashtable,
&aggstate->perhash[0].hashiter);
} }
/* /*
* ExecAgg for hashed case: phase 2, retrieving groups from hash table * ExecAgg for hashed case: retrieving groups from hash table
*/ */
static TupleTableSlot * static TupleTableSlot *
agg_retrieve_hash_table(AggState *aggstate) agg_retrieve_hash_table(AggState *aggstate)
...@@ -2305,17 +2535,22 @@ agg_retrieve_hash_table(AggState *aggstate) ...@@ -2305,17 +2535,22 @@ agg_retrieve_hash_table(AggState *aggstate)
TupleHashEntryData *entry; TupleHashEntryData *entry;
TupleTableSlot *firstSlot; TupleTableSlot *firstSlot;
TupleTableSlot *result; TupleTableSlot *result;
TupleTableSlot *hashslot; AggStatePerHash perhash;
/* /*
* get state info from node * get state info from node.
*
* econtext is the per-output-tuple expression context.
*/ */
/* econtext is the per-output-tuple expression context */
econtext = aggstate->ss.ps.ps_ExprContext; econtext = aggstate->ss.ps.ps_ExprContext;
peragg = aggstate->peragg; peragg = aggstate->peragg;
firstSlot = aggstate->ss.ss_ScanTupleSlot; firstSlot = aggstate->ss.ss_ScanTupleSlot;
hashslot = aggstate->hashslot;
/*
* Note that perhash (and therefore anything accessed through it) can
* change inside the loop, as we change between grouping sets.
*/
perhash = &aggstate->perhash[aggstate->current_set];
/* /*
* We loop retrieving groups until we find one satisfying * We loop retrieving groups until we find one satisfying
...@@ -2323,18 +2558,38 @@ agg_retrieve_hash_table(AggState *aggstate) ...@@ -2323,18 +2558,38 @@ agg_retrieve_hash_table(AggState *aggstate)
*/ */
while (!aggstate->agg_done) while (!aggstate->agg_done)
{ {
TupleTableSlot *hashslot = perhash->hashslot;
int i; int i;
/* /*
* Find the next entry in the hash table * Find the next entry in the hash table
*/ */
entry = ScanTupleHashTable(aggstate->hashtable, &aggstate->hashiter); entry = ScanTupleHashTable(perhash->hashtable, &perhash->hashiter);
if (entry == NULL) if (entry == NULL)
{ {
/* No more entries in hashtable, so done */ int nextset = aggstate->current_set + 1;
if (nextset < aggstate->num_hashes)
{
/*
* Switch to next grouping set, reinitialize, and restart the
* loop.
*/
select_current_set(aggstate, nextset, true);
perhash = &aggstate->perhash[aggstate->current_set];
ResetTupleHashIterator(perhash->hashtable, &perhash->hashiter);
continue;
}
else
{
/* No more hashtables, so done */
aggstate->agg_done = TRUE; aggstate->agg_done = TRUE;
return NULL; return NULL;
} }
}
/* /*
* Clear the per-output-tuple context for each group * Clear the per-output-tuple context for each group
...@@ -2356,9 +2611,9 @@ agg_retrieve_hash_table(AggState *aggstate) ...@@ -2356,9 +2611,9 @@ agg_retrieve_hash_table(AggState *aggstate)
memset(firstSlot->tts_isnull, true, memset(firstSlot->tts_isnull, true,
firstSlot->tts_tupleDescriptor->natts * sizeof(bool)); firstSlot->tts_tupleDescriptor->natts * sizeof(bool));
for (i = 0; i < aggstate->numhashGrpCols; i++) for (i = 0; i < perhash->numhashGrpCols; i++)
{ {
int varNumber = aggstate->hashGrpColIdxInput[i] - 1; int varNumber = perhash->hashGrpColIdxInput[i] - 1;
firstSlot->tts_values[varNumber] = hashslot->tts_values[i]; firstSlot->tts_values[varNumber] = hashslot->tts_values[i];
firstSlot->tts_isnull[varNumber] = hashslot->tts_isnull[i]; firstSlot->tts_isnull[varNumber] = hashslot->tts_isnull[i];
...@@ -2367,14 +2622,18 @@ agg_retrieve_hash_table(AggState *aggstate) ...@@ -2367,14 +2622,18 @@ agg_retrieve_hash_table(AggState *aggstate)
pergroup = (AggStatePerGroup) entry->additional; pergroup = (AggStatePerGroup) entry->additional;
finalize_aggregates(aggstate, peragg, pergroup, 0);
/* /*
* Use the representative input tuple for any references to * Use the representative input tuple for any references to
* non-aggregated input columns in the qual and tlist. * non-aggregated input columns in the qual and tlist.
*/ */
econtext->ecxt_outertuple = firstSlot; econtext->ecxt_outertuple = firstSlot;
prepare_projection_slot(aggstate,
econtext->ecxt_outertuple,
aggstate->current_set);
finalize_aggregates(aggstate, peragg, pergroup);
result = project_aggregates(aggstate); result = project_aggregates(aggstate);
if (result) if (result)
return result; return result;
...@@ -2388,7 +2647,8 @@ agg_retrieve_hash_table(AggState *aggstate) ...@@ -2388,7 +2647,8 @@ agg_retrieve_hash_table(AggState *aggstate)
* ExecInitAgg * ExecInitAgg
* *
* Creates the run-time information for the agg node produced by the * Creates the run-time information for the agg node produced by the
* planner and initializes its outer subtree * planner and initializes its outer subtree.
*
* ----------------- * -----------------
*/ */
AggState * AggState *
...@@ -2403,14 +2663,18 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ...@@ -2403,14 +2663,18 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
transno, transno,
aggno; aggno;
int phase; int phase;
int phaseidx;
List *combined_inputeval; List *combined_inputeval;
ListCell *l; ListCell *l;
Bitmapset *all_grouped_cols = NULL; Bitmapset *all_grouped_cols = NULL;
int numGroupingSets = 1; int numGroupingSets = 1;
int numPhases; int numPhases;
int numHashes;
int column_offset; int column_offset;
int i = 0; int i = 0;
int j = 0; int j = 0;
bool use_hashing = (node->aggstrategy == AGG_HASHED ||
node->aggstrategy == AGG_MIXED);
/* check for unsupported flags */ /* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
...@@ -2425,9 +2689,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ...@@ -2425,9 +2689,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggs = NIL; aggstate->aggs = NIL;
aggstate->numaggs = 0; aggstate->numaggs = 0;
aggstate->numtrans = 0; aggstate->numtrans = 0;
aggstate->aggstrategy = node->aggstrategy;
aggstate->aggsplit = node->aggsplit; aggstate->aggsplit = node->aggsplit;
aggstate->maxsets = 0; aggstate->maxsets = 0;
aggstate->hashfunctions = NULL;
aggstate->projected_set = -1; aggstate->projected_set = -1;
aggstate->current_set = 0; aggstate->current_set = 0;
aggstate->peragg = NULL; aggstate->peragg = NULL;
...@@ -2437,18 +2701,22 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ...@@ -2437,18 +2701,22 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->agg_done = false; aggstate->agg_done = false;
aggstate->pergroup = NULL; aggstate->pergroup = NULL;
aggstate->grp_firstTuple = NULL; aggstate->grp_firstTuple = NULL;
aggstate->hashtable = NULL;
aggstate->sort_in = NULL; aggstate->sort_in = NULL;
aggstate->sort_out = NULL; aggstate->sort_out = NULL;
/*
* phases[0] always exists, but is dummy in sorted/plain mode
*/
numPhases = (use_hashing ? 1 : 2);
numHashes = (use_hashing ? 1 : 0);
/* /*
* Calculate the maximum number of grouping sets in any phase; this * Calculate the maximum number of grouping sets in any phase; this
* determines the size of some allocations. * determines the size of some allocations. Also calculate the number of
* phases, since all hashed/mixed nodes contribute to only a single phase.
*/ */
if (node->groupingSets) if (node->groupingSets)
{ {
Assert(node->aggstrategy != AGG_HASHED);
numGroupingSets = list_length(node->groupingSets); numGroupingSets = list_length(node->groupingSets);
foreach(l, node->chain) foreach(l, node->chain)
...@@ -2457,22 +2725,32 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ...@@ -2457,22 +2725,32 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
numGroupingSets = Max(numGroupingSets, numGroupingSets = Max(numGroupingSets,
list_length(agg->groupingSets)); list_length(agg->groupingSets));
/*
* additional AGG_HASHED aggs become part of phase 0, but all
* others add an extra phase.
*/
if (agg->aggstrategy != AGG_HASHED)
++numPhases;
else
++numHashes;
} }
} }
aggstate->maxsets = numGroupingSets; aggstate->maxsets = numGroupingSets;
aggstate->numphases = numPhases = 1 + list_length(node->chain); aggstate->numphases = numPhases;
aggstate->aggcontexts = (ExprContext **) aggstate->aggcontexts = (ExprContext **)
palloc0(sizeof(ExprContext *) * numGroupingSets); palloc0(sizeof(ExprContext *) * numGroupingSets);
/* /*
* Create expression contexts. We need three or more, one for * Create expression contexts. We need three or more, one for
* per-input-tuple processing, one for per-output-tuple processing, and * per-input-tuple processing, one for per-output-tuple processing, one
* one for each grouping set. The per-tuple memory context of the * for all the hashtables, and one for each grouping set. The per-tuple
* per-grouping-set ExprContexts (aggcontexts) replaces the standalone * memory context of the per-grouping-set ExprContexts (aggcontexts)
* memory context formerly used to hold transition values. We cheat a * replaces the standalone memory context formerly used to hold transition
* little by using ExecAssignExprContext() to build all of them. * values. We cheat a little by using ExecAssignExprContext() to build
* all of them.
* *
* NOTE: the details of what is stored in aggcontexts and what is stored * NOTE: the details of what is stored in aggcontexts and what is stored
* in the regular per-query memory context are driven by a simple * in the regular per-query memory context are driven by a simple
...@@ -2488,14 +2766,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ...@@ -2488,14 +2766,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext; aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext;
} }
if (use_hashing)
{
ExecAssignExprContext(estate, &aggstate->ss.ps);
aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
}
ExecAssignExprContext(estate, &aggstate->ss.ps); ExecAssignExprContext(estate, &aggstate->ss.ps);
/* /*
* tuple table initialization * tuple table initialization.
*
* For hashtables, we create some additional slots below.
*/ */
ExecInitScanTupleSlot(estate, &aggstate->ss); ExecInitScanTupleSlot(estate, &aggstate->ss);
ExecInitResultTupleSlot(estate, &aggstate->ss.ps); ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
aggstate->hashslot = ExecInitExtraTupleSlot(estate);
aggstate->sort_slot = ExecInitExtraTupleSlot(estate); aggstate->sort_slot = ExecInitExtraTupleSlot(estate);
/* /*
...@@ -2559,19 +2844,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ...@@ -2559,19 +2844,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
* For each phase, prepare grouping set data and fmgr lookup data for * For each phase, prepare grouping set data and fmgr lookup data for
* compare functions. Accumulate all_grouped_cols in passing. * compare functions. Accumulate all_grouped_cols in passing.
*/ */
aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData)); aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
for (phase = 0; phase < numPhases; ++phase) aggstate->num_hashes = numHashes;
if (numHashes)
{
aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
aggstate->phases[0].numsets = 0;
aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int));
aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *));
}
phase = 0;
for (phaseidx = 0; phaseidx <= list_length(node->chain); ++phaseidx)
{ {
AggStatePerPhase phasedata = &aggstate->phases[phase];
Agg *aggnode; Agg *aggnode;
Sort *sortnode; Sort *sortnode;
int num_sets;
if (phase > 0) if (phaseidx > 0)
{ {
aggnode = castNode(Agg, list_nth(node->chain, phase - 1)); aggnode = castNode(Agg, list_nth(node->chain, phaseidx - 1));
sortnode = castNode(Sort, aggnode->plan.lefttree); sortnode = castNode(Sort, aggnode->plan.lefttree);
} }
else else
...@@ -2580,6 +2872,41 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ...@@ -2580,6 +2872,41 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
sortnode = NULL; sortnode = NULL;
} }
Assert(phase <= 1 || sortnode);
if (aggnode->aggstrategy == AGG_HASHED
|| aggnode->aggstrategy == AGG_MIXED)
{
AggStatePerPhase phasedata = &aggstate->phases[0];
AggStatePerHash perhash;
Bitmapset *cols = NULL;
Assert(phase == 0);
i = phasedata->numsets++;
perhash = &aggstate->perhash[i];
/* phase 0 always points to the "real" Agg in the hash case */
phasedata->aggnode = node;
phasedata->aggstrategy = node->aggstrategy;
/* but the actual Agg node representing this hash is saved here */
perhash->aggnode = aggnode;
phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols;
for (j = 0; j < aggnode->numCols; ++j)
cols = bms_add_member(cols, aggnode->grpColIdx[j]);
phasedata->grouped_cols[i] = cols;
all_grouped_cols = bms_add_members(all_grouped_cols, cols);
continue;
}
else
{
AggStatePerPhase phasedata = &aggstate->phases[++phase];
int num_sets;
phasedata->numsets = num_sets = list_length(aggnode->groupingSets); phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
if (num_sets) if (num_sets)
...@@ -2599,6 +2926,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ...@@ -2599,6 +2926,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
phasedata->grouped_cols[i] = cols; phasedata->grouped_cols[i] = cols;
phasedata->gset_lengths[i] = current_length; phasedata->gset_lengths[i] = current_length;
++i; ++i;
} }
...@@ -2607,7 +2935,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ...@@ -2607,7 +2935,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
} }
else else
{ {
Assert(phase == 0); Assert(phaseidx == 0);
phasedata->gset_lengths = NULL; phasedata->gset_lengths = NULL;
phasedata->grouped_cols = NULL; phasedata->grouped_cols = NULL;
...@@ -2626,8 +2954,10 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ...@@ -2626,8 +2954,10 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
} }
phasedata->aggnode = aggnode; phasedata->aggnode = aggnode;
phasedata->aggstrategy = aggnode->aggstrategy;
phasedata->sortnode = sortnode; phasedata->sortnode = sortnode;
} }
}
/* /*
* Convert all_grouped_cols to a descending-order list. * Convert all_grouped_cols to a descending-order list.
...@@ -2636,13 +2966,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ...@@ -2636,13 +2966,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
while ((i = bms_next_member(all_grouped_cols, i)) >= 0) while ((i = bms_next_member(all_grouped_cols, i)) >= 0)
aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols); aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols);
/*
* Initialize current phase-dependent values to initial phase
*/
aggstate->current_phase = 0;
initialize_phase(aggstate, 0);
/* /*
* Set up aggregate-result storage in the output expr context, and also * Set up aggregate-result storage in the output expr context, and also
* allocate my private per-agg working storage * allocate my private per-agg working storage
...@@ -2657,23 +2980,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ...@@ -2657,23 +2980,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->peragg = peraggs; aggstate->peragg = peraggs;
aggstate->pertrans = pertransstates; aggstate->pertrans = pertransstates;
/* /*
* Hashing can only appear in the initial phase. * Hashing can only appear in the initial phase.
*/ */
if (node->aggstrategy == AGG_HASHED) if (use_hashing)
{ {
find_hash_columns(aggstate); for (i = 0; i < numHashes; ++i)
{
aggstate->perhash[i].hashslot = ExecInitExtraTupleSlot(estate);
execTuplesHashPrepare(aggstate->perhash[i].numCols,
aggstate->perhash[i].aggnode->grpOperators,
&aggstate->perhash[i].eqfunctions,
&aggstate->perhash[i].hashfunctions);
}
execTuplesHashPrepare(node->numCols, /* this is an array of pointers, not structures */
node->grpOperators, aggstate->hash_pergroup = palloc0(sizeof(AggStatePerGroup) * numHashes);
&aggstate->phases[0].eqfunctions,
&aggstate->hashfunctions);
find_hash_columns(aggstate);
build_hash_table(aggstate); build_hash_table(aggstate);
aggstate->table_filled = false; aggstate->table_filled = false;
} }
else
if (node->aggstrategy != AGG_HASHED)
{ {
AggStatePerGroup pergroup; AggStatePerGroup pergroup;
...@@ -2684,6 +3014,25 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ...@@ -2684,6 +3014,25 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
aggstate->pergroup = pergroup; aggstate->pergroup = pergroup;
} }
/*
* Initialize current phase-dependent values to initial phase. The initial
* phase is 1 (first sort pass) for all strategies that use sorting (if
* hashing is being done too, then phase 0 is processed last); but if only
* hashing is being done, then phase 0 is all there is.
*/
if (node->aggstrategy == AGG_HASHED)
{
aggstate->current_phase = 0;
initialize_phase(aggstate, 0);
select_current_set(aggstate, 0, true);
}
else
{
aggstate->current_phase = 1;
initialize_phase(aggstate, 1);
select_current_set(aggstate, 0, false);
}
/* ----------------- /* -----------------
* Perform lookups of aggregate function info, and initialize the * Perform lookups of aggregate function info, and initialize the
* unchanging fields of the per-agg and per-trans data. * unchanging fields of the per-agg and per-trans data.
...@@ -3261,7 +3610,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans, ...@@ -3261,7 +3610,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
* We don't implement DISTINCT or ORDER BY aggs in the HASHED case * We don't implement DISTINCT or ORDER BY aggs in the HASHED case
* (yet) * (yet)
*/ */
Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED); Assert(aggstate->aggstrategy != AGG_HASHED && aggstate->aggstrategy != AGG_MIXED);
/* If we have only one input, we need its len/byval info. */ /* If we have only one input, we need its len/byval info. */
if (numInputs == 1) if (numInputs == 1)
...@@ -3510,6 +3859,8 @@ ExecEndAgg(AggState *node) ...@@ -3510,6 +3859,8 @@ ExecEndAgg(AggState *node)
/* And ensure any agg shutdown callbacks have been called */ /* And ensure any agg shutdown callbacks have been called */
for (setno = 0; setno < numGroupingSets; setno++) for (setno = 0; setno < numGroupingSets; setno++)
ReScanExprContext(node->aggcontexts[setno]); ReScanExprContext(node->aggcontexts[setno]);
if (node->hashcontext)
ReScanExprContext(node->hashcontext);
/* /*
* We don't actually free any ExprContexts here (see comment in * We don't actually free any ExprContexts here (see comment in
...@@ -3537,7 +3888,7 @@ ExecReScanAgg(AggState *node) ...@@ -3537,7 +3888,7 @@ ExecReScanAgg(AggState *node)
node->agg_done = false; node->agg_done = false;
if (aggnode->aggstrategy == AGG_HASHED) if (node->aggstrategy == AGG_HASHED)
{ {
/* /*
* In the hashed case, if we haven't yet built the hash table then we * In the hashed case, if we haven't yet built the hash table then we
...@@ -3557,7 +3908,9 @@ ExecReScanAgg(AggState *node) ...@@ -3557,7 +3908,9 @@ ExecReScanAgg(AggState *node)
if (outerPlan->chgParam == NULL && if (outerPlan->chgParam == NULL &&
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams)) !bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
{ {
ResetTupleHashIterator(node->hashtable, &node->hashiter); ResetTupleHashIterator(node->perhash[0].hashtable,
&node->perhash[0].hashiter);
select_current_set(node, 0, true);
return; return;
} }
} }
...@@ -3582,11 +3935,7 @@ ExecReScanAgg(AggState *node) ...@@ -3582,11 +3935,7 @@ ExecReScanAgg(AggState *node)
* ExecReScan already did it. But we do need to reset our per-grouping-set * ExecReScan already did it. But we do need to reset our per-grouping-set
* contexts, which may have transvalues stored in them. (We use rescan * contexts, which may have transvalues stored in them. (We use rescan
* rather than just reset because transfns may have registered callbacks * rather than just reset because transfns may have registered callbacks
* that need to be run now.) * that need to be run now.) For the AGG_HASHED case, see below.
*
* Note that with AGG_HASHED, the hash table is allocated in a sub-context
* of the aggcontext. This used to be an issue, but now, resetting a
* context automatically deletes sub-contexts too.
*/ */
for (setno = 0; setno < numGroupingSets; setno++) for (setno = 0; setno < numGroupingSets; setno++)
...@@ -3606,13 +3955,21 @@ ExecReScanAgg(AggState *node) ...@@ -3606,13 +3955,21 @@ ExecReScanAgg(AggState *node)
MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs); MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs); MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
if (aggnode->aggstrategy == AGG_HASHED) /*
* With AGG_HASHED/MIXED, the hash table is allocated in a sub-context of
* the hashcontext. This used to be an issue, but now, resetting a context
* automatically deletes sub-contexts too.
*/
if (node->aggstrategy == AGG_HASHED || node->aggstrategy == AGG_MIXED)
{ {
ReScanExprContext(node->hashcontext);
/* Rebuild an empty hash table */ /* Rebuild an empty hash table */
build_hash_table(node); build_hash_table(node);
node->table_filled = false; node->table_filled = false;
/* iterator will be reset when the table is filled */
} }
else
if (node->aggstrategy != AGG_HASHED)
{ {
/* /*
* Reset the per-group state (in particular, mark transvalues null) * Reset the per-group state (in particular, mark transvalues null)
...@@ -3620,8 +3977,8 @@ ExecReScanAgg(AggState *node) ...@@ -3620,8 +3977,8 @@ ExecReScanAgg(AggState *node)
MemSet(node->pergroup, 0, MemSet(node->pergroup, 0,
sizeof(AggStatePerGroupData) * node->numaggs * numGroupingSets); sizeof(AggStatePerGroupData) * node->numaggs * numGroupingSets);
/* reset to phase 0 */ /* reset to phase 1 */
initialize_phase(node, 0); initialize_phase(node, 1);
node->input_done = false; node->input_done = false;
node->projected_set = -1; node->projected_set = -1;
...@@ -3662,7 +4019,7 @@ AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext) ...@@ -3662,7 +4019,7 @@ AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
if (aggcontext) if (aggcontext)
{ {
AggState *aggstate = ((AggState *) fcinfo->context); AggState *aggstate = ((AggState *) fcinfo->context);
ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set]; ExprContext *cxt = aggstate->curaggcontext;
*aggcontext = cxt->ecxt_per_tuple_memory; *aggcontext = cxt->ecxt_per_tuple_memory;
} }
...@@ -3751,7 +4108,7 @@ AggRegisterCallback(FunctionCallInfo fcinfo, ...@@ -3751,7 +4108,7 @@ AggRegisterCallback(FunctionCallInfo fcinfo,
if (fcinfo->context && IsA(fcinfo->context, AggState)) if (fcinfo->context && IsA(fcinfo->context, AggState))
{ {
AggState *aggstate = (AggState *) fcinfo->context; AggState *aggstate = (AggState *) fcinfo->context;
ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set]; ExprContext *cxt = aggstate->curaggcontext;
RegisterExprContextCallback(cxt, func, arg); RegisterExprContextCallback(cxt, func, arg);
......
...@@ -12,7 +12,7 @@ subdir = src/backend/lib ...@@ -12,7 +12,7 @@ subdir = src/backend/lib
top_builddir = ../../.. top_builddir = ../../..
include $(top_builddir)/src/Makefile.global include $(top_builddir)/src/Makefile.global
OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o pairingheap.o \ OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o knapsack.o \
rbtree.o stringinfo.o pairingheap.o rbtree.o stringinfo.o
include $(top_srcdir)/src/backend/common.mk include $(top_srcdir)/src/backend/common.mk
/*-------------------------------------------------------------------------
*
* knapsack.c
* Knapsack problem solver
*
* Given input vectors of integral item weights (must be >= 0) and values
* (double >= 0), compute the set of items which produces the greatest total
* value without exceeding a specified total weight; each item is included at
* most once (this is the 0/1 knapsack problem). Weight 0 items will always be
* included.
*
* The performance of this algorithm is pseudo-polynomial, O(nW) where W is the
* weight limit. To use with non-integral weights or approximate solutions,
* the caller should pre-scale the input weights to a suitable range. This
* allows approximate solutions in polynomial time (the general case of the
* exact problem is NP-hard).
*
* Copyright (c) 2017, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/lib/knapsack.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include <limits.h>
#include "lib/knapsack.h"
#include "miscadmin.h"
#include "nodes/bitmapset.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/palloc.h"
/*
* DiscreteKnapsack
*
* The item_values input is optional; if omitted, all the items are assumed to
* have value 1.
*
* Returns a Bitmapset of the 0..(n-1) indexes of the items chosen for
* inclusion in the solution.
*
* This uses the usual dynamic-programming algorithm, adapted to reuse the
* memory on each pass (by working from larger weights to smaller). At the
* start of pass number i, the values[w] array contains the largest value
* computed with total weight <= w, using only items with indices < i; and
* sets[w] contains the bitmap of items actually used for that value. (The
* bitmapsets are all pre-initialized with an unused high bit so that memory
* allocation is done only once.)
*/
Bitmapset *
DiscreteKnapsack(int max_weight, int num_items,
int *item_weights, double *item_values)
{
MemoryContext local_ctx = AllocSetContextCreate(CurrentMemoryContext,
"Knapsack",
ALLOCSET_SMALL_MINSIZE,
ALLOCSET_SMALL_INITSIZE,
ALLOCSET_SMALL_MAXSIZE);
MemoryContext oldctx = MemoryContextSwitchTo(local_ctx);
double *values;
Bitmapset **sets;
Bitmapset *result;
int i,
j;
Assert(max_weight >= 0);
Assert(num_items > 0 && item_weights);
values = palloc((1 + max_weight) * sizeof(double));
sets = palloc((1 + max_weight) * sizeof(Bitmapset *));
for (i = 0; i <= max_weight; ++i)
{
values[i] = 0;
sets[i] = bms_make_singleton(num_items);
}
for (i = 0; i < num_items; ++i)
{
int iw = item_weights[i];
double iv = item_values ? item_values[i] : 1;
for (j = max_weight; j >= iw; --j)
{
int ow = j - iw;
if (values[j] <= values[ow] + iv)
{
/* copy sets[ow] to sets[j] without realloc */
if (j != ow)
{
sets[j] = bms_del_members(sets[j], sets[j]);
sets[j] = bms_add_members(sets[j], sets[ow]);
}
sets[j] = bms_add_member(sets[j], i);
values[j] = values[ow] + iv;
}
}
}
MemoryContextSwitchTo(oldctx);
result = bms_del_member(bms_copy(sets[max_weight]), num_items);
MemoryContextDelete(local_ctx);
return result;
}
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "postgres.h" #include "postgres.h"
#include "access/hash.h" #include "access/hash.h"
#include "nodes/pg_list.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
...@@ -457,6 +458,35 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b) ...@@ -457,6 +458,35 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b)
return false; return false;
} }
/*
* bms_overlap_list - does a set overlap an integer list?
*/
bool
bms_overlap_list(const Bitmapset *a, const List *b)
{
ListCell *lc;
int wordnum,
bitnum;
if (a == NULL || b == NIL)
return false;
foreach(lc, b)
{
int x = lfirst_int(lc);
if (x < 0)
elog(ERROR, "negative bitmapset member not allowed");
wordnum = WORDNUM(x);
bitnum = BITNUM(x);
if (wordnum < a->nwords)
if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
return true;
}
return false;
}
/* /*
* bms_nonempty_difference - do sets have a nonempty difference? * bms_nonempty_difference - do sets have a nonempty difference?
*/ */
......
...@@ -1941,6 +1941,28 @@ _outAggPath(StringInfo str, const AggPath *node) ...@@ -1941,6 +1941,28 @@ _outAggPath(StringInfo str, const AggPath *node)
WRITE_NODE_FIELD(qual); WRITE_NODE_FIELD(qual);
} }
static void
_outRollupData(StringInfo str, const RollupData *node)
{
WRITE_NODE_TYPE("ROLLUP");
WRITE_NODE_FIELD(groupClause);
WRITE_NODE_FIELD(gsets);
WRITE_NODE_FIELD(gsets_data);
WRITE_FLOAT_FIELD(numGroups, "%.0f");
WRITE_BOOL_FIELD(hashable);
WRITE_BOOL_FIELD(is_hashed);
}
static void
_outGroupingSetData(StringInfo str, const GroupingSetData *node)
{
WRITE_NODE_TYPE("GSDATA");
WRITE_NODE_FIELD(set);
WRITE_FLOAT_FIELD(numGroups, "%.0f");
}
static void static void
_outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node) _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
{ {
...@@ -1949,8 +1971,8 @@ _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node) ...@@ -1949,8 +1971,8 @@ _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
_outPathInfo(str, (const Path *) node); _outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath); WRITE_NODE_FIELD(subpath);
WRITE_NODE_FIELD(rollup_groupclauses); WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
WRITE_NODE_FIELD(rollup_lists); WRITE_NODE_FIELD(rollups);
WRITE_NODE_FIELD(qual); WRITE_NODE_FIELD(qual);
} }
...@@ -3961,14 +3983,18 @@ outNode(StringInfo str, const void *obj) ...@@ -3961,14 +3983,18 @@ outNode(StringInfo str, const void *obj)
case T_PlannerParamItem: case T_PlannerParamItem:
_outPlannerParamItem(str, obj); _outPlannerParamItem(str, obj);
break; break;
case T_RollupData:
_outRollupData(str, obj);
break;
case T_GroupingSetData:
_outGroupingSetData(str, obj);
break;
case T_StatisticExtInfo: case T_StatisticExtInfo:
_outStatisticExtInfo(str, obj); _outStatisticExtInfo(str, obj);
break; break;
case T_ExtensibleNode: case T_ExtensibleNode:
_outExtensibleNode(str, obj); _outExtensibleNode(str, obj);
break; break;
case T_CreateStmt: case T_CreateStmt:
_outCreateStmt(str, obj); _outCreateStmt(str, obj);
break; break;
......
...@@ -1884,11 +1884,16 @@ cost_agg(Path *path, PlannerInfo *root, ...@@ -1884,11 +1884,16 @@ cost_agg(Path *path, PlannerInfo *root,
total_cost = startup_cost + cpu_tuple_cost; total_cost = startup_cost + cpu_tuple_cost;
output_tuples = 1; output_tuples = 1;
} }
else if (aggstrategy == AGG_SORTED) else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED)
{ {
/* Here we are able to deliver output on-the-fly */ /* Here we are able to deliver output on-the-fly */
startup_cost = input_startup_cost; startup_cost = input_startup_cost;
total_cost = input_total_cost; total_cost = input_total_cost;
if (aggstrategy == AGG_MIXED && !enable_hashagg)
{
startup_cost += disable_cost;
total_cost += disable_cost;
}
/* calcs phrased this way to match HASHED case, see note above */ /* calcs phrased this way to match HASHED case, see note above */
total_cost += aggcosts->transCost.startup; total_cost += aggcosts->transCost.startup;
total_cost += aggcosts->transCost.per_tuple * input_tuples; total_cost += aggcosts->transCost.per_tuple * input_tuples;
......
...@@ -1783,18 +1783,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) ...@@ -1783,18 +1783,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
{ {
Agg *plan; Agg *plan;
Plan *subplan; Plan *subplan;
List *rollup_groupclauses = best_path->rollup_groupclauses; List *rollups = best_path->rollups;
List *rollup_lists = best_path->rollup_lists;
AttrNumber *grouping_map; AttrNumber *grouping_map;
int maxref; int maxref;
List *chain; List *chain;
ListCell *lc, ListCell *lc;
*lc2;
/* Shouldn't get here without grouping sets */ /* Shouldn't get here without grouping sets */
Assert(root->parse->groupingSets); Assert(root->parse->groupingSets);
Assert(rollup_lists != NIL); Assert(rollups != NIL);
Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
/* /*
* Agg can project, so no need to be terribly picky about child tlist, but * Agg can project, so no need to be terribly picky about child tlist, but
...@@ -1846,72 +1843,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) ...@@ -1846,72 +1843,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
* costs will be shown by EXPLAIN. * costs will be shown by EXPLAIN.
*/ */
chain = NIL; chain = NIL;
if (list_length(rollup_groupclauses) > 1) if (list_length(rollups) > 1)
{ {
forboth(lc, rollup_groupclauses, lc2, rollup_lists) ListCell *lc2 = lnext(list_head(rollups));
bool is_first_sort = ((RollupData *) linitial(rollups))->is_hashed;
for_each_cell(lc, lc2)
{ {
List *groupClause = (List *) lfirst(lc); RollupData *rollup = lfirst(lc);
List *gsets = (List *) lfirst(lc2);
AttrNumber *new_grpColIdx; AttrNumber *new_grpColIdx;
Plan *sort_plan; Plan *sort_plan = NULL;
Plan *agg_plan; Plan *agg_plan;
AggStrategy strat;
/* We want to iterate over all but the last rollup list elements */ new_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
if (lnext(lc) == NULL)
break;
new_grpColIdx = remap_groupColIdx(root, groupClause);
if (!rollup->is_hashed && !is_first_sort)
{
sort_plan = (Plan *) sort_plan = (Plan *)
make_sort_from_groupcols(groupClause, make_sort_from_groupcols(rollup->groupClause,
new_grpColIdx, new_grpColIdx,
subplan); subplan);
}
if (!rollup->is_hashed)
is_first_sort = false;
if (rollup->is_hashed)
strat = AGG_HASHED;
else if (list_length(linitial(rollup->gsets)) == 0)
strat = AGG_PLAIN;
else
strat = AGG_SORTED;
agg_plan = (Plan *) make_agg(NIL, agg_plan = (Plan *) make_agg(NIL,
NIL, NIL,
AGG_SORTED, strat,
AGGSPLIT_SIMPLE, AGGSPLIT_SIMPLE,
list_length((List *) linitial(gsets)), list_length((List *) linitial(rollup->gsets)),
new_grpColIdx, new_grpColIdx,
extract_grouping_ops(groupClause), extract_grouping_ops(rollup->groupClause),
gsets, rollup->gsets,
NIL, NIL,
0, /* numGroups not needed */ rollup->numGroups,
sort_plan); sort_plan);
/* /*
* Nuke stuff we don't need to avoid bloating debug output. * Remove stuff we don't need to avoid bloating debug output.
*/ */
if (sort_plan)
{
sort_plan->targetlist = NIL; sort_plan->targetlist = NIL;
sort_plan->lefttree = NULL; sort_plan->lefttree = NULL;
}
chain = lappend(chain, agg_plan); chain = lappend(chain, agg_plan);
} }
} }
/* /*
* Now make the final Agg node * Now make the real Agg node
*/ */
{ {
List *groupClause = (List *) llast(rollup_groupclauses); RollupData *rollup = linitial(rollups);
List *gsets = (List *) llast(rollup_lists);
AttrNumber *top_grpColIdx; AttrNumber *top_grpColIdx;
int numGroupCols; int numGroupCols;
top_grpColIdx = remap_groupColIdx(root, groupClause); top_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
numGroupCols = list_length((List *) linitial(gsets)); numGroupCols = list_length((List *) linitial(rollup->gsets));
plan = make_agg(build_path_tlist(root, &best_path->path), plan = make_agg(build_path_tlist(root, &best_path->path),
best_path->qual, best_path->qual,
(numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN, best_path->aggstrategy,
AGGSPLIT_SIMPLE, AGGSPLIT_SIMPLE,
numGroupCols, numGroupCols,
top_grpColIdx, top_grpColIdx,
extract_grouping_ops(groupClause), extract_grouping_ops(rollup->groupClause),
gsets, rollup->gsets,
chain, chain,
0, /* numGroups not needed */ rollup->numGroups,
subplan); subplan);
/* Copy cost data from Path to Plan */ /* Copy cost data from Path to Plan */
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "foreign/fdwapi.h" #include "foreign/fdwapi.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "lib/bipartite_match.h" #include "lib/bipartite_match.h"
#include "lib/knapsack.h"
#include "nodes/makefuncs.h" #include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h" #include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG #ifdef OPTIMIZER_DEBUG
...@@ -91,12 +92,31 @@ typedef struct ...@@ -91,12 +92,31 @@ typedef struct
List *groupClause; /* overrides parse->groupClause */ List *groupClause; /* overrides parse->groupClause */
} standard_qp_extra; } standard_qp_extra;
/*
* Data specific to grouping sets
*/
typedef struct
{
List *rollups;
List *hash_sets_idx;
double dNumHashGroups;
bool any_hashable;
Bitmapset *unsortable_refs;
Bitmapset *unhashable_refs;
List *unsortable_sets;
int *tleref_to_colnum_map;
} grouping_sets_data;
/* Local functions */ /* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
static void inheritance_planner(PlannerInfo *root); static void inheritance_planner(PlannerInfo *root);
static void grouping_planner(PlannerInfo *root, bool inheritance_update, static void grouping_planner(PlannerInfo *root, bool inheritance_update,
double tuple_fraction); double tuple_fraction);
static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
int *tleref_to_colnum_map);
static void preprocess_rowmarks(PlannerInfo *root); static void preprocess_rowmarks(PlannerInfo *root);
static double preprocess_limit(PlannerInfo *root, static double preprocess_limit(PlannerInfo *root,
double tuple_fraction, double tuple_fraction,
...@@ -109,8 +129,7 @@ static List *reorder_grouping_sets(List *groupingSets, List *sortclause); ...@@ -109,8 +129,7 @@ static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
static void standard_qp_callback(PlannerInfo *root, void *extra); static void standard_qp_callback(PlannerInfo *root, void *extra);
static double get_number_of_groups(PlannerInfo *root, static double get_number_of_groups(PlannerInfo *root,
double path_rows, double path_rows,
List *rollup_lists, grouping_sets_data *gd);
List *rollup_groupclauses);
static Size estimate_hashagg_tablesize(Path *path, static Size estimate_hashagg_tablesize(Path *path,
const AggClauseCosts *agg_costs, const AggClauseCosts *agg_costs,
double dNumGroups); double dNumGroups);
...@@ -118,8 +137,16 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root, ...@@ -118,8 +137,16 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel, RelOptInfo *input_rel,
PathTarget *target, PathTarget *target,
const AggClauseCosts *agg_costs, const AggClauseCosts *agg_costs,
List *rollup_lists, grouping_sets_data *gd);
List *rollup_groupclauses); static void consider_groupingsets_paths(PlannerInfo *root,
RelOptInfo *grouped_rel,
Path *path,
bool is_sorted,
bool can_hash,
PathTarget *target,
grouping_sets_data *gd,
const AggClauseCosts *agg_costs,
double dNumGroups);
static RelOptInfo *create_window_paths(PlannerInfo *root, static RelOptInfo *create_window_paths(PlannerInfo *root,
RelOptInfo *input_rel, RelOptInfo *input_rel,
PathTarget *input_target, PathTarget *input_target,
...@@ -1540,8 +1567,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, ...@@ -1540,8 +1567,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
AggClauseCosts agg_costs; AggClauseCosts agg_costs;
WindowFuncLists *wflists = NULL; WindowFuncLists *wflists = NULL;
List *activeWindows = NIL; List *activeWindows = NIL;
List *rollup_lists = NIL; grouping_sets_data *gset_data = NULL;
List *rollup_groupclauses = NIL;
standard_qp_extra qp_extra; standard_qp_extra qp_extra;
/* A recursive query should always have setOperations */ /* A recursive query should always have setOperations */
...@@ -1550,84 +1576,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, ...@@ -1550,84 +1576,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Preprocess grouping sets and GROUP BY clause, if any */ /* Preprocess grouping sets and GROUP BY clause, if any */
if (parse->groupingSets) if (parse->groupingSets)
{ {
int *tleref_to_colnum_map; gset_data = preprocess_grouping_sets(root);
List *sets;
int maxref;
ListCell *lc;
ListCell *lc2;
ListCell *lc_set;
parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
/* Identify max SortGroupRef in groupClause, for array sizing */
maxref = 0;
foreach(lc, parse->groupClause)
{
SortGroupClause *gc = lfirst(lc);
if (gc->tleSortGroupRef > maxref)
maxref = gc->tleSortGroupRef;
}
/* Allocate workspace array for remapping */
tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
/* Examine the rollup sets */
sets = extract_rollup_sets(parse->groupingSets);
foreach(lc_set, sets)
{
List *current_sets = (List *) lfirst(lc_set);
List *groupclause;
int ref;
/*
* Reorder the current list of grouping sets into correct
* prefix order. If only one aggregation pass is needed, try
* to make the list match the ORDER BY clause; if more than
* one pass is needed, we don't bother with that.
*/
current_sets = reorder_grouping_sets(current_sets,
(list_length(sets) == 1
? parse->sortClause
: NIL));
/*
* Order the groupClause appropriately. If the first grouping
* set is empty, this can match regular GROUP BY
* preprocessing, otherwise we have to force the groupClause
* to match that grouping set's order.
*/
groupclause = preprocess_groupclause(root,
linitial(current_sets));
/*
* Now that we've pinned down an order for the groupClause for
* this list of grouping sets, we need to remap the entries in
* the grouping sets from sortgrouprefs to plain indices
* (0-based) into the groupClause for this collection of
* grouping sets.
*/
ref = 0;
foreach(lc, groupclause)
{
SortGroupClause *gc = lfirst(lc);
tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
}
foreach(lc, current_sets)
{
foreach(lc2, (List *) lfirst(lc))
{
lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)];
}
}
/* Save the reordered sets and corresponding groupclauses */
rollup_lists = lcons(current_sets, rollup_lists);
rollup_groupclauses = lcons(groupclause, rollup_groupclauses);
}
} }
else else
{ {
...@@ -1721,8 +1670,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, ...@@ -1721,8 +1670,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Set up data needed by standard_qp_callback */ /* Set up data needed by standard_qp_callback */
qp_extra.tlist = tlist; qp_extra.tlist = tlist;
qp_extra.activeWindows = activeWindows; qp_extra.activeWindows = activeWindows;
qp_extra.groupClause = qp_extra.groupClause = (gset_data
parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause; ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
: parse->groupClause);
/* /*
* Generate the best unsorted and presorted paths for the scan/join * Generate the best unsorted and presorted paths for the scan/join
...@@ -1922,8 +1872,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, ...@@ -1922,8 +1872,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel, current_rel,
grouping_target, grouping_target,
&agg_costs, &agg_costs,
rollup_lists, gset_data);
rollup_groupclauses);
/* Fix things up if grouping_target contains SRFs */ /* Fix things up if grouping_target contains SRFs */
if (parse->hasTargetSRFs) if (parse->hasTargetSRFs)
adjust_paths_for_srfs(root, current_rel, adjust_paths_for_srfs(root, current_rel,
...@@ -1960,7 +1909,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, ...@@ -1960,7 +1909,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel = create_distinct_paths(root, current_rel = create_distinct_paths(root,
current_rel); current_rel);
} }
} /* end of if (setOperations) */ } /* end of if (setOperations) */
/* /*
...@@ -2113,6 +2061,221 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, ...@@ -2113,6 +2061,221 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Note: currently, we leave it to callers to do set_cheapest() */ /* Note: currently, we leave it to callers to do set_cheapest() */
} }
/*
* Do preprocessing for groupingSets clause and related data. This handles the
* preliminary steps of expanding the grouping sets, organizing them into lists
* of rollups, and preparing annotations which will later be filled in with
* size estimates.
*/
static grouping_sets_data *
preprocess_grouping_sets(PlannerInfo *root)
{
Query *parse = root->parse;
List *sets;
int maxref = 0;
ListCell *lc;
ListCell *lc_set;
grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
gd->any_hashable = false;
gd->unhashable_refs = NULL;
gd->unsortable_refs = NULL;
gd->unsortable_sets = NIL;
if (parse->groupClause)
{
ListCell *lc;
foreach(lc, parse->groupClause)
{
SortGroupClause *gc = lfirst(lc);
Index ref = gc->tleSortGroupRef;
if (ref > maxref)
maxref = ref;
if (!gc->hashable)
gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
if (!OidIsValid(gc->sortop))
gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
}
}
/* Allocate workspace array for remapping */
gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
/*
* If we have any unsortable sets, we must extract them before trying to
* prepare rollups. Unsortable sets don't go through
* reorder_grouping_sets, so we must apply the GroupingSetData annotation
* here.
*/
if (!bms_is_empty(gd->unsortable_refs))
{
List *sortable_sets = NIL;
foreach(lc, parse->groupingSets)
{
List *gset = lfirst(lc);
if (bms_overlap_list(gd->unsortable_refs, gset))
{
GroupingSetData *gs = makeNode(GroupingSetData);
gs->set = gset;
gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
/*
* We must enforce here that an unsortable set is hashable;
* later code assumes this. Parse analysis only checks that
* every individual column is either hashable or sortable.
*
* Note that passing this test doesn't guarantee we can
* generate a plan; there might be other showstoppers.
*/
if (bms_overlap_list(gd->unhashable_refs, gset))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement GROUP BY"),
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
}
else
sortable_sets = lappend(sortable_sets, gset);
}
if (sortable_sets)
sets = extract_rollup_sets(sortable_sets);
else
sets = NIL;
}
else
sets = extract_rollup_sets(parse->groupingSets);
foreach(lc_set, sets)
{
List *current_sets = (List *) lfirst(lc_set);
RollupData *rollup = makeNode(RollupData);
GroupingSetData *gs;
/*
* Reorder the current list of grouping sets into correct prefix
* order. If only one aggregation pass is needed, try to make the
* list match the ORDER BY clause; if more than one pass is needed, we
* don't bother with that.
*
* Note that this reorders the sets from smallest-member-first to
* largest-member-first, and applies the GroupingSetData annotations,
* though the data will be filled in later.
*/
current_sets = reorder_grouping_sets(current_sets,
(list_length(sets) == 1
? parse->sortClause
: NIL));
/*
* Get the initial (and therefore largest) grouping set.
*/
gs = linitial(current_sets);
/*
* Order the groupClause appropriately. If the first grouping set is
* empty, then the groupClause must also be empty; otherwise we have
* to force the groupClause to match that grouping set's order.
*
* (The first grouping set can be empty even though parse->groupClause
* is not empty only if all non-empty grouping sets are unsortable.
* The groupClauses for hashed grouping sets are built later on.)
*/
if (gs->set)
rollup->groupClause = preprocess_groupclause(root, gs->set);
else
rollup->groupClause = NIL;
/*
* Is it hashable? We pretend empty sets are hashable even though we
* actually force them not to be hashed later. But don't bother if
* there's nothing but empty sets (since in that case we can't hash
* anything).
*/
if (gs->set &&
!bms_overlap_list(gd->unhashable_refs, gs->set))
{
rollup->hashable = true;
gd->any_hashable = true;
}
/*
* Now that we've pinned down an order for the groupClause for this
* list of grouping sets, we need to remap the entries in the grouping
* sets from sortgrouprefs to plain indices (0-based) into the
* groupClause for this collection of grouping sets. We keep the
* original form for later use, though.
*/
rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
current_sets,
gd->tleref_to_colnum_map);
rollup->gsets_data = current_sets;
gd->rollups = lappend(gd->rollups, rollup);
}
if (gd->unsortable_sets)
{
/*
* We have not yet pinned down a groupclause for this, but we will
* need index-based lists for estimation purposes. Construct
* hash_sets_idx based on the entire original groupclause for now.
*/
gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
gd->unsortable_sets,
gd->tleref_to_colnum_map);
gd->any_hashable = true;
}
return gd;
}
/*
* Given a groupclause and a list of GroupingSetData, return equivalent sets
* (without annotation) mapped to indexes into the given groupclause.
*/
static List *
remap_to_groupclause_idx(List *groupClause,
List *gsets,
int *tleref_to_colnum_map)
{
int ref = 0;
List *result = NIL;
ListCell *lc;
foreach(lc, groupClause)
{
SortGroupClause *gc = lfirst(lc);
tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
}
foreach(lc, gsets)
{
List *set = NIL;
ListCell *lc2;
GroupingSetData *gs = lfirst(lc);
foreach(lc2, gs->set)
{
set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
}
result = lappend(result, set);
}
return result;
}
/* /*
* Detect whether a plan node is a "dummy" plan created when a relation * Detect whether a plan node is a "dummy" plan created when a relation
...@@ -3028,7 +3191,7 @@ extract_rollup_sets(List *groupingSets) ...@@ -3028,7 +3191,7 @@ extract_rollup_sets(List *groupingSets)
/* /*
* Reorder the elements of a list of grouping sets such that they have correct * Reorder the elements of a list of grouping sets such that they have correct
* prefix relationships. * prefix relationships. Also inserts the GroupingSetData annotations.
* *
* The input must be ordered with smallest sets first; the result is returned * The input must be ordered with smallest sets first; the result is returned
* with largest sets first. Note that the result shares no list substructure * with largest sets first. Note that the result shares no list substructure
...@@ -3051,6 +3214,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause) ...@@ -3051,6 +3214,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
{ {
List *candidate = lfirst(lc); List *candidate = lfirst(lc);
List *new_elems = list_difference_int(candidate, previous); List *new_elems = list_difference_int(candidate, previous);
GroupingSetData *gs = makeNode(GroupingSetData);
if (list_length(new_elems) > 0) if (list_length(new_elems) > 0)
{ {
...@@ -3078,7 +3242,8 @@ reorder_grouping_sets(List *groupingsets, List *sortclause) ...@@ -3078,7 +3242,8 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
} }
} }
result = lcons(list_copy(previous), result); gs->set = list_copy(previous);
result = lcons(gs, result);
list_free(new_elems); list_free(new_elems);
} }
...@@ -3173,15 +3338,16 @@ standard_qp_callback(PlannerInfo *root, void *extra) ...@@ -3173,15 +3338,16 @@ standard_qp_callback(PlannerInfo *root, void *extra)
* Estimate number of groups produced by grouping clauses (1 if not grouping) * Estimate number of groups produced by grouping clauses (1 if not grouping)
* *
* path_rows: number of output rows from scan/join step * path_rows: number of output rows from scan/join step
* rollup_lists: list of grouping sets, or NIL if not doing grouping sets * gsets: grouping set data, or NULL if not doing grouping sets
* rollup_groupclauses: list of grouping clauses for grouping sets, *
* or NIL if not doing grouping sets * If doing grouping sets, we also annotate the gsets data with the estimates
* for each set and each individual rollup list, with a view to later
* determining whether some combination of them could be hashed instead.
*/ */
static double static double
get_number_of_groups(PlannerInfo *root, get_number_of_groups(PlannerInfo *root,
double path_rows, double path_rows,
List *rollup_lists, grouping_sets_data *gd)
List *rollup_groupclauses)
{ {
Query *parse = root->parse; Query *parse = root->parse;
double dNumGroups; double dNumGroups;
...@@ -3193,28 +3359,60 @@ get_number_of_groups(PlannerInfo *root, ...@@ -3193,28 +3359,60 @@ get_number_of_groups(PlannerInfo *root,
if (parse->groupingSets) if (parse->groupingSets)
{ {
/* Add up the estimates for each grouping set */ /* Add up the estimates for each grouping set */
ListCell *lc, ListCell *lc;
*lc2; ListCell *lc2;
dNumGroups = 0; dNumGroups = 0;
forboth(lc, rollup_groupclauses, lc2, rollup_lists)
foreach(lc, gd->rollups)
{ {
List *groupClause = (List *) lfirst(lc); RollupData *rollup = lfirst(lc);
List *gsets = (List *) lfirst(lc2); ListCell *lc;
ListCell *lc3;
groupExprs = get_sortgrouplist_exprs(groupClause, groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
parse->targetList); parse->targetList);
foreach(lc3, gsets) rollup->numGroups = 0.0;
forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
{ {
List *gset = (List *) lfirst(lc3); List *gset = (List *) lfirst(lc);
GroupingSetData *gs = lfirst(lc2);
double numGroups = estimate_num_groups(root,
groupExprs,
path_rows,
&gset);
gs->numGroups = numGroups;
rollup->numGroups += numGroups;
}
dNumGroups += rollup->numGroups;
}
dNumGroups += estimate_num_groups(root, if (gd->hash_sets_idx)
{
ListCell *lc;
gd->dNumHashGroups = 0;
groupExprs = get_sortgrouplist_exprs(parse->groupClause,
parse->targetList);
forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
{
List *gset = (List *) lfirst(lc);
GroupingSetData *gs = lfirst(lc2);
double numGroups = estimate_num_groups(root,
groupExprs, groupExprs,
path_rows, path_rows,
&gset); &gset);
gs->numGroups = numGroups;
gd->dNumHashGroups += numGroups;
} }
dNumGroups += gd->dNumHashGroups;
} }
} }
else else
...@@ -3250,6 +3448,11 @@ get_number_of_groups(PlannerInfo *root, ...@@ -3250,6 +3448,11 @@ get_number_of_groups(PlannerInfo *root,
* estimate_hashagg_tablesize * estimate_hashagg_tablesize
* estimate the number of bytes that a hash aggregate hashtable will * estimate the number of bytes that a hash aggregate hashtable will
* require based on the agg_costs, path width and dNumGroups. * require based on the agg_costs, path width and dNumGroups.
*
* XXX this may be over-estimating the size now that hashagg knows to omit
* unneeded columns from the hashtable. Also for mixed-mode grouping sets,
* grouping columns not in the hashed set are counted here even though hashagg
* won't store them. Is this a problem?
*/ */
static Size static Size
estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs, estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
...@@ -3300,8 +3503,7 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3300,8 +3503,7 @@ create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel, RelOptInfo *input_rel,
PathTarget *target, PathTarget *target,
const AggClauseCosts *agg_costs, const AggClauseCosts *agg_costs,
List *rollup_lists, grouping_sets_data *gd)
List *rollup_groupclauses)
{ {
Query *parse = root->parse; Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path; Path *cheapest_path = input_rel->cheapest_total_path;
...@@ -3410,8 +3612,7 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3410,8 +3612,7 @@ create_grouping_paths(PlannerInfo *root,
*/ */
dNumGroups = get_number_of_groups(root, dNumGroups = get_number_of_groups(root,
cheapest_path->rows, cheapest_path->rows,
rollup_lists, gd);
rollup_groupclauses);
/* /*
* Determine whether it's possible to perform sort-based implementations * Determine whether it's possible to perform sort-based implementations
...@@ -3419,15 +3620,22 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3419,15 +3620,22 @@ create_grouping_paths(PlannerInfo *root,
* grouping_is_sortable() is trivially true, and all the * grouping_is_sortable() is trivially true, and all the
* pathkeys_contained_in() tests will succeed too, so that we'll consider * pathkeys_contained_in() tests will succeed too, so that we'll consider
* every surviving input path.) * every surviving input path.)
*
* If we have grouping sets, we might be able to sort some but not all of
* them; in this case, we need can_sort to be true as long as we must
* consider any sorted-input plan.
*/ */
can_sort = grouping_is_sortable(parse->groupClause); can_sort = (gd && gd->rollups != NIL)
|| grouping_is_sortable(parse->groupClause);
/* /*
* Determine whether we should consider hash-based implementations of * Determine whether we should consider hash-based implementations of
* grouping. * grouping.
* *
* Hashed aggregation only applies if we're grouping. We currently can't * Hashed aggregation only applies if we're grouping. If we have grouping
* hash if there are grouping sets, though. * sets, some groups might be hashable but others not; in this case we set
* can_hash true as long as there is nothing globally preventing us from
* hashing (and we should therefore consider plans with hashes).
* *
* Executor doesn't support hashed aggregation with DISTINCT or ORDER BY * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
* aggregates. (Doing so would imply storing *all* the input values in * aggregates. (Doing so would imply storing *all* the input values in
...@@ -3440,9 +3648,8 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3440,9 +3648,8 @@ create_grouping_paths(PlannerInfo *root,
* other gating conditions, so we want to do it last. * other gating conditions, so we want to do it last.
*/ */
can_hash = (parse->groupClause != NIL && can_hash = (parse->groupClause != NIL &&
parse->groupingSets == NIL &&
agg_costs->numOrderedAggs == 0 && agg_costs->numOrderedAggs == 0 &&
grouping_is_hashable(parse->groupClause)); (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
/* /*
* If grouped_rel->consider_parallel is true, then paths that we generate * If grouped_rel->consider_parallel is true, then paths that we generate
...@@ -3508,8 +3715,7 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3508,8 +3715,7 @@ create_grouping_paths(PlannerInfo *root,
/* Estimate number of partial groups. */ /* Estimate number of partial groups. */
dNumPartialGroups = get_number_of_groups(root, dNumPartialGroups = get_number_of_groups(root,
cheapest_partial_path->rows, cheapest_partial_path->rows,
NIL, gd);
NIL);
/* /*
* Collect statistics about aggregates for estimating costs of * Collect statistics about aggregates for estimating costs of
...@@ -3642,20 +3848,9 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3642,20 +3848,9 @@ create_grouping_paths(PlannerInfo *root,
/* Now decide what to stick atop it */ /* Now decide what to stick atop it */
if (parse->groupingSets) if (parse->groupingSets)
{ {
/* consider_groupingsets_paths(root, grouped_rel,
* We have grouping sets, possibly with aggregation. Make path, true, can_hash, target,
* a GroupingSetsPath. gd, agg_costs, dNumGroups);
*/
add_path(grouped_rel, (Path *)
create_groupingsets_path(root,
grouped_rel,
path,
target,
(List *) parse->havingQual,
rollup_lists,
rollup_groupclauses,
agg_costs,
dNumGroups));
} }
else if (parse->hasAggs) else if (parse->hasAggs)
{ {
...@@ -3815,23 +4010,34 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3815,23 +4010,34 @@ create_grouping_paths(PlannerInfo *root,
} }
if (can_hash) if (can_hash)
{
if (parse->groupingSets)
{
/*
* Try for a hash-only groupingsets path over unsorted input.
*/
consider_groupingsets_paths(root, grouped_rel,
cheapest_path, false, true, target,
gd, agg_costs, dNumGroups);
}
else
{ {
hashaggtablesize = estimate_hashagg_tablesize(cheapest_path, hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
agg_costs, agg_costs,
dNumGroups); dNumGroups);
/* /*
* Provided that the estimated size of the hashtable does not exceed * Provided that the estimated size of the hashtable does not
* work_mem, we'll generate a HashAgg Path, although if we were unable * exceed work_mem, we'll generate a HashAgg Path, although if we
* to sort above, then we'd better generate a Path, so that we at * were unable to sort above, then we'd better generate a Path, so
* least have one. * that we at least have one.
*/ */
if (hashaggtablesize < work_mem * 1024L || if (hashaggtablesize < work_mem * 1024L ||
grouped_rel->pathlist == NIL) grouped_rel->pathlist == NIL)
{ {
/* /*
* We just need an Agg over the cheapest-total input path, since * We just need an Agg over the cheapest-total input path,
* input order won't matter. * since input order won't matter.
*/ */
add_path(grouped_rel, (Path *) add_path(grouped_rel, (Path *)
create_agg_path(root, grouped_rel, create_agg_path(root, grouped_rel,
...@@ -3844,6 +4050,7 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3844,6 +4050,7 @@ create_grouping_paths(PlannerInfo *root,
agg_costs, agg_costs,
dNumGroups)); dNumGroups));
} }
}
/* /*
* Generate a HashAgg Path atop of the cheapest partial path. Once * Generate a HashAgg Path atop of the cheapest partial path. Once
...@@ -3921,6 +4128,344 @@ create_grouping_paths(PlannerInfo *root, ...@@ -3921,6 +4128,344 @@ create_grouping_paths(PlannerInfo *root,
return grouped_rel; return grouped_rel;
} }
/*
* For a given input path, consider the possible ways of doing grouping sets on
* it, by combinations of hashing and sorting. This can be called multiple
* times, so it's important that it not scribble on input. No result is
* returned, but any generated paths are added to grouped_rel.
*/
static void
consider_groupingsets_paths(PlannerInfo *root,
RelOptInfo *grouped_rel,
Path *path,
bool is_sorted,
bool can_hash,
PathTarget *target,
grouping_sets_data *gd,
const AggClauseCosts *agg_costs,
double dNumGroups)
{
Query *parse = root->parse;
/*
* If we're not being offered sorted input, then only consider plans that
* can be done entirely by hashing.
*
* We can hash everything if it looks like it'll fit in work_mem. But if
* the input is actually sorted despite not being advertised as such, we
* prefer to make use of that in order to use less memory.
*
* If none of the grouping sets are sortable, then ignore the work_mem
* limit and generate a path anyway, since otherwise we'll just fail.
*/
if (!is_sorted)
{
List *new_rollups = NIL;
RollupData *unhashed_rollup = NULL;
List *sets_data;
List *empty_sets_data = NIL;
List *empty_sets = NIL;
ListCell *lc;
ListCell *l_start = list_head(gd->rollups);
AggStrategy strat = AGG_HASHED;
Size hashsize;
double exclude_groups = 0.0;
Assert(can_hash);
if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
{
unhashed_rollup = lfirst(l_start);
exclude_groups = unhashed_rollup->numGroups;
l_start = lnext(l_start);
}
hashsize = estimate_hashagg_tablesize(path,
agg_costs,
dNumGroups - exclude_groups);
/*
* gd->rollups is empty if we have only unsortable columns to work
* with. Override work_mem in that case; otherwise, we'll rely on the
* sorted-input case to generate usable mixed paths.
*/
if (hashsize > work_mem * 1024L && gd->rollups)
return; /* nope, won't fit */
/*
* We need to burst the existing rollups list into individual grouping
* sets and recompute a groupClause for each set.
*/
sets_data = list_copy(gd->unsortable_sets);
for_each_cell(lc, l_start)
{
RollupData *rollup = lfirst(lc);
/*
* If we find an unhashable rollup that's not been skipped by the
* "actually sorted" check above, we can't cope; we'd need sorted
* input (with a different sort order) but we can't get that here.
* So bail out; we'll get a valid path from the is_sorted case
* instead.
*
* The mere presence of empty grouping sets doesn't make a rollup
* unhashable (see preprocess_grouping_sets), we handle those
* specially below.
*/
if (!rollup->hashable)
return;
else
sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
}
foreach(lc, sets_data)
{
GroupingSetData *gs = lfirst(lc);
List *gset = gs->set;
RollupData *rollup;
if (gset == NIL)
{
/* Empty grouping sets can't be hashed. */
empty_sets_data = lappend(empty_sets_data, gs);
empty_sets = lappend(empty_sets, NIL);
}
else
{
rollup = makeNode(RollupData);
rollup->groupClause = preprocess_groupclause(root, gset);
rollup->gsets_data = list_make1(gs);
rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
rollup->gsets_data,
gd->tleref_to_colnum_map);
rollup->numGroups = gs->numGroups;
rollup->hashable = true;
rollup->is_hashed = true;
new_rollups = lappend(new_rollups, rollup);
}
}
/*
* If we didn't find anything nonempty to hash, then bail. We'll
* generate a path from the is_sorted case.
*/
if (new_rollups == NIL)
return;
/*
* If there were empty grouping sets they should have been in the
* first rollup.
*/
Assert(!unhashed_rollup || !empty_sets);
if (unhashed_rollup)
{
new_rollups = lappend(new_rollups, unhashed_rollup);
strat = AGG_MIXED;
}
else if (empty_sets)
{
RollupData *rollup = makeNode(RollupData);
rollup->groupClause = NIL;
rollup->gsets_data = empty_sets_data;
rollup->gsets = empty_sets;
rollup->numGroups = list_length(empty_sets);
rollup->hashable = false;
rollup->is_hashed = false;
new_rollups = lappend(new_rollups, rollup);
strat = AGG_MIXED;
}
add_path(grouped_rel, (Path *)
create_groupingsets_path(root,
grouped_rel,
path,
target,
(List *) parse->havingQual,
strat,
new_rollups,
agg_costs,
dNumGroups));
return;
}
/*
* If we have sorted input but nothing we can do with it, bail.
*/
if (list_length(gd->rollups) == 0)
return;
/*
* Given sorted input, we try and make two paths: one sorted and one mixed
* sort/hash. (We need to try both because hashagg might be disabled, or
* some columns might not be sortable.)
*
* can_hash is passed in as false if some obstacle elsewhere (such as
* ordered aggs) means that we shouldn't consider hashing at all.
*/
if (can_hash && gd->any_hashable)
{
List *rollups = NIL;
List *hash_sets = list_copy(gd->unsortable_sets);
double availspace = (work_mem * 1024.0);
ListCell *lc;
/*
* Account first for space needed for groups we can't sort at all.
*/
availspace -= (double) estimate_hashagg_tablesize(path,
agg_costs,
gd->dNumHashGroups);
if (availspace > 0 && list_length(gd->rollups) > 1)
{
double scale;
int num_rollups = list_length(gd->rollups);
int k_capacity;
int *k_weights = palloc(num_rollups * sizeof(int));
Bitmapset *hash_items = NULL;
int i;
/*
* We treat this as a knapsack problem: the knapsack capacity
* represents work_mem, the item weights are the estimated memory
* usage of the hashtables needed to implement a single rollup, and
* we really ought to use the cost saving as the item value;
* however, currently the costs assigned to sort nodes don't
* reflect the comparison costs well, and so we treat all items as
* of equal value (each rollup we hash instead saves us one sort).
*
* To use the discrete knapsack, we need to scale the values to a
* reasonably small bounded range. We choose to allow a 5% error
* margin; we have no more than 4096 rollups in the worst possible
* case, which with a 5% error margin will require a bit over 42MB
* of workspace. (Anyone wanting to plan queries that complex had
* better have the memory for it. In more reasonable cases, with
* no more than a couple of dozen rollups, the memory usage will
* be negligible.)
*
* k_capacity is naturally bounded, but we clamp the values for
* scale and weight (below) to avoid overflows or underflows (or
* uselessly trying to use a scale factor less than 1 byte).
*/
scale = Max(availspace / (20.0 * num_rollups), 1.0);
k_capacity = (int) floor(availspace / scale);
/*
* We leave the first rollup out of consideration since it's the
* one that matches the input sort order. We assign indexes "i"
* to only those entries considered for hashing; the second loop,
* below, must use the same condition.
*/
i = 0;
for_each_cell(lc, lnext(list_head(gd->rollups)))
{
RollupData *rollup = lfirst(lc);
if (rollup->hashable)
{
double sz = estimate_hashagg_tablesize(path,
agg_costs,
rollup->numGroups);
/*
* If sz is enormous, but work_mem (and hence scale) is
* small, avoid integer overflow here.
*/
k_weights[i] = (int) Min(floor(sz / scale),
k_capacity + 1.0);
++i;
}
}
/*
* Apply knapsack algorithm; compute the set of items which
* maximizes the value stored (in this case the number of sorts
* saved) while keeping the total size (approximately) within
* capacity.
*/
if (i > 0)
hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
if (!bms_is_empty(hash_items))
{
rollups = list_make1(linitial(gd->rollups));
i = 0;
for_each_cell(lc, lnext(list_head(gd->rollups)))
{
RollupData *rollup = lfirst(lc);
if (rollup->hashable)
{
if (bms_is_member(i, hash_items))
hash_sets = list_concat(hash_sets,
list_copy(rollup->gsets_data));
else
rollups = lappend(rollups, rollup);
++i;
}
else
rollups = lappend(rollups, rollup);
}
}
}
if (!rollups && hash_sets)
rollups = list_copy(gd->rollups);
foreach(lc, hash_sets)
{
GroupingSetData *gs = lfirst(lc);
RollupData *rollup = makeNode(RollupData);
Assert(gs->set != NIL);
rollup->groupClause = preprocess_groupclause(root, gs->set);
rollup->gsets_data = list_make1(gs);
rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
rollup->gsets_data,
gd->tleref_to_colnum_map);
rollup->numGroups = gs->numGroups;
rollup->hashable = true;
rollup->is_hashed = true;
rollups = lcons(rollup, rollups);
}
if (rollups)
{
add_path(grouped_rel, (Path *)
create_groupingsets_path(root,
grouped_rel,
path,
target,
(List *) parse->havingQual,
AGG_MIXED,
rollups,
agg_costs,
dNumGroups));
}
}
/*
* Now try the simple sorted case.
*/
if (!gd->unsortable_sets)
add_path(grouped_rel, (Path *)
create_groupingsets_path(root,
grouped_rel,
path,
target,
(List *) parse->havingQual,
AGG_SORTED,
gd->rollups,
agg_costs,
dNumGroups));
}
/* /*
* create_window_paths * create_window_paths
* *
......
...@@ -2697,10 +2697,9 @@ create_agg_path(PlannerInfo *root, ...@@ -2697,10 +2697,9 @@ create_agg_path(PlannerInfo *root,
* 'subpath' is the path representing the source of data * 'subpath' is the path representing the source of data
* 'target' is the PathTarget to be computed * 'target' is the PathTarget to be computed
* 'having_qual' is the HAVING quals if any * 'having_qual' is the HAVING quals if any
* 'rollup_lists' is a list of grouping sets * 'rollups' is a list of RollupData nodes
* 'rollup_groupclauses' is a list of grouping clauses for grouping sets
* 'agg_costs' contains cost info about the aggregate functions to be computed * 'agg_costs' contains cost info about the aggregate functions to be computed
* 'numGroups' is the estimated number of groups * 'numGroups' is the estimated total number of groups
*/ */
GroupingSetsPath * GroupingSetsPath *
create_groupingsets_path(PlannerInfo *root, create_groupingsets_path(PlannerInfo *root,
...@@ -2708,13 +2707,15 @@ create_groupingsets_path(PlannerInfo *root, ...@@ -2708,13 +2707,15 @@ create_groupingsets_path(PlannerInfo *root,
Path *subpath, Path *subpath,
PathTarget *target, PathTarget *target,
List *having_qual, List *having_qual,
List *rollup_lists, AggStrategy aggstrategy,
List *rollup_groupclauses, List *rollups,
const AggClauseCosts *agg_costs, const AggClauseCosts *agg_costs,
double numGroups) double numGroups)
{ {
GroupingSetsPath *pathnode = makeNode(GroupingSetsPath); GroupingSetsPath *pathnode = makeNode(GroupingSetsPath);
int numGroupCols; ListCell *lc;
bool is_first = true;
bool is_first_sort = true;
/* The topmost generated Plan node will be an Agg */ /* The topmost generated Plan node will be an Agg */
pathnode->path.pathtype = T_Agg; pathnode->path.pathtype = T_Agg;
...@@ -2727,55 +2728,90 @@ create_groupingsets_path(PlannerInfo *root, ...@@ -2727,55 +2728,90 @@ create_groupingsets_path(PlannerInfo *root,
pathnode->path.parallel_workers = subpath->parallel_workers; pathnode->path.parallel_workers = subpath->parallel_workers;
pathnode->subpath = subpath; pathnode->subpath = subpath;
/*
* Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED
* to AGG_HASHED, here if possible.
*/
if (aggstrategy == AGG_SORTED &&
list_length(rollups) == 1 &&
((RollupData *) linitial(rollups))->groupClause == NIL)
aggstrategy = AGG_PLAIN;
if (aggstrategy == AGG_MIXED &&
list_length(rollups) == 1)
aggstrategy = AGG_HASHED;
/* /*
* Output will be in sorted order by group_pathkeys if, and only if, there * Output will be in sorted order by group_pathkeys if, and only if, there
* is a single rollup operation on a non-empty list of grouping * is a single rollup operation on a non-empty list of grouping
* expressions. * expressions.
*/ */
if (list_length(rollup_groupclauses) == 1 && if (aggstrategy == AGG_SORTED && list_length(rollups) == 1)
((List *) linitial(rollup_groupclauses)) != NIL)
pathnode->path.pathkeys = root->group_pathkeys; pathnode->path.pathkeys = root->group_pathkeys;
else else
pathnode->path.pathkeys = NIL; pathnode->path.pathkeys = NIL;
pathnode->rollup_groupclauses = rollup_groupclauses; pathnode->aggstrategy = aggstrategy;
pathnode->rollup_lists = rollup_lists; pathnode->rollups = rollups;
pathnode->qual = having_qual; pathnode->qual = having_qual;
Assert(rollup_lists != NIL); Assert(rollups != NIL);
Assert(list_length(rollup_lists) == list_length(rollup_groupclauses)); Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1);
/* Account for cost of the topmost Agg node */ foreach(lc, rollups)
numGroupCols = list_length((List *) linitial((List *) llast(rollup_lists))); {
RollupData *rollup = lfirst(lc);
List *gsets = rollup->gsets;
int numGroupCols = list_length(linitial(gsets));
/*
* In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the
* (already-sorted) input, and following ones do their own sort.
*
* In AGG_HASHED mode, there is one rollup for each grouping set.
*
* In AGG_MIXED mode, the first rollups are hashed, the first
* non-hashed one takes the (already-sorted) input, and following ones
* do their own sort.
*/
if (is_first)
{
cost_agg(&pathnode->path, root, cost_agg(&pathnode->path, root,
(numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN, aggstrategy,
agg_costs, agg_costs,
numGroupCols, numGroupCols,
numGroups, rollup->numGroups,
subpath->startup_cost, subpath->startup_cost,
subpath->total_cost, subpath->total_cost,
subpath->rows); subpath->rows);
is_first = false;
/* if (!rollup->is_hashed)
* Add in the costs and output rows of the additional sorting/aggregation is_first_sort = false;
* steps, if any. Only total costs count, since the extra sorts aren't }
* run on startup. else
*/
if (list_length(rollup_lists) > 1)
{
ListCell *lc;
foreach(lc, rollup_lists)
{ {
List *gsets = (List *) lfirst(lc);
Path sort_path; /* dummy for result of cost_sort */ Path sort_path; /* dummy for result of cost_sort */
Path agg_path; /* dummy for result of cost_agg */ Path agg_path; /* dummy for result of cost_agg */
/* We must iterate over all but the last rollup_lists element */ if (rollup->is_hashed || is_first_sort)
if (lnext(lc) == NULL) {
break; /*
* Account for cost of aggregation, but don't charge input
* cost again
*/
cost_agg(&agg_path, root,
rollup->is_hashed ? AGG_HASHED : AGG_SORTED,
agg_costs,
numGroupCols,
rollup->numGroups,
0.0, 0.0,
subpath->rows);
if (!rollup->is_hashed)
is_first_sort = false;
}
else
{
/* Account for cost of sort, but don't charge input cost again */ /* Account for cost of sort, but don't charge input cost again */
cost_sort(&sort_path, root, NIL, cost_sort(&sort_path, root, NIL,
0.0, 0.0,
...@@ -2786,16 +2822,16 @@ create_groupingsets_path(PlannerInfo *root, ...@@ -2786,16 +2822,16 @@ create_groupingsets_path(PlannerInfo *root,
-1.0); -1.0);
/* Account for cost of aggregation */ /* Account for cost of aggregation */
numGroupCols = list_length((List *) linitial(gsets));
cost_agg(&agg_path, root, cost_agg(&agg_path, root,
AGG_SORTED, AGG_SORTED,
agg_costs, agg_costs,
numGroupCols, numGroupCols,
numGroups, /* XXX surely not right for all steps? */ rollup->numGroups,
sort_path.startup_cost, sort_path.startup_cost,
sort_path.total_cost, sort_path.total_cost,
sort_path.rows); sort_path.rows);
}
pathnode->path.total_cost += agg_path.total_cost; pathnode->path.total_cost += agg_path.total_cost;
pathnode->path.rows += agg_path.rows; pathnode->path.rows += agg_path.rows;
......
/*
* knapsack.h
*
* Copyright (c) 2017, PostgreSQL Global Development Group
*
* src/include/lib/knapsack.h
*/
#ifndef KNAPSACK_H
#define KNAPSACK_H
#include "postgres.h"
#include "nodes/bitmapset.h"
extern Bitmapset *DiscreteKnapsack(int max_weight, int num_items,
int *item_weights, double *item_values);
#endif /* KNAPSACK_H */
...@@ -20,6 +20,11 @@ ...@@ -20,6 +20,11 @@
#ifndef BITMAPSET_H #ifndef BITMAPSET_H
#define BITMAPSET_H #define BITMAPSET_H
/*
* Forward decl to save including pg_list.h
*/
struct List;
/* /*
* Data representation * Data representation
*/ */
...@@ -70,6 +75,7 @@ extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b); ...@@ -70,6 +75,7 @@ extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b); extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
extern bool bms_is_member(int x, const Bitmapset *a); extern bool bms_is_member(int x, const Bitmapset *a);
extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b); extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b);
extern bool bms_overlap_list(const Bitmapset *a, const struct List *b);
extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b); extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);
extern int bms_singleton_member(const Bitmapset *a); extern int bms_singleton_member(const Bitmapset *a);
extern bool bms_get_singleton_member(const Bitmapset *a, int *member); extern bool bms_get_singleton_member(const Bitmapset *a, int *member);
......
...@@ -1699,6 +1699,7 @@ typedef struct AggStatePerAggData *AggStatePerAgg; ...@@ -1699,6 +1699,7 @@ typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerTransData *AggStatePerTrans; typedef struct AggStatePerTransData *AggStatePerTrans;
typedef struct AggStatePerGroupData *AggStatePerGroup; typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggStatePerPhaseData *AggStatePerPhase; typedef struct AggStatePerPhaseData *AggStatePerPhase;
typedef struct AggStatePerHashData *AggStatePerHash;
typedef struct AggState typedef struct AggState
{ {
...@@ -1706,15 +1707,17 @@ typedef struct AggState ...@@ -1706,15 +1707,17 @@ typedef struct AggState
List *aggs; /* all Aggref nodes in targetlist & quals */ List *aggs; /* all Aggref nodes in targetlist & quals */
int numaggs; /* length of list (could be zero!) */ int numaggs; /* length of list (could be zero!) */
int numtrans; /* number of pertrans items */ int numtrans; /* number of pertrans items */
AggStrategy aggstrategy; /* strategy mode */
AggSplit aggsplit; /* agg-splitting mode, see nodes.h */ AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
AggStatePerPhase phase; /* pointer to current phase data */ AggStatePerPhase phase; /* pointer to current phase data */
int numphases; /* number of phases */ int numphases; /* number of phases (including phase 0) */
int current_phase; /* current phase number */ int current_phase; /* current phase number */
FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
AggStatePerAgg peragg; /* per-Aggref information */ AggStatePerAgg peragg; /* per-Aggref information */
AggStatePerTrans pertrans; /* per-Trans state information */ AggStatePerTrans pertrans; /* per-Trans state information */
ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */
ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */ ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */
ExprContext *tmpcontext; /* econtext for input expressions */ ExprContext *tmpcontext; /* econtext for input expressions */
ExprContext *curaggcontext; /* currently active aggcontext */
AggStatePerTrans curpertrans; /* currently active trans state */ AggStatePerTrans curpertrans; /* currently active trans state */
bool input_done; /* indicates end of input */ bool input_done; /* indicates end of input */
bool agg_done; /* indicates completion of Agg scan */ bool agg_done; /* indicates completion of Agg scan */
...@@ -1726,21 +1729,17 @@ typedef struct AggState ...@@ -1726,21 +1729,17 @@ typedef struct AggState
/* These fields are for grouping set phase data */ /* These fields are for grouping set phase data */
int maxsets; /* The max number of sets in any phase */ int maxsets; /* The max number of sets in any phase */
AggStatePerPhase phases; /* array of all phases */ AggStatePerPhase phases; /* array of all phases */
Tuplesortstate *sort_in; /* sorted input to phases > 0 */ Tuplesortstate *sort_in; /* sorted input to phases > 1 */
Tuplesortstate *sort_out; /* input is copied here for next phase */ Tuplesortstate *sort_out; /* input is copied here for next phase */
TupleTableSlot *sort_slot; /* slot for sort results */ TupleTableSlot *sort_slot; /* slot for sort results */
/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */ /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
AggStatePerGroup pergroup; /* per-Aggref-per-group working state */ AggStatePerGroup pergroup; /* per-Aggref-per-group working state */
HeapTuple grp_firstTuple; /* copy of first tuple of current group */ HeapTuple grp_firstTuple; /* copy of first tuple of current group */
/* these fields are used in AGG_HASHED mode: */ /* these fields are used in AGG_HASHED and AGG_MIXED modes: */
TupleHashTable hashtable; /* hash table with one entry per group */
TupleTableSlot *hashslot; /* slot for loading hash table */
int numhashGrpCols; /* number of columns in hash table */
int largestGrpColIdx; /* largest column required for hashing */
AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */
AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */
bool table_filled; /* hash table filled yet? */ bool table_filled; /* hash table filled yet? */
TupleHashIterator hashiter; /* for iterating through hash table */ int num_hashes;
AggStatePerHash perhash;
AggStatePerGroup *hash_pergroup; /* array of per-group pointers */
/* support for evaluation of agg inputs */ /* support for evaluation of agg inputs */
TupleTableSlot *evalslot; /* slot for agg inputs */ TupleTableSlot *evalslot; /* slot for agg inputs */
ProjectionInfo *evalproj; /* projection machinery */ ProjectionInfo *evalproj; /* projection machinery */
......
...@@ -261,6 +261,8 @@ typedef enum NodeTag ...@@ -261,6 +261,8 @@ typedef enum NodeTag
T_PlaceHolderInfo, T_PlaceHolderInfo,
T_MinMaxAggInfo, T_MinMaxAggInfo,
T_PlannerParamItem, T_PlannerParamItem,
T_RollupData,
T_GroupingSetData,
T_StatisticExtInfo, T_StatisticExtInfo,
/* /*
...@@ -724,7 +726,8 @@ typedef enum AggStrategy ...@@ -724,7 +726,8 @@ typedef enum AggStrategy
{ {
AGG_PLAIN, /* simple agg across all input rows */ AGG_PLAIN, /* simple agg across all input rows */
AGG_SORTED, /* grouped agg, input must be sorted */ AGG_SORTED, /* grouped agg, input must be sorted */
AGG_HASHED /* grouped agg, use internal hashtable */ AGG_HASHED, /* grouped agg, use internal hashtable */
AGG_MIXED /* grouped agg, hash and sort both used */
} AggStrategy; } AggStrategy;
/* /*
......
...@@ -758,7 +758,7 @@ typedef struct Agg ...@@ -758,7 +758,7 @@ typedef struct Agg
Oid *grpOperators; /* equality operators to compare with */ Oid *grpOperators; /* equality operators to compare with */
long numGroups; /* estimated number of groups in input */ long numGroups; /* estimated number of groups in input */
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */ Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
/* Note: planner provides numGroups & aggParams only in AGG_HASHED case */ /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
List *groupingSets; /* grouping sets to use */ List *groupingSets; /* grouping sets to use */
List *chain; /* chained Agg/Sort nodes */ List *chain; /* chained Agg/Sort nodes */
} Agg; } Agg;
......
...@@ -1417,18 +1417,38 @@ typedef struct AggPath ...@@ -1417,18 +1417,38 @@ typedef struct AggPath
List *qual; /* quals (HAVING quals), if any */ List *qual; /* quals (HAVING quals), if any */
} AggPath; } AggPath;
/*
* Various annotations used for grouping sets in the planner.
*/
typedef struct GroupingSetData
{
NodeTag type;
List *set; /* grouping set as list of sortgrouprefs */
double numGroups; /* est. number of result groups */
} GroupingSetData;
typedef struct RollupData
{
NodeTag type;
List *groupClause; /* applicable subset of parse->groupClause */
List *gsets; /* lists of integer indexes into groupClause */
List *gsets_data; /* list of GroupingSetData */
double numGroups; /* est. number of result groups */
bool hashable; /* can be hashed */
bool is_hashed; /* to be implemented as a hashagg */
} RollupData;
/* /*
* GroupingSetsPath represents a GROUPING SETS aggregation * GroupingSetsPath represents a GROUPING SETS aggregation
*
* Currently we only support this in sorted not hashed form, so the input
* must always be appropriately presorted.
*/ */
typedef struct GroupingSetsPath typedef struct GroupingSetsPath
{ {
Path path; Path path;
Path *subpath; /* path representing input source */ Path *subpath; /* path representing input source */
List *rollup_groupclauses; /* list of lists of SortGroupClause's */ AggStrategy aggstrategy; /* basic strategy */
List *rollup_lists; /* parallel list of lists of grouping sets */ List *rollups; /* list of RollupData */
List *qual; /* quals (HAVING quals), if any */ List *qual; /* quals (HAVING quals), if any */
} GroupingSetsPath; } GroupingSetsPath;
......
...@@ -195,8 +195,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root, ...@@ -195,8 +195,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
Path *subpath, Path *subpath,
PathTarget *target, PathTarget *target,
List *having_qual, List *having_qual,
List *rollup_lists, AggStrategy aggstrategy,
List *rollup_groupclauses, List *rollups,
const AggClauseCosts *agg_costs, const AggClauseCosts *agg_costs,
double numGroups); double numGroups);
extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root, extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root,
......
...@@ -13,6 +13,13 @@ copy gstest2 from stdin; ...@@ -13,6 +13,13 @@ copy gstest2 from stdin;
create temp table gstest3 (a integer, b integer, c integer, d integer); create temp table gstest3 (a integer, b integer, c integer, d integer);
copy gstest3 from stdin; copy gstest3 from stdin;
alter table gstest3 add primary key (a); alter table gstest3 add primary key (a);
create temp table gstest4(id integer, v integer,
unhashable_col bit(4), unsortable_col xid);
insert into gstest4
values (1,1,b'0000','1'), (2,2,b'0001','1'),
(3,4,b'0010','2'), (4,8,b'0011','2'),
(5,16,b'0000','2'), (6,32,b'0001','2'),
(7,64,b'0010','1'), (8,128,b'0011','1');
create temp table gstest_empty (a integer, b integer, v integer); create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer) create function gstest_data(v integer, out a integer, out b integer)
returns setof record returns setof record
...@@ -22,6 +29,7 @@ create function gstest_data(v integer, out a integer, out b integer) ...@@ -22,6 +29,7 @@ create function gstest_data(v integer, out a integer, out b integer)
end; end;
$f$ language plpgsql; $f$ language plpgsql;
-- basic functionality -- basic functionality
set enable_hashagg = false; -- test hashing explicitly later
-- simple rollup with multiple plain aggregates, with and without ordering -- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping) -- (and with ordering differing from grouping)
select a, b, grouping(a,b), sum(v), count(*), max(v) select a, b, grouping(a,b), sum(v), count(*), max(v)
...@@ -462,7 +470,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a); ...@@ -462,7 +470,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates -- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v) select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)); from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
a | b | grouping | sum | count | max a | b | grouping | sum | count | max
---+---+----------+-----+-------+----- ---+---+----------+-----+-------+-----
1 | 1 | 0 | 21 | 2 | 11 1 | 1 | 0 | 21 | 2 | 11
...@@ -473,19 +481,19 @@ select a, b, grouping(a,b), sum(v), count(*), max(v) ...@@ -473,19 +481,19 @@ select a, b, grouping(a,b), sum(v), count(*), max(v)
3 | 4 | 0 | 17 | 1 | 17 3 | 4 | 0 | 17 | 1 | 17
4 | 1 | 0 | 37 | 2 | 19 4 | 1 | 0 | 37 | 2 | 19
| | 3 | 21 | 2 | 11 | | 3 | 21 | 2 | 11
| | 3 | 25 | 2 | 13
| | 3 | 14 | 1 | 14
| | 3 | 15 | 1 | 15
| | 3 | 16 | 1 | 16
| | 3 | 17 | 1 | 17
| | 3 | 37 | 2 | 19
| | 3 | 21 | 2 | 11 | | 3 | 21 | 2 | 11
| | 3 | 25 | 2 | 13 | | 3 | 25 | 2 | 13
| | 3 | 25 | 2 | 13
| | 3 | 14 | 1 | 14
| | 3 | 14 | 1 | 14 | | 3 | 14 | 1 | 14
| | 3 | 15 | 1 | 15 | | 3 | 15 | 1 | 15
| | 3 | 15 | 1 | 15
| | 3 | 16 | 1 | 16 | | 3 | 16 | 1 | 16
| | 3 | 16 | 1 | 16
| | 3 | 17 | 1 | 17
| | 3 | 17 | 1 | 17 | | 3 | 17 | 1 | 17
| | 3 | 37 | 2 | 19 | | 3 | 37 | 2 | 19
| | 3 | 37 | 2 | 19
(21 rows) (21 rows)
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1)); select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
...@@ -847,4 +855,598 @@ select sum(ten) from onek group by rollup(four::text), two order by 1; ...@@ -847,4 +855,598 @@ select sum(ten) from onek group by rollup(four::text), two order by 1;
2500 2500
(6 rows) (6 rows)
-- hashing support
set enable_hashagg = true;
-- failure cases
select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
ERROR: could not implement GROUP BY
DETAIL: Some of the datatypes only support hashing, while others only support sorting.
select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
ERROR: could not implement GROUP BY
DETAIL: Some of the datatypes only support hashing, while others only support sorting.
-- simple cases
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
a | b | grouping | sum | count | max
---+---+----------+-----+-------+-----
1 | | 1 | 60 | 5 | 14
2 | | 1 | 15 | 1 | 15
3 | | 1 | 33 | 2 | 17
4 | | 1 | 37 | 2 | 19
| 1 | 2 | 58 | 4 | 19
| 2 | 2 | 25 | 2 | 13
| 3 | 2 | 45 | 3 | 16
| 4 | 2 | 17 | 1 | 17
(8 rows)
explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Sort
Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
-> HashAggregate
Hash Key: "*VALUES*".column1
Hash Key: "*VALUES*".column2
-> Values Scan on "*VALUES*"
(6 rows)
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by cube(a,b) order by 3,1,2;
a | b | grouping | sum | count | max
---+---+----------+-----+-------+-----
1 | 1 | 0 | 21 | 2 | 11
1 | 2 | 0 | 25 | 2 | 13
1 | 3 | 0 | 14 | 1 | 14
2 | 3 | 0 | 15 | 1 | 15
3 | 3 | 0 | 16 | 1 | 16
3 | 4 | 0 | 17 | 1 | 17
4 | 1 | 0 | 37 | 2 | 19
1 | | 1 | 60 | 5 | 14
2 | | 1 | 15 | 1 | 15
3 | | 1 | 33 | 2 | 17
4 | | 1 | 37 | 2 | 19
| 1 | 2 | 58 | 4 | 19
| 2 | 2 | 25 | 2 | 13
| 3 | 2 | 45 | 3 | 16
| 4 | 2 | 17 | 1 | 17
| | 3 | 145 | 10 | 19
(16 rows)
explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by cube(a,b) order by 3,1,2;
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Sort
Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2
-> MixedAggregate
Hash Key: "*VALUES*".column1, "*VALUES*".column2
Hash Key: "*VALUES*".column1
Hash Key: "*VALUES*".column2
Group Key: ()
-> Values Scan on "*VALUES*"
(8 rows)
-- shouldn't try and hash
explain (costs off)
select a, b, grouping(a,b), array_agg(v order by v)
from gstest1 group by cube(a,b);
QUERY PLAN
----------------------------------------------------------
GroupAggregate
Group Key: "*VALUES*".column1, "*VALUES*".column2
Group Key: "*VALUES*".column1
Group Key: ()
Sort Key: "*VALUES*".column2
Group Key: "*VALUES*".column2
-> Sort
Sort Key: "*VALUES*".column1, "*VALUES*".column2
-> Values Scan on "*VALUES*"
(9 rows)
-- mixed hashable/sortable cases
select unhashable_col, unsortable_col,
grouping(unhashable_col, unsortable_col),
count(*), sum(v)
from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
order by 3, 5;
unhashable_col | unsortable_col | grouping | count | sum
----------------+----------------+----------+-------+-----
0000 | | 1 | 2 | 17
0001 | | 1 | 2 | 34
0010 | | 1 | 2 | 68
0011 | | 1 | 2 | 136
| 2 | 2 | 4 | 60
| 1 | 2 | 4 | 195
(6 rows)
explain (costs off)
select unhashable_col, unsortable_col,
grouping(unhashable_col, unsortable_col),
count(*), sum(v)
from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
order by 3,5;
QUERY PLAN
------------------------------------------------------------------
Sort
Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
-> MixedAggregate
Hash Key: unsortable_col
Group Key: unhashable_col
-> Sort
Sort Key: unhashable_col
-> Seq Scan on gstest4
(8 rows)
select unhashable_col, unsortable_col,
grouping(unhashable_col, unsortable_col),
count(*), sum(v)
from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
order by 3,5;
unhashable_col | unsortable_col | grouping | count | sum
----------------+----------------+----------+-------+-----
0000 | | 1 | 1 | 1
0001 | | 1 | 1 | 2
0010 | | 1 | 1 | 4
0011 | | 1 | 1 | 8
0000 | | 1 | 1 | 16
0001 | | 1 | 1 | 32
0010 | | 1 | 1 | 64
0011 | | 1 | 1 | 128
| 1 | 2 | 1 | 1
| 1 | 2 | 1 | 2
| 2 | 2 | 1 | 4
| 2 | 2 | 1 | 8
| 2 | 2 | 1 | 16
| 2 | 2 | 1 | 32
| 1 | 2 | 1 | 64
| 1 | 2 | 1 | 128
(16 rows)
explain (costs off)
select unhashable_col, unsortable_col,
grouping(unhashable_col, unsortable_col),
count(*), sum(v)
from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
order by 3,5;
QUERY PLAN
------------------------------------------------------------------
Sort
Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v))
-> MixedAggregate
Hash Key: v, unsortable_col
Group Key: v, unhashable_col
-> Sort
Sort Key: v, unhashable_col
-> Seq Scan on gstest4
(8 rows)
-- empty input: first is 0 rows, second 1, third 3 etc.
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
a | b | sum | count
---+---+-----+-------
(0 rows)
explain (costs off)
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
QUERY PLAN
--------------------------------
HashAggregate
Hash Key: a, b
Hash Key: a
-> Seq Scan on gstest_empty
(4 rows)
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
a | b | sum | count
---+---+-----+-------
| | | 0
(1 row)
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
a | b | sum | count
---+---+-----+-------
| | | 0
| | | 0
| | | 0
(3 rows)
explain (costs off)
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
QUERY PLAN
--------------------------------
MixedAggregate
Hash Key: a, b
Group Key: ()
Group Key: ()
Group Key: ()
-> Seq Scan on gstest_empty
(6 rows)
select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
sum | count
-----+-------
| 0
| 0
| 0
(3 rows)
explain (costs off)
select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
QUERY PLAN
--------------------------------
Aggregate
Group Key: ()
Group Key: ()
Group Key: ()
-> Seq Scan on gstest_empty
(5 rows)
-- check that functionally dependent cols are not nulled
select a, d, grouping(a,b,c)
from gstest3
group by grouping sets ((a,b), (a,c));
a | d | grouping
---+---+----------
1 | 1 | 1
2 | 2 | 1
1 | 1 | 2
2 | 2 | 2
(4 rows)
explain (costs off)
select a, d, grouping(a,b,c)
from gstest3
group by grouping sets ((a,b), (a,c));
QUERY PLAN
---------------------------
HashAggregate
Hash Key: a, b
Hash Key: a, c
-> Seq Scan on gstest3
(4 rows)
-- simple rescan tests
select a, b, sum(v.x)
from (values (1),(2)) v(x), gstest_data(v.x)
group by grouping sets (a,b);
a | b | sum
---+---+-----
2 | | 6
1 | | 3
| 2 | 3
| 3 | 3
| 1 | 3
(5 rows)
explain (costs off)
select a, b, sum(v.x)
from (values (1),(2)) v(x), gstest_data(v.x)
group by grouping sets (a,b);
QUERY PLAN
------------------------------------------
HashAggregate
Hash Key: gstest_data.a
Hash Key: gstest_data.b
-> Nested Loop
-> Values Scan on "*VALUES*"
-> Function Scan on gstest_data
(6 rows)
select *
from (values (1),(2)) v(x),
lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
ERROR: aggregate functions are not allowed in FROM clause of their own query level
LINE 3: lateral (select a, b, sum(v.x) from gstest_data(v.x) ...
^
explain (costs off)
select *
from (values (1),(2)) v(x),
lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
ERROR: aggregate functions are not allowed in FROM clause of their own query level
LINE 4: lateral (select a, b, sum(v.x) from gstest_data(v.x...
^
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
a | b | grouping | sum | count | max
---+---+----------+-----+-------+-----
1 | 1 | 0 | 21 | 2 | 11
1 | 2 | 0 | 25 | 2 | 13
1 | 3 | 0 | 14 | 1 | 14
2 | 3 | 0 | 15 | 1 | 15
3 | 3 | 0 | 16 | 1 | 16
3 | 4 | 0 | 17 | 1 | 17
4 | 1 | 0 | 37 | 2 | 19
| | 3 | 21 | 2 | 11
| | 3 | 21 | 2 | 11
| | 3 | 25 | 2 | 13
| | 3 | 25 | 2 | 13
| | 3 | 14 | 1 | 14
| | 3 | 14 | 1 | 14
| | 3 | 15 | 1 | 15
| | 3 | 15 | 1 | 15
| | 3 | 16 | 1 | 16
| | 3 | 16 | 1 | 16
| | 3 | 17 | 1 | 17
| | 3 | 17 | 1 | 17
| | 3 | 37 | 2 | 19
| | 3 | 37 | 2 | 19
(21 rows)
explain (costs off)
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
QUERY PLAN
-------------------------------------------------------------------------------------------
Sort
Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), (max("*VALUES*".column3))
-> HashAggregate
Hash Key: "*VALUES*".column1, "*VALUES*".column2
Hash Key: ("*VALUES*".column1 + 1), ("*VALUES*".column2 + 1)
Hash Key: ("*VALUES*".column1 + 2), ("*VALUES*".column2 + 2)
-> Values Scan on "*VALUES*"
(7 rows)
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
from gstest2 group by cube (a,b) order by rsum, a, b;
a | b | sum | rsum
---+---+-----+------
1 | 1 | 8 | 8
1 | 2 | 2 | 10
1 | | 10 | 20
2 | 2 | 2 | 22
2 | | 2 | 24
| 1 | 8 | 32
| 2 | 4 | 36
| | 12 | 48
(8 rows)
explain (costs off)
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
from gstest2 group by cube (a,b) order by rsum, a, b;
QUERY PLAN
---------------------------------------------
Sort
Sort Key: (sum((sum(c))) OVER (?)), a, b
-> WindowAgg
-> Sort
Sort Key: a, b
-> MixedAggregate
Hash Key: a, b
Hash Key: a
Hash Key: b
Group Key: ()
-> Seq Scan on gstest2
(11 rows)
select a, b, sum(v.x)
from (values (1),(2)) v(x), gstest_data(v.x)
group by cube (a,b) order by a,b;
a | b | sum
---+---+-----
1 | 1 | 1
1 | 2 | 1
1 | 3 | 1
1 | | 3
2 | 1 | 2
2 | 2 | 2
2 | 3 | 2
2 | | 6
| 1 | 3
| 2 | 3
| 3 | 3
| | 9
(12 rows)
explain (costs off)
select a, b, sum(v.x)
from (values (1),(2)) v(x), gstest_data(v.x)
group by cube (a,b) order by a,b;
QUERY PLAN
------------------------------------------------
Sort
Sort Key: gstest_data.a, gstest_data.b
-> MixedAggregate
Hash Key: gstest_data.a, gstest_data.b
Hash Key: gstest_data.a
Hash Key: gstest_data.b
Group Key: ()
-> Nested Loop
-> Values Scan on "*VALUES*"
-> Function Scan on gstest_data
(10 rows)
-- More rescan tests
select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
a | a | four | ten | count
---+---+------+-----+-------
1 | 1 | 0 | 0 | 50
1 | 1 | 0 | 2 | 50
1 | 1 | 0 | 4 | 50
1 | 1 | 0 | 6 | 50
1 | 1 | 0 | 8 | 50
1 | 1 | 0 | | 250
1 | 1 | 1 | 1 | 50
1 | 1 | 1 | 3 | 50
1 | 1 | 1 | 5 | 50
1 | 1 | 1 | 7 | 50
1 | 1 | 1 | 9 | 50
1 | 1 | 1 | | 250
1 | 1 | 2 | 0 | 50
1 | 1 | 2 | 2 | 50
1 | 1 | 2 | 4 | 50
1 | 1 | 2 | 6 | 50
1 | 1 | 2 | 8 | 50
1 | 1 | 2 | | 250
1 | 1 | 3 | 1 | 50
1 | 1 | 3 | 3 | 50
1 | 1 | 3 | 5 | 50
1 | 1 | 3 | 7 | 50
1 | 1 | 3 | 9 | 50
1 | 1 | 3 | | 250
1 | 1 | | 0 | 100
1 | 1 | | 1 | 100
1 | 1 | | 2 | 100
1 | 1 | | 3 | 100
1 | 1 | | 4 | 100
1 | 1 | | 5 | 100
1 | 1 | | 6 | 100
1 | 1 | | 7 | 100
1 | 1 | | 8 | 100
1 | 1 | | 9 | 100
1 | 1 | | | 1000
2 | 2 | 0 | 0 | 50
2 | 2 | 0 | 2 | 50
2 | 2 | 0 | 4 | 50
2 | 2 | 0 | 6 | 50
2 | 2 | 0 | 8 | 50
2 | 2 | 0 | | 250
2 | 2 | 1 | 1 | 50
2 | 2 | 1 | 3 | 50
2 | 2 | 1 | 5 | 50
2 | 2 | 1 | 7 | 50
2 | 2 | 1 | 9 | 50
2 | 2 | 1 | | 250
2 | 2 | 2 | 0 | 50
2 | 2 | 2 | 2 | 50
2 | 2 | 2 | 4 | 50
2 | 2 | 2 | 6 | 50
2 | 2 | 2 | 8 | 50
2 | 2 | 2 | | 250
2 | 2 | 3 | 1 | 50
2 | 2 | 3 | 3 | 50
2 | 2 | 3 | 5 | 50
2 | 2 | 3 | 7 | 50
2 | 2 | 3 | 9 | 50
2 | 2 | 3 | | 250
2 | 2 | | 0 | 100
2 | 2 | | 1 | 100
2 | 2 | | 2 | 100
2 | 2 | | 3 | 100
2 | 2 | | 4 | 100
2 | 2 | | 5 | 100
2 | 2 | | 6 | 100
2 | 2 | | 7 | 100
2 | 2 | | 8 | 100
2 | 2 | | 9 | 100
2 | 2 | | | 1000
(70 rows)
select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
array
------------------------------------------------------------------------------------------------------------------------------------------------------
{"(1,0,0,250)","(1,0,2,250)","(1,0,,500)","(1,1,1,250)","(1,1,3,250)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)","(1,,,1000)"}
{"(2,0,0,250)","(2,0,2,250)","(2,0,,500)","(2,1,1,250)","(2,1,3,250)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)","(2,,,1000)"}
(2 rows)
-- Rescan logic changes when there are no empty grouping sets, so test
-- that too:
select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
a | a | four | ten | count
---+---+------+-----+-------
1 | 1 | 0 | | 250
1 | 1 | 1 | | 250
1 | 1 | 2 | | 250
1 | 1 | 3 | | 250
1 | 1 | | 0 | 100
1 | 1 | | 1 | 100
1 | 1 | | 2 | 100
1 | 1 | | 3 | 100
1 | 1 | | 4 | 100
1 | 1 | | 5 | 100
1 | 1 | | 6 | 100
1 | 1 | | 7 | 100
1 | 1 | | 8 | 100
1 | 1 | | 9 | 100
2 | 2 | 0 | | 250
2 | 2 | 1 | | 250
2 | 2 | 2 | | 250
2 | 2 | 3 | | 250
2 | 2 | | 0 | 100
2 | 2 | | 1 | 100
2 | 2 | | 2 | 100
2 | 2 | | 3 | 100
2 | 2 | | 4 | 100
2 | 2 | | 5 | 100
2 | 2 | | 6 | 100
2 | 2 | | 7 | 100
2 | 2 | | 8 | 100
2 | 2 | | 9 | 100
(28 rows)
select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
array
---------------------------------------------------------------------------------
{"(1,0,,500)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)"}
{"(2,0,,500)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)"}
(2 rows)
-- test the knapsack
set work_mem = '64kB';
explain (costs off)
select unique1,
count(two), count(four), count(ten),
count(hundred), count(thousand), count(twothousand),
count(*)
from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
QUERY PLAN
-------------------------------
MixedAggregate
Hash Key: two
Hash Key: four
Hash Key: ten
Hash Key: hundred
Group Key: unique1
Sort Key: twothousand
Group Key: twothousand
Sort Key: thousand
Group Key: thousand
-> Sort
Sort Key: unique1
-> Seq Scan on tenk1
(13 rows)
explain (costs off)
select unique1,
count(two), count(four), count(ten),
count(hundred), count(thousand), count(twothousand),
count(*)
from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
QUERY PLAN
-------------------------------
MixedAggregate
Hash Key: two
Hash Key: four
Hash Key: ten
Hash Key: hundred
Group Key: unique1
-> Sort
Sort Key: unique1
-> Seq Scan on tenk1
(9 rows)
set work_mem = '384kB';
explain (costs off)
select unique1,
count(two), count(four), count(ten),
count(hundred), count(thousand), count(twothousand),
count(*)
from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
QUERY PLAN
-------------------------------
MixedAggregate
Hash Key: two
Hash Key: four
Hash Key: ten
Hash Key: hundred
Hash Key: thousand
Group Key: unique1
Sort Key: twothousand
Group Key: twothousand
-> Sort
Sort Key: unique1
-> Seq Scan on tenk1
(12 rows)
-- end -- end
...@@ -233,6 +233,7 @@ SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROU ...@@ -233,6 +233,7 @@ SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROU
(6 rows) (6 rows)
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL -- grouping sets are a bit special, they produce NULLs in columns not actually NULL
set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab); SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
dataa | b | g | count dataa | b | g | count
-------+-----+---+------- -------+-----+---+-------
...@@ -311,46 +312,46 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d ...@@ -311,46 +312,46 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
b | bar | | 2 b | bar | | 2
b | | | 2 b | | | 2
| | | 6 | | | 6
a | | 1 | 2
b | | 1 | 1
| | 1 | 3
a | | 2 | 2
b | | 2 | 1
| | 2 | 3
| bar | 1 | 2 | bar | 1 | 2
| bar | 2 | 2 | bar | 2 | 2
| bar | | 4 | bar | | 4
| foo | 1 | 1 | foo | 1 | 1
| foo | 2 | 1 | foo | 2 | 1
| foo | | 2 | foo | | 2
a | | 1 | 2
b | | 1 | 1
| | 1 | 3
a | | 2 | 2
b | | 2 | 1
| | 2 | 3
(24 rows) (24 rows)
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
dataa | b | g | count dataa | b | g | count
-------+-----+---+------- -------+-----+---+-------
a | foo | | 2
a | | | 4
a | | 2 | 2
a | bar | 1 | 1 a | bar | 1 | 1
a | bar | 2 | 1 a | bar | 2 | 1
a | bar | | 2 a | bar | | 2
a | foo | 1 | 1 a | foo | 1 | 1
a | foo | 2 | 1 a | foo | 2 | 1
a | foo | | 2
a | | | 4
a | | 1 | 2 a | | 1 | 2
a | | 2 | 2 b | bar | 1 | 1
b | bar | 2 | 1
b | | | 2 b | | | 2
b | | 1 | 1 b | | 1 | 1
b | | 2 | 1 b | bar | 2 | 1
b | bar | 1 | 1
b | bar | | 2 b | bar | | 2
| foo | | 2 b | | 2 | 1
| foo | 1 | 1
| | 2 | 3 | | 2 | 3
| | | 6
| bar | 1 | 2 | bar | 1 | 2
| bar | 2 | 2 | bar | 2 | 2
| | | 6
| foo | 2 | 1
| bar | | 4 | bar | | 4
| foo | 1 | 1
| foo | 2 | 1
| foo | | 2
| | 1 | 3 | | 1 | 3
(24 rows) (24 rows)
...@@ -360,29 +361,30 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d ...@@ -360,29 +361,30 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d
a | bar | 1 | 1 a | bar | 1 | 1
a | foo | 1 | 1 a | foo | 1 | 1
b | bar | 1 | 1 b | bar | 1 | 1
| bar | 1 | 2
| foo | 1 | 1
a | | 1 | 2 a | | 1 | 2
b | | 1 | 1 b | | 1 | 1
| | 1 | 3 | | 1 | 3
| bar | 1 | 2
| foo | 1 | 1
| foo | 2 | 1
| bar | 2 | 2
a | | 2 | 2 a | | 2 | 2
b | | 2 | 1 b | | 2 | 1
a | bar | 2 | 1 | bar | 2 | 2
| | 2 | 3 | | 2 | 3
| foo | 2 | 1
a | bar | 2 | 1
a | foo | 2 | 1 a | foo | 2 | 1
b | bar | 2 | 1 b | bar | 2 | 1
a | foo | | 2 a | | | 4
b | bar | | 2 b | bar | | 2
b | | | 2 b | | | 2
| | | 6 | | | 6
a | | | 4 a | foo | | 2
a | bar | | 2
| bar | | 4 | bar | | 4
| foo | | 2 | foo | | 2
a | bar | | 2
(24 rows) (24 rows)
reset enable_hashagg;
-- data modification -- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data; CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
INSERT INTO fewmore VALUES(generate_series(4,5)); INSERT INTO fewmore VALUES(generate_series(4,5));
......
...@@ -31,6 +31,14 @@ copy gstest3 from stdin; ...@@ -31,6 +31,14 @@ copy gstest3 from stdin;
\. \.
alter table gstest3 add primary key (a); alter table gstest3 add primary key (a);
create temp table gstest4(id integer, v integer,
unhashable_col bit(4), unsortable_col xid);
insert into gstest4
values (1,1,b'0000','1'), (2,2,b'0001','1'),
(3,4,b'0010','2'), (4,8,b'0011','2'),
(5,16,b'0000','2'), (6,32,b'0001','2'),
(7,64,b'0010','1'), (8,128,b'0011','1');
create temp table gstest_empty (a integer, b integer, v integer); create temp table gstest_empty (a integer, b integer, v integer);
create function gstest_data(v integer, out a integer, out b integer) create function gstest_data(v integer, out a integer, out b integer)
...@@ -43,8 +51,11 @@ create function gstest_data(v integer, out a integer, out b integer) ...@@ -43,8 +51,11 @@ create function gstest_data(v integer, out a integer, out b integer)
-- basic functionality -- basic functionality
set enable_hashagg = false; -- test hashing explicitly later
-- simple rollup with multiple plain aggregates, with and without ordering -- simple rollup with multiple plain aggregates, with and without ordering
-- (and with ordering differing from grouping) -- (and with ordering differing from grouping)
select a, b, grouping(a,b), sum(v), count(*), max(v) select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by rollup (a,b); from gstest1 group by rollup (a,b);
select a, b, grouping(a,b), sum(v), count(*), max(v) select a, b, grouping(a,b), sum(v), count(*), max(v)
...@@ -161,7 +172,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a); ...@@ -161,7 +172,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
-- Tests for chained aggregates -- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v) select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)); from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1)); select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1); select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1);
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
...@@ -224,4 +235,146 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou ...@@ -224,4 +235,146 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou
select sum(ten) from onek group by two, rollup(four::text) order by 1; select sum(ten) from onek group by two, rollup(four::text) order by 1;
select sum(ten) from onek group by rollup(four::text), two order by 1; select sum(ten) from onek group by rollup(four::text), two order by 1;
-- hashing support
set enable_hashagg = true;
-- failure cases
select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
-- simple cases
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by cube(a,b) order by 3,1,2;
explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by cube(a,b) order by 3,1,2;
-- shouldn't try and hash
explain (costs off)
select a, b, grouping(a,b), array_agg(v order by v)
from gstest1 group by cube(a,b);
-- mixed hashable/sortable cases
select unhashable_col, unsortable_col,
grouping(unhashable_col, unsortable_col),
count(*), sum(v)
from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
order by 3, 5;
explain (costs off)
select unhashable_col, unsortable_col,
grouping(unhashable_col, unsortable_col),
count(*), sum(v)
from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
order by 3,5;
select unhashable_col, unsortable_col,
grouping(unhashable_col, unsortable_col),
count(*), sum(v)
from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
order by 3,5;
explain (costs off)
select unhashable_col, unsortable_col,
grouping(unhashable_col, unsortable_col),
count(*), sum(v)
from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
order by 3,5;
-- empty input: first is 0 rows, second 1, third 3 etc.
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
explain (costs off)
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
explain (costs off)
select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
explain (costs off)
select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
-- check that functionally dependent cols are not nulled
select a, d, grouping(a,b,c)
from gstest3
group by grouping sets ((a,b), (a,c));
explain (costs off)
select a, d, grouping(a,b,c)
from gstest3
group by grouping sets ((a,b), (a,c));
-- simple rescan tests
select a, b, sum(v.x)
from (values (1),(2)) v(x), gstest_data(v.x)
group by grouping sets (a,b);
explain (costs off)
select a, b, sum(v.x)
from (values (1),(2)) v(x), gstest_data(v.x)
group by grouping sets (a,b);
select *
from (values (1),(2)) v(x),
lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
explain (costs off)
select *
from (values (1),(2)) v(x),
lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
-- Tests for chained aggregates
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
explain (costs off)
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
from gstest2 group by cube (a,b) order by rsum, a, b;
explain (costs off)
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
from gstest2 group by cube (a,b) order by rsum, a, b;
select a, b, sum(v.x)
from (values (1),(2)) v(x), gstest_data(v.x)
group by cube (a,b) order by a,b;
explain (costs off)
select a, b, sum(v.x)
from (values (1),(2)) v(x), gstest_data(v.x)
group by cube (a,b) order by a,b;
-- More rescan tests
select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
-- Rescan logic changes when there are no empty grouping sets, so test
-- that too:
select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
-- test the knapsack
set work_mem = '64kB';
explain (costs off)
select unique1,
count(two), count(four), count(ten),
count(hundred), count(thousand), count(twothousand),
count(*)
from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
explain (costs off)
select unique1,
count(two), count(four), count(ten),
count(hundred), count(thousand), count(twothousand),
count(*)
from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
set work_mem = '384kB';
explain (costs off)
select unique1,
count(two), count(four), count(ten),
count(hundred), count(thousand), count(twothousand),
count(*)
from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
-- end -- end
...@@ -66,12 +66,14 @@ SELECT SUM(count(*)) OVER(PARTITION BY generate_series(1,3) ORDER BY generate_se ...@@ -66,12 +66,14 @@ SELECT SUM(count(*)) OVER(PARTITION BY generate_series(1,3) ORDER BY generate_se
SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROUP BY few.dataa ORDER BY 5, 1; SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROUP BY few.dataa ORDER BY 5, 1;
-- grouping sets are a bit special, they produce NULLs in columns not actually NULL -- grouping sets are a bit special, they produce NULLs in columns not actually NULL
set enable_hashagg = false;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab); SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY dataa; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY g; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY g;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g); SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g);
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa;
SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY g; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY g;
reset enable_hashagg;
-- data modification -- data modification
CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data; CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment