Commit 2103b7ba authored by Tom Lane's avatar Tom Lane

Phase 2 of hashed-aggregation project. nodeAgg.c now knows how to do

hashed aggregation, but there's not yet planner support for it.
parent fc9814d1
This diff is collapsed.
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* locate group boundaries. * locate group boundaries.
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.48 2002/11/06 00:00:43 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.49 2002/11/06 22:31:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -151,9 +151,8 @@ ExecInitGroup(Group *node, EState *estate, Plan *parent) ...@@ -151,9 +151,8 @@ ExecInitGroup(Group *node, EState *estate, Plan *parent)
*/ */
grpstate = makeNode(GroupState); grpstate = makeNode(GroupState);
node->grpstate = grpstate; node->grpstate = grpstate;
grpstate->grp_useFirstTuple = FALSE;
grpstate->grp_done = FALSE;
grpstate->grp_firstTuple = NULL; grpstate->grp_firstTuple = NULL;
grpstate->grp_done = FALSE;
/* /*
* create expression context * create expression context
...@@ -236,7 +235,6 @@ ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent) ...@@ -236,7 +235,6 @@ ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent)
{ {
GroupState *grpstate = node->grpstate; GroupState *grpstate = node->grpstate;
grpstate->grp_useFirstTuple = FALSE;
grpstate->grp_done = FALSE; grpstate->grp_done = FALSE;
if (grpstate->grp_firstTuple != NULL) if (grpstate->grp_firstTuple != NULL)
{ {
......
...@@ -7,7 +7,8 @@ ...@@ -7,7 +7,8 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* *
* $Id: nodeHash.c,v 1.66 2002/09/04 20:31:18 momjian Exp $ * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.67 2002/11/06 22:31:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -31,8 +32,6 @@ ...@@ -31,8 +32,6 @@
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
static uint32 hashFunc(Datum key, int typLen, bool byVal);
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
* ExecHash * ExecHash
* *
...@@ -532,7 +531,7 @@ ExecHashGetBucket(HashJoinTable hashtable, ...@@ -532,7 +531,7 @@ ExecHashGetBucket(HashJoinTable hashtable,
/* /*
* We reset the eval context each time to reclaim any memory leaked in * We reset the eval context each time to reclaim any memory leaked in
* the hashkey expression or hashFunc itself. * the hashkey expression or ComputeHashFunc itself.
*/ */
ResetExprContext(econtext); ResetExprContext(econtext);
...@@ -550,7 +549,7 @@ ExecHashGetBucket(HashJoinTable hashtable, ...@@ -550,7 +549,7 @@ ExecHashGetBucket(HashJoinTable hashtable,
bucketno = 0; bucketno = 0;
else else
{ {
bucketno = hashFunc(keyval, bucketno = ComputeHashFunc(keyval,
(int) hashtable->typLen, (int) hashtable->typLen,
hashtable->typByVal) hashtable->typByVal)
% (uint32) hashtable->totalbuckets; % (uint32) hashtable->totalbuckets;
...@@ -622,16 +621,16 @@ ExecScanHashBucket(HashJoinState *hjstate, ...@@ -622,16 +621,16 @@ ExecScanHashBucket(HashJoinState *hjstate,
} }
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
* hashFunc * ComputeHashFunc
* *
* the hash function for hash joins * the hash function for hash joins (also used for hash aggregation)
* *
* XXX this probably ought to be replaced with datatype-specific * XXX this probably ought to be replaced with datatype-specific
* hash functions, such as those already implemented for hash indexes. * hash functions, such as those already implemented for hash indexes.
* ---------------------------------------------------------------- * ----------------------------------------------------------------
*/ */
static uint32 uint32
hashFunc(Datum key, int typLen, bool byVal) ComputeHashFunc(Datum key, int typLen, bool byVal)
{ {
unsigned char *k; unsigned char *k;
...@@ -681,7 +680,7 @@ hashFunc(Datum key, int typLen, bool byVal) ...@@ -681,7 +680,7 @@ hashFunc(Datum key, int typLen, bool byVal)
} }
else else
{ {
elog(ERROR, "hashFunc: Invalid typLen %d", typLen); elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen);
k = NULL; /* keep compiler quiet */ k = NULL; /* keep compiler quiet */
} }
} }
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.215 2002/11/06 00:00:43 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.216 2002/11/06 22:31:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -524,6 +524,7 @@ _copyAgg(Agg *from) ...@@ -524,6 +524,7 @@ _copyAgg(Agg *from)
memcpy(newnode->grpColIdx, from->grpColIdx, memcpy(newnode->grpColIdx, from->grpColIdx,
from->numCols * sizeof(AttrNumber)); from->numCols * sizeof(AttrNumber));
} }
newnode->numGroups = from->numGroups;
return newnode; return newnode;
} }
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.177 2002/11/06 00:00:44 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.178 2002/11/06 22:31:24 tgl Exp $
* *
* NOTES * NOTES
* Every (plan) node in POSTGRES has an associated "out" routine which * Every (plan) node in POSTGRES has an associated "out" routine which
...@@ -597,8 +597,8 @@ _outAgg(StringInfo str, Agg *node) ...@@ -597,8 +597,8 @@ _outAgg(StringInfo str, Agg *node)
{ {
appendStringInfo(str, " AGG "); appendStringInfo(str, " AGG ");
_outPlanInfo(str, (Plan *) node); _outPlanInfo(str, (Plan *) node);
appendStringInfo(str, " :aggstrategy %d :numCols %d ", appendStringInfo(str, " :aggstrategy %d :numCols %d :numGroups %ld ",
(int) node->aggstrategy, node->numCols); (int) node->aggstrategy, node->numCols, node->numGroups);
} }
static void static void
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.120 2002/11/06 00:00:44 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.121 2002/11/06 22:31:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1675,6 +1675,7 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy, ...@@ -1675,6 +1675,7 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
plan->plan_rows *= 0.1; plan->plan_rows *= 0.1;
if (plan->plan_rows < 1) if (plan->plan_rows < 1)
plan->plan_rows = 1; plan->plan_rows = 1;
node->numGroups = (long) plan->plan_rows;
} }
plan->state = (EState *) NULL; plan->state = (EState *) NULL;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.126 2002/11/06 00:00:44 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.127 2002/11/06 22:31:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -931,6 +931,7 @@ grouping_planner(Query *parse, double tuple_fraction) ...@@ -931,6 +931,7 @@ grouping_planner(Query *parse, double tuple_fraction)
AttrNumber *groupColIdx = NULL; AttrNumber *groupColIdx = NULL;
Path *cheapest_path; Path *cheapest_path;
Path *sorted_path; Path *sorted_path;
bool use_hashed_grouping = false;
/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */ /* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
tlist = preprocess_targetlist(tlist, tlist = preprocess_targetlist(tlist,
...@@ -1209,6 +1210,29 @@ grouping_planner(Query *parse, double tuple_fraction) ...@@ -1209,6 +1210,29 @@ grouping_planner(Query *parse, double tuple_fraction)
group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys); group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys);
sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys); sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys);
/*
* Consider whether we might want to use hashed grouping.
*/
if (parse->groupClause)
{
/*
* Executor doesn't support hashed aggregation with DISTINCT
* aggregates. (Doing so would imply storing *all* the input
* values in the hash table, which seems like a certain loser.)
*/
if (parse->hasAggs &&
(contain_distinct_agg_clause((Node *) tlist) ||
contain_distinct_agg_clause(parse->havingQual)))
use_hashed_grouping = false;
else
{
#if 0 /* much more to do here */
/* TEMPORARY HOTWIRE FOR TESTING */
use_hashed_grouping = true;
#endif
}
}
/* /*
* Select the best path and create a plan to execute it. * Select the best path and create a plan to execute it.
* *
...@@ -1279,22 +1303,30 @@ grouping_planner(Query *parse, double tuple_fraction) ...@@ -1279,22 +1303,30 @@ grouping_planner(Query *parse, double tuple_fraction)
} }
/* /*
* If any aggregate is present, insert the Agg node, plus an explicit * Insert AGG or GROUP node if needed, plus an explicit sort step
* sort if necessary. * if necessary.
* *
* HAVING clause, if any, becomes qual of the Agg node * HAVING clause, if any, becomes qual of the Agg node
*/ */
if (parse->hasAggs) if (use_hashed_grouping)
{ {
/* Hashed aggregate plan --- no sort needed */
result_plan = (Plan *) make_agg(tlist,
(List *) parse->havingQual,
AGG_HASHED,
length(parse->groupClause),
groupColIdx,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
}
else if (parse->hasAggs)
{
/* Plain aggregate plan --- sort if needed */
AggStrategy aggstrategy; AggStrategy aggstrategy;
if (parse->groupClause) if (parse->groupClause)
{ {
aggstrategy = AGG_SORTED;
/*
* Add an explicit sort if we couldn't make the path come out
* the way the AGG node needs it.
*/
if (!pathkeys_contained_in(group_pathkeys, current_pathkeys)) if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
{ {
result_plan = make_groupsortplan(parse, result_plan = make_groupsortplan(parse,
...@@ -1303,9 +1335,18 @@ grouping_planner(Query *parse, double tuple_fraction) ...@@ -1303,9 +1335,18 @@ grouping_planner(Query *parse, double tuple_fraction)
result_plan); result_plan);
current_pathkeys = group_pathkeys; current_pathkeys = group_pathkeys;
} }
aggstrategy = AGG_SORTED;
/*
* The AGG node will not change the sort ordering of its
* groups, so current_pathkeys describes the result too.
*/
} }
else else
{
aggstrategy = AGG_PLAIN; aggstrategy = AGG_PLAIN;
/* Result will be only one row anyway; no sort order */
current_pathkeys = NIL;
}
result_plan = (Plan *) make_agg(tlist, result_plan = (Plan *) make_agg(tlist,
(List *) parse->havingQual, (List *) parse->havingQual,
...@@ -1313,10 +1354,6 @@ grouping_planner(Query *parse, double tuple_fraction) ...@@ -1313,10 +1354,6 @@ grouping_planner(Query *parse, double tuple_fraction)
length(parse->groupClause), length(parse->groupClause),
groupColIdx, groupColIdx,
result_plan); result_plan);
/*
* Note: plain or grouped Agg does not affect any existing
* sort order of the tuples
*/
} }
else else
{ {
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.109 2002/09/11 14:48:54 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.110 2002/11/06 22:31:24 tgl Exp $
* *
* HISTORY * HISTORY
* AUTHOR DATE MAJOR EVENT * AUTHOR DATE MAJOR EVENT
...@@ -46,6 +46,7 @@ typedef struct ...@@ -46,6 +46,7 @@ typedef struct
} check_subplans_for_ungrouped_vars_context; } check_subplans_for_ungrouped_vars_context;
static bool contain_agg_clause_walker(Node *node, void *context); static bool contain_agg_clause_walker(Node *node, void *context);
static bool contain_distinct_agg_clause_walker(Node *node, void *context);
static bool pull_agg_clause_walker(Node *node, List **listptr); static bool pull_agg_clause_walker(Node *node, List **listptr);
static bool expression_returns_set_walker(Node *node, void *context); static bool expression_returns_set_walker(Node *node, void *context);
static bool contain_subplans_walker(Node *node, void *context); static bool contain_subplans_walker(Node *node, void *context);
...@@ -410,6 +411,32 @@ contain_agg_clause_walker(Node *node, void *context) ...@@ -410,6 +411,32 @@ contain_agg_clause_walker(Node *node, void *context)
return expression_tree_walker(node, contain_agg_clause_walker, context); return expression_tree_walker(node, contain_agg_clause_walker, context);
} }
/*
* contain_distinct_agg_clause
* Recursively search for DISTINCT Aggref nodes within a clause.
*
* Returns true if any DISTINCT aggregate found.
*/
bool
contain_distinct_agg_clause(Node *clause)
{
return contain_distinct_agg_clause_walker(clause, NULL);
}
static bool
contain_distinct_agg_clause_walker(Node *node, void *context)
{
if (node == NULL)
return false;
if (IsA(node, Aggref))
{
if (((Aggref *) node)->aggdistinct)
return true; /* abort the tree traversal and return
* true */
}
return expression_tree_walker(node, contain_distinct_agg_clause_walker, context);
}
/* /*
* pull_agg_clause * pull_agg_clause
* Recursively pulls all Aggref nodes from an expression tree. * Recursively pulls all Aggref nodes from an expression tree.
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: nodeHash.h,v 1.24 2002/06/20 20:29:49 momjian Exp $ * $Id: nodeHash.h,v 1.25 2002/11/06 22:31:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -36,5 +36,6 @@ extern void ExecChooseHashTableSize(double ntuples, int tupwidth, ...@@ -36,5 +36,6 @@ extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
int *virtualbuckets, int *virtualbuckets,
int *physicalbuckets, int *physicalbuckets,
int *numbatches); int *numbatches);
extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal);
#endif /* NODEHASH_H */ #endif /* NODEHASH_H */
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: execnodes.h,v 1.76 2002/11/06 00:00:44 tgl Exp $ * $Id: execnodes.h,v 1.77 2002/11/06 22:31:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -661,12 +661,18 @@ typedef struct MaterialState ...@@ -661,12 +661,18 @@ typedef struct MaterialState
* *
* csstate.css_ScanTupleSlot refers to output of underlying plan. * csstate.css_ScanTupleSlot refers to output of underlying plan.
* *
* Note: the associated ExprContext contains ecxt_aggvalues and ecxt_aggnulls * Note: csstate.cstate.cs_ExprContext contains ecxt_aggvalues and
* arrays, which hold the computed agg values for the current input group * ecxt_aggnulls arrays, which hold the computed agg values for the current
* during evaluation of an Agg node's output tuple(s). * input group during evaluation of an Agg node's output tuple(s). We
* create a second ExprContext, tmpcontext, in which to evaluate input
* expressions and run the aggregate transition functions.
* ------------------------- * -------------------------
*/ */
typedef struct AggStatePerAggData *AggStatePerAgg; /* private in nodeAgg.c */ /* these structs are private in nodeAgg.c: */
typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggHashEntryData *AggHashEntry;
typedef struct AggHashTableData *AggHashTable;
typedef struct AggState typedef struct AggState
{ {
...@@ -674,13 +680,18 @@ typedef struct AggState ...@@ -674,13 +680,18 @@ typedef struct AggState
List *aggs; /* all Aggref nodes in targetlist & quals */ List *aggs; /* all Aggref nodes in targetlist & quals */
int numaggs; /* length of list (could be zero!) */ int numaggs; /* length of list (could be zero!) */
FmgrInfo *eqfunctions; /* per-grouping-field equality fns */ FmgrInfo *eqfunctions; /* per-grouping-field equality fns */
HeapTuple grp_firstTuple; /* copy of first tuple of current group */ AggStatePerAgg peragg; /* per-Aggref information */
AggStatePerAgg peragg; /* per-Aggref working state */ MemoryContext aggcontext; /* memory context for long-lived data */
MemoryContext tup_cxt; /* context for per-output-tuple ExprContext *tmpcontext; /* econtext for input expressions */
* expressions */
MemoryContext agg_cxt[2]; /* pair of expression eval memory contexts */
int which_cxt; /* 0 or 1, indicates current agg_cxt */
bool agg_done; /* indicates completion of Agg scan */ bool agg_done; /* indicates completion of Agg scan */
/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
AggStatePerGroup pergroup; /* per-Aggref-per-group working state */
HeapTuple grp_firstTuple; /* copy of first tuple of current group */
/* these fields are used in AGG_HASHED mode: */
AggHashTable hashtable; /* hash table with one entry per group */
bool table_filled; /* hash table filled yet? */
AggHashEntry next_hash_entry; /* next entry in current chain */
int next_hash_bucket; /* next chain */
} AggState; } AggState;
/* --------------------- /* ---------------------
...@@ -691,9 +702,8 @@ typedef struct GroupState ...@@ -691,9 +702,8 @@ typedef struct GroupState
{ {
CommonScanState csstate; /* its first field is NodeTag */ CommonScanState csstate; /* its first field is NodeTag */
FmgrInfo *eqfunctions; /* per-field lookup data for equality fns */ FmgrInfo *eqfunctions; /* per-field lookup data for equality fns */
bool grp_useFirstTuple; /* first tuple not processed yet */
bool grp_done;
HeapTuple grp_firstTuple; /* copy of first tuple of current group */ HeapTuple grp_firstTuple; /* copy of first tuple of current group */
bool grp_done; /* indicates completion of Group scan */
} GroupState; } GroupState;
/* ---------------- /* ----------------
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: plannodes.h,v 1.59 2002/11/06 00:00:44 tgl Exp $ * $Id: plannodes.h,v 1.60 2002/11/06 22:31:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -349,6 +349,7 @@ typedef struct Agg ...@@ -349,6 +349,7 @@ typedef struct Agg
AggStrategy aggstrategy; AggStrategy aggstrategy;
int numCols; /* number of grouping columns */ int numCols; /* number of grouping columns */
AttrNumber *grpColIdx; /* their indexes in the target list */ AttrNumber *grpColIdx; /* their indexes in the target list */
long numGroups; /* estimated number of groups in input */
AggState *aggstate; AggState *aggstate;
} Agg; } Agg;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: clauses.h,v 1.54 2002/09/11 14:48:55 tgl Exp $ * $Id: clauses.h,v 1.55 2002/11/06 22:31:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -40,6 +40,7 @@ extern Expr *make_ands_explicit(List *andclauses); ...@@ -40,6 +40,7 @@ extern Expr *make_ands_explicit(List *andclauses);
extern List *make_ands_implicit(Expr *clause); extern List *make_ands_implicit(Expr *clause);
extern bool contain_agg_clause(Node *clause); extern bool contain_agg_clause(Node *clause);
extern bool contain_distinct_agg_clause(Node *clause);
extern List *pull_agg_clause(Node *clause); extern List *pull_agg_clause(Node *clause);
extern bool expression_returns_set(Node *clause); extern bool expression_returns_set(Node *clause);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment