Commit f6dba10e authored by Tom Lane's avatar Tom Lane

First phase of implementing hash-based grouping/aggregation. An AGG plan

node now does its own grouping of the input rows, and has no need for a
preceding GROUP node in the plan pipeline.  This allows elimination of
the misnamed tuplePerGroup option for GROUP, and actually saves more code
in nodeGroup.c than it costs in nodeAgg.c, as well as being presumably
faster.  Restructure the API of query_planner so that we do not commit to
using a sorted or unsorted plan in query_planner; instead grouping_planner
makes the decision.  (Right now it isn't any smarter than query_planner
was, but that will change as soon as it has the option to select a hash-
based aggregation step.)  Despite all the hackery, no initdb needed since
only in-memory node types changed.
parent a8c18b98
......@@ -5,7 +5,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994-5, Regents of the University of California
*
* $Header: /cvsroot/pgsql/src/backend/commands/explain.c,v 1.89 2002/10/14 04:26:54 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/commands/explain.c,v 1.90 2002/11/06 00:00:43 tgl Exp $
*
*/
......@@ -275,7 +275,21 @@ explain_outNode(StringInfo str, Plan *plan, Plan *outer_plan,
pname = "Group";
break;
case T_Agg:
pname = "Aggregate";
switch (((Agg *) plan)->aggstrategy)
{
case AGG_PLAIN:
pname = "Aggregate";
break;
case AGG_SORTED:
pname = "GroupAggregate";
break;
case AGG_HASHED:
pname = "HashAggregate";
break;
default:
pname = "Aggregate ???";
break;
}
break;
case T_Unique:
pname = "Unique";
......
This diff is collapsed.
......@@ -15,7 +15,7 @@
* locate group boundaries.
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.47 2002/06/20 20:29:28 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.48 2002/11/06 00:00:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -31,147 +31,14 @@
#include "utils/lsyscache.h"
#include "utils/syscache.h"
static TupleTableSlot *ExecGroupEveryTuple(Group *node);
static TupleTableSlot *ExecGroupOneTuple(Group *node);
/* ---------------------------------------
/*
* ExecGroup -
*
* There are two modes in which tuples are returned by ExecGroup. If
* tuplePerGroup is TRUE, every tuple from the same group will be
* returned, followed by a NULL at the end of each group. This is
* useful for Agg node which needs to aggregate over tuples of the same
* group. (eg. SELECT salary, count(*) FROM emp GROUP BY salary)
*
* If tuplePerGroup is FALSE, only one tuple per group is returned. The
* tuple returned contains only the group columns. NULL is returned only
* at the end when no more groups are present. This is useful when
* the query does not involve aggregates. (eg. SELECT salary FROM emp
* GROUP BY salary)
* ------------------------------------------
* Return one tuple for each group of matching input tuples.
*/
TupleTableSlot *
ExecGroup(Group *node)
{
if (node->tuplePerGroup)
return ExecGroupEveryTuple(node);
else
return ExecGroupOneTuple(node);
}
/*
* ExecGroupEveryTuple -
* return every tuple with a NULL between each group
*/
static TupleTableSlot *
ExecGroupEveryTuple(Group *node)
{
GroupState *grpstate;
EState *estate;
ExprContext *econtext;
TupleDesc tupdesc;
HeapTuple outerTuple = NULL;
HeapTuple firsttuple;
TupleTableSlot *outerslot;
ProjectionInfo *projInfo;
TupleTableSlot *resultSlot;
/*
* get state info from node
*/
grpstate = node->grpstate;
if (grpstate->grp_done)
return NULL;
estate = node->plan.state;
econtext = grpstate->csstate.cstate.cs_ExprContext;
tupdesc = ExecGetScanType(&grpstate->csstate);
/*
* We need not call ResetExprContext here because execTuplesMatch will
* reset the per-tuple memory context once per input tuple.
*/
/* if we haven't returned first tuple of a new group yet ... */
if (grpstate->grp_useFirstTuple)
{
grpstate->grp_useFirstTuple = FALSE;
/*
* note we rely on subplan to hold ownership of the tuple for as
* long as we need it; we don't copy it.
*/
ExecStoreTuple(grpstate->grp_firstTuple,
grpstate->csstate.css_ScanTupleSlot,
InvalidBuffer, false);
}
else
{
outerslot = ExecProcNode(outerPlan(node), (Plan *) node);
if (TupIsNull(outerslot))
{
grpstate->grp_done = TRUE;
return NULL;
}
outerTuple = outerslot->val;
firsttuple = grpstate->grp_firstTuple;
if (firsttuple == NULL)
{
/* this should occur on the first call only */
grpstate->grp_firstTuple = heap_copytuple(outerTuple);
}
else
{
/*
* Compare with first tuple and see if this tuple is of the
* same group.
*/
if (!execTuplesMatch(firsttuple, outerTuple,
tupdesc,
node->numCols, node->grpColIdx,
grpstate->eqfunctions,
econtext->ecxt_per_tuple_memory))
{
/*
* No; save the tuple to return it next time, and return
* NULL
*/
grpstate->grp_useFirstTuple = TRUE;
heap_freetuple(firsttuple);
grpstate->grp_firstTuple = heap_copytuple(outerTuple);
return NULL; /* signifies the end of the group */
}
}
/*
* note we rely on subplan to hold ownership of the tuple for as
* long as we need it; we don't copy it.
*/
ExecStoreTuple(outerTuple,
grpstate->csstate.css_ScanTupleSlot,
InvalidBuffer, false);
}
/*
* form a projection tuple, store it in the result tuple slot and
* return it.
*/
projInfo = grpstate->csstate.cstate.cs_ProjInfo;
econtext->ecxt_scantuple = grpstate->csstate.css_ScanTupleSlot;
resultSlot = ExecProject(projInfo, NULL);
return resultSlot;
}
/*
* ExecGroupOneTuple -
* returns one tuple per group, a NULL at the end when there are no more
* tuples.
*/
static TupleTableSlot *
ExecGroupOneTuple(Group *node)
{
GroupState *grpstate;
EState *estate;
......@@ -198,10 +65,11 @@ ExecGroupOneTuple(Group *node)
* reset the per-tuple memory context once per input tuple.
*/
/* If we don't already have first tuple of group, fetch it */
/* this should occur on the first call only */
firsttuple = grpstate->grp_firstTuple;
if (firsttuple == NULL)
{
/* this should occur on the first call only */
outerslot = ExecProcNode(outerPlan(node), (Plan *) node);
if (TupIsNull(outerslot))
{
......@@ -213,7 +81,7 @@ ExecGroupOneTuple(Group *node)
}
/*
* find all tuples that belong to a group
* Scan over all tuples that belong to this group
*/
for (;;)
{
......@@ -239,22 +107,18 @@ ExecGroupOneTuple(Group *node)
}
/*
* form a projection tuple, store it in the result tuple slot and
* return it.
*/
projInfo = grpstate->csstate.cstate.cs_ProjInfo;
/*
* note we rely on subplan to hold ownership of the tuple for as long
* as we need it; we don't copy it.
* form a projection tuple based on the (copied) first tuple of the
* group, and store it in the result tuple slot.
*/
ExecStoreTuple(firsttuple,
grpstate->csstate.css_ScanTupleSlot,
InvalidBuffer, false);
InvalidBuffer,
false);
econtext->ecxt_scantuple = grpstate->csstate.css_ScanTupleSlot;
projInfo = grpstate->csstate.cstate.cs_ProjInfo;
resultSlot = ExecProject(projInfo, NULL);
/* save outerTuple if we are not done yet */
/* save first tuple of next group, if we are not done yet */
if (!grpstate->grp_done)
{
heap_freetuple(firsttuple);
......@@ -386,14 +250,14 @@ ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent)
}
/*****************************************************************************
* Code shared with nodeUnique.c
* Code shared with nodeUnique.c and nodeAgg.c
*****************************************************************************/
/*
* execTuplesMatch
* Return true if two tuples match in all the indicated fields.
* This is used to detect group boundaries in nodeGroup, and to
* decide whether two tuples are distinct or not in nodeUnique.
* This is used to detect group boundaries in nodeGroup and nodeAgg,
* and to decide whether two tuples are distinct or not in nodeUnique.
*
* tuple1, tuple2: the tuples to compare
* tupdesc: tuple descriptor applying to both tuples
......@@ -425,7 +289,8 @@ execTuplesMatch(HeapTuple tuple1,
* We cannot report a match without checking all the fields, but we
* can report a non-match as soon as we find unequal fields. So,
* start comparing at the last field (least significant sort key).
* That's the most likely to be different...
* That's the most likely to be different if we are dealing with
* sorted input.
*/
result = true;
......
......@@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.214 2002/10/14 22:14:34 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.215 2002/11/06 00:00:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -497,10 +497,10 @@ _copyGroup(Group *from)
CopyPlanFields((Plan *) from, (Plan *) newnode);
newnode->tuplePerGroup = from->tuplePerGroup;
newnode->numCols = from->numCols;
newnode->grpColIdx = palloc(from->numCols * sizeof(AttrNumber));
memcpy(newnode->grpColIdx, from->grpColIdx, from->numCols * sizeof(AttrNumber));
memcpy(newnode->grpColIdx, from->grpColIdx,
from->numCols * sizeof(AttrNumber));
return newnode;
}
......@@ -516,6 +516,15 @@ _copyAgg(Agg *from)
CopyPlanFields((Plan *) from, (Plan *) newnode);
newnode->aggstrategy = from->aggstrategy;
newnode->numCols = from->numCols;
if (from->numCols > 0)
{
newnode->grpColIdx = palloc(from->numCols * sizeof(AttrNumber));
memcpy(newnode->grpColIdx, from->grpColIdx,
from->numCols * sizeof(AttrNumber));
}
return newnode;
}
......@@ -1280,6 +1289,29 @@ _copyAppendPath(AppendPath *from)
return newnode;
}
/* ----------------
* _copyResultPath
* ----------------
*/
static ResultPath *
_copyResultPath(ResultPath *from)
{
ResultPath *newnode = makeNode(ResultPath);
/*
* copy the node superclass fields
*/
CopyPathFields((Path *) from, (Path *) newnode);
/*
* copy remainder of node
*/
Node_Copy(from, newnode, subpath);
Node_Copy(from, newnode, constantqual);
return newnode;
}
/* ----------------
* CopyJoinPathFields
*
......@@ -2878,6 +2910,9 @@ copyObject(void *from)
case T_AppendPath:
retval = _copyAppendPath(from);
break;
case T_ResultPath:
retval = _copyResultPath(from);
break;
case T_NestPath:
retval = _copyNestPath(from);
break;
......
......@@ -20,7 +20,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.161 2002/10/14 22:14:34 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.162 2002/11/06 00:00:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -463,6 +463,18 @@ _equalAppendPath(AppendPath *a, AppendPath *b)
return true;
}
static bool
_equalResultPath(ResultPath *a, ResultPath *b)
{
if (!_equalPath((Path *) a, (Path *) b))
return false;
if (!equal(a->subpath, b->subpath))
return false;
if (!equal(a->constantqual, b->constantqual))
return false;
return true;
}
static bool
_equalJoinPath(JoinPath *a, JoinPath *b)
{
......@@ -2103,6 +2115,9 @@ equal(void *a, void *b)
case T_AppendPath:
retval = _equalAppendPath(a, b);
break;
case T_ResultPath:
retval = _equalResultPath(a, b);
break;
case T_IndexOptInfo:
retval = _equalIndexOptInfo(a, b);
break;
......
......@@ -5,7 +5,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.176 2002/10/14 22:14:34 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.177 2002/11/06 00:00:44 tgl Exp $
*
* NOTES
* Every (plan) node in POSTGRES has an associated "out" routine which
......@@ -597,6 +597,8 @@ _outAgg(StringInfo str, Agg *node)
{
appendStringInfo(str, " AGG ");
_outPlanInfo(str, (Plan *) node);
appendStringInfo(str, " :aggstrategy %d :numCols %d ",
(int) node->aggstrategy, node->numCols);
}
static void
......@@ -604,11 +606,7 @@ _outGroup(StringInfo str, Group *node)
{
appendStringInfo(str, " GRP ");
_outPlanInfo(str, (Plan *) node);
/* the actual Group fields */
appendStringInfo(str, " :numCols %d :tuplePerGroup %s ",
node->numCols,
booltostr(node->tuplePerGroup));
appendStringInfo(str, " :numCols %d ", node->numCols);
}
static void
......@@ -1114,6 +1112,26 @@ _outAppendPath(StringInfo str, AppendPath *node)
_outNode(str, node->subpaths);
}
/*
* ResultPath is a subclass of Path.
*/
static void
_outResultPath(StringInfo str, ResultPath *node)
{
appendStringInfo(str,
" RESULTPATH :pathtype %d :startup_cost %.2f :total_cost %.2f :pathkeys ",
node->path.pathtype,
node->path.startup_cost,
node->path.total_cost);
_outNode(str, node->path.pathkeys);
appendStringInfo(str, " :subpath ");
_outNode(str, node->subpath);
appendStringInfo(str, " :constantqual ");
_outNode(str, node->constantqual);
}
/*
* NestPath is a subclass of Path
*/
......@@ -1717,6 +1735,9 @@ _outNode(StringInfo str, void *obj)
case T_AppendPath:
_outAppendPath(str, obj);
break;
case T_ResultPath:
_outResultPath(str, obj);
break;
case T_NestPath:
_outNestPath(str, obj);
break;
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.135 2002/10/14 22:14:34 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.136 2002/11/06 00:00:44 tgl Exp $
*
* NOTES
* Most of the read functions for plan nodes are tested. (In fact, they
......@@ -696,17 +696,6 @@ _readSort(void)
return local_node;
}
static Agg *
_readAgg(void)
{
Agg *local_node;
local_node = makeNode(Agg);
_getPlan((Plan *) local_node);
return local_node;
}
/* ----------------
* _readHash
*
......@@ -1880,6 +1869,45 @@ _readAppendPath(void)
return local_node;
}
/* ----------------
* _readResultPath
*
* ResultPath is a subclass of Path.
* ----------------
*/
static ResultPath *
_readResultPath(void)
{
ResultPath *local_node;
char *token;
int length;
local_node = makeNode(ResultPath);
token = pg_strtok(&length); /* get :pathtype */
token = pg_strtok(&length); /* now read it */
local_node->path.pathtype = atoi(token);
token = pg_strtok(&length); /* get :startup_cost */
token = pg_strtok(&length); /* now read it */
local_node->path.startup_cost = (Cost) atof(token);
token = pg_strtok(&length); /* get :total_cost */
token = pg_strtok(&length); /* now read it */
local_node->path.total_cost = (Cost) atof(token);
token = pg_strtok(&length); /* get :pathkeys */
local_node->path.pathkeys = nodeRead(true); /* now read it */
token = pg_strtok(&length); /* get :subpath */
local_node->subpath = nodeRead(true); /* now read it */
token = pg_strtok(&length); /* get :constantqual */
local_node->constantqual = nodeRead(true); /* now read it */
return local_node;
}
/* ----------------
* _readNestPath
*
......@@ -2196,8 +2224,6 @@ parsePlanString(void)
return_value = _readFromExpr();
else if (length == 8 && strncmp(token, "JOINEXPR", length) == 0)
return_value = _readJoinExpr();
else if (length == 3 && strncmp(token, "AGG", length) == 0)
return_value = _readAgg();
else if (length == 4 && strncmp(token, "HASH", length) == 0)
return_value = _readHash();
else if (length == 6 && strncmp(token, "RESDOM", length) == 0)
......@@ -2240,6 +2266,8 @@ parsePlanString(void)
return_value = _readTidPath();
else if (length == 10 && strncmp(token, "APPENDPATH", length) == 0)
return_value = _readAppendPath();
else if (length == 10 && strncmp(token, "RESULTPATH", length) == 0)
return_value = _readResultPath();
else if (length == 8 && strncmp(token, "NESTPATH", length) == 0)
return_value = _readNestPath();
else if (length == 9 && strncmp(token, "MERGEPATH", length) == 0)
......
......@@ -219,11 +219,9 @@ planner()
pull out constant quals, which can be used to gate execution of the
whole plan (if any are found, we make a top-level Result node
to do the gating)
make a simplified target list that only contains Vars, no expressions
---subplanner()
make list of base relations used in query
split up the qual into restrictions (a=1) and joins (b=c)
find qual clauses that enable merge and hash joins
make list of base relations used in query
split up the qual into restrictions (a=1) and joins (b=c)
find qual clauses that enable merge and hash joins
----make_one_rel()
set_base_rel_pathlist()
find scan and all index paths for each base relation
......@@ -239,7 +237,7 @@ planner()
cheapest path for each newly constructed joinrel.
Loop back if this wasn't the top join level.
Back at query_planner:
put back constant quals and non-simplified target list
put back any constant quals by adding a Result node
Back at grouping_planner:
do grouping(GROUP)
do aggregates
......@@ -257,8 +255,11 @@ RelOptInfo - a relation or joined relations
JoinInfo - join clauses, including the relids needed for the join
Path - every way to generate a RelOptInfo(sequential,index,joins)
SeqScan - a plain Path node with nodeTag = T_SeqScan
SeqScan - a plain Path node with pathtype = T_SeqScan
IndexPath - index scans
TidPath - scan by CTID
AppendPath - append multiple subpaths together
ResultPath - a Result plan (used for variable-free tlist or qual)
NestPath - nested-loop joins
MergePath - merge joins
HashPath - hash joins
......@@ -276,10 +277,10 @@ generated during the optimization process are marked with their sort order
It is also possible to avoid an explicit sort step to implement a user's
ORDER BY clause if the final path has the right ordering already, so the
sort ordering is of interest even at the top level. subplanner() will
sort ordering is of interest even at the top level. query_planner() will
look for the cheapest path with a sort order matching the desired order,
and will compare its cost to the cost of using the cheapest-overall path
and doing an explicit sort.
and grouping_planner() will compare its cost to the cost of using the
cheapest-overall path and doing an explicit sort.
When we are generating paths for a particular RelOptInfo, we discard a path
if it is more expensive than another known path that has the same or better
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: geqo_misc.c,v 1.34 2002/09/04 20:31:20 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_misc.c,v 1.35 2002/11/06 00:00:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -19,19 +19,17 @@
=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
*/
#include "postgres.h"
#include "optimizer/geqo_misc.h"
#include "nodes/print.h"
#ifdef GEQO_DEBUG
static float avg_pool(Pool *pool);
/* avg_pool
*
/*
* avg_pool
*/
static float
avg_pool(Pool *pool)
......@@ -81,7 +79,6 @@ print_pool(FILE *fp, Pool *pool, int start, int stop)
/* print_gen
*
* printout for chromosome: best, worst, mean, average
*
*/
void
print_gen(FILE *fp, Pool *pool, int generation)
......@@ -121,133 +118,4 @@ print_edge_table(FILE *fp, Edge *edge_table, int num_gene)
fprintf(fp, "\n");
}
/*************************************************************
Debug output subroutines
*************************************************************/
void
geqo_print_joinclauses(Query *root, List *clauses)
{
List *l;
foreach(l, clauses)
{
RestrictInfo *c = lfirst(l);
print_expr((Node *) c->clause, root->rtable);
if (lnext(l))
printf(" ");
}
}
void
geqo_print_path(Query *root, Path *path, int indent)
{
char *ptype = NULL;
JoinPath *jp;
bool join = false;
int i;
for (i = 0; i < indent; i++)
printf("\t");
switch (nodeTag(path))
{
case T_Path:
ptype = "SeqScan";
join = false;
break;
case T_IndexPath:
ptype = "IdxScan";
join = false;
break;
case T_NestPath:
ptype = "Nestloop";
join = true;
break;
case T_MergePath:
ptype = "MergeJoin";
join = true;
break;
case T_HashPath:
ptype = "HashJoin";
join = true;
break;
default:
break;
}
if (join)
{
jp = (JoinPath *) path;
printf("%s rows=%.0f cost=%.2f..%.2f\n",
ptype, path->parent->rows,
path->startup_cost, path->total_cost);
switch (nodeTag(path))
{
case T_MergePath:
case T_HashPath:
for (i = 0; i < indent + 1; i++)
printf("\t");
printf(" clauses=(");
geqo_print_joinclauses(root, jp->joinrestrictinfo);
printf(")\n");
if (nodeTag(path) == T_MergePath)
{
MergePath *mp = (MergePath *) path;
if (mp->outersortkeys || mp->innersortkeys)
{
for (i = 0; i < indent + 1; i++)
printf("\t");
printf(" sortouter=%d sortinner=%d\n",
((mp->outersortkeys) ? 1 : 0),
((mp->innersortkeys) ? 1 : 0));
}
}
break;
default:
break;
}
geqo_print_path(root, jp->outerjoinpath, indent + 1);
geqo_print_path(root, jp->innerjoinpath, indent + 1);
}
else
{
int relid = lfirsti(path->parent->relids);
printf("%s(%d) rows=%.0f cost=%.2f..%.2f\n",
ptype, relid, path->parent->rows,
path->startup_cost, path->total_cost);
if (IsA(path, IndexPath))
{
printf(" pathkeys=");
print_pathkeys(path->pathkeys, root->rtable);
}
}
}
void
geqo_print_rel(Query *root, RelOptInfo *rel)
{
List *l;
printf("______________________________\n");
printf("(");
foreach(l, rel->relids)
printf("%d ", lfirsti(l));
printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
printf("\tpath list:\n");
foreach(l, rel->pathlist)
geqo_print_path(root, lfirst(l), 1);
printf("\n\tcheapest startup path:\n");
geqo_print_path(root, rel->cheapest_startup_path, 1);
printf("\n\tcheapest total path:\n");
geqo_print_path(root, rel->cheapest_total_path, 1);
}
#endif /* GEQO_DEBUG */
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.88 2002/09/04 20:31:20 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.89 2002/11/06 00:00:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -742,6 +742,14 @@ print_path(Query *root, Path *path, int indent)
ptype = "TidScan";
join = false;
break;
case T_AppendPath:
ptype = "Append";
join = false;
break;
case T_ResultPath:
ptype = "Result";
join = false;
break;
case T_NestPath:
ptype = "Nestloop";
join = true;
......@@ -762,10 +770,15 @@ print_path(Query *root, Path *path, int indent)
for (i = 0; i < indent; i++)
printf("\t");
printf("%s(", ptype);
print_relids(path->parent->relids);
printf(") rows=%.0f cost=%.2f..%.2f\n",
path->parent->rows, path->startup_cost, path->total_cost);
printf("%s", ptype);
if (path->parent)
{
printf("(");
print_relids(path->parent->relids);
printf(") rows=%.0f", path->parent->rows);
}
printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost);
if (path->pathkeys)
{
......@@ -785,7 +798,7 @@ print_path(Query *root, Path *path, int indent)
print_restrictclauses(root, jp->joinrestrictinfo);
printf("\n");
if (nodeTag(path) == T_MergePath)
if (IsA(path, MergePath))
{
MergePath *mp = (MergePath *) path;
......
......@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.119 2002/09/18 21:35:21 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.120 2002/11/06 00:00:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -34,6 +34,7 @@
static Scan *create_scan_plan(Query *root, Path *best_path);
static Join *create_join_plan(Query *root, JoinPath *best_path);
static Append *create_append_plan(Query *root, AppendPath *best_path);
static Result *create_result_plan(Query *root, ResultPath *best_path);
static SeqScan *create_seqscan_plan(Path *best_path, List *tlist,
List *scan_clauses);
static IndexScan *create_indexscan_plan(Query *root, IndexPath *best_path,
......@@ -135,6 +136,10 @@ create_plan(Query *root, Path *best_path)
plan = (Plan *) create_append_plan(root,
(AppendPath *) best_path);
break;
case T_Result:
plan = (Plan *) create_result_plan(root,
(ResultPath *) best_path);
break;
default:
elog(ERROR, "create_plan: unknown pathtype %d",
best_path->pathtype);
......@@ -342,6 +347,35 @@ create_append_plan(Query *root, AppendPath *best_path)
return plan;
}
/*
* create_result_plan
* Create a Result plan for 'best_path' and (recursively) plans
* for its subpaths.
*
* Returns a Plan node.
*/
static Result *
create_result_plan(Query *root, ResultPath *best_path)
{
Result *plan;
List *tlist;
Plan *subplan;
if (best_path->path.parent)
tlist = best_path->path.parent->targetlist;
else
tlist = NIL; /* will be filled in later */
if (best_path->subpath)
subplan = create_plan(root, best_path->subpath);
else
subplan = NULL;
plan = make_result(tlist, (Node *) best_path->constantqual, subplan);
return plan;
}
/*****************************************************************************
*
......@@ -1605,11 +1639,16 @@ make_material(List *tlist, Plan *lefttree)
}
Agg *
make_agg(List *tlist, List *qual, Plan *lefttree)
make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
int ngrp, AttrNumber *grpColIdx, Plan *lefttree)
{
Agg *node = makeNode(Agg);
Plan *plan = &node->plan;
node->aggstrategy = aggstrategy;
node->numCols = ngrp;
node->grpColIdx = grpColIdx;
copy_plan_costsize(plan, lefttree);
/*
......@@ -1621,22 +1660,21 @@ make_agg(List *tlist, List *qual, Plan *lefttree)
length(pull_agg_clause((Node *) qual)));
/*
* We will produce a single output tuple if the input is not a Group,
* We will produce a single output tuple if not grouping,
* and a tuple per group otherwise. For now, estimate the number of
* groups as 10% of the number of tuples --- bogus, but how to do
* better? (Note we assume the input Group node is in "tuplePerGroup"
* mode, so it didn't reduce its row count already.)
* better?
*/
if (IsA(lefttree, Group))
if (aggstrategy == AGG_PLAIN)
{
plan->plan_rows *= 0.1;
if (plan->plan_rows < 1)
plan->plan_rows = 1;
plan->plan_rows = 1;
plan->startup_cost = plan->total_cost;
}
else
{
plan->plan_rows = 1;
plan->startup_cost = plan->total_cost;
plan->plan_rows *= 0.1;
if (plan->plan_rows < 1)
plan->plan_rows = 1;
}
plan->state = (EState *) NULL;
......@@ -1650,7 +1688,6 @@ make_agg(List *tlist, List *qual, Plan *lefttree)
Group *
make_group(List *tlist,
bool tuplePerGroup,
int ngrp,
AttrNumber *grpColIdx,
Plan *lefttree)
......@@ -1667,25 +1704,18 @@ make_group(List *tlist,
plan->total_cost += cpu_operator_cost * plan->plan_rows * ngrp;
/*
* If tuplePerGroup (which is named exactly backwards) is true, we
* will return all the input tuples, so the input node's row count is
* OK. Otherwise, we'll return only one tuple from each group. For
* now, estimate the number of groups as 10% of the number of tuples
* Estimate the number of groups as 10% of the number of tuples
* --- bogus, but how to do better?
*/
if (!tuplePerGroup)
{
plan->plan_rows *= 0.1;
if (plan->plan_rows < 1)
plan->plan_rows = 1;
}
plan->plan_rows *= 0.1;
if (plan->plan_rows < 1)
plan->plan_rows = 1;
plan->state = (EState *) NULL;
plan->qual = NULL;
plan->targetlist = tlist;
plan->lefttree = lefttree;
plan->righttree = (Plan *) NULL;
node->tuplePerGroup = tuplePerGroup;
node->numCols = ngrp;
node->grpColIdx = grpColIdx;
......@@ -1883,9 +1913,6 @@ make_result(List *tlist,
Result *node = makeNode(Result);
Plan *plan = &node->plan;
#ifdef NOT_USED
tlist = generate_fjoin(tlist);
#endif
if (subplan)
copy_plan_costsize(plan, subplan);
else
......@@ -1906,57 +1933,3 @@ make_result(List *tlist,
return node;
}
#ifdef NOT_USED
List *
generate_fjoin(List *tlist)
{
List tlistP;
List newTlist = NIL;
List fjoinList = NIL;
int nIters = 0;
/*
* Break the target list into elements with Iter nodes, and those
* without them.
*/
foreach(tlistP, tlist)
{
List tlistElem;
tlistElem = lfirst(tlistP);
if (IsA(lsecond(tlistElem), Iter))
{
nIters++;
fjoinList = lappend(fjoinList, tlistElem);
}
else
newTlist = lappend(newTlist, tlistElem);
}
/*
* if we have an Iter node then we need to flatten.
*/
if (nIters > 0)
{
List *inner;
List *tempList;
Fjoin *fjoinNode;
DatumPtr results = (DatumPtr) palloc(nIters * sizeof(Datum));
BoolPtr alwaysDone = (BoolPtr) palloc(nIters * sizeof(bool));
inner = lfirst(fjoinList);
fjoinList = lnext(fjoinList);
fjoinNode = (Fjoin) MakeFjoin(false,
nIters,
inner,
results,
alwaysDone);
tempList = lcons(fjoinNode, fjoinList);
newTlist = lappend(newTlist, tempList);
}
return newTlist;
return tlist; /* do nothing for now - ay 10/94 */
}
#endif
This diff is collapsed.
This diff is collapsed.
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.78 2002/06/20 20:29:31 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.79 2002/11/06 00:00:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -405,7 +405,6 @@ create_tidscan_path(Query *root, RelOptInfo *rel, List *tideval)
* create_append_path
* Creates a path corresponding to an Append plan, returning the
* pathnode.
*
*/
AppendPath *
create_append_path(RelOptInfo *rel, List *subpaths)
......@@ -433,6 +432,41 @@ create_append_path(RelOptInfo *rel, List *subpaths)
return pathnode;
}
/*
* create_result_path
* Creates a path corresponding to a Result plan, returning the
* pathnode.
*/
ResultPath *
create_result_path(RelOptInfo *rel, Path *subpath, List *constantqual)
{
ResultPath *pathnode = makeNode(ResultPath);
pathnode->path.pathtype = T_Result;
pathnode->path.parent = rel; /* may be NULL */
if (subpath)
pathnode->path.pathkeys = subpath->pathkeys;
else
pathnode->path.pathkeys = NIL;
pathnode->subpath = subpath;
pathnode->constantqual = constantqual;
if (subpath)
{
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost = subpath->total_cost;
}
else
{
pathnode->path.startup_cost = 0;
pathnode->path.total_cost = cpu_tuple_cost;
}
return pathnode;
}
/*
* create_subqueryscan_path
* Creates a path corresponding to a sequential scan of a subquery,
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: execnodes.h,v 1.75 2002/09/04 20:31:42 momjian Exp $
* $Id: execnodes.h,v 1.76 2002/11/06 00:00:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -673,6 +673,8 @@ typedef struct AggState
CommonScanState csstate; /* its first field is NodeTag */
List *aggs; /* all Aggref nodes in targetlist & quals */
int numaggs; /* length of list (could be zero!) */
FmgrInfo *eqfunctions; /* per-grouping-field equality fns */
HeapTuple grp_firstTuple; /* copy of first tuple of current group */
AggStatePerAgg peragg; /* per-Aggref working state */
MemoryContext tup_cxt; /* context for per-output-tuple
* expressions */
......@@ -691,7 +693,7 @@ typedef struct GroupState
FmgrInfo *eqfunctions; /* per-field lookup data for equality fns */
bool grp_useFirstTuple; /* first tuple not processed yet */
bool grp_done;
HeapTuple grp_firstTuple;
HeapTuple grp_firstTuple; /* copy of first tuple of current group */
} GroupState;
/* ----------------
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: nodes.h,v 1.120 2002/10/11 04:16:44 momjian Exp $
* $Id: nodes.h,v 1.121 2002/11/06 00:00:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -82,6 +82,7 @@ typedef enum NodeTag
T_HashPath,
T_TidPath,
T_AppendPath,
T_ResultPath,
T_PathKeyItem,
T_RestrictInfo,
T_JoinInfo,
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: parsenodes.h,v 1.209 2002/10/14 22:14:35 tgl Exp $
* $Id: parsenodes.h,v 1.210 2002/11/06 00:00:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -101,7 +101,7 @@ typedef struct Query
List *join_rel_list; /* list of join-relation RelOptInfos */
List *equi_key_list; /* list of lists of equijoined
* PathKeyItems */
List *query_pathkeys; /* pathkeys for query_planner()'s result */
List *query_pathkeys; /* desired pathkeys for query_planner() */
} Query;
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: plannodes.h,v 1.58 2002/09/04 20:31:44 momjian Exp $
* $Id: plannodes.h,v 1.59 2002/11/06 00:00:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -140,17 +140,23 @@ typedef struct Plan
* ===============
*/
/* all plan nodes "derive" from the Plan structure by having the
Plan structure as the first field. This ensures that everything works
when nodes are cast to Plan's. (node pointers are frequently cast to Plan*
when passed around generically in the executor */
/*
* all plan nodes "derive" from the Plan structure by having the
* Plan structure as the first field. This ensures that everything works
* when nodes are cast to Plan's. (node pointers are frequently cast to Plan*
* when passed around generically in the executor)
*/
/* ----------------
* Result node -
* If no outer plan, evaluate a variable-free targetlist.
* If outer plan, return tuples from outer plan that satisfy
* given quals (we can also do a level of projection)
* If outer plan, return tuples from outer plan (after a level of
* projection as shown by targetlist).
*
* If resconstantqual isn't NULL, it represents a one-time qualification
* test (i.e., one that doesn't depend on any variables from the outer plan,
* so needs to be evaluated only once).
* ----------------
*/
typedef struct Result
......@@ -318,30 +324,45 @@ typedef struct HashJoin
/* ---------------
* aggregate node
*
* An Agg node implements plain or grouped aggregation. For grouped
* aggregation, we can work with presorted input or unsorted input;
* the latter strategy uses an internal hashtable.
*
* Notice the lack of any direct info about the aggregate functions to be
* computed. They are found by scanning the node's tlist and quals during
* executor startup. (It is possible that there are no aggregate functions;
* this could happen if they get optimized away by constant-folding, or if
* we are using the Agg node to implement hash-based grouping.)
* ---------------
*/
typedef enum AggStrategy
{
AGG_PLAIN, /* simple agg across all input rows */
AGG_SORTED, /* grouped agg, input must be sorted */
AGG_HASHED /* grouped agg, use internal hashtable */
} AggStrategy;
typedef struct Agg
{
Plan plan;
AggStrategy aggstrategy;
int numCols; /* number of grouping columns */
AttrNumber *grpColIdx; /* their indexes in the target list */
AggState *aggstate;
} Agg;
/* ---------------
* group node -
* use for queries with GROUP BY specified.
*
* If tuplePerGroup is true, one tuple (with group columns only) is
* returned for each group and NULL is returned when there are no more
* groups. Otherwise, all the tuples of a group are returned with a
* NULL returned at the end of each group. (see nodeGroup.c for details)
* Used for queries with GROUP BY (but no aggregates) specified.
* The input must be presorted according to the grouping columns.
* ---------------
*/
typedef struct Group
{
Plan plan;
bool tuplePerGroup; /* what tuples to return (see above) */
int numCols; /* number of group columns */
AttrNumber *grpColIdx; /* indexes into the target list */
int numCols; /* number of grouping columns */
AttrNumber *grpColIdx; /* their indexes in the target list */
GroupState *grpstate;
} Group;
......
/*-------------------------------------------------------------------------
*
* relation.h
* Definitions for internal planner nodes.
* Definitions for planner's internal data structures.
*
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: relation.h,v 1.67 2002/09/04 20:31:44 momjian Exp $
* $Id: relation.h,v 1.68 2002/11/06 00:00:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -402,6 +402,19 @@ typedef struct AppendPath
List *subpaths; /* list of component Paths */
} AppendPath;
/*
* ResultPath represents use of a Result plan node, either to compute a
* variable-free targetlist or to gate execution of a subplan with a
* one-time (variable-free) qual condition. Note that in the former case
* path.parent will be NULL; in the latter case it is copied from the subpath.
*/
typedef struct ResultPath
{
Path path;
Path *subpath;
List *constantqual;
} ResultPath;
/*
* All join-type paths share these fields.
*/
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: geqo_misc.h,v 1.21 2002/09/04 20:31:45 momjian Exp $
* $Id: geqo_misc.h,v 1.22 2002/11/06 00:00:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -32,9 +32,6 @@ extern void print_pool(FILE *fp, Pool *pool, int start, int stop);
extern void print_gen(FILE *fp, Pool *pool, int generation);
extern void print_edge_table(FILE *fp, Edge *edge_table, int num_gene);
extern void geqo_print_rel(Query *root, RelOptInfo *rel);
extern void geqo_print_path(Query *root, Path *path, int indent);
extern void geqo_print_joinclauses(Query *root, List *clauses);
#endif /* GEQO_DEBUG */
#endif /* GEQO_MISC_H */
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: pathnode.h,v 1.44 2002/06/20 20:29:51 momjian Exp $
* $Id: pathnode.h,v 1.45 2002/11/06 00:00:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -35,6 +35,8 @@ extern IndexPath *create_index_path(Query *root, RelOptInfo *rel,
extern TidPath *create_tidscan_path(Query *root, RelOptInfo *rel,
List *tideval);
extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths);
extern ResultPath *create_result_path(RelOptInfo *rel, Path *subpath,
List *constantqual);
extern Path *create_subqueryscan_path(RelOptInfo *rel);
extern Path *create_functionscan_path(Query *root, RelOptInfo *rel);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: planmain.h,v 1.60 2002/09/04 20:31:45 momjian Exp $
* $Id: planmain.h,v 1.61 2002/11/06 00:00:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -20,7 +20,8 @@
/*
* prototypes for plan/planmain.c
*/
extern Plan *query_planner(Query *root, List *tlist, double tuple_fraction);
extern void query_planner(Query *root, List *tlist, double tuple_fraction,
Path **cheapest_path, Path **sorted_path);
/*
* prototypes for plan/createplan.c
......@@ -33,9 +34,10 @@ extern Sort *make_sort(Query *root, List *tlist,
Plan *lefttree, int keycount);
extern Sort *make_sort_from_pathkeys(Query *root, List *tlist,
Plan *lefttree, List *pathkeys);
extern Agg *make_agg(List *tlist, List *qual, Plan *lefttree);
extern Group *make_group(List *tlist, bool tuplePerGroup, int ngrp,
AttrNumber *grpColIdx, Plan *lefttree);
extern Agg *make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
int ngrp, AttrNumber *grpColIdx, Plan *lefttree);
extern Group *make_group(List *tlist, int ngrp, AttrNumber *grpColIdx,
Plan *lefttree);
extern Material *make_material(List *tlist, Plan *lefttree);
extern Unique *make_unique(List *tlist, Plan *lefttree, List *distinctList);
extern Limit *make_limit(List *tlist, Plan *lefttree,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment