Commit addc42c3 authored by Tom Lane's avatar Tom Lane

Create the planner mechanism for optimizing simple MIN and MAX queries

into indexscans on matching indexes.  For the moment, it only handles
int4 and text datatypes; next step is to add a column to pg_aggregate
so that all MIN/MAX aggregates can be handled.  Per my recent proposal.
parent c3294f1c
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.172 2005/03/28 00:58:22 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.173 2005/04/11 23:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -54,7 +54,6 @@
((opclass) == BOOL_BTREE_OPS_OID || (opclass) == BOOL_HASH_OPS_OID)
static List *group_clauses_by_indexkey(IndexOptInfo *index);
static List *group_clauses_by_indexkey_for_join(Query *root,
IndexOptInfo *index,
Relids outer_relids,
......@@ -72,8 +71,6 @@ static bool pred_test_simple_clause(Expr *predicate, Node *clause);
static Relids indexable_outerrelids(IndexOptInfo *index);
static Path *make_innerjoin_index_path(Query *root, IndexOptInfo *index,
List *clausegroups);
static bool match_index_to_operand(Node *operand, int indexcol,
IndexOptInfo *index);
static bool match_boolean_index_clause(Node *clause, int indexcol,
IndexOptInfo *index);
static bool match_special_index_operator(Expr *clause, Oid opclass,
......@@ -234,7 +231,7 @@ create_index_paths(Query *root, RelOptInfo *rel)
* clauses matching column C, because the executor couldn't use them anyway.
* Therefore, there are no empty sublists in the result.
*/
static List *
List *
group_clauses_by_indexkey(IndexOptInfo *index)
{
List *clausegroup_list = NIL;
......@@ -1774,7 +1771,7 @@ make_expr_from_indexclauses(List *indexclauses)
* indexcol: the column number of the index (counting from 0)
* index: the index of interest
*/
static bool
bool
match_index_to_operand(Node *operand,
int indexcol,
IndexOptInfo *index)
......
......@@ -4,7 +4,7 @@
# Makefile for optimizer/plan
#
# IDENTIFICATION
# $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.12 2003/11/29 19:51:50 pgsql Exp $
# $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.13 2005/04/11 23:06:55 tgl Exp $
#
#-------------------------------------------------------------------------
......@@ -12,7 +12,8 @@ subdir = src/backend/optimizer/plan
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = createplan.o initsplan.o planmain.o planner.o setrefs.o subselect.o
OBJS = createplan.o initsplan.o planagg.o planmain.o planner.o \
setrefs.o subselect.o
all: SUBSYS.o
......
/*-------------------------------------------------------------------------
*
* planagg.c
* Special planning for aggregate queries.
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.1 2005/04/11 23:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/skey.h"
#include "catalog/pg_aggregate.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
#include "optimizer/subselect.h"
#include "parser/parsetree.h"
#include "parser/parse_clause.h"
#include "parser/parse_expr.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
typedef struct
{
Oid aggfnoid; /* pg_proc Oid of the aggregate */
Oid aggsortop; /* Oid of its sort operator */
Expr *target; /* expression we are aggregating on */
IndexPath *path; /* access path for index scan */
Cost pathcost; /* estimated cost to fetch first row */
Param *param; /* param for subplan's output */
} MinMaxAggInfo;
static bool find_minmax_aggs_walker(Node *node, List **context);
static bool build_minmax_path(Query *root, RelOptInfo *rel,
MinMaxAggInfo *info);
static ScanDirection match_agg_to_index_col(MinMaxAggInfo *info,
IndexOptInfo *index, int indexcol);
static void make_agg_subplan(Query *root, MinMaxAggInfo *info,
List *constant_quals);
static Node *replace_aggs_with_params_mutator(Node *node, List **context);
static Oid fetch_agg_sort_op(Oid aggfnoid);
/*
* optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
*
* This checks to see if we can replace MIN/MAX aggregate functions by
* subqueries of the form
* (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
* Given a suitable index on tab.col, this can be much faster than the
* generic scan-all-the-rows plan.
*
* We are passed the Query, the preprocessed tlist, and the best path
* devised for computing the input of a standard Agg node. If we are able
* to optimize all the aggregates, and the result is estimated to be cheaper
* than the generic aggregate method, then generate and return a Plan that
* does it that way. Otherwise, return NULL.
*/
Plan *
optimize_minmax_aggregates(Query *root, List *tlist, Path *best_path)
{
RangeTblRef *rtr;
RangeTblEntry *rte;
RelOptInfo *rel;
List *aggs_list;
ListCell *l;
Cost total_cost;
Path agg_p;
Plan *plan;
Node *hqual;
QualCost tlist_cost;
List *constant_quals;
/* Nothing to do if query has no aggregates */
if (!root->hasAggs)
return NULL;
Assert(!root->setOperations); /* shouldn't get here if a setop */
Assert(root->rowMarks == NIL); /* nor if FOR UPDATE */
/*
* Reject unoptimizable cases.
*
* We don't handle GROUP BY, because our current implementations of
* grouping require looking at all the rows anyway, and so there's not
* much point in optimizing MIN/MAX.
*/
if (root->groupClause)
return NULL;
/*
* We also restrict the query to reference exactly one table, since
* join conditions can't be handled reasonably. (We could perhaps
* handle a query containing cartesian-product joins, but it hardly
* seems worth the trouble.)
*/
Assert(root->jointree != NULL && IsA(root->jointree, FromExpr));
if (list_length(root->jointree->fromlist) != 1)
return NULL;
rtr = (RangeTblRef *) linitial(root->jointree->fromlist);
if (!IsA(rtr, RangeTblRef))
return NULL;
rte = rt_fetch(rtr->rtindex, root->rtable);
if (rte->rtekind != RTE_RELATION)
return NULL;
rel = find_base_rel(root, rtr->rtindex);
/*
* Also reject cases with subplans or volatile functions in WHERE.
* This may be overly paranoid, but it's not entirely clear if the
* transformation is safe then.
*/
if (contain_subplans(root->jointree->quals) ||
contain_volatile_functions(root->jointree->quals))
return NULL;
/*
* Since this optimization is not applicable all that often, we want
* to fall out before doing very much work if possible. Therefore
* we do the work in several passes. The first pass scans the tlist
* and HAVING qual to find all the aggregates and verify that
* each of them is a MIN/MAX aggregate. If that succeeds, the second
* pass looks at each aggregate to see if it is optimizable; if so
* we make an IndexPath describing how we would scan it. (We do not
* try to optimize if only some aggs are optimizable, since that means
* we'll have to scan all the rows anyway.) If that succeeds, we have
* enough info to compare costs against the generic implementation.
* Only if that test passes do we build a Plan.
*/
/* Pass 1: find all the aggregates */
aggs_list = NIL;
if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
return NULL;
if (find_minmax_aggs_walker(root->havingQual, &aggs_list))
return NULL;
/* Pass 2: see if each one is optimizable */
total_cost = 0;
foreach(l, aggs_list)
{
MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
if (!build_minmax_path(root, rel, info))
return NULL;
total_cost += info->pathcost;
}
/*
* Make the cost comparison.
*
* Note that we don't include evaluation cost of the tlist here;
* this is OK since it isn't included in best_path's cost either,
* and should be the same in either case.
*/
cost_agg(&agg_p, root, AGG_PLAIN, list_length(aggs_list),
0, 0,
best_path->startup_cost, best_path->total_cost,
best_path->parent->rows);
if (total_cost > agg_p.total_cost)
return NULL; /* too expensive */
/*
* OK, we are going to generate an optimized plan. The first thing we
* need to do is look for any non-variable WHERE clauses that query_planner
* might have removed from the basic plan. (Normal WHERE clauses will
* be properly incorporated into the sub-plans by create_plan.) If there
* are any, they will be in a gating Result node atop the best_path.
* They have to be incorporated into a gating Result in each sub-plan
* in order to produce the semantically correct result.
*/
if (IsA(best_path, ResultPath))
{
Assert(((ResultPath *) best_path)->subpath != NULL);
constant_quals = ((ResultPath *) best_path)->constantqual;
}
else
constant_quals = NIL;
/* Pass 3: generate subplans and output Param nodes */
foreach(l, aggs_list)
{
make_agg_subplan(root, (MinMaxAggInfo *) lfirst(l), constant_quals);
}
/*
* Modify the targetlist and HAVING qual to reference subquery outputs
*/
tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist,
&aggs_list);
hqual = replace_aggs_with_params_mutator(root->havingQual,
&aggs_list);
/*
* Generate the output plan --- basically just a Result
*/
plan = (Plan *) make_result(tlist, hqual, NULL);
/* Account for evaluation cost of the tlist (make_result did the rest) */
cost_qual_eval(&tlist_cost, tlist);
plan->startup_cost += tlist_cost.startup;
plan->total_cost += tlist_cost.startup + tlist_cost.per_tuple;
return plan;
}
/*
* find_minmax_aggs_walker
* Recursively scan the Aggref nodes in an expression tree, and check
* that each one is a MIN/MAX aggregate. If so, build a list of the
* distinct aggregate calls in the tree.
*
* Returns TRUE if a non-MIN/MAX aggregate is found, FALSE otherwise.
* (This seemingly-backward definition is used because expression_tree_walker
* aborts the scan on TRUE return, which is what we want.)
*
* Found aggregates are added to the list at *context; it's up to the caller
* to initialize the list to NIL.
*
* This does not descend into subqueries, and so should be used only after
* reduction of sublinks to subplans. There mustn't be outer-aggregate
* references either.
*/
static bool
find_minmax_aggs_walker(Node *node, List **context)
{
if (node == NULL)
return false;
if (IsA(node, Aggref))
{
Aggref *aggref = (Aggref *) node;
Oid aggsortop;
MinMaxAggInfo *info;
ListCell *l;
Assert(aggref->agglevelsup == 0);
if (aggref->aggstar)
return true; /* foo(*) is surely not optimizable */
/* note: we do not care if DISTINCT is mentioned ... */
aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
if (!OidIsValid(aggsortop))
return true; /* not a MIN/MAX aggregate */
/*
* Check whether it's already in the list, and add it if not.
*/
foreach(l, *context)
{
info = (MinMaxAggInfo *) lfirst(l);
if (info->aggfnoid == aggref->aggfnoid &&
equal(info->target, aggref->target))
return false;
}
info = (MinMaxAggInfo *) palloc0(sizeof(MinMaxAggInfo));
info->aggfnoid = aggref->aggfnoid;
info->aggsortop = aggsortop;
info->target = aggref->target;
*context = lappend(*context, info);
/*
* We need not recurse into the argument, since it can't contain
* any aggregates.
*/
return false;
}
Assert(!IsA(node, SubLink));
return expression_tree_walker(node, find_minmax_aggs_walker,
(void *) context);
}
/*
* build_minmax_path
* Given a MIN/MAX aggregate, try to find an index it can be optimized
* with. Build a Path describing the best such index path.
*
* Returns TRUE if successful, FALSE if not. In the TRUE case, info->path
* is filled in.
*
* XXX look at sharing more code with indxpath.c.
*
* Note: check_partial_indexes() must have been run previously.
*/
static bool
build_minmax_path(Query *root, RelOptInfo *rel, MinMaxAggInfo *info)
{
IndexPath *best_path = NULL;
Cost best_cost = 0;
ListCell *l;
foreach(l, rel->indexlist)
{
IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
ScanDirection indexscandir = NoMovementScanDirection;
int indexcol;
int prevcol;
List *restrictclauses;
IndexPath *new_path;
Cost new_cost;
/* Ignore non-btree indexes */
if (index->relam != BTREE_AM_OID)
continue;
/* Ignore partial indexes that do not match the query */
if (index->indpred != NIL && !index->predOK)
continue;
/*
* Look for a match to one of the index columns. (In a stupidly
* designed index, there could be multiple matches, but we only
* care about the first one.)
*/
for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
{
indexscandir = match_agg_to_index_col(info, index, indexcol);
if (!ScanDirectionIsNoMovement(indexscandir))
break;
}
if (ScanDirectionIsNoMovement(indexscandir))
continue;
/*
* If the match is not at the first index column, we have to verify
* that there are "x = something" restrictions on all the earlier
* index columns. Since we'll need the restrictclauses list anyway
* to build the path, it's convenient to extract that first and then
* look through it for the equality restrictions.
*/
restrictclauses = group_clauses_by_indexkey(index);
if (list_length(restrictclauses) < indexcol)
continue; /* definitely haven't got enough */
for (prevcol = 0; prevcol < indexcol; prevcol++)
{
List *rinfos = (List *) list_nth(restrictclauses, prevcol);
ListCell *ll;
foreach(ll, rinfos)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(ll);
int strategy;
Assert(is_opclause(rinfo->clause));
strategy =
get_op_opclass_strategy(((OpExpr *) rinfo->clause)->opno,
index->classlist[prevcol]);
if (strategy == BTEqualStrategyNumber)
break;
}
if (ll == NULL)
break; /* none are Equal for this index col */
}
if (prevcol < indexcol)
continue; /* didn't find all Equal clauses */
/*
* Build the access path. We don't bother marking it with pathkeys.
*/
new_path = create_index_path(root, index,
restrictclauses,
NIL,
indexscandir);
/*
* Estimate actual cost of fetching just one row.
*/
if (new_path->rows > 1.0)
new_cost = new_path->path.startup_cost +
(new_path->path.total_cost - new_path->path.startup_cost)
* 1.0 / new_path->rows;
else
new_cost = new_path->path.total_cost;
/*
* Keep if first or if cheaper than previous best.
*/
if (best_path == NULL || new_cost < best_cost)
{
best_path = new_path;
best_cost = new_cost;
}
}
info->path = best_path;
info->pathcost = best_cost;
return (best_path != NULL);
}
/*
* match_agg_to_index_col
* Does an aggregate match an index column?
*
* It matches if its argument is equal to the index column's data and its
* sortop is either the LessThan or GreaterThan member of the column's opclass.
*
* We return ForwardScanDirection if match the LessThan member,
* BackwardScanDirection if match the GreaterThan member,
* and NoMovementScanDirection if there's no match.
*/
static ScanDirection
match_agg_to_index_col(MinMaxAggInfo *info, IndexOptInfo *index, int indexcol)
{
int strategy;
/* Check for data match */
if (!match_index_to_operand((Node *) info->target, indexcol, index))
return NoMovementScanDirection;
/* Look up the operator in the opclass */
strategy = get_op_opclass_strategy(info->aggsortop,
index->classlist[indexcol]);
if (strategy == BTLessStrategyNumber)
return ForwardScanDirection;
if (strategy == BTGreaterStrategyNumber)
return BackwardScanDirection;
return NoMovementScanDirection;
}
/*
* Construct a suitable plan for a converted aggregate query
*/
static void
make_agg_subplan(Query *root, MinMaxAggInfo *info, List *constant_quals)
{
Query *subquery;
Path *path;
Plan *plan;
TargetEntry *tle;
SortClause *sortcl;
/*
* Generate a suitably modified Query node. Much of the work here is
* probably unnecessary in the normal case, but we want to make it look
* good if someone tries to EXPLAIN the result.
*/
subquery = (Query *) copyObject(root);
subquery->commandType = CMD_SELECT;
subquery->resultRelation = 0;
subquery->resultRelations = NIL;
subquery->into = NULL;
subquery->hasAggs = false;
subquery->groupClause = NIL;
subquery->havingQual = NULL;
subquery->hasHavingQual = false;
subquery->distinctClause = NIL;
/* single tlist entry that is the aggregate target */
tle = makeTargetEntry(copyObject(info->target),
1,
pstrdup("agg_target"),
false);
subquery->targetList = list_make1(tle);
/* set up the appropriate ORDER BY entry */
sortcl = makeNode(SortClause);
sortcl->tleSortGroupRef = assignSortGroupRef(tle, subquery->targetList);
sortcl->sortop = info->aggsortop;
subquery->sortClause = list_make1(sortcl);
/* set up LIMIT 1 */
subquery->limitOffset = NULL;
subquery->limitCount = (Node *) makeConst(INT4OID, sizeof(int4),
Int32GetDatum(1),
false, true);
/*
* Generate the plan for the subquery. We already have a Path for
* the basic indexscan, but we have to convert it to a Plan and
* attach a LIMIT node above it. We might need a gating Result, too,
* which is most easily added at the Path stage.
*/
path = (Path *) info->path;
if (constant_quals)
path = (Path *) create_result_path(NULL,
path,
copyObject(constant_quals));
plan = create_plan(subquery, path);
plan->targetlist = copyObject(subquery->targetList);
plan = (Plan *) make_limit(plan,
subquery->limitOffset,
subquery->limitCount);
/*
* Convert the plan into an InitPlan, and make a Param for its result.
*/
info->param = SS_make_initplan_from_plan(subquery, plan,
exprType((Node *) tle->expr),
-1);
}
/*
* Replace original aggregate calls with subplan output Params
*/
static Node *
replace_aggs_with_params_mutator(Node *node, List **context)
{
if (node == NULL)
return NULL;
if (IsA(node, Aggref))
{
Aggref *aggref = (Aggref *) node;
ListCell *l;
foreach(l, *context)
{
MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
if (info->aggfnoid == aggref->aggfnoid &&
equal(info->target, aggref->target))
return (Node *) info->param;
}
elog(ERROR, "failed to re-find aggregate info record");
}
Assert(!IsA(node, SubLink));
return expression_tree_mutator(node, replace_aggs_with_params_mutator,
(void *) context);
}
/*
* Get the OID of the sort operator, if any, associated with an aggregate.
* Returns InvalidOid if there is no such operator.
*/
static Oid
fetch_agg_sort_op(Oid aggfnoid)
{
#ifdef NOT_YET
HeapTuple aggTuple;
Form_pg_aggregate aggform;
Oid aggsortop;
/* fetch aggregate entry from pg_aggregate */
aggTuple = SearchSysCache(AGGFNOID,
ObjectIdGetDatum(aggfnoid),
0, 0, 0);
if (!HeapTupleIsValid(aggTuple))
return InvalidOid;
aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
aggsortop = aggform->aggsortop;
ReleaseSysCache(aggTuple);
return aggsortop;
#else
/*
* XXX stub implementation for testing: hardwire a few cases.
*/
if (aggfnoid == 2132) /* min(int4) -> int4lt */
return 97;
if (aggfnoid == 2116) /* max(int4) -> int4gt */
return 521;
if (aggfnoid == 2145) /* min(text) -> text_lt */
return 664;
if (aggfnoid == 2129) /* max(text) -> text_gt */
return 666;
return InvalidOid;
#endif
}
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.183 2005/04/10 19:50:08 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.184 2005/04/11 23:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -362,43 +362,12 @@ subquery_planner(Query *parse, double tuple_fraction)
/*
* If any subplans were generated, or if we're inside a subplan, build
* initPlan list and extParam/allParam sets for plan nodes.
* initPlan list and extParam/allParam sets for plan nodes, and attach
* the initPlans to the top plan node.
*/
if (PlannerPlanId != saved_planid || PlannerQueryLevel > 1)
{
Cost initplan_cost = 0;
/* Prepare extParam/allParam sets for all nodes in tree */
SS_finalize_plan(plan, parse->rtable);
/*
* SS_finalize_plan doesn't handle initPlans, so we have to
* manually attach them to the topmost plan node, and add their
* extParams to the topmost node's, too.
*
* We also add the total_cost of each initPlan to the startup cost of
* the top node. This is a conservative overestimate, since in
* fact each initPlan might be executed later than plan startup,
* or even not at all.
*/
plan->initPlan = PlannerInitPlan;
foreach(l, plan->initPlan)
{
SubPlan *initplan = (SubPlan *) lfirst(l);
plan->extParam = bms_add_members(plan->extParam,
initplan->plan->extParam);
/* allParam must include all members of extParam */
plan->allParam = bms_add_members(plan->allParam,
plan->extParam);
initplan_cost += initplan->plan->total_cost;
}
plan->startup_cost += initplan_cost;
plan->total_cost += initplan_cost;
}
/* Return to outer subquery context */
PlannerQueryLevel--;
PlannerInitPlan = saved_initplan;
......@@ -692,6 +661,7 @@ grouping_planner(Query *parse, double tuple_fraction)
double sub_tuple_fraction;
Path *cheapest_path;
Path *sorted_path;
Path *best_path;
double dNumGroups = 0;
long numGroups = 0;
AggClauseCounts agg_counts;
......@@ -959,114 +929,175 @@ grouping_planner(Query *parse, double tuple_fraction)
}
/*
* Select the best path and create a plan to execute it.
*
* If we are doing hashed grouping, we will always read all the input
* tuples, so use the cheapest-total path. Otherwise, trust
* query_planner's decision about which to use.
* Select the best path. If we are doing hashed grouping, we will
* always read all the input tuples, so use the cheapest-total
* path. Otherwise, trust query_planner's decision about which to use.
*/
if (sorted_path && !use_hashed_grouping)
{
result_plan = create_plan(parse, sorted_path);
current_pathkeys = sorted_path->pathkeys;
}
if (use_hashed_grouping || !sorted_path)
best_path = cheapest_path;
else
{
result_plan = create_plan(parse, cheapest_path);
current_pathkeys = cheapest_path->pathkeys;
}
best_path = sorted_path;
/*
* create_plan() returns a plan with just a "flat" tlist of
* required Vars. Usually we need to insert the sub_tlist as the
* tlist of the top plan node. However, we can skip that if we
* determined that whatever query_planner chose to return will be
* good enough.
* Check to see if it's possible to optimize MIN/MAX aggregates.
* If so, we will forget all the work we did so far to choose a
* "regular" path ... but we had to do it anyway to be able to
* tell which way is cheaper.
*/
if (need_tlist_eval)
result_plan = optimize_minmax_aggregates(parse,
tlist,
best_path);
if (result_plan != NULL)
{
/*
* optimize_minmax_aggregates generated the full plan, with
* the right tlist, and it has no sort order.
*/
current_pathkeys = NIL;
}
else
{
/*
* If the top-level plan node is one that cannot do expression
* evaluation, we must insert a Result node to project the
* desired tlist.
* Normal case --- create a plan according to query_planner's
* results.
*/
if (!is_projection_capable_plan(result_plan))
result_plan = create_plan(parse, best_path);
current_pathkeys = best_path->pathkeys;
/*
* create_plan() returns a plan with just a "flat" tlist of
* required Vars. Usually we need to insert the sub_tlist as the
* tlist of the top plan node. However, we can skip that if we
* determined that whatever query_planner chose to return will be
* good enough.
*/
if (need_tlist_eval)
{
result_plan = (Plan *) make_result(sub_tlist, NULL,
result_plan);
/*
* If the top-level plan node is one that cannot do expression
* evaluation, we must insert a Result node to project the
* desired tlist.
*/
if (!is_projection_capable_plan(result_plan))
{
result_plan = (Plan *) make_result(sub_tlist, NULL,
result_plan);
}
else
{
/*
* Otherwise, just replace the subplan's flat tlist with
* the desired tlist.
*/
result_plan->targetlist = sub_tlist;
}
/*
* Also, account for the cost of evaluation of the sub_tlist.
*
* Up to now, we have only been dealing with "flat" tlists,
* containing just Vars. So their evaluation cost is zero
* according to the model used by cost_qual_eval() (or if you
* prefer, the cost is factored into cpu_tuple_cost). Thus we
* can avoid accounting for tlist cost throughout
* query_planner() and subroutines. But now we've inserted a
* tlist that might contain actual operators, sub-selects, etc
* --- so we'd better account for its cost.
*
* Below this point, any tlist eval cost for added-on nodes
* should be accounted for as we create those nodes.
* Presently, of the node types we can add on, only Agg and
* Group project new tlists (the rest just copy their input
* tuples) --- so make_agg() and make_group() are responsible
* for computing the added cost.
*/
cost_qual_eval(&tlist_cost, sub_tlist);
result_plan->startup_cost += tlist_cost.startup;
result_plan->total_cost += tlist_cost.startup +
tlist_cost.per_tuple * result_plan->plan_rows;
}
else
{
/*
* Otherwise, just replace the subplan's flat tlist with
* the desired tlist.
* Since we're using query_planner's tlist and not the one
* make_subplanTargetList calculated, we have to refigure any
* grouping-column indexes make_subplanTargetList computed.
*/
result_plan->targetlist = sub_tlist;
locate_grouping_columns(parse, tlist, result_plan->targetlist,
groupColIdx);
}
/*
* Also, account for the cost of evaluation of the sub_tlist.
*
* Up to now, we have only been dealing with "flat" tlists,
* containing just Vars. So their evaluation cost is zero
* according to the model used by cost_qual_eval() (or if you
* prefer, the cost is factored into cpu_tuple_cost). Thus we
* can avoid accounting for tlist cost throughout
* query_planner() and subroutines. But now we've inserted a
* tlist that might contain actual operators, sub-selects, etc
* --- so we'd better account for its cost.
* Insert AGG or GROUP node if needed, plus an explicit sort step
* if necessary.
*
* Below this point, any tlist eval cost for added-on nodes
* should be accounted for as we create those nodes.
* Presently, of the node types we can add on, only Agg and
* Group project new tlists (the rest just copy their input
* tuples) --- so make_agg() and make_group() are responsible
* for computing the added cost.
*/
cost_qual_eval(&tlist_cost, sub_tlist);
result_plan->startup_cost += tlist_cost.startup;
result_plan->total_cost += tlist_cost.startup +
tlist_cost.per_tuple * result_plan->plan_rows;
}
else
{
/*
* Since we're using query_planner's tlist and not the one
* make_subplanTargetList calculated, we have to refigure any
* grouping-column indexes make_subplanTargetList computed.
* HAVING clause, if any, becomes qual of the Agg or Group node.
*/
locate_grouping_columns(parse, tlist, result_plan->targetlist,
groupColIdx);
}
if (use_hashed_grouping)
{
/* Hashed aggregate plan --- no sort needed */
result_plan = (Plan *) make_agg(parse,
tlist,
(List *) parse->havingQual,
AGG_HASHED,
numGroupCols,
groupColIdx,
numGroups,
agg_counts.numAggs,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
}
else if (parse->hasAggs)
{
/* Plain aggregate plan --- sort if needed */
AggStrategy aggstrategy;
/*
* Insert AGG or GROUP node if needed, plus an explicit sort step
* if necessary.
*
* HAVING clause, if any, becomes qual of the Agg or Group node.
*/
if (use_hashed_grouping)
{
/* Hashed aggregate plan --- no sort needed */
result_plan = (Plan *) make_agg(parse,
tlist,
(List *) parse->havingQual,
AGG_HASHED,
numGroupCols,
groupColIdx,
numGroups,
agg_counts.numAggs,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
}
else if (parse->hasAggs)
{
/* Plain aggregate plan --- sort if needed */
AggStrategy aggstrategy;
if (parse->groupClause)
{
if (!pathkeys_contained_in(group_pathkeys,
current_pathkeys))
{
result_plan = (Plan *)
make_sort_from_groupcols(parse,
parse->groupClause,
groupColIdx,
result_plan);
current_pathkeys = group_pathkeys;
}
aggstrategy = AGG_SORTED;
if (parse->groupClause)
/*
* The AGG node will not change the sort ordering of its
* groups, so current_pathkeys describes the result too.
*/
}
else
{
aggstrategy = AGG_PLAIN;
/* Result will be only one row anyway; no sort order */
current_pathkeys = NIL;
}
result_plan = (Plan *) make_agg(parse,
tlist,
(List *) parse->havingQual,
aggstrategy,
numGroupCols,
groupColIdx,
numGroups,
agg_counts.numAggs,
result_plan);
}
else if (parse->groupClause)
{
/*
* GROUP BY without aggregation, so insert a group node (plus
* the appropriate sort node, if necessary).
*
* Add an explicit sort if we couldn't make the path come
* out the way the GROUP node needs it.
*/
if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
{
result_plan = (Plan *)
......@@ -1076,75 +1107,34 @@ grouping_planner(Query *parse, double tuple_fraction)
result_plan);
current_pathkeys = group_pathkeys;
}
aggstrategy = AGG_SORTED;
/*
* The AGG node will not change the sort ordering of its
* groups, so current_pathkeys describes the result too.
*/
result_plan = (Plan *) make_group(parse,
tlist,
(List *) parse->havingQual,
numGroupCols,
groupColIdx,
dNumGroups,
result_plan);
/* The Group node won't change sort ordering */
}
else
else if (parse->hasHavingQual)
{
aggstrategy = AGG_PLAIN;
/* Result will be only one row anyway; no sort order */
current_pathkeys = NIL;
}
result_plan = (Plan *) make_agg(parse,
tlist,
(List *) parse->havingQual,
aggstrategy,
numGroupCols,
groupColIdx,
numGroups,
agg_counts.numAggs,
result_plan);
}
else if (parse->groupClause)
{
/*
* GROUP BY without aggregation, so insert a group node (plus the
* appropriate sort node, if necessary).
*
* Add an explicit sort if we couldn't make the path come
* out the way the GROUP node needs it.
*/
if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
{
result_plan = (Plan *)
make_sort_from_groupcols(parse,
parse->groupClause,
groupColIdx,
result_plan);
current_pathkeys = group_pathkeys;
/*
* No aggregates, and no GROUP BY, but we have a HAVING qual.
* This is a degenerate case in which we are supposed to emit
* either 0 or 1 row depending on whether HAVING succeeds.
* Furthermore, there cannot be any variables in either HAVING
* or the targetlist, so we actually do not need the FROM table
* at all! We can just throw away the plan-so-far and generate
* a Result node. This is a sufficiently unusual corner case
* that it's not worth contorting the structure of this routine
* to avoid having to generate the plan in the first place.
*/
result_plan = (Plan *) make_result(tlist,
parse->havingQual,
NULL);
}
result_plan = (Plan *) make_group(parse,
tlist,
(List *) parse->havingQual,
numGroupCols,
groupColIdx,
dNumGroups,
result_plan);
/* The Group node won't change sort ordering */
}
else if (parse->hasHavingQual)
{
/*
* No aggregates, and no GROUP BY, but we have a HAVING qual.
* This is a degenerate case in which we are supposed to emit
* either 0 or 1 row depending on whether HAVING succeeds.
* Furthermore, there cannot be any variables in either HAVING
* or the targetlist, so we actually do not need the FROM table
* at all! We can just throw away the plan-so-far and generate
* a Result node. This is a sufficiently unusual corner case
* that it's not worth contorting the structure of this routine
* to avoid having to generate the plan in the first place.
*/
result_plan = (Plan *) make_result(tlist,
parse->havingQual,
NULL);
}
} /* end of non-minmax-aggregate case */
} /* end of if (setOperations) */
/*
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.95 2005/04/06 16:34:05 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.96 2005/04/11 23:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -915,14 +915,16 @@ process_sublinks_mutator(Node *node, bool *isTopQual)
/*
* SS_finalize_plan - do final sublink processing for a completed Plan.
*
* This recursively computes the extParam and allParam sets
* for every Plan node in the given plan tree.
* This recursively computes the extParam and allParam sets for every Plan
* node in the given plan tree. It also attaches any generated InitPlans
* to the top plan node.
*/
void
SS_finalize_plan(Plan *plan, List *rtable)
{
Bitmapset *outer_params = NULL;
Bitmapset *valid_params = NULL;
Cost initplan_cost = 0;
int paramid;
ListCell *l;
......@@ -959,6 +961,33 @@ SS_finalize_plan(Plan *plan, List *rtable)
bms_free(outer_params);
bms_free(valid_params);
/*
* Finally, attach any initPlans to the topmost plan node,
* and add their extParams to the topmost node's, too.
*
* We also add the total_cost of each initPlan to the startup cost of
* the top node. This is a conservative overestimate, since in
* fact each initPlan might be executed later than plan startup,
* or even not at all.
*/
plan->initPlan = PlannerInitPlan;
PlannerInitPlan = NIL; /* make sure they're not attached twice */
foreach(l, plan->initPlan)
{
SubPlan *initplan = (SubPlan *) lfirst(l);
plan->extParam = bms_add_members(plan->extParam,
initplan->plan->extParam);
/* allParam must include all members of extParam */
plan->allParam = bms_add_members(plan->allParam,
plan->extParam);
initplan_cost += initplan->plan->total_cost;
}
plan->startup_cost += initplan_cost;
plan->total_cost += initplan_cost;
}
/*
......@@ -1165,3 +1194,75 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
return expression_tree_walker(node, finalize_primnode,
(void *) context);
}
/*
* SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
*
* The plan is expected to return a scalar value of the indicated type.
* We build an EXPR_SUBLINK SubPlan node and put it into the initplan
* list for the current query level. A Param that represents the initplan's
* output is returned.
*
* We assume the plan hasn't been put through SS_finalize_plan.
*/
Param *
SS_make_initplan_from_plan(Query *root, Plan *plan,
Oid resulttype, int32 resulttypmod)
{
List *saved_initplan = PlannerInitPlan;
SubPlan *node;
Param *prm;
Bitmapset *tmpset;
int paramid;
/*
* Set up for a new level of subquery. This is just to keep
* SS_finalize_plan from becoming confused.
*/
PlannerQueryLevel++;
PlannerInitPlan = NIL;
/*
* Build extParam/allParam sets for plan nodes.
*/
SS_finalize_plan(plan, root->rtable);
/* Return to outer subquery context */
PlannerQueryLevel--;
PlannerInitPlan = saved_initplan;
/*
* Create a SubPlan node and add it to the outer list of InitPlans.
*/
node = makeNode(SubPlan);
node->subLinkType = EXPR_SUBLINK;
node->plan = plan;
node->plan_id = PlannerPlanId++; /* Assign unique ID to this
* SubPlan */
node->rtable = root->rtable;
PlannerInitPlan = lappend(PlannerInitPlan, node);
/*
* Make parParam list of params that current query level will pass to
* this child plan. (In current usage there probably aren't any.)
*/
tmpset = bms_copy(plan->extParam);
while ((paramid = bms_first_member(tmpset)) >= 0)
{
PlannerParamItem *pitem = list_nth(PlannerParamList, paramid);
if (pitem->abslevel == PlannerQueryLevel)
node->parParam = lappend_int(node->parParam, paramid);
}
bms_free(tmpset);
/*
* Make a Param that will be the subplan's output.
*/
prm = generate_new_param(resulttype, resulttypmod);
node->setParam = list_make1_int(prm->paramid);
return prm;
}
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.122 2005/03/31 22:46:14 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.123 2005/04/11 23:06:56 tgl Exp $
*
* NOTES
* Eventually, the index information should go through here, too.
......@@ -53,6 +53,31 @@ op_in_opclass(Oid opno, Oid opclass)
0, 0);
}
/*
* get_op_opclass_strategy
*
* Get the operator's strategy number within the specified opclass,
* or 0 if it's not a member of the opclass.
*/
int
get_op_opclass_strategy(Oid opno, Oid opclass)
{
HeapTuple tp;
Form_pg_amop amop_tup;
int result;
tp = SearchSysCache(AMOPOPID,
ObjectIdGetDatum(opno),
ObjectIdGetDatum(opclass),
0, 0);
if (!HeapTupleIsValid(tp))
return 0;
amop_tup = (Form_pg_amop) GETSTRUCT(tp);
result = amop_tup->amopstrategy;
ReleaseSysCache(tp);
return result;
}
/*
* get_op_opclass_properties
*
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.80 2005/03/27 06:29:49 tgl Exp $
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.81 2005/04/11 23:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -38,8 +38,11 @@ extern void debug_print_rel(Query *root, RelOptInfo *rel);
extern void create_index_paths(Query *root, RelOptInfo *rel);
extern Path *best_inner_indexscan(Query *root, RelOptInfo *rel,
Relids outer_relids, JoinType jointype);
extern List *group_clauses_by_indexkey(IndexOptInfo *index);
extern List *group_clauses_by_indexkey_for_or(IndexOptInfo *index,
Expr *orsubclause);
extern bool match_index_to_operand(Node *operand, int indexcol,
IndexOptInfo *index);
extern List *expand_indexqual_conditions(IndexOptInfo *index,
List *clausegroups);
extern void check_partial_indexes(Query *root, RelOptInfo *rel);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.80 2005/03/10 23:21:25 tgl Exp $
* $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.81 2005/04/11 23:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -23,6 +23,12 @@
extern void query_planner(Query *root, List *tlist, double tuple_fraction,
Path **cheapest_path, Path **sorted_path);
/*
* prototypes for plan/planagg.c
*/
extern Plan *optimize_minmax_aggregates(Query *root, List *tlist,
Path *best_path);
/*
* prototypes for plan/createplan.c
*/
......
......@@ -5,7 +5,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.23 2004/12/31 22:03:36 pgsql Exp $
* $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.24 2005/04/11 23:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -24,5 +24,7 @@ extern Node *convert_IN_to_join(Query *parse, SubLink *sublink);
extern Node *SS_replace_correlation_vars(Node *expr);
extern Node *SS_process_sublinks(Node *expr, bool isQual);
extern void SS_finalize_plan(Plan *plan, List *rtable);
extern Param *SS_make_initplan_from_plan(Query *root, Plan *plan,
Oid resulttype, int32 resulttypmod);
#endif /* SUBSELECT_H */
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.96 2005/03/31 22:46:27 tgl Exp $
* $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.97 2005/04/11 23:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -25,6 +25,7 @@ typedef enum IOFuncSelector
} IOFuncSelector;
extern bool op_in_opclass(Oid opno, Oid opclass);
extern int get_op_opclass_strategy(Oid opno, Oid opclass);
extern void get_op_opclass_properties(Oid opno, Oid opclass,
int *strategy, Oid *subtype,
bool *recheck);
......
......@@ -293,3 +293,58 @@ FROM bool_test;
t | t | f | | f | t
(1 row)
--
-- Test several cases that should be optimized into indexscans instead of
-- the generic aggregate implementation. We can't actually verify that they
-- are done as indexscans, but we can check that the results are correct.
--
-- Basic cases
select max(unique1) from tenk1;
max
------
9999
(1 row)
select max(unique1) from tenk1 where unique1 < 42;
max
-----
41
(1 row)
select max(unique1) from tenk1 where unique1 > 42;
max
------
9999
(1 row)
select max(unique1) from tenk1 where unique1 > 42000;
max
-----
(1 row)
-- multi-column index (uses tenk1_thous_tenthous)
select max(tenthous) from tenk1 where thousand = 33;
max
------
9033
(1 row)
select min(tenthous) from tenk1 where thousand = 33;
min
-----
33
(1 row)
-- check parameter propagation into an indexscan subquery
select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
from int4_tbl;
f1 | gt
-------------+----
0 | 1
123456 |
-123456 | 0
2147483647 |
-2147483647 | 0
(5 rows)
......@@ -12,6 +12,7 @@ CREATE INDEX onek_stringu1 ON onek USING btree(stringu1 name_ops);
CREATE INDEX tenk1_unique1 ON tenk1 USING btree(unique1 int4_ops);
CREATE INDEX tenk1_unique2 ON tenk1 USING btree(unique2 int4_ops);
CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops);
CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous);
CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops);
CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops);
CREATE INDEX tenk2_hundred ON tenk2 USING btree(hundred int4_ops);
......
......@@ -180,3 +180,23 @@ SELECT
BOOL_OR(NOT b2) AS "f",
BOOL_OR(NOT b3) AS "t"
FROM bool_test;
--
-- Test several cases that should be optimized into indexscans instead of
-- the generic aggregate implementation. We can't actually verify that they
-- are done as indexscans, but we can check that the results are correct.
--
-- Basic cases
select max(unique1) from tenk1;
select max(unique1) from tenk1 where unique1 < 42;
select max(unique1) from tenk1 where unique1 > 42;
select max(unique1) from tenk1 where unique1 > 42000;
-- multi-column index (uses tenk1_thous_tenthous)
select max(tenthous) from tenk1 where thousand = 33;
select min(tenthous) from tenk1 where thousand = 33;
-- check parameter propagation into an indexscan subquery
select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
from int4_tbl;
......@@ -20,6 +20,8 @@ CREATE INDEX tenk1_unique2 ON tenk1 USING btree(unique2 int4_ops);
CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops);
CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous);
CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops);
CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment