Commit 8df08c84 authored by Tom Lane's avatar Tom Lane

Reimplement planner's handling of MIN/MAX aggregate optimization (again).

Instead of playing cute games with pathkeys, just build a direct
representation of the intended sub-select, and feed it through
query_planner to get a Path for the index access.  This is a bit slower
than 9.1's previous method, since we'll duplicate most of the overhead of
query_planner; but since the whole optimization only applies to rather
simple single-table queries, that probably won't be much of a problem in
practice.  The advantage is that we get to do the right thing when there's
a partial index that needs the implicit IS NOT NULL clause to be usable.
Also, although this makes planagg.c be a bit more closely tied to the
ordering of operations in grouping_planner, we can get rid of some coupling
to lower-level parts of the planner.  Per complaint from Marti Raudsepp.
parent 6d8096e2
......@@ -1930,22 +1930,6 @@ _copyPlaceHolderInfo(PlaceHolderInfo *from)
return newnode;
}
/*
* _copyMinMaxAggInfo
*/
static MinMaxAggInfo *
_copyMinMaxAggInfo(MinMaxAggInfo *from)
{
MinMaxAggInfo *newnode = makeNode(MinMaxAggInfo);
COPY_SCALAR_FIELD(aggfnoid);
COPY_SCALAR_FIELD(aggsortop);
COPY_NODE_FIELD(target);
COPY_NODE_FIELD(pathkeys);
return newnode;
}
/* ****************************************************************
* parsenodes.h copy functions
* ****************************************************************
......@@ -4129,9 +4113,6 @@ copyObject(void *from)
case T_PlaceHolderInfo:
retval = _copyPlaceHolderInfo(from);
break;
case T_MinMaxAggInfo:
retval = _copyMinMaxAggInfo(from);
break;
/*
* VALUE NODES
......
......@@ -886,17 +886,6 @@ _equalPlaceHolderInfo(PlaceHolderInfo *a, PlaceHolderInfo *b)
return true;
}
static bool
_equalMinMaxAggInfo(MinMaxAggInfo *a, MinMaxAggInfo *b)
{
COMPARE_SCALAR_FIELD(aggfnoid);
COMPARE_SCALAR_FIELD(aggsortop);
COMPARE_NODE_FIELD(target);
COMPARE_NODE_FIELD(pathkeys);
return true;
}
/*
* Stuff from parsenodes.h
......@@ -2690,9 +2679,6 @@ equal(void *a, void *b)
case T_PlaceHolderInfo:
retval = _equalPlaceHolderInfo(a, b);
break;
case T_MinMaxAggInfo:
retval = _equalMinMaxAggInfo(a, b);
break;
case T_List:
case T_IntList:
......
......@@ -1914,7 +1914,10 @@ _outMinMaxAggInfo(StringInfo str, MinMaxAggInfo *node)
WRITE_OID_FIELD(aggfnoid);
WRITE_OID_FIELD(aggsortop);
WRITE_NODE_FIELD(target);
WRITE_NODE_FIELD(pathkeys);
/* We intentionally omit subroot --- too large, not interesting enough */
WRITE_NODE_FIELD(path);
WRITE_FLOAT_FIELD(pathcost, "%.2f");
WRITE_NODE_FIELD(param);
}
static void
......
......@@ -41,6 +41,13 @@
#define IsBooleanOpfamily(opfamily) \
((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID)
/* Whether to use ScalarArrayOpExpr to build index qualifications */
typedef enum
{
SAOP_FORBID, /* Do not use ScalarArrayOpExpr */
SAOP_ALLOW, /* OK to use ScalarArrayOpExpr */
SAOP_REQUIRE /* Require ScalarArrayOpExpr */
} SaOpControl;
/* Whether we are looking for plain indexscan, bitmap scan, or either */
typedef enum
......@@ -78,6 +85,11 @@ static PathClauseUsage *classify_index_clause_usage(Path *path,
List **clauselist);
static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
static int find_list_position(Node *node, List **nodelist);
static List *group_clauses_by_indexkey(IndexOptInfo *index,
List *clauses, List *outer_clauses,
Relids outer_relids,
SaOpControl saop_control,
bool *found_clause);
static bool match_clause_to_indexcol(IndexOptInfo *index,
int indexcol,
RestrictInfo *rinfo,
......@@ -1060,7 +1072,7 @@ find_list_position(Node *node, List **nodelist)
* from multiple places. Defend against redundant outputs by using
* list_append_unique_ptr (pointer equality should be good enough).
*/
List *
static List *
group_clauses_by_indexkey(IndexOptInfo *index,
List *clauses, List *outer_clauses,
Relids outer_relids,
......
......@@ -905,39 +905,6 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
return pathkeys;
}
/****************************************************************************
* PATHKEYS AND AGGREGATES
****************************************************************************/
/*
* make_pathkeys_for_aggregate
* Generate a pathkeys list (always a 1-item list) that represents
* the sort order needed by a MIN/MAX aggregate
*
* This is only called before EquivalenceClass merging, so we can assume
* we are not supposed to canonicalize.
*/
List *
make_pathkeys_for_aggregate(PlannerInfo *root,
Expr *aggtarget,
Oid aggsortop)
{
PathKey *pathkey;
/*
* We arbitrarily set nulls_first to false. Actually, a MIN/MAX agg can
* use either nulls ordering option, but that is dealt with elsewhere.
*/
pathkey = make_pathkey_from_sortop(root,
aggtarget,
aggsortop,
false, /* nulls_first */
0,
true,
false);
return list_make1(pathkey);
}
/****************************************************************************
* PATHKEYS AND MERGECLAUSES
****************************************************************************/
......@@ -1407,11 +1374,10 @@ make_inner_pathkeys_for_merge(PlannerInfo *root,
* PATHKEY USEFULNESS CHECKS
*
* We only want to remember as many of the pathkeys of a path as have some
* potential use, which can include subsequent mergejoins, meeting the query's
* requested output ordering, or implementing MIN/MAX aggregates. This
* ensures that add_path() won't consider a path to have a usefully different
* ordering unless it really is useful. These routines check for usefulness
* of given pathkeys.
* potential use, either for subsequent mergejoins or for meeting the query's
* requested output ordering. This ensures that add_path() won't consider
* a path to have a usefully different ordering unless it really is useful.
* These routines check for usefulness of given pathkeys.
****************************************************************************/
/*
......@@ -1553,50 +1519,6 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
return 0; /* path ordering not useful */
}
/*
* pathkeys_useful_for_minmax
* Count the number of pathkeys that are useful for implementing
* some MIN/MAX aggregate.
*
* Like pathkeys_useful_for_ordering, this is a yes-or-no affair, but
* there could be several MIN/MAX aggregates and we can match to any one.
*
* We can't use pathkeys_contained_in() because we would like to match
* pathkeys regardless of the nulls_first setting. However, we know that
* MIN/MAX aggregates will have at most one item in their pathkeys, so it's
* not too complicated to match by brute force.
*/
static int
pathkeys_useful_for_minmax(PlannerInfo *root, List *pathkeys)
{
PathKey *pathkey;
ListCell *lc;
if (pathkeys == NIL)
return 0; /* unordered path */
pathkey = (PathKey *) linitial(pathkeys);
foreach(lc, root->minmax_aggs)
{
MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
PathKey *mmpathkey;
/* Ignore minmax agg if its pathkey turned out to be redundant */
if (mminfo->pathkeys == NIL)
continue;
Assert(list_length(mminfo->pathkeys) == 1);
mmpathkey = (PathKey *) linitial(mminfo->pathkeys);
if (mmpathkey->pk_eclass == pathkey->pk_eclass &&
mmpathkey->pk_opfamily == pathkey->pk_opfamily &&
mmpathkey->pk_strategy == pathkey->pk_strategy)
return 1;
}
return 0; /* path ordering not useful */
}
/*
* truncate_useless_pathkeys
* Shorten the given pathkey list to just the useful pathkeys.
......@@ -1608,15 +1530,11 @@ truncate_useless_pathkeys(PlannerInfo *root,
{
int nuseful;
int nuseful2;
int nuseful3;
nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;
nuseful3 = pathkeys_useful_for_minmax(root, pathkeys);
if (nuseful3 > nuseful)
nuseful = nuseful3;
/*
* Note: not safe to modify input list destructively, but we can avoid
......@@ -1642,8 +1560,8 @@ truncate_useless_pathkeys(PlannerInfo *root,
*
* We could make the test more complex, for example checking to see if any of
* the joinclauses are really mergejoinable, but that likely wouldn't win
* often enough to repay the extra cycles. Queries with no join, sort, or
* aggregate at all are reasonably common, so this much work seems worthwhile.
* often enough to repay the extra cycles. Queries with neither a join nor
* a sort are reasonably common, though, so this much work seems worthwhile.
*/
bool
has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
......@@ -1652,7 +1570,5 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
return true; /* might be able to use pathkeys for merging */
if (root->query_pathkeys != NIL)
return true; /* might be able to use them for ordering */
if (root->minmax_aggs != NIL)
return true; /* might be able to use them for MIN/MAX */
return false; /* definitely useless */
}
......@@ -5,7 +5,10 @@
*
* This module tries to replace MIN/MAX aggregate functions by subqueries
* of the form
* (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
* (SELECT col FROM tab
* WHERE col IS NOT NULL AND existing-quals
* ORDER BY col ASC/DESC
* LIMIT 1)
* Given a suitable index on tab.col, this can be much faster than the
* generic scan-all-the-rows aggregation plan. We can handle multiple
* MIN/MAX aggregates by generating multiple subqueries, and their
......@@ -26,41 +29,25 @@
#include "postgres.h"
#include "catalog/pg_aggregate.h"
#include "catalog/pg_am.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/subselect.h"
#include "parser/parsetree.h"
#include "parser/parse_clause.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
/* Per-aggregate info during optimize_minmax_aggregates() */
typedef struct
{
MinMaxAggInfo *mminfo; /* info gathered by preprocessing */
Path *path; /* access path for ordered scan */
Cost pathcost; /* estimated cost to fetch first row */
Param *param; /* param for subplan's output */
} PrivateMMAggInfo;
static bool find_minmax_aggs_walker(Node *node, List **context);
static PrivateMMAggInfo *find_minmax_path(PlannerInfo *root, RelOptInfo *rel,
MinMaxAggInfo *mminfo);
static bool path_usable_for_agg(Path *path);
static void make_agg_subplan(PlannerInfo *root, RelOptInfo *rel,
PrivateMMAggInfo *info);
static void add_notnull_qual(PlannerInfo *root, RelOptInfo *rel,
PrivateMMAggInfo *info, Path *path);
static Node *replace_aggs_with_params_mutator(Node *node, List **context);
static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
Oid eqop, Oid sortop, bool nulls_first);
static void make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *mminfo);
static Node *replace_aggs_with_params_mutator(Node *node, PlannerInfo *root);
static Oid fetch_agg_sort_op(Oid aggfnoid);
......@@ -100,7 +87,10 @@ preprocess_minmax_aggregates(PlannerInfo *root, List *tlist)
*
* We don't handle GROUP BY or windowing, because our current
* implementations of grouping require looking at all the rows anyway, and
* so there's not much point in optimizing MIN/MAX.
* so there's not much point in optimizing MIN/MAX. (Note: relaxing
* this would likely require some restructuring in grouping_planner(),
* since it performs assorted processing related to these features between
* calling preprocess_minmax_aggregates and optimize_minmax_aggregates.)
*/
if (parse->groupClause || parse->hasWindowFuncs)
return;
......@@ -139,24 +129,48 @@ preprocess_minmax_aggregates(PlannerInfo *root, List *tlist)
/*
* OK, there is at least the possibility of performing the optimization.
* Build pathkeys (and thereby EquivalenceClasses) for each aggregate.
* The existence of the EquivalenceClasses will prompt the path generation
* logic to try to build paths matching the desired sort ordering(s).
*
* Note: the pathkeys are non-canonical at this point. They'll be fixed
* later by canonicalize_all_pathkeys().
* Build an access path for each aggregate. (We must do this now because
* we need to call query_planner with a pristine copy of the current query
* tree; it'll be too late when optimize_minmax_aggregates gets called.)
* If any of the aggregates prove to be non-indexable, give up; there is
* no point in optimizing just some of them.
*/
foreach(lc, aggs_list)
{
MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
Oid eqop;
bool reverse;
/*
* We'll need the equality operator that goes with the aggregate's
* ordering operator.
*/
eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
if (!OidIsValid(eqop)) /* shouldn't happen */
elog(ERROR, "could not find equality operator for ordering operator %u",
mminfo->aggsortop);
mminfo->pathkeys = make_pathkeys_for_aggregate(root,
mminfo->target,
mminfo->aggsortop);
/*
* We can use either an ordering that gives NULLS FIRST or one that
* gives NULLS LAST; furthermore there's unlikely to be much
* performance difference between them, so it doesn't seem worth
* costing out both ways if we get a hit on the first one. NULLS
* FIRST is more likely to be available if the operator is a
* reverse-sort operator, so try that first if reverse.
*/
if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
continue;
if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
continue;
/* No indexable path for this aggregate, so fail */
return;
}
/*
* We're done until path generation is complete. Save info for later.
* (Setting root->minmax_aggs non-NIL signals we succeeded in making
* index access paths for all the aggregates.)
*/
root->minmax_aggs = aggs_list;
}
......@@ -164,11 +178,15 @@ preprocess_minmax_aggregates(PlannerInfo *root, List *tlist)
/*
* optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
*
* Check to see whether all the aggregates are in fact optimizable into
* indexscans. If so, and the result is estimated to be cheaper than the
* generic aggregate method, then generate and return a Plan that does it
* Check to see whether using the aggregate indexscans is cheaper than the
* generic aggregate method. If so, generate and return a Plan that does it
* that way. Otherwise, return NULL.
*
* Note: it seems likely that the generic method will never be cheaper
* in practice, except maybe for tiny tables where it'd hardly matter.
* Should we skip even trying to build the standard plan, if
* preprocess_minmax_aggregates succeeds?
*
* We are passed the preprocessed tlist, as well as the best path devised for
* computing the input of a standard Agg node.
*/
......@@ -176,59 +194,34 @@ Plan *
optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
{
Query *parse = root->parse;
FromExpr *jtnode;
RangeTblRef *rtr;
RelOptInfo *rel;
List *aggs_list;
ListCell *lc;
Cost total_cost;
Path agg_p;
Plan *plan;
Node *hqual;
QualCost tlist_cost;
ListCell *lc;
/* Nothing to do if preprocess_minmax_aggs rejected the query */
if (root->minmax_aggs == NIL)
return NULL;
/* Re-locate the one real table identified by preprocess_minmax_aggs */
jtnode = parse->jointree;
while (IsA(jtnode, FromExpr))
{
Assert(list_length(jtnode->fromlist) == 1);
jtnode = linitial(jtnode->fromlist);
}
Assert(IsA(jtnode, RangeTblRef));
rtr = (RangeTblRef *) jtnode;
rel = find_base_rel(root, rtr->rtindex);
/*
* Examine each agg to see if we can find a suitable ordered path for it.
* Give up if any agg isn't indexable.
* Now we have enough info to compare costs against the generic aggregate
* implementation.
*
* Note that we don't include evaluation cost of the tlist here; this is
* OK since it isn't included in best_path's cost either, and should be
* the same in either case.
*/
aggs_list = NIL;
total_cost = 0;
foreach(lc, root->minmax_aggs)
{
MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
PrivateMMAggInfo *info;
info = find_minmax_path(root, rel, mminfo);
if (!info)
return NULL;
aggs_list = lappend(aggs_list, info);
total_cost += info->pathcost;
total_cost += mminfo->pathcost;
}
/*
* Now we have enough info to compare costs against the generic aggregate
* implementation.
*
* Note that we don't include evaluation cost of the tlist here; this is
* OK since it isn't included in best_path's cost either, and should be
* the same in either case.
*/
cost_agg(&agg_p, root, AGG_PLAIN, list_length(aggs_list),
cost_agg(&agg_p, root, AGG_PLAIN, list_length(root->minmax_aggs),
0, 0,
best_path->startup_cost, best_path->total_cost,
best_path->parent->rows);
......@@ -241,18 +234,18 @@ optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
*
* First, generate a subplan and output Param node for each agg.
*/
foreach(lc, aggs_list)
foreach(lc, root->minmax_aggs)
{
make_agg_subplan(root, rel, (PrivateMMAggInfo *) lfirst(lc));
MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
make_agg_subplan(root, mminfo);
}
/*
* Modify the targetlist and HAVING qual to reference subquery outputs
*/
tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist,
&aggs_list);
hqual = replace_aggs_with_params_mutator(parse->havingQual,
&aggs_list);
tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist, root);
hqual = replace_aggs_with_params_mutator(parse->havingQual, root);
/*
* We have to replace Aggrefs with Params in equivalence classes too, else
......@@ -264,7 +257,7 @@ optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
*/
mutate_eclass_expressions(root,
replace_aggs_with_params_mutator,
&aggs_list);
(void *) root);
/*
* Generate the output plan --- basically just a Result
......@@ -340,7 +333,10 @@ find_minmax_aggs_walker(Node *node, List **context)
mminfo->aggfnoid = aggref->aggfnoid;
mminfo->aggsortop = aggsortop;
mminfo->target = curTarget->expr;
mminfo->pathkeys = NIL; /* don't compute pathkeys yet */
mminfo->subroot = NULL; /* don't compute path yet */
mminfo->path = NULL;
mminfo->pathcost = 0;
mminfo->param = NULL;
*context = lappend(*context, mminfo);
......@@ -356,198 +352,162 @@ find_minmax_aggs_walker(Node *node, List **context)
}
/*
* find_minmax_path
* Given a MIN/MAX aggregate, try to find an ordered Path it can be
* build_minmax_path
* Given a MIN/MAX aggregate, try to build an indexscan Path it can be
* optimized with.
*
* If successful, build and return a PrivateMMAggInfo struct. Otherwise,
* return NULL.
* If successful, stash the best path in *mminfo and return TRUE.
* Otherwise, return FALSE.
*/
static PrivateMMAggInfo *
find_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *mminfo)
static bool
build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
Oid eqop, Oid sortop, bool nulls_first)
{
PrivateMMAggInfo *info;
Path *best_path = NULL;
Cost best_cost = 0;
PlannerInfo *subroot;
Query *parse;
TargetEntry *tle;
NullTest *ntest;
SortGroupClause *sortcl;
Path *cheapest_path;
Path *sorted_path;
double dNumGroups;
Cost path_cost;
double path_fraction;
PathKey *mmpathkey;
ListCell *lc;
/*
* Punt if the aggregate's pathkey turned out to be redundant, ie its
* pathkeys list is now empty. This would happen with something like
* "SELECT max(x) ... WHERE x = constant". There's no need to try to
* optimize such a case, because if there is an index that would help,
* it should already have been used with the WHERE clause.
/*----------
* Generate modified query of the form
* (SELECT col FROM tab
* WHERE col IS NOT NULL AND existing-quals
* ORDER BY col ASC/DESC
* LIMIT 1)
*----------
*/
if (mminfo->pathkeys == NIL)
return NULL;
subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
memcpy(subroot, root, sizeof(PlannerInfo));
subroot->parse = parse = (Query *) copyObject(root->parse);
/* make sure subroot planning won't change root->init_plans contents */
subroot->init_plans = list_copy(root->init_plans);
/* There shouldn't be any OJ info to translate, as yet */
Assert(subroot->join_info_list == NIL);
/* and we haven't created PlaceHolderInfos, either */
Assert(subroot->placeholder_list == NIL);
/* single tlist entry that is the aggregate target */
tle = makeTargetEntry(copyObject(mminfo->target),
(AttrNumber) 1,
pstrdup("agg_target"),
false);
parse->targetList = list_make1(tle);
/* No HAVING, no DISTINCT, no aggregates anymore */
parse->havingQual = NULL;
subroot->hasHavingQual = false;
parse->distinctClause = NIL;
parse->hasDistinctOn = false;
parse->hasAggs = false;
/* Build "target IS NOT NULL" expression */
ntest = makeNode(NullTest);
ntest->nulltesttype = IS_NOT_NULL;
ntest->arg = copyObject(mminfo->target);
/* we checked it wasn't a rowtype in find_minmax_aggs_walker */
ntest->argisrow = false;
/* User might have had that in WHERE already */
if (!list_member((List *) parse->jointree->quals, ntest))
parse->jointree->quals = (Node *)
lcons(ntest, (List *) parse->jointree->quals);
/* Build suitable ORDER BY clause */
sortcl = makeNode(SortGroupClause);
sortcl->tleSortGroupRef = assignSortGroupRef(tle, parse->targetList);
sortcl->eqop = eqop;
sortcl->sortop = sortop;
sortcl->nulls_first = nulls_first;
sortcl->hashable = false; /* no need to make this accurate */
parse->sortClause = list_make1(sortcl);
/* set up expressions for LIMIT 1 */
parse->limitOffset = NULL;
parse->limitCount = (Node *) makeConst(INT8OID, -1, sizeof(int64),
Int64GetDatum(1), false,
FLOAT8PASSBYVAL);
/*
* Search the paths that were generated for the rel to see if there are
* any with the desired ordering. There could be multiple such paths,
* in which case take the cheapest (as measured according to how fast it
* will be to fetch the first row).
*
* We can't use pathkeys_contained_in() to check the ordering, because we
* would like to match pathkeys regardless of the nulls_first setting.
* However, we know that MIN/MAX aggregates will have at most one item in
* their pathkeys, so it's not too complicated to match by brute force.
*
* Note: this test ignores the possible costs associated with skipping
* NULL tuples. We assume that adding the not-null criterion to the
* indexqual doesn't really cost anything.
* Set up requested pathkeys.
*/
if (rel->rows > 1.0)
path_fraction = 1.0 / rel->rows;
else
path_fraction = 1.0;
subroot->group_pathkeys = NIL;
subroot->window_pathkeys = NIL;
subroot->distinct_pathkeys = NIL;
Assert(list_length(mminfo->pathkeys) == 1);
mmpathkey = (PathKey *) linitial(mminfo->pathkeys);
subroot->sort_pathkeys =
make_pathkeys_for_sortclauses(subroot,
parse->sortClause,
parse->targetList,
false);
foreach(lc, rel->pathlist)
{
Path *path = (Path *) lfirst(lc);
PathKey *pathkey;
Cost path_cost;
subroot->query_pathkeys = subroot->sort_pathkeys;
if (path->pathkeys == NIL)
continue; /* unordered path */
pathkey = (PathKey *) linitial(path->pathkeys);
/*
* Generate the best paths for this query, telling query_planner that
* we have LIMIT 1.
*/
query_planner(subroot, parse->targetList, 1.0, 1.0,
&cheapest_path, &sorted_path, &dNumGroups);
if (mmpathkey->pk_eclass == pathkey->pk_eclass &&
mmpathkey->pk_opfamily == pathkey->pk_opfamily &&
mmpathkey->pk_strategy == pathkey->pk_strategy)
{
/*
* OK, it has the right ordering; is it acceptable otherwise?
* (We test in this order because the pathkey check is cheap.)
*/
if (path_usable_for_agg(path))
{
/*
* It'll work; but is it the cheapest?
*
* Note: cost calculation here should match
* compare_fractional_path_costs().
*/
path_cost = path->startup_cost +
path_fraction * (path->total_cost - path->startup_cost);
if (best_path == NULL || path_cost < best_cost)
{
best_path = path;
best_cost = path_cost;
}
}
}
/*
* Fail if no presorted path. However, if query_planner determines that
* the presorted path is also the cheapest, it will set sorted_path to
* NULL ... don't be fooled. (This is kind of a pain here, but it
* simplifies life for grouping_planner, so leave it be.)
*/
if (!sorted_path)
{
if (cheapest_path &&
pathkeys_contained_in(subroot->sort_pathkeys,
cheapest_path->pathkeys))
sorted_path = cheapest_path;
else
return false;
}
/* Fail if no suitable path */
if (best_path == NULL)
return NULL;
/* Construct private state for further processing */
info = (PrivateMMAggInfo *) palloc(sizeof(PrivateMMAggInfo));
info->mminfo = mminfo;
info->path = best_path;
info->pathcost = best_cost;
info->param = NULL; /* will be set later */
return info;
}
/*
* Determine cost to get just the first row of the presorted path.
*
* Note: cost calculation here should match
* compare_fractional_path_costs().
*/
if (sorted_path->parent->rows > 1.0)
path_fraction = 1.0 / sorted_path->parent->rows;
else
path_fraction = 1.0;
/*
* To be usable, a Path needs to be an IndexPath on a btree index, or be a
* MergeAppendPath of such IndexPaths. This restriction is mainly because
* we need to be sure the index can handle an added NOT NULL constraint at
* minimal additional cost. If you wish to relax it, you'll need to improve
* add_notnull_qual() too.
*/
static bool
path_usable_for_agg(Path *path)
{
if (IsA(path, IndexPath))
{
IndexPath *ipath = (IndexPath *) path;
path_cost = sorted_path->startup_cost +
path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
/* OK if it's a btree index */
if (ipath->indexinfo->relam == BTREE_AM_OID)
return true;
}
else if (IsA(path, MergeAppendPath))
{
MergeAppendPath *mpath = (MergeAppendPath *) path;
ListCell *lc;
/* Save state for further processing */
mminfo->subroot = subroot;
mminfo->path = sorted_path;
mminfo->pathcost = path_cost;
foreach(lc, mpath->subpaths)
{
if (!path_usable_for_agg((Path *) lfirst(lc)))
return false;
}
return true;
}
return false;
return true;
}
/*
* Construct a suitable plan for a converted aggregate query
*/
static void
make_agg_subplan(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info)
make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *mminfo)
{
PlannerInfo subroot;
Query *subparse;
PlannerInfo *subroot = mminfo->subroot;
Query *subparse = subroot->parse;
Plan *plan;
TargetEntry *tle;
/*
* Generate a suitably modified query. Much of the work here is probably
* unnecessary in the normal case, but we want to make it look good if
* someone tries to EXPLAIN the result.
*/
memcpy(&subroot, root, sizeof(PlannerInfo));
subroot.parse = subparse = (Query *) copyObject(root->parse);
subparse->commandType = CMD_SELECT;
subparse->resultRelation = 0;
subparse->returningList = NIL;
subparse->utilityStmt = NULL;
subparse->intoClause = NULL;
subparse->hasAggs = false;
subparse->hasDistinctOn = false;
subparse->groupClause = NIL;
subparse->havingQual = NULL;
subparse->distinctClause = NIL;
subparse->sortClause = NIL;
subroot.hasHavingQual = false;
/* single tlist entry that is the aggregate target */
tle = makeTargetEntry(copyObject(info->mminfo->target),
1,
pstrdup("agg_target"),
false);
subparse->targetList = list_make1(tle);
/* set up expressions for LIMIT 1 */
subparse->limitOffset = NULL;
subparse->limitCount = (Node *) makeConst(INT8OID, -1, sizeof(int64),
Int64GetDatum(1), false,
FLOAT8PASSBYVAL);
/*
* Modify the ordered Path to add an indexed "target IS NOT NULL"
* condition to each scan. We need this to ensure we don't return a NULL,
* which'd be contrary to the standard behavior of MIN/MAX. We insist on
* it being indexed, else the Path might not be as cheap as we thought.
*/
add_notnull_qual(root, rel, info, info->path);
/*
* Generate the plan for the subquery. We already have a Path, but we have
* to convert it to a Plan and attach a LIMIT node above it.
*/
plan = create_plan(&subroot, info->path);
plan = create_plan(subroot, mminfo->path);
plan->targetlist = subparse->targetList;
......@@ -559,137 +519,28 @@ make_agg_subplan(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info)
/*
* Convert the plan into an InitPlan, and make a Param for its result.
*/
info->param = SS_make_initplan_from_plan(&subroot, plan,
exprType((Node *) tle->expr),
-1,
exprCollation((Node *) tle->expr));
mminfo->param =
SS_make_initplan_from_plan(subroot, plan,
exprType((Node *) mminfo->target),
-1,
exprCollation((Node *) mminfo->target));
/*
* Put the updated list of InitPlans back into the outer PlannerInfo.
* Make sure the initplan gets into the outer PlannerInfo, along with
* any other initplans generated by the sub-planning run. We had to
* include the outer PlannerInfo's pre-existing initplans into the
* inner one's init_plans list earlier, so make sure we don't put back
* any duplicate entries.
*/
root->init_plans = subroot.init_plans;
}
/*
* Attach a suitable NOT NULL qual to the IndexPath, or each of the member
* IndexPaths. Note we assume we can modify the paths in-place.
*/
static void
add_notnull_qual(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info,
Path *path)
{
if (IsA(path, IndexPath))
{
IndexPath *ipath = (IndexPath *) path;
Expr *target;
NullTest *ntest;
RestrictInfo *rinfo;
List *newquals;
bool found_clause;
/*
* If we are looking at a child of the original rel, we have to adjust
* the agg target expression to match the child.
*/
if (ipath->path.parent != rel)
{
AppendRelInfo *appinfo = NULL;
ListCell *lc;
/* Search for the appropriate AppendRelInfo */
foreach(lc, root->append_rel_list)
{
appinfo = (AppendRelInfo *) lfirst(lc);
if (appinfo->parent_relid == rel->relid &&
appinfo->child_relid == ipath->path.parent->relid)
break;
appinfo = NULL;
}
if (!appinfo)
elog(ERROR, "failed to find AppendRelInfo for child rel");
target = (Expr *)
adjust_appendrel_attrs((Node *) info->mminfo->target,
appinfo);
}
else
{
/* Otherwise, just make a copy (may not be necessary) */
target = copyObject(info->mminfo->target);
}
/* Build "target IS NOT NULL" expression */
ntest = makeNode(NullTest);
ntest->nulltesttype = IS_NOT_NULL;
ntest->arg = target;
/* we checked it wasn't a rowtype in find_minmax_aggs_walker */
ntest->argisrow = false;
/*
* We can skip adding the NOT NULL qual if it duplicates either an
* already-given index condition, or a clause of the index predicate.
*/
if (list_member(get_actual_clauses(ipath->indexquals), ntest) ||
list_member(ipath->indexinfo->indpred, ntest))
return;
/* Wrap it in a RestrictInfo and prepend to existing indexquals */
rinfo = make_restrictinfo((Expr *) ntest,
true,
false,
false,
NULL,
NULL);
newquals = list_concat(list_make1(rinfo), ipath->indexquals);
/*
* We can't just stick the IS NOT NULL at the front of the list,
* though. It has to go in the right position corresponding to its
* index column, which might not be the first one. Easiest way to fix
* this is to run the quals through group_clauses_by_indexkey again.
*/
newquals = group_clauses_by_indexkey(ipath->indexinfo,
newquals,
NIL,
NULL,
SAOP_FORBID,
&found_clause);
newquals = flatten_clausegroups_list(newquals);
/* Trouble if we lost any quals */
if (list_length(newquals) != list_length(ipath->indexquals) + 1)
elog(ERROR, "add_notnull_qual failed to add NOT NULL qual");
/*
* And update the path's indexquals. Note we don't bother adding
* to indexclauses, which is OK since this is like a generated
* index qual.
*/
ipath->indexquals = newquals;
}
else if (IsA(path, MergeAppendPath))
{
MergeAppendPath *mpath = (MergeAppendPath *) path;
ListCell *lc;
foreach(lc, mpath->subpaths)
{
add_notnull_qual(root, rel, info, (Path *) lfirst(lc));
}
}
else
{
/* shouldn't get here, because of path_usable_for_agg checks */
elog(ERROR, "add_notnull_qual failed");
}
root->init_plans = list_concat_unique_ptr(root->init_plans,
subroot->init_plans);
}
/*
* Replace original aggregate calls with subplan output Params
*/
static Node *
replace_aggs_with_params_mutator(Node *node, List **context)
replace_aggs_with_params_mutator(Node *node, PlannerInfo *root)
{
if (node == NULL)
return NULL;
......@@ -697,21 +548,21 @@ replace_aggs_with_params_mutator(Node *node, List **context)
{
Aggref *aggref = (Aggref *) node;
TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args);
ListCell *l;
ListCell *lc;
foreach(l, *context)
foreach(lc, root->minmax_aggs)
{
PrivateMMAggInfo *info = (PrivateMMAggInfo *) lfirst(l);
MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
if (info->mminfo->aggfnoid == aggref->aggfnoid &&
equal(info->mminfo->target, curTarget->expr))
return (Node *) info->param;
if (mminfo->aggfnoid == aggref->aggfnoid &&
equal(mminfo->target, curTarget->expr))
return (Node *) mminfo->param;
}
elog(ERROR, "failed to re-find PrivateMMAggInfo record");
elog(ERROR, "failed to re-find MinMaxAggInfo record");
}
Assert(!IsA(node, SubLink));
return expression_tree_mutator(node, replace_aggs_with_params_mutator,
(void *) context);
(void *) root);
}
/*
......
......@@ -440,18 +440,9 @@ query_planner(PlannerInfo *root, List *tlist,
static void
canonicalize_all_pathkeys(PlannerInfo *root)
{
ListCell *lc;
root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys);
root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys);
root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
foreach(lc, root->minmax_aggs)
{
MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
mminfo->pathkeys = canonicalize_pathkeys(root, mminfo->pathkeys);
}
}
......@@ -1042,7 +1042,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
count_agg_clauses(parse->havingQual, &agg_counts);
/*
* Preprocess MIN/MAX aggregates, if any.
* Preprocess MIN/MAX aggregates, if any. Note: be careful about
* adding logic between here and the optimize_minmax_aggregates
* call. Anything that is needed in MIN/MAX-optimizable cases
* will have to be duplicated in planagg.c.
*/
preprocess_minmax_aggregates(root, tlist);
}
......
......@@ -1387,9 +1387,6 @@ typedef struct PlaceHolderInfo
/*
* For each potentially index-optimizable MIN/MAX aggregate function,
* root->minmax_aggs stores a MinMaxAggInfo describing it.
*
* Note: a MIN/MAX agg doesn't really care about the nulls_first property,
* so the pathkey's nulls_first flag should be ignored.
*/
typedef struct MinMaxAggInfo
{
......@@ -1398,7 +1395,10 @@ typedef struct MinMaxAggInfo
Oid aggfnoid; /* pg_proc Oid of the aggregate */
Oid aggsortop; /* Oid of its sort operator */
Expr *target; /* expression we are aggregating on */
List *pathkeys; /* pathkeys representing needed sort order */
PlannerInfo *subroot; /* modified "root" for planning the subquery */
Path *path; /* access path for subquery */
Cost pathcost; /* estimated cost to fetch first row */
Param *param; /* param for subplan's output */
} MinMaxAggInfo;
/*
......
......@@ -42,14 +42,6 @@ extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
* indxpath.c
* routines to generate index paths
*/
typedef enum
{
/* Whether to use ScalarArrayOpExpr to build index qualifications */
SAOP_FORBID, /* Do not use ScalarArrayOpExpr */
SAOP_ALLOW, /* OK to use ScalarArrayOpExpr */
SAOP_REQUIRE /* Require ScalarArrayOpExpr */
} SaOpControl;
extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *outer_clauses,
......@@ -59,11 +51,6 @@ extern void best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
Path **cheapest_startup, Path **cheapest_total);
extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
List *restrictlist);
extern List *group_clauses_by_indexkey(IndexOptInfo *index,
List *clauses, List *outer_clauses,
Relids outer_relids,
SaOpControl saop_control,
bool *found_clause);
extern bool eclass_matches_any_index(EquivalenceClass *ec,
EquivalenceMember *em,
RelOptInfo *rel);
......@@ -176,9 +163,6 @@ extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
List *sortclauses,
List *tlist,
bool canonicalize);
extern List *make_pathkeys_for_aggregate(PlannerInfo *root,
Expr *aggtarget,
Oid aggsortop);
extern void initialize_mergeclause_eclasses(PlannerInfo *root,
RestrictInfo *restrictinfo);
extern void update_mergeclause_eclasses(PlannerInfo *root,
......
......@@ -690,32 +690,19 @@ select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
9999 | 1
(3 rows)
-- this is an interesting special case as of 9.1
explain (costs off)
select min(unique2) from tenk1 where unique2 = 42;
QUERY PLAN
-----------------------------------------------
Aggregate
-> Index Scan using tenk1_unique2 on tenk1
Index Cond: (unique2 = 42)
(3 rows)
select min(unique2) from tenk1 where unique2 = 42;
min
-----
42
(1 row)
-- try it on an inheritance tree
create table minmaxtest(f1 int);
create table minmaxtest1() inherits (minmaxtest);
create table minmaxtest2() inherits (minmaxtest);
create table minmaxtest3() inherits (minmaxtest);
create index minmaxtesti on minmaxtest(f1);
create index minmaxtest1i on minmaxtest1(f1);
create index minmaxtest2i on minmaxtest2(f1 desc);
create index minmaxtest3i on minmaxtest3(f1) where f1 is not null;
insert into minmaxtest values(11), (12);
insert into minmaxtest1 values(13), (14);
insert into minmaxtest2 values(15), (16);
insert into minmaxtest3 values(17), (18);
explain (costs off)
select min(f1), max(f1) from minmaxtest;
QUERY PLAN
......@@ -731,6 +718,8 @@ explain (costs off)
Index Cond: (f1 IS NOT NULL)
-> Index Scan Backward using minmaxtest2i on minmaxtest2 minmaxtest
Index Cond: (f1 IS NOT NULL)
-> Index Scan using minmaxtest3i on minmaxtest3 minmaxtest
Index Cond: (f1 IS NOT NULL)
InitPlan 2 (returns $1)
-> Limit
-> Merge Append
......@@ -741,18 +730,21 @@ explain (costs off)
Index Cond: (f1 IS NOT NULL)
-> Index Scan using minmaxtest2i on minmaxtest2 minmaxtest
Index Cond: (f1 IS NOT NULL)
(21 rows)
-> Index Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest
Index Cond: (f1 IS NOT NULL)
(25 rows)
select min(f1), max(f1) from minmaxtest;
min | max
-----+-----
11 | 16
11 | 18
(1 row)
drop table minmaxtest cascade;
NOTICE: drop cascades to 2 other objects
NOTICE: drop cascades to 3 other objects
DETAIL: drop cascades to table minmaxtest1
drop cascades to table minmaxtest2
drop cascades to table minmaxtest3
--
-- Test combinations of DISTINCT and/or ORDER BY
--
......
......@@ -258,22 +258,21 @@ select max(unique2) from tenk1 order by max(unique2)+1;
explain (costs off)
select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
-- this is an interesting special case as of 9.1
explain (costs off)
select min(unique2) from tenk1 where unique2 = 42;
select min(unique2) from tenk1 where unique2 = 42;
-- try it on an inheritance tree
create table minmaxtest(f1 int);
create table minmaxtest1() inherits (minmaxtest);
create table minmaxtest2() inherits (minmaxtest);
create table minmaxtest3() inherits (minmaxtest);
create index minmaxtesti on minmaxtest(f1);
create index minmaxtest1i on minmaxtest1(f1);
create index minmaxtest2i on minmaxtest2(f1 desc);
create index minmaxtest3i on minmaxtest3(f1) where f1 is not null;
insert into minmaxtest values(11), (12);
insert into minmaxtest1 values(13), (14);
insert into minmaxtest2 values(15), (16);
insert into minmaxtest3 values(17), (18);
explain (costs off)
select min(f1), max(f1) from minmaxtest;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment