Commit addc42c3 authored by Tom Lane's avatar Tom Lane

Create the planner mechanism for optimizing simple MIN and MAX queries

into indexscans on matching indexes.  For the moment, it only handles
int4 and text datatypes; next step is to add a column to pg_aggregate
so that all MIN/MAX aggregates can be handled.  Per my recent proposal.
parent c3294f1c
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.172 2005/03/28 00:58:22 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.173 2005/04/11 23:06:55 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -54,7 +54,6 @@ ...@@ -54,7 +54,6 @@
((opclass) == BOOL_BTREE_OPS_OID || (opclass) == BOOL_HASH_OPS_OID) ((opclass) == BOOL_BTREE_OPS_OID || (opclass) == BOOL_HASH_OPS_OID)
static List *group_clauses_by_indexkey(IndexOptInfo *index);
static List *group_clauses_by_indexkey_for_join(Query *root, static List *group_clauses_by_indexkey_for_join(Query *root,
IndexOptInfo *index, IndexOptInfo *index,
Relids outer_relids, Relids outer_relids,
...@@ -72,8 +71,6 @@ static bool pred_test_simple_clause(Expr *predicate, Node *clause); ...@@ -72,8 +71,6 @@ static bool pred_test_simple_clause(Expr *predicate, Node *clause);
static Relids indexable_outerrelids(IndexOptInfo *index); static Relids indexable_outerrelids(IndexOptInfo *index);
static Path *make_innerjoin_index_path(Query *root, IndexOptInfo *index, static Path *make_innerjoin_index_path(Query *root, IndexOptInfo *index,
List *clausegroups); List *clausegroups);
static bool match_index_to_operand(Node *operand, int indexcol,
IndexOptInfo *index);
static bool match_boolean_index_clause(Node *clause, int indexcol, static bool match_boolean_index_clause(Node *clause, int indexcol,
IndexOptInfo *index); IndexOptInfo *index);
static bool match_special_index_operator(Expr *clause, Oid opclass, static bool match_special_index_operator(Expr *clause, Oid opclass,
...@@ -234,7 +231,7 @@ create_index_paths(Query *root, RelOptInfo *rel) ...@@ -234,7 +231,7 @@ create_index_paths(Query *root, RelOptInfo *rel)
* clauses matching column C, because the executor couldn't use them anyway. * clauses matching column C, because the executor couldn't use them anyway.
* Therefore, there are no empty sublists in the result. * Therefore, there are no empty sublists in the result.
*/ */
static List * List *
group_clauses_by_indexkey(IndexOptInfo *index) group_clauses_by_indexkey(IndexOptInfo *index)
{ {
List *clausegroup_list = NIL; List *clausegroup_list = NIL;
...@@ -1774,7 +1771,7 @@ make_expr_from_indexclauses(List *indexclauses) ...@@ -1774,7 +1771,7 @@ make_expr_from_indexclauses(List *indexclauses)
* indexcol: the column number of the index (counting from 0) * indexcol: the column number of the index (counting from 0)
* index: the index of interest * index: the index of interest
*/ */
static bool bool
match_index_to_operand(Node *operand, match_index_to_operand(Node *operand,
int indexcol, int indexcol,
IndexOptInfo *index) IndexOptInfo *index)
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
# Makefile for optimizer/plan # Makefile for optimizer/plan
# #
# IDENTIFICATION # IDENTIFICATION
# $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.12 2003/11/29 19:51:50 pgsql Exp $ # $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.13 2005/04/11 23:06:55 tgl Exp $
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
...@@ -12,7 +12,8 @@ subdir = src/backend/optimizer/plan ...@@ -12,7 +12,8 @@ subdir = src/backend/optimizer/plan
top_builddir = ../../../.. top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global include $(top_builddir)/src/Makefile.global
OBJS = createplan.o initsplan.o planmain.o planner.o setrefs.o subselect.o OBJS = createplan.o initsplan.o planagg.o planmain.o planner.o \
setrefs.o subselect.o
all: SUBSYS.o all: SUBSYS.o
......
This diff is collapsed.
This diff is collapsed.
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.95 2005/04/06 16:34:05 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.96 2005/04/11 23:06:55 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -915,14 +915,16 @@ process_sublinks_mutator(Node *node, bool *isTopQual) ...@@ -915,14 +915,16 @@ process_sublinks_mutator(Node *node, bool *isTopQual)
/* /*
* SS_finalize_plan - do final sublink processing for a completed Plan. * SS_finalize_plan - do final sublink processing for a completed Plan.
* *
* This recursively computes the extParam and allParam sets * This recursively computes the extParam and allParam sets for every Plan
* for every Plan node in the given plan tree. * node in the given plan tree. It also attaches any generated InitPlans
* to the top plan node.
*/ */
void void
SS_finalize_plan(Plan *plan, List *rtable) SS_finalize_plan(Plan *plan, List *rtable)
{ {
Bitmapset *outer_params = NULL; Bitmapset *outer_params = NULL;
Bitmapset *valid_params = NULL; Bitmapset *valid_params = NULL;
Cost initplan_cost = 0;
int paramid; int paramid;
ListCell *l; ListCell *l;
...@@ -959,6 +961,33 @@ SS_finalize_plan(Plan *plan, List *rtable) ...@@ -959,6 +961,33 @@ SS_finalize_plan(Plan *plan, List *rtable)
bms_free(outer_params); bms_free(outer_params);
bms_free(valid_params); bms_free(valid_params);
/*
* Finally, attach any initPlans to the topmost plan node,
* and add their extParams to the topmost node's, too.
*
* We also add the total_cost of each initPlan to the startup cost of
* the top node. This is a conservative overestimate, since in
* fact each initPlan might be executed later than plan startup,
* or even not at all.
*/
plan->initPlan = PlannerInitPlan;
PlannerInitPlan = NIL; /* make sure they're not attached twice */
foreach(l, plan->initPlan)
{
SubPlan *initplan = (SubPlan *) lfirst(l);
plan->extParam = bms_add_members(plan->extParam,
initplan->plan->extParam);
/* allParam must include all members of extParam */
plan->allParam = bms_add_members(plan->allParam,
plan->extParam);
initplan_cost += initplan->plan->total_cost;
}
plan->startup_cost += initplan_cost;
plan->total_cost += initplan_cost;
} }
/* /*
...@@ -1165,3 +1194,75 @@ finalize_primnode(Node *node, finalize_primnode_context *context) ...@@ -1165,3 +1194,75 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
return expression_tree_walker(node, finalize_primnode, return expression_tree_walker(node, finalize_primnode,
(void *) context); (void *) context);
} }
/*
* SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
*
* The plan is expected to return a scalar value of the indicated type.
* We build an EXPR_SUBLINK SubPlan node and put it into the initplan
* list for the current query level. A Param that represents the initplan's
* output is returned.
*
* We assume the plan hasn't been put through SS_finalize_plan.
*/
Param *
SS_make_initplan_from_plan(Query *root, Plan *plan,
Oid resulttype, int32 resulttypmod)
{
List *saved_initplan = PlannerInitPlan;
SubPlan *node;
Param *prm;
Bitmapset *tmpset;
int paramid;
/*
* Set up for a new level of subquery. This is just to keep
* SS_finalize_plan from becoming confused.
*/
PlannerQueryLevel++;
PlannerInitPlan = NIL;
/*
* Build extParam/allParam sets for plan nodes.
*/
SS_finalize_plan(plan, root->rtable);
/* Return to outer subquery context */
PlannerQueryLevel--;
PlannerInitPlan = saved_initplan;
/*
* Create a SubPlan node and add it to the outer list of InitPlans.
*/
node = makeNode(SubPlan);
node->subLinkType = EXPR_SUBLINK;
node->plan = plan;
node->plan_id = PlannerPlanId++; /* Assign unique ID to this
* SubPlan */
node->rtable = root->rtable;
PlannerInitPlan = lappend(PlannerInitPlan, node);
/*
* Make parParam list of params that current query level will pass to
* this child plan. (In current usage there probably aren't any.)
*/
tmpset = bms_copy(plan->extParam);
while ((paramid = bms_first_member(tmpset)) >= 0)
{
PlannerParamItem *pitem = list_nth(PlannerParamList, paramid);
if (pitem->abslevel == PlannerQueryLevel)
node->parParam = lappend_int(node->parParam, paramid);
}
bms_free(tmpset);
/*
* Make a Param that will be the subplan's output.
*/
prm = generate_new_param(resulttype, resulttypmod);
node->setParam = list_make1_int(prm->paramid);
return prm;
}
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.122 2005/03/31 22:46:14 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.123 2005/04/11 23:06:56 tgl Exp $
* *
* NOTES * NOTES
* Eventually, the index information should go through here, too. * Eventually, the index information should go through here, too.
...@@ -53,6 +53,31 @@ op_in_opclass(Oid opno, Oid opclass) ...@@ -53,6 +53,31 @@ op_in_opclass(Oid opno, Oid opclass)
0, 0); 0, 0);
} }
/*
* get_op_opclass_strategy
*
* Get the operator's strategy number within the specified opclass,
* or 0 if it's not a member of the opclass.
*/
int
get_op_opclass_strategy(Oid opno, Oid opclass)
{
HeapTuple tp;
Form_pg_amop amop_tup;
int result;
tp = SearchSysCache(AMOPOPID,
ObjectIdGetDatum(opno),
ObjectIdGetDatum(opclass),
0, 0);
if (!HeapTupleIsValid(tp))
return 0;
amop_tup = (Form_pg_amop) GETSTRUCT(tp);
result = amop_tup->amopstrategy;
ReleaseSysCache(tp);
return result;
}
/* /*
* get_op_opclass_properties * get_op_opclass_properties
* *
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.80 2005/03/27 06:29:49 tgl Exp $ * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.81 2005/04/11 23:06:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -38,8 +38,11 @@ extern void debug_print_rel(Query *root, RelOptInfo *rel); ...@@ -38,8 +38,11 @@ extern void debug_print_rel(Query *root, RelOptInfo *rel);
extern void create_index_paths(Query *root, RelOptInfo *rel); extern void create_index_paths(Query *root, RelOptInfo *rel);
extern Path *best_inner_indexscan(Query *root, RelOptInfo *rel, extern Path *best_inner_indexscan(Query *root, RelOptInfo *rel,
Relids outer_relids, JoinType jointype); Relids outer_relids, JoinType jointype);
extern List *group_clauses_by_indexkey(IndexOptInfo *index);
extern List *group_clauses_by_indexkey_for_or(IndexOptInfo *index, extern List *group_clauses_by_indexkey_for_or(IndexOptInfo *index,
Expr *orsubclause); Expr *orsubclause);
extern bool match_index_to_operand(Node *operand, int indexcol,
IndexOptInfo *index);
extern List *expand_indexqual_conditions(IndexOptInfo *index, extern List *expand_indexqual_conditions(IndexOptInfo *index,
List *clausegroups); List *clausegroups);
extern void check_partial_indexes(Query *root, RelOptInfo *rel); extern void check_partial_indexes(Query *root, RelOptInfo *rel);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.80 2005/03/10 23:21:25 tgl Exp $ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.81 2005/04/11 23:06:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -23,6 +23,12 @@ ...@@ -23,6 +23,12 @@
extern void query_planner(Query *root, List *tlist, double tuple_fraction, extern void query_planner(Query *root, List *tlist, double tuple_fraction,
Path **cheapest_path, Path **sorted_path); Path **cheapest_path, Path **sorted_path);
/*
* prototypes for plan/planagg.c
*/
extern Plan *optimize_minmax_aggregates(Query *root, List *tlist,
Path *best_path);
/* /*
* prototypes for plan/createplan.c * prototypes for plan/createplan.c
*/ */
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.23 2004/12/31 22:03:36 pgsql Exp $ * $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.24 2005/04/11 23:06:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -24,5 +24,7 @@ extern Node *convert_IN_to_join(Query *parse, SubLink *sublink); ...@@ -24,5 +24,7 @@ extern Node *convert_IN_to_join(Query *parse, SubLink *sublink);
extern Node *SS_replace_correlation_vars(Node *expr); extern Node *SS_replace_correlation_vars(Node *expr);
extern Node *SS_process_sublinks(Node *expr, bool isQual); extern Node *SS_process_sublinks(Node *expr, bool isQual);
extern void SS_finalize_plan(Plan *plan, List *rtable); extern void SS_finalize_plan(Plan *plan, List *rtable);
extern Param *SS_make_initplan_from_plan(Query *root, Plan *plan,
Oid resulttype, int32 resulttypmod);
#endif /* SUBSELECT_H */ #endif /* SUBSELECT_H */
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.96 2005/03/31 22:46:27 tgl Exp $ * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.97 2005/04/11 23:06:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -25,6 +25,7 @@ typedef enum IOFuncSelector ...@@ -25,6 +25,7 @@ typedef enum IOFuncSelector
} IOFuncSelector; } IOFuncSelector;
extern bool op_in_opclass(Oid opno, Oid opclass); extern bool op_in_opclass(Oid opno, Oid opclass);
extern int get_op_opclass_strategy(Oid opno, Oid opclass);
extern void get_op_opclass_properties(Oid opno, Oid opclass, extern void get_op_opclass_properties(Oid opno, Oid opclass,
int *strategy, Oid *subtype, int *strategy, Oid *subtype,
bool *recheck); bool *recheck);
......
...@@ -293,3 +293,58 @@ FROM bool_test; ...@@ -293,3 +293,58 @@ FROM bool_test;
t | t | f | | f | t t | t | f | | f | t
(1 row) (1 row)
--
-- Test several cases that should be optimized into indexscans instead of
-- the generic aggregate implementation. We can't actually verify that they
-- are done as indexscans, but we can check that the results are correct.
--
-- Basic cases
select max(unique1) from tenk1;
max
------
9999
(1 row)
select max(unique1) from tenk1 where unique1 < 42;
max
-----
41
(1 row)
select max(unique1) from tenk1 where unique1 > 42;
max
------
9999
(1 row)
select max(unique1) from tenk1 where unique1 > 42000;
max
-----
(1 row)
-- multi-column index (uses tenk1_thous_tenthous)
select max(tenthous) from tenk1 where thousand = 33;
max
------
9033
(1 row)
select min(tenthous) from tenk1 where thousand = 33;
min
-----
33
(1 row)
-- check parameter propagation into an indexscan subquery
select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
from int4_tbl;
f1 | gt
-------------+----
0 | 1
123456 |
-123456 | 0
2147483647 |
-2147483647 | 0
(5 rows)
...@@ -12,6 +12,7 @@ CREATE INDEX onek_stringu1 ON onek USING btree(stringu1 name_ops); ...@@ -12,6 +12,7 @@ CREATE INDEX onek_stringu1 ON onek USING btree(stringu1 name_ops);
CREATE INDEX tenk1_unique1 ON tenk1 USING btree(unique1 int4_ops); CREATE INDEX tenk1_unique1 ON tenk1 USING btree(unique1 int4_ops);
CREATE INDEX tenk1_unique2 ON tenk1 USING btree(unique2 int4_ops); CREATE INDEX tenk1_unique2 ON tenk1 USING btree(unique2 int4_ops);
CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops); CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops);
CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous);
CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops); CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops);
CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops); CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops);
CREATE INDEX tenk2_hundred ON tenk2 USING btree(hundred int4_ops); CREATE INDEX tenk2_hundred ON tenk2 USING btree(hundred int4_ops);
......
...@@ -180,3 +180,23 @@ SELECT ...@@ -180,3 +180,23 @@ SELECT
BOOL_OR(NOT b2) AS "f", BOOL_OR(NOT b2) AS "f",
BOOL_OR(NOT b3) AS "t" BOOL_OR(NOT b3) AS "t"
FROM bool_test; FROM bool_test;
--
-- Test several cases that should be optimized into indexscans instead of
-- the generic aggregate implementation. We can't actually verify that they
-- are done as indexscans, but we can check that the results are correct.
--
-- Basic cases
select max(unique1) from tenk1;
select max(unique1) from tenk1 where unique1 < 42;
select max(unique1) from tenk1 where unique1 > 42;
select max(unique1) from tenk1 where unique1 > 42000;
-- multi-column index (uses tenk1_thous_tenthous)
select max(tenthous) from tenk1 where thousand = 33;
select min(tenthous) from tenk1 where thousand = 33;
-- check parameter propagation into an indexscan subquery
select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
from int4_tbl;
...@@ -20,6 +20,8 @@ CREATE INDEX tenk1_unique2 ON tenk1 USING btree(unique2 int4_ops); ...@@ -20,6 +20,8 @@ CREATE INDEX tenk1_unique2 ON tenk1 USING btree(unique2 int4_ops);
CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops); CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops);
CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous);
CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops); CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops);
CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops); CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment