Create the planner mechanism for optimizing simple MIN and MAX queries

into indexscans on matching indexes. For the moment, it only handles int4 and text datatypes; next step is to add a column to pg_aggregate so that all MIN/MAX aggregates can be handled. Per my recent proposal.

Create the planner mechanism for optimizing simple MIN and MAX queries
into indexscans on matching indexes. For the moment, it only handles int4 and text datatypes; next step is to add a column to pg_aggregate so that all MIN/MAX aggregates can be handled. Per my recent proposal.
addc42c3 · Tom Lane · c3294f1c · addc42c3 · addc42c3 · addc42c3
Commit addc42c3 authored Apr 11, 2005 by Tom Lane
14 changed files
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.172 2005/03/28 00:58:22 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.173 2005/04/11 23:06:55 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -54,7 +54,6 @@
 	((opclass) == BOOL_BTREE_OPS_OID || (opclass) == BOOL_HASH_OPS_OID)


-static List *group_clauses_by_indexkey(IndexOptInfo *index);
 static List *group_clauses_by_indexkey_for_join(Query *root,
 								   IndexOptInfo *index,
 								   Relids outer_relids,
@@ -72,8 +71,6 @@ static bool pred_test_simple_clause(Expr *predicate, Node *clause);
 static Relids indexable_outerrelids(IndexOptInfo *index);
 static Path *make_innerjoin_index_path(Query *root, IndexOptInfo *index,
 						  List *clausegroups);
-static bool match_index_to_operand(Node *operand, int indexcol,
-					   IndexOptInfo *index);
 static bool match_boolean_index_clause(Node *clause, int indexcol,
 									   IndexOptInfo *index);
 static bool match_special_index_operator(Expr *clause, Oid opclass,
@@ -234,7 +231,7 @@ create_index_paths(Query *root, RelOptInfo *rel)
 * clauses matching column C, because the executor couldn't use them anyway.
 * Therefore, there are no empty sublists in the result.
 */
-static List *
+List *
 group_clauses_by_indexkey(IndexOptInfo *index)
 {
 	List	   *clausegroup_list = NIL;
@@ -1774,7 +1771,7 @@ make_expr_from_indexclauses(List *indexclauses)
 * indexcol: the column number of the index (counting from 0)
 * index: the index of interest
 */
-static bool
+bool
 match_index_to_operand(Node *operand,
 					   int indexcol,
 					   IndexOptInfo *index)

--- a/src/backend/optimizer/plan/Makefile
+++ b/src/backend/optimizer/plan/Makefile
@@ -4,7 +4,7 @@
 #    Makefile for optimizer/plan
 #
 # IDENTIFICATION
-#    $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.12 2003/11/29 19:51:50 pgsql Exp $
+#    $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.13 2005/04/11 23:06:55 tgl Exp $
 #
 #-------------------------------------------------------------------------

@@ -12,7 +12,8 @@ subdir = src/backend/optimizer/plan
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global

-OBJS = createplan.o initsplan.o planmain.o planner.o setrefs.o subselect.o
+OBJS = createplan.o initsplan.o planagg.o planmain.o planner.o \
+	setrefs.o subselect.o

 all: SUBSYS.o


--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.95 2005/04/06 16:34:05 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.96 2005/04/11 23:06:55 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -915,14 +915,16 @@ process_sublinks_mutator(Node *node, bool *isTopQual)
 /*
 * SS_finalize_plan - do final sublink processing for a completed Plan.
 *
- * This recursively computes the extParam and allParam sets
- * for every Plan node in the given plan tree.
+ * This recursively computes the extParam and allParam sets for every Plan
+ * node in the given plan tree.  It also attaches any generated InitPlans
+ * to the top plan node.
 */
 void
 SS_finalize_plan(Plan *plan, List *rtable)
 {
 	Bitmapset  *outer_params = NULL;
 	Bitmapset  *valid_params = NULL;
+	Cost		initplan_cost = 0;
 	int			paramid;
 	ListCell   *l;

@@ -959,6 +961,33 @@ SS_finalize_plan(Plan *plan, List *rtable)

 	bms_free(outer_params);
 	bms_free(valid_params);
+
+	/*
+	 * Finally, attach any initPlans to the topmost plan node,
+	 * and add their extParams to the topmost node's, too.
+	 *
+	 * We also add the total_cost of each initPlan to the startup cost of
+	 * the top node.  This is a conservative overestimate, since in
+	 * fact each initPlan might be executed later than plan startup,
+	 * or even not at all.
+	 */
+	plan->initPlan = PlannerInitPlan;
+	PlannerInitPlan = NIL;		/* make sure they're not attached twice */
+
+	foreach(l, plan->initPlan)
+	{
+		SubPlan    *initplan = (SubPlan *) lfirst(l);
+
+		plan->extParam = bms_add_members(plan->extParam,
+										 initplan->plan->extParam);
+		/* allParam must include all members of extParam */
+		plan->allParam = bms_add_members(plan->allParam,
+										 plan->extParam);
+		initplan_cost += initplan->plan->total_cost;
+	}
+
+	plan->startup_cost += initplan_cost;
+	plan->total_cost += initplan_cost;
 }

 /*
@@ -1165,3 +1194,75 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
 	return expression_tree_walker(node, finalize_primnode,
 								  (void *) context);
 }
+
+/*
+ * SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
+ *
+ * The plan is expected to return a scalar value of the indicated type.
+ * We build an EXPR_SUBLINK SubPlan node and put it into the initplan
+ * list for the current query level.  A Param that represents the initplan's
+ * output is returned.
+ *
+ * We assume the plan hasn't been put through SS_finalize_plan.
+ */
+Param *
+SS_make_initplan_from_plan(Query *root, Plan *plan,
+						   Oid resulttype, int32 resulttypmod)
+{
+	List	   *saved_initplan = PlannerInitPlan;
+	SubPlan    *node;
+	Param	   *prm;
+	Bitmapset  *tmpset;
+	int			paramid;
+
+	/*
+	 * Set up for a new level of subquery.  This is just to keep
+	 * SS_finalize_plan from becoming confused.
+	 */
+	PlannerQueryLevel++;
+	PlannerInitPlan = NIL;
+
+	/*
+	 * Build extParam/allParam sets for plan nodes.
+	 */
+	SS_finalize_plan(plan, root->rtable);
+
+	/* Return to outer subquery context */
+	PlannerQueryLevel--;
+	PlannerInitPlan = saved_initplan;
+
+	/*
+	 * Create a SubPlan node and add it to the outer list of InitPlans.
+	 */
+	node = makeNode(SubPlan);
+	node->subLinkType = EXPR_SUBLINK;
+	node->plan = plan;
+	node->plan_id = PlannerPlanId++;	/* Assign unique ID to this
+										 * SubPlan */
+
+	node->rtable = root->rtable;
+
+	PlannerInitPlan = lappend(PlannerInitPlan, node);
+
+	/*
+	 * Make parParam list of params that current query level will pass to
+	 * this child plan.  (In current usage there probably aren't any.)
+	 */
+	tmpset = bms_copy(plan->extParam);
+	while ((paramid = bms_first_member(tmpset)) >= 0)
+	{
+		PlannerParamItem *pitem = list_nth(PlannerParamList, paramid);
+
+		if (pitem->abslevel == PlannerQueryLevel)
+			node->parParam = lappend_int(node->parParam, paramid);
+	}
+	bms_free(tmpset);
+
+	/*
+	 * Make a Param that will be the subplan's output.
+	 */
+	prm = generate_new_param(resulttype, resulttypmod);
+	node->setParam = list_make1_int(prm->paramid);
+
+	return prm;
+}
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.122 2005/03/31 22:46:14 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.123 2005/04/11 23:06:56 tgl Exp $
 *
 * NOTES
 *	  Eventually, the index information should go through here, too.
@@ -53,6 +53,31 @@ op_in_opclass(Oid opno, Oid opclass)
 								0, 0);
 }

+/*
+ * get_op_opclass_strategy
+ *
+ *		Get the operator's strategy number within the specified opclass,
+ *		or 0 if it's not a member of the opclass.
+ */
+int
+get_op_opclass_strategy(Oid opno, Oid opclass)
+{
+	HeapTuple	tp;
+	Form_pg_amop amop_tup;
+	int			result;
+
+	tp = SearchSysCache(AMOPOPID,
+						ObjectIdGetDatum(opno),
+						ObjectIdGetDatum(opclass),
+						0, 0);
+	if (!HeapTupleIsValid(tp))
+		return 0;
+	amop_tup = (Form_pg_amop) GETSTRUCT(tp);
+	result = amop_tup->amopstrategy;
+	ReleaseSysCache(tp);
+	return result;
+}
+
 /*
 * get_op_opclass_properties
 *

--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -8,7 +8,7 @@
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.80 2005/03/27 06:29:49 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.81 2005/04/11 23:06:56 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -38,8 +38,11 @@ extern void debug_print_rel(Query *root, RelOptInfo *rel);
 extern void create_index_paths(Query *root, RelOptInfo *rel);
 extern Path *best_inner_indexscan(Query *root, RelOptInfo *rel,
 					 Relids outer_relids, JoinType jointype);
+extern List *group_clauses_by_indexkey(IndexOptInfo *index);
 extern List *group_clauses_by_indexkey_for_or(IndexOptInfo *index,
 								 Expr *orsubclause);
+extern bool match_index_to_operand(Node *operand, int indexcol,
+					   IndexOptInfo *index);
 extern List *expand_indexqual_conditions(IndexOptInfo *index,
 										 List *clausegroups);
 extern void check_partial_indexes(Query *root, RelOptInfo *rel);

--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.80 2005/03/10 23:21:25 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.81 2005/04/11 23:06:56 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -23,6 +23,12 @@
 extern void query_planner(Query *root, List *tlist, double tuple_fraction,
 			  Path **cheapest_path, Path **sorted_path);

+/*
+ * prototypes for plan/planagg.c
+ */
+extern Plan *optimize_minmax_aggregates(Query *root, List *tlist,
+										Path *best_path);
+
 /*
 * prototypes for plan/createplan.c
 */

--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -5,7 +5,7 @@
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.23 2004/12/31 22:03:36 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.24 2005/04/11 23:06:56 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -24,5 +24,7 @@ extern Node *convert_IN_to_join(Query *parse, SubLink *sublink);
 extern Node *SS_replace_correlation_vars(Node *expr);
 extern Node *SS_process_sublinks(Node *expr, bool isQual);
 extern void SS_finalize_plan(Plan *plan, List *rtable);
+extern Param *SS_make_initplan_from_plan(Query *root, Plan *plan,
+										 Oid resulttype, int32 resulttypmod);

 #endif   /* SUBSELECT_H */
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -6,7 +6,7 @@
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.96 2005/03/31 22:46:27 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.97 2005/04/11 23:06:56 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -25,6 +25,7 @@ typedef enum IOFuncSelector
 } IOFuncSelector;

 extern bool op_in_opclass(Oid opno, Oid opclass);
+extern int	get_op_opclass_strategy(Oid opno, Oid opclass);
 extern void get_op_opclass_properties(Oid opno, Oid opclass,
 						  int *strategy, Oid *subtype,
 						  bool *recheck);

--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -293,3 +293,58 @@ FROM bool_test;
 t | t | f |   | f | t
 (1 row)

+--
+-- Test several cases that should be optimized into indexscans instead of
+-- the generic aggregate implementation.  We can't actually verify that they
+-- are done as indexscans, but we can check that the results are correct.
+--
+-- Basic cases
+select max(unique1) from tenk1;
+ max  
+------
+ 9999
+(1 row)
+
+select max(unique1) from tenk1 where unique1 < 42;
+ max 
+-----
+  41
+(1 row)
+
+select max(unique1) from tenk1 where unique1 > 42;
+ max  
+------
+ 9999
+(1 row)
+
+select max(unique1) from tenk1 where unique1 > 42000;
+ max 
+-----
+    
+(1 row)
+
+-- multi-column index (uses tenk1_thous_tenthous)
+select max(tenthous) from tenk1 where thousand = 33;
+ max  
+------
+ 9033
+(1 row)
+
+select min(tenthous) from tenk1 where thousand = 33;
+ min 
+-----
+  33
+(1 row)
+
+-- check parameter propagation into an indexscan subquery
+select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
+from int4_tbl;
+     f1      | gt 
+-------------+----
+           0 |  1
+      123456 |   
+     -123456 |  0
+  2147483647 |   
+ -2147483647 |  0
+(5 rows)
+
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -12,6 +12,7 @@ CREATE INDEX onek_stringu1 ON onek USING btree(stringu1 name_ops);
 CREATE INDEX tenk1_unique1 ON tenk1 USING btree(unique1 int4_ops);
 CREATE INDEX tenk1_unique2 ON tenk1 USING btree(unique2 int4_ops);
 CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops);
+CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous);
 CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops);
 CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops);
 CREATE INDEX tenk2_hundred ON tenk2 USING btree(hundred int4_ops);

--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -180,3 +180,23 @@ SELECT
  BOOL_OR(NOT b2)  AS "f",
  BOOL_OR(NOT b3)  AS "t"
 FROM bool_test;
+
+--
+-- Test several cases that should be optimized into indexscans instead of
+-- the generic aggregate implementation.  We can't actually verify that they
+-- are done as indexscans, but we can check that the results are correct.
+--
+
+-- Basic cases
+select max(unique1) from tenk1;
+select max(unique1) from tenk1 where unique1 < 42;
+select max(unique1) from tenk1 where unique1 > 42;
+select max(unique1) from tenk1 where unique1 > 42000;
+
+-- multi-column index (uses tenk1_thous_tenthous)
+select max(tenthous) from tenk1 where thousand = 33;
+select min(tenthous) from tenk1 where thousand = 33;
+
+-- check parameter propagation into an indexscan subquery
+select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
+from int4_tbl;
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -20,6 +20,8 @@ CREATE INDEX tenk1_unique2 ON tenk1 USING btree(unique2 int4_ops);

 CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops);

+CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous);
+
 CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops);

 CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops);