Postpone creation of pathkeys lists to fix bug #8049.

This patch gets rid of the concept of, and infrastructure for, non-canonical PathKeys; we now only ever create canonical pathkey lists. The need for non-canonical pathkeys came from the desire to have grouping_planner initialize query_pathkeys and related pathkey lists before calling query_planner. However, since query_planner didn't actually *do* anything with those lists before they'd been made canonical, we can get rid of the whole mess by just not creating the lists at all until the point where we formerly canonicalized them. There are several ways in which we could implement that without making query_planner itself deal with grouping/sorting features (which are supposed to be the province of grouping_planner). I chose to add a callback function to query_planner's API; other alternatives would have required adding more fields to PlannerInfo, which while not bad in itself would create an ABI break for planner-related plugins in the 9.2 release series. This still breaks ABI for anything that calls query_planner directly, but it seems somewhat unlikely that there are any such plugins. I had originally conceived of this change as merely a step on the way to fixing bug #8049 from Teun Hoogendoorn; but it turns out that this fixes that bug all by itself, as per the added regression test. The reason is that now get_eclass_for_sort_expr is adding the ORDER BY expression at the end of EquivalenceClass creation not the start, and so anything that is in a multi-member EquivalenceClass has already been created with correct em_nullable_relids. I am suspicious that there are related scenarios in which we still need to teach get_eclass_for_sort_expr to compute correct nullable_relids, but am not eager to risk destabilizing either 9.2 or 9.3 to fix bugs that are only hypothetical. So for the moment, do this and stop here. Back-patch to 9.2 but not to earlier branches, since they don't exhibit this bug for lack of join-clause-movement logic that depends on em_nullable_relids being correct. (We might have to revisit that choice if any related bugs turn up.) In 9.2, don't change the signature of make_pathkeys_for_sortclauses nor remove canonicalize_pathkeys, so as not to risk more plugin breakage than we have to.

Postpone creation of pathkeys lists to fix bug #8049.
This patch gets rid of the concept of, and infrastructure for, non-canonical PathKeys; we now only ever create canonical pathkey lists. The need for non-canonical pathkeys came from the desire to have grouping_planner initialize query_pathkeys and related pathkey lists before calling query_planner. However, since query_planner didn't actually *do* anything with those lists before they'd been made canonical, we can get rid of the whole mess by just not creating the lists at all until the point where we formerly canonicalized them. There are several ways in which we could implement that without making query_planner itself deal with grouping/sorting features (which are supposed to be the province of grouping_planner). I chose to add a callback function to query_planner's API; other alternatives would have required adding more fields to PlannerInfo, which while not bad in itself would create an ABI break for planner-related plugins in the 9.2 release series. This still breaks ABI for anything that calls query_planner directly, but it seems somewhat unlikely that there are any such plugins. I had originally conceived of this change as merely a step on the way to fixing bug #8049 from Teun Hoogendoorn; but it turns out that this fixes that bug all by itself, as per the added regression test. The reason is that now get_eclass_for_sort_expr is adding the ORDER BY expression at the end of EquivalenceClass creation not the start, and so anything that is in a multi-member EquivalenceClass has already been created with correct em_nullable_relids. I am suspicious that there are related scenarios in which we still need to teach get_eclass_for_sort_expr to compute correct nullable_relids, but am not eager to risk destabilizing either 9.2 or 9.3 to fix bugs that are only hypothetical. So for the moment, do this and stop here. Back-patch to 9.2 but not to earlier branches, since they don't exhibit this bug for lack of join-clause-movement logic that depends on em_nullable_relids being correct. (We might have to revisit that choice if any related bugs turn up.) In 9.2, don't change the signature of make_pathkeys_for_sortclauses nor remove canonicalize_pathkeys, so as not to risk more plugin breakage than we have to.
db9f0e1d · Tom Lane · 5fc89376 · db9f0e1d · db9f0e1d · db9f0e1d
Commit db9f0e1d authored Apr 29, 2013 by Tom Lane
12 changed files
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -728,22 +728,8 @@ _equalFromExpr(const FromExpr *a, const FromExpr *b)
 static bool
 _equalPathKey(const PathKey *a, const PathKey *b)
 {
-	/*
+	/* We assume pointer equality is sufficient to compare the eclasses */
-	 * This is normally used on non-canonicalized PathKeys, so must chase up
+	COMPARE_SCALAR_FIELD(pk_eclass);
-	 * to the topmost merged EquivalenceClass and see if those are the same
-	 * (by pointer equality).
-	 */
-	EquivalenceClass *a_eclass;
-	EquivalenceClass *b_eclass;
-	a_eclass = a->pk_eclass;
-	while (a_eclass->ec_merged)
-		a_eclass = a_eclass->ec_merged;
-	b_eclass = b->pk_eclass;
-	while (b_eclass->ec_merged)
-		b_eclass = b_eclass->ec_merged;
-	if (a_eclass != b_eclass)
-		return false;
 	COMPARE_SCALAR_FIELD(pk_opfamily);
 	COMPARE_SCALAR_FIELD(pk_strategy);
 	COMPARE_SCALAR_FIELD(pk_nulls_first);

--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -589,16 +589,6 @@ since they are easily compared to the pathkeys of a potential candidate
 path.  So, SortGroupClause lists are turned into pathkeys lists for use
 inside the optimizer.
-Because we have to generate pathkeys lists from the sort clauses before
-we've finished EquivalenceClass merging, we cannot use the pointer-equality
-method of comparing PathKeys in the earliest stages of the planning
-process.  Instead, we generate "non canonical" PathKeys that reference
-single-element EquivalenceClasses that might get merged later.  After we
-complete EquivalenceClass merging, we replace these with "canonical"
-PathKeys that reference only fully-merged classes, and after that we make
-sure we don't generate more than one copy of each "canonical" PathKey.
-Then it is safe to use pointer comparison on canonical PathKeys.
 An additional refinement we can make is to insist that canonical pathkey
 lists (sort orderings) do not mention the same EquivalenceClass more than
 once.  For example, in all these cases the second sort column is redundant,
@@ -651,12 +641,11 @@ mergejoinable clauses found in the quals.  At the end of this process,
 we know all we can know about equivalence of different variables, so
 subsequently there will be no further merging of EquivalenceClasses.
 At that point it is possible to consider the EquivalenceClasses as
-"canonical" and build canonical PathKeys that reference them.  Before
+"canonical" and build canonical PathKeys that reference them.  At this
-we reach that point (actually, before entering query_planner at all)
+time we construct PathKeys for the query's ORDER BY and related clauses.
-we also ensure that we have constructed EquivalenceClasses for all the
+(Any ordering expressions that do not appear elsewhere will result in
-expressions used in the query's ORDER BY and related clauses.  These
+the creation of new EquivalenceClasses, but this cannot result in merging
-classes might or might not get merged together, depending on what we
+existing classes, so canonical-ness is not lost.)
-find in the quals.
 Because all the EquivalenceClasses are known before we begin path
 generation, we can use them as a guide to which indexes are of interest:

--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -285,11 +285,19 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
 		}
 		/*
-		 * Case 2: need to merge ec1 and ec2.  We add ec2's items to ec1, then
+		 * Case 2: need to merge ec1 and ec2.  This should never happen after
-		 * set ec2's ec_merged link to point to ec1 and remove ec2 from the
+		 * we've built any canonical pathkeys; if it did, those pathkeys might
-		 * eq_classes list.  We cannot simply delete ec2 because that could
+		 * be rendered non-canonical by the merge.
-		 * leave dangling pointers in existing PathKeys.  We leave it behind
+		 */
-		 * with a link so that the merged EC can be found.
+		if (root->canon_pathkeys != NIL)
+			elog(ERROR, "too late to merge equivalence classes");
+		/*
+		 * We add ec2's items to ec1, then set ec2's ec_merged link to point
+		 * to ec1 and remove ec2 from the eq_classes list.  We cannot simply
+		 * delete ec2 because that could leave dangling pointers in existing
+		 * PathKeys.  We leave it behind with a link so that the merged EC can
+		 * be found.
 		 */
 		ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members);
 		ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources);

--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -48,6 +48,7 @@
 static bool find_minmax_aggs_walker(Node *node, List **context);
 static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
 				  Oid eqop, Oid sortop, bool nulls_first);
+static void minmax_qp_callback(PlannerInfo *root, void *extra);
 static void make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *mminfo);
 static Node *replace_aggs_with_params_mutator(Node *node, PlannerInfo *root);
 static Oid	fetch_agg_sort_op(Oid aggfnoid);
@@ -445,26 +446,12 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
 										   Int64GetDatum(1), false,
 										   FLOAT8PASSBYVAL);
-	/*
-	 * Set up requested pathkeys.
-	 */
-	subroot->group_pathkeys = NIL;
-	subroot->window_pathkeys = NIL;
-	subroot->distinct_pathkeys = NIL;
-	subroot->sort_pathkeys =
-		make_pathkeys_for_sortclauses(subroot,
-									  parse->sortClause,
-									  parse->targetList,
-									  false);
-	subroot->query_pathkeys = subroot->sort_pathkeys;
 	/*
 	 * Generate the best paths for this query, telling query_planner that we
 	 * have LIMIT 1.
 	 */
 	query_planner(subroot, parse->targetList, 1.0, 1.0,
+				  minmax_qp_callback, NULL,
 				  &cheapest_path, &sorted_path, &dNumGroups);
 	/*
@@ -505,6 +492,24 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
 	return true;
 }
+/*
+ * Compute query_pathkeys and other pathkeys during plan generation
+ */
+static void
+minmax_qp_callback(PlannerInfo *root, void *extra)
+{
+	root->group_pathkeys = NIL;
+	root->window_pathkeys = NIL;
+	root->distinct_pathkeys = NIL;
+	root->sort_pathkeys =
+		make_pathkeys_for_sortclauses(root,
+									  root->parse->sortClause,
+									  root->parse->targetList);
+	root->query_pathkeys = root->sort_pathkeys;
+}
 /*
 * Construct a suitable plan for a converted aggregate query
 */

--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -30,10 +30,6 @@
 #include "utils/selfuncs.h"
-/* Local functions */
-static void canonicalize_all_pathkeys(PlannerInfo *root);
 /*
 * query_planner
 *	  Generate a path (that is, a simplified plan) for a basic query,
@@ -55,6 +51,8 @@ static void canonicalize_all_pathkeys(PlannerInfo *root);
 * tuple_fraction is the fraction of tuples we expect will be retrieved
 * limit_tuples is a hard limit on number of tuples to retrieve,
 *		or -1 if no limit
+ * qp_callback is a function to compute query_pathkeys once it's safe to do so
+ * qp_extra is optional extra data to pass to qp_callback
 *
 * Output parameters:
 * *cheapest_path receives the overall-cheapest path for the query
@@ -63,18 +61,11 @@ static void canonicalize_all_pathkeys(PlannerInfo *root);
 * *num_groups receives the estimated number of groups, or 1 if query
 *				does not use grouping
 *
- * Note: the PlannerInfo node also includes a query_pathkeys field, which is
+ * Note: the PlannerInfo node also includes a query_pathkeys field, which
- * both an input and an output of query_planner().	The input value signals
+ * tells query_planner the sort order that is desired in the final output
- * query_planner that the indicated sort order is wanted in the final output
+ * plan.  This value is *not* available at call time, but is computed by
- * plan.  But this value has not yet been "canonicalized", since the needed
+ * qp_callback once we have completed merging the query's equivalence classes.
- * info does not get computed until we scan the qual clauses.  We canonicalize
+ * (We cannot construct canonical pathkeys until that's done.)
- * it as soon as that task is done.  (The main reason query_pathkeys is a
- * PlannerInfo field and not a passed parameter is that the low-level routines
- * in indxpath.c need to see it.)
- *
- * Note: the PlannerInfo node includes other pathkeys fields besides
- * query_pathkeys, all of which need to be canonicalized once the info is
- * available.  See canonicalize_all_pathkeys.
 *
 * tuple_fraction is interpreted as follows:
 *	  0: expect all tuples to be retrieved (normal case)
@@ -89,6 +80,7 @@ static void canonicalize_all_pathkeys(PlannerInfo *root);
 void
 query_planner(PlannerInfo *root, List *tlist,
 			  double tuple_fraction, double limit_tuples,
+			  query_pathkeys_callback qp_callback, void *qp_extra,
 			  Path **cheapest_path, Path **sorted_path,
 			  double *num_groups)
 {
@@ -118,11 +110,11 @@ query_planner(PlannerInfo *root, List *tlist,
 		*sorted_path = NULL;
 		/*
-		 * We still are required to canonicalize any pathkeys, in case it's
+		 * We still are required to call qp_callback, in case it's something
-		 * something like "SELECT 2+2 ORDER BY 1".
+		 * like "SELECT 2+2 ORDER BY 1".
 		 */
 		root->canon_pathkeys = NIL;
-		canonicalize_all_pathkeys(root);
+		(*qp_callback) (root, qp_extra);
 		return;
 	}
@@ -205,10 +197,10 @@ query_planner(PlannerInfo *root, List *tlist,
 	/*
 	 * We have completed merging equivalence sets, so it's now possible to
-	 * convert previously generated pathkeys (in particular, the requested
+	 * generate pathkeys in canonical form; so compute query_pathkeys and
-	 * query_pathkeys) to canonical form.
+	 * other pathkeys fields in PlannerInfo.
 	 */
-	canonicalize_all_pathkeys(root);
+	(*qp_callback) (root, qp_extra);
 	/*
 	 * Examine any "placeholder" expressions generated during subquery pullup.
@@ -429,19 +421,3 @@ query_planner(PlannerInfo *root, List *tlist,
 	*cheapest_path = cheapestpath;
 	*sorted_path = sortedpath;
 }
-/*
- * canonicalize_all_pathkeys
- *		Canonicalize all pathkeys that were generated before entering
- *		query_planner and then stashed in PlannerInfo.
- */
-static void
-canonicalize_all_pathkeys(PlannerInfo *root)
-{
-	root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
-	root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
-	root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys);
-	root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys);
-	root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
-}
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -206,7 +206,7 @@ typedef struct PlannerInfo
 	List	   *placeholder_list;		/* list of PlaceHolderInfos */
 	List	   *query_pathkeys; /* desired pathkeys for query_planner(), and
-								 * actual pathkeys afterwards */
+								 * actual pathkeys after planning */
 	List	   *group_pathkeys; /* groupClause pathkeys, if any */
 	List	   *window_pathkeys;	/* pathkeys of bottom window, if any */

--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -154,7 +154,6 @@ typedef enum
 	PATHKEYS_DIFFERENT			/* neither pathkey includes the other */
 } PathKeysComparison;
-extern List *canonicalize_pathkeys(PlannerInfo *root, List *pathkeys);
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
@@ -174,8 +173,7 @@ extern List *build_join_pathkeys(PlannerInfo *root,
 					List *outer_pathkeys);
 extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
 							  List *sortclauses,
-							  List *tlist,
+							  List *tlist);
-							  bool canonicalize);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
 								RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,

--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -21,11 +21,15 @@
 #define DEFAULT_CURSOR_TUPLE_FRACTION 0.1
 extern double cursor_tuple_fraction;
+/* query_planner callback to compute query_pathkeys */
+typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra);
 /*
 * prototypes for plan/planmain.c
 */
 extern void query_planner(PlannerInfo *root, List *tlist,
 			  double tuple_fraction, double limit_tuples,
+			  query_pathkeys_callback qp_callback, void *qp_extra,
 			  Path **cheapest_path, Path **sorted_path,
 			  double *num_groups);

--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2826,6 +2826,35 @@ select b.unique1 from
 (5 rows)
+explain (costs off)
+select * from
+(
+  select unique1, q1, coalesce(unique1, -1) + q1 as fault
+  from int8_tbl left join tenk1 on (q2 = unique2)
+) ss
+where fault = 122
+order by fault;
+                           QUERY PLAN                            
+-----------------------------------------------------------------
+ Nested Loop Left Join
+   Filter: ((COALESCE(tenk1.unique1, (-1)) + int8_tbl.q1) = 122)
+   ->  Seq Scan on int8_tbl
+   ->  Index Scan using tenk1_unique2 on tenk1
+         Index Cond: (int8_tbl.q2 = unique2)
+(5 rows)
+select * from
+(
+  select unique1, q1, coalesce(unique1, -1) + q1 as fault
+  from int8_tbl left join tenk1 on (q2 = unique2)
+) ss
+where fault = 122
+order by fault;
+ unique1 | q1  | fault 
+---------+-----+-------
+         | 123 |   122
+(1 row)
 --
 -- test handling of potential equivalence clauses above outer joins
 --

--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -749,6 +749,23 @@ select b.unique1 from
  right join int4_tbl i2 on i2.f1 = b.tenthous
  order by 1;
+explain (costs off)
+select * from
+(
+  select unique1, q1, coalesce(unique1, -1) + q1 as fault
+  from int8_tbl left join tenk1 on (q2 = unique2)
+) ss
+where fault = 122
+order by fault;
+select * from
+(
+  select unique1, q1, coalesce(unique1, -1) + q1 as fault
+  from int8_tbl left join tenk1 on (q2 = unique2)
+) ss
+where fault = 122
+order by fault;
 --
 -- test handling of potential equivalence clauses above outer joins
 --