Fix best_inner_indexscan to return both the cheapest-total-cost and

cheapest-startup-cost innerjoin indexscans, and make joinpath.c consider both of these (when different) as the inside of a nestloop join. The original design was based on the assumption that indexscan paths always have negligible startup cost, and so total cost is the only important figure of merit; an assumption that's obviously broken by bitmap indexscans. This oversight could lead to choosing poor plans in cases where fast-start behavior is more important than total cost, such as LIMIT and IN queries. 8.1-vintage brain fade exposed by an example from Chuck D.

Fix best_inner_indexscan to return both the cheapest-total-cost and
cheapest-startup-cost innerjoin indexscans, and make joinpath.c consider both of these (when different) as the inside of a nestloop join. The original design was based on the assumption that indexscan paths always have negligible startup cost, and so total cost is the only important figure of merit; an assumption that's obviously broken by bitmap indexscans. This oversight could lead to choosing poor plans in cases where fast-start behavior is more important than total cost, such as LIMIT and IN queries. 8.1-vintage brain fade exposed by an example from Chuck D.
d7153c5f · Tom Lane · 2415ad98 · d7153c5f · d7153c5f · d7153c5f
Commit d7153c5f authored May 22, 2007 by Tom Lane
5 changed files
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.306 2007/04/27 22:05:47 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.307 2007/05/22 01:40:33 tgl Exp $
 *
 * NOTES
 *	  Every node type that can appear in stored rules' parsetrees *must*
@@ -1440,7 +1440,8 @@ _outInnerIndexscanInfo(StringInfo str, InnerIndexscanInfo *node)
 	WRITE_NODE_TYPE("INNERINDEXSCANINFO");
 	WRITE_BITMAPSET_FIELD(other_relids);
 	WRITE_BOOL_FIELD(isouterjoin);
-	WRITE_NODE_FIELD(best_innerpath);
+	WRITE_NODE_FIELD(cheapest_startup_innerpath);
+	WRITE_NODE_FIELD(cheapest_total_innerpath);
 }
 static void

--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.221 2007/04/17 20:03:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.222 2007/05/22 01:40:33 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -1599,26 +1599,26 @@ eclass_matches_any_index(EquivalenceClass *ec, EquivalenceMember *em,
 /*
 * best_inner_indexscan
- *	  Finds the best available inner indexscan for a nestloop join
+ *	  Finds the best available inner indexscans for a nestloop join
 *	  with the given rel on the inside and the given outer_rel outside.
- *	  May return NULL if there are no possible inner indexscans.
 *
- * We ignore ordering considerations (since a nestloop's inner scan's order
+ * *cheapest_startup gets the path with least startup cost
- * is uninteresting).  Also, we consider only total cost when deciding which
+ * *cheapest_total gets the path with least total cost (often the same path)
- * of two possible paths is better --- this assumes that all indexpaths have
+ * Both are set to NULL if there are no possible inner indexscans.
- * negligible startup cost.  (True today, but someday we might have to think
+ *
- * harder.)  Therefore, there is only one dimension of comparison and so it's
+ * We ignore ordering considerations, since a nestloop's inner scan's order
- * sufficient to return a single "best" path.
+ * is uninteresting.  Hence startup cost and total cost are the only figures
+ * of merit to consider.
 *
 * Note: create_index_paths() must have been run previously for this rel,
- * else the result will always be NULL.
+ * else the results will always be NULL.
 */
-Path *
+void
 best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
-					 RelOptInfo *outer_rel, JoinType jointype)
+					 RelOptInfo *outer_rel, JoinType jointype,
+					 Path **cheapest_startup, Path **cheapest_total)
 {
 	Relids		outer_relids;
-	Path	   *cheapest;
 	bool		isouterjoin;
 	List	   *clause_list;
 	List	   *indexpaths;
@@ -1627,6 +1627,9 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
 	InnerIndexscanInfo *info;
 	MemoryContext oldcontext;
+	/* Initialize results for failure returns */
+	*cheapest_startup = *cheapest_total = NULL;
 	/*
 	 * Nestloop only supports inner, left, and IN joins.
 	 */
@@ -1641,14 +1644,14 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
 			isouterjoin = true;
 			break;
 		default:
-			return NULL;
+			return;
 	}
 	/*
 	 * If there are no indexable joinclauses for this rel, exit quickly.
 	 */
 	if (bms_is_empty(rel->index_outer_relids))
-		return NULL;
+		return;
 	/*
 	 * Otherwise, we have to do path selection in the main planning context,
@@ -1668,17 +1671,17 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
 	{
 		bms_free(outer_relids);
 		MemoryContextSwitchTo(oldcontext);
-		return NULL;
+		return;
 	}
 	/*
 	 * Look to see if we already computed the result for this set of relevant
 	 * outerrels.  (We include the isouterjoin status in the cache lookup key
 	 * for safety.	In practice I suspect this is not necessary because it
-	 * should always be the same for a given innerrel.)
+	 * should always be the same for a given combination of rels.)
 	 *
 	 * NOTE: because we cache on outer_relids rather than outer_rel->relids,
-	 * we will report the same path and hence path cost for joins with
+	 * we will report the same paths and hence path cost for joins with
 	 * different sets of irrelevant rels on the outside.  Now that cost_index
 	 * is sensitive to outer_rel->rows, this is not really right.  However the
 	 * error is probably not large.  Is it worth establishing a separate cache
@@ -1692,7 +1695,9 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
 		{
 			bms_free(outer_relids);
 			MemoryContextSwitchTo(oldcontext);
-			return info->best_innerpath;
+			*cheapest_startup = info->cheapest_startup_innerpath;
+			*cheapest_total = info->cheapest_total_innerpath;
+			return;
 		}
 	}
@@ -1755,28 +1760,32 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
 	}
 	/*
-	 * Now choose the cheapest member of indexpaths.
+	 * Now choose the cheapest members of indexpaths.
 	 */
-	cheapest = NULL;
+	if (indexpaths != NIL)
-	foreach(l, indexpaths)
+	{
+		*cheapest_startup = *cheapest_total = (Path *) linitial(indexpaths);
+		for_each_cell(l, lnext(list_head(indexpaths)))
 		{
 			Path	   *path = (Path *) lfirst(l);
-		if (cheapest == NULL ||
+			if (compare_path_costs(path, *cheapest_startup, STARTUP_COST) < 0)
-			compare_path_costs(path, cheapest, TOTAL_COST) < 0)
+				*cheapest_startup = path;
-			cheapest = path;
+			if (compare_path_costs(path, *cheapest_total, TOTAL_COST) < 0)
+				*cheapest_total = path;
+		}
 	}
-	/* Cache the result --- whether positive or negative */
+	/* Cache the results --- whether positive or negative */
 	info = makeNode(InnerIndexscanInfo);
 	info->other_relids = outer_relids;
 	info->isouterjoin = isouterjoin;
-	info->best_innerpath = cheapest;
+	info->cheapest_startup_innerpath = *cheapest_startup;
+	info->cheapest_total_innerpath = *cheapest_total;
 	rel->index_inner_paths = lcons(info, rel->index_inner_paths);
 	MemoryContextSwitchTo(oldcontext);
-	return cheapest;
 }
 /*

--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.111 2007/01/20 20:45:39 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.112 2007/05/22 01:40:33 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -286,10 +286,11 @@ sort_inner_and_outer(PlannerInfo *root,
 *	  only outer paths that are already ordered well enough for merging).
 *
 * We always generate a nestloop path for each available outer path.
- * In fact we may generate as many as four: one on the cheapest-total-cost
+ * In fact we may generate as many as five: one on the cheapest-total-cost
 * inner path, one on the same with materialization, one on the
- * cheapest-startup-cost inner path (if different),
+ * cheapest-startup-cost inner path (if different), one on the
- * and one on the best inner-indexscan path (if any).
+ * cheapest-total inner-indexscan path (if any), and one on the
+ * cheapest-startup inner-indexscan path (if different).
 *
 * We also consider mergejoins if mergejoin clauses are available.	We have
 * two ways to generate the inner path for a mergejoin: sort the cheapest
@@ -325,7 +326,8 @@ match_unsorted_outer(PlannerInfo *root,
 	Path	   *inner_cheapest_startup = innerrel->cheapest_startup_path;
 	Path	   *inner_cheapest_total = innerrel->cheapest_total_path;
 	Path	   *matpath = NULL;
-	Path	   *bestinnerjoin = NULL;
+	Path	   *index_cheapest_startup = NULL;
+	Path	   *index_cheapest_total = NULL;
 	ListCell   *l;
 	/*
@@ -383,17 +385,20 @@ match_unsorted_outer(PlannerInfo *root,
 				create_material_path(innerrel, inner_cheapest_total);
 		/*
-		 * Get the best innerjoin indexpath (if any) for this outer rel. It's
+		 * Get the best innerjoin indexpaths (if any) for this outer rel.
-		 * the same for all outer paths.
+		 * They're the same for all outer paths.
 		 */
 		if (innerrel->reloptkind != RELOPT_JOINREL)
 		{
 			if (IsA(inner_cheapest_total, AppendPath))
-				bestinnerjoin = best_appendrel_indexscan(root, innerrel,
+				index_cheapest_total = best_appendrel_indexscan(root,
-														 outerrel, jointype);
+																innerrel,
+																outerrel,
+																jointype);
 			else if (innerrel->rtekind == RTE_RELATION)
-				bestinnerjoin = best_inner_indexscan(root, innerrel,
+				best_inner_indexscan(root, innerrel, outerrel, jointype,
-													 outerrel, jointype);
+									 &index_cheapest_startup,
+									 &index_cheapest_total);
 		}
 	}
@@ -435,8 +440,8 @@ match_unsorted_outer(PlannerInfo *root,
 			 * Always consider a nestloop join with this outer and
 			 * cheapest-total-cost inner.  When appropriate, also consider
 			 * using the materialized form of the cheapest inner, the
-			 * cheapest-startup-cost inner path, and the best innerjoin
+			 * cheapest-startup-cost inner path, and the cheapest innerjoin
-			 * indexpath.
+			 * indexpaths.
 			 */
 			add_path(joinrel, (Path *)
 					 create_nestloop_path(root,
@@ -464,13 +469,23 @@ match_unsorted_outer(PlannerInfo *root,
 											  inner_cheapest_startup,
 											  restrictlist,
 											  merge_pathkeys));
-			if (bestinnerjoin != NULL)
+			if (index_cheapest_total != NULL)
 				add_path(joinrel, (Path *)
 						 create_nestloop_path(root,
 											  joinrel,
 											  jointype,
 											  outerpath,
-											  bestinnerjoin,
+											  index_cheapest_total,
+											  restrictlist,
+											  merge_pathkeys));
+			if (index_cheapest_startup != NULL &&
+				index_cheapest_startup != index_cheapest_total)
+				add_path(joinrel, (Path *)
+						 create_nestloop_path(root,
+											  joinrel,
+											  jointype,
+											  outerpath,
+											  index_cheapest_startup,
 											  restrictlist,
 											  merge_pathkeys));
 		}
@@ -789,6 +804,9 @@ hash_inner_and_outer(PlannerInfo *root,
 *	  with the given append relation on the inside and the given outer_rel
 *	  outside.	Returns an AppendPath comprising the best inner scans, or
 *	  NULL if there are no possible inner indexscans.
+ *
+ * Note that we currently consider only cheapest-total-cost.  It's not
+ * very clear what cheapest-startup-cost might mean for an AppendPath.
 */
 static Path *
 best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
@@ -804,7 +822,8 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
 		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
 		int			childRTindex;
 		RelOptInfo *childrel;
-		Path	   *bestinnerjoin;
+		Path	   *index_cheapest_startup;
+		Path	   *index_cheapest_total;
 		/* append_rel_list contains all append rels; ignore others */
 		if (appinfo->parent_relid != parentRTindex)
@@ -824,10 +843,10 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
 			continue;			/* OK, we can ignore it */
 		/*
-		 * Get the best innerjoin indexpath (if any) for this child rel.
+		 * Get the best innerjoin indexpaths (if any) for this child rel.
 		 */
-		bestinnerjoin = best_inner_indexscan(root, childrel,
+		best_inner_indexscan(root, childrel, outer_rel, jointype,
-											 outer_rel, jointype);
+							 &index_cheapest_startup, &index_cheapest_total);
 		/*
 		 * If no luck on an indexpath for this rel, we'll still consider an
@@ -835,12 +854,12 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
 		 * find at least one indexpath, else there's not going to be any
 		 * improvement over the base path for the appendrel.
 		 */
-		if (bestinnerjoin)
+		if (index_cheapest_total)
 			found_indexscan = true;
 		else
-			bestinnerjoin = childrel->cheapest_total_path;
+			index_cheapest_total = childrel->cheapest_total_path;
-		append_paths = lappend(append_paths, bestinnerjoin);
+		append_paths = lappend(append_paths, index_cheapest_total);
 	}
 	if (!found_indexscan)

--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.141 2007/04/21 21:01:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.142 2007/05/22 01:40:33 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -996,20 +996,20 @@ typedef struct MergeScanSelCache
 * relation includes all other relids appearing in those joinclauses.
 * The set of usable joinclauses, and thus the best inner indexscan,
 * thus varies depending on which outer relation we consider; so we have
- * to recompute the best such path for every join.	To avoid lots of
+ * to recompute the best such paths for every join.  To avoid lots of
 * redundant computation, we cache the results of such searches.  For
 * each relation we compute the set of possible otherrelids (all relids
 * appearing in joinquals that could become indexquals for this table).
 * Two outer relations whose relids have the same intersection with this
 * set will have the same set of available joinclauses and thus the same
- * best inner indexscan for the inner relation.  By taking the intersection
+ * best inner indexscans for the inner relation.  By taking the intersection
 * before scanning the cache, we avoid recomputing when considering
 * join rels that differ only by the inclusion of irrelevant other rels.
 *
 * The search key also includes a bool showing whether the join being
 * considered is an outer join.  Since we constrain the join order for
 * outer joins, I believe that this bool can only have one possible value
- * for any particular base relation; but store it anyway to avoid confusion.
+ * for any particular lookup key; but store it anyway to avoid confusion.
 */
 typedef struct InnerIndexscanInfo
@@ -1018,8 +1018,9 @@ typedef struct InnerIndexscanInfo
 	/* The lookup key: */
 	Relids		other_relids;	/* a set of relevant other relids */
 	bool		isouterjoin;	/* true if join is outer */
-	/* Best path for this lookup key: */
+	/* Best paths for this lookup key (NULL if no available indexscans): */
-	Path	   *best_innerpath; /* best inner indexscan, or NULL if none */
+	Path	   *cheapest_startup_innerpath;	/* cheapest startup cost */
+	Path	   *cheapest_total_innerpath;	/* cheapest total cost */
 } InnerIndexscanInfo;
 /*

--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.97 2007/04/15 20:09:28 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.98 2007/05/22 01:40:33 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -45,8 +45,9 @@ extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
 extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
 						 List *clauses, List *outer_clauses,
 						 RelOptInfo *outer_rel);
-extern Path *best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
+extern void best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
-					 RelOptInfo *outer_rel, JoinType jointype);
+					 RelOptInfo *outer_rel, JoinType jointype,
+					 Path **cheapest_startup, Path **cheapest_total);
 extern List *group_clauses_by_indexkey(IndexOptInfo *index,
 						  List *clauses, List *outer_clauses,
 						  Relids outer_relids,