Use a hopefully-more-reliable method of detecting default selectivity

estimates when combining the estimates for a range query. As pointed out by Miquel van Smoorenburg, the existing check for an impossible combined result would quite possibly fail to detect one default and one non-default input. It seems better to use the default range query estimate in such cases. To do so, add a check for an estimate of exactly DEFAULT_INEQ_SEL. This is a bit ugly because it introduces additional coupling between clauselist_selectivity and scalarltsel/scalargtsel, but it's not like there wasn't plenty already...

Use a hopefully-more-reliable method of detecting default selectivity
estimates when combining the estimates for a range query. As pointed out by Miquel van Smoorenburg, the existing check for an impossible combined result would quite possibly fail to detect one default and one non-default input. It seems better to use the default range query estimate in such cases. To do so, add a check for an estimate of exactly DEFAULT_INEQ_SEL. This is a bit ugly because it introduces additional coupling between clauselist_selectivity and scalarltsel/scalargtsel, but it's not like there wasn't plenty already...
547bb4a7 · Tom Lane · e4387116 · 547bb4a7 · 547bb4a7 · 547bb4a7
Commit 547bb4a7 authored Nov 09, 2004 by Tom Lane
3 changed files
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.70 2004/08/29 05:06:43 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.71 2004/11/09 00:34:38 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -80,9 +80,10 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 * interpreting it as a value.	Then the available range is 1-losel to hisel.
 * However, this calculation double-excludes nulls, so really we need
 * hisel + losel + null_frac - 1.)
- * If the calculation yields zero or negative, however, we chicken out and
+ *
- * use a default estimate; that probably means that one or both
+ * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
- * selectivities is a default estimate rather than an actual range value.
+ * and instead use DEFAULT_RANGE_INEQ_SEL.  The same applies if the equation
+ * yields an impossible (negative) result.
 *
 * A free side-effect is that we can recognize redundant inequalities such
 * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
@@ -194,7 +195,21 @@ clauselist_selectivity(Query *root,
 		if (rqlist->have_lobound && rqlist->have_hibound)
 		{
 			/* Successfully matched a pair of range clauses */
-			Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0;
+			Selectivity s2;
+			/*
+			 * Exact equality to the default value probably means the
+			 * selectivity function punted.  This is not airtight but
+			 * should be good enough.
+			 */
+			if (rqlist->hibound == DEFAULT_INEQ_SEL ||
+				rqlist->lobound == DEFAULT_INEQ_SEL)
+			{
+				s2 = DEFAULT_RANGE_INEQ_SEL;
+			}
+			else
+			{
+				s2 = rqlist->hibound + rqlist->lobound - 1.0;
 				/* Adjust for double-exclusion of NULLs */
 				s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid);
@@ -205,8 +220,7 @@ clauselist_selectivity(Query *root,
 				 * tight range and got a bogus result due to roundoff errors.
 				 * However, if s2 is very negative, then we probably have
 				 * default selectivity estimates on one or both sides of the
-			 * range.  In that case, insert a not-so-wildly-optimistic
+				 * range that we failed to recognize above for some reason.
-			 * default estimate.
 				 */
 				if (s2 <= 0.0)
 				{
@@ -216,7 +230,7 @@ clauselist_selectivity(Query *root,
 						 * No data available --- use a default estimate that
 						 * is small, but not real small.
 						 */
-					s2 = 0.005;
+						s2 = DEFAULT_RANGE_INEQ_SEL;
 					}
 					else
 					{
@@ -227,6 +241,7 @@ clauselist_selectivity(Query *root,
 						s2 = 1.0e-10;
 					}
 				}
+			}
 			/* Merge in the selectivity of the pair of clauses */
 			s1 *= s2;
 		}

--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.166 2004/09/18 19:39:50 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.167 2004/11/09 00:34:42 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -111,45 +111,6 @@
 #include "utils/syscache.h"
-/*
- * Note: the default selectivity estimates are not chosen entirely at random.
- * We want them to be small enough to ensure that indexscans will be used if
- * available, for typical table densities of ~100 tuples/page.	Thus, for
- * example, 0.01 is not quite small enough, since that makes it appear that
- * nearly all pages will be hit anyway.  Also, since we sometimes estimate
- * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
- * 1/DEFAULT_EQ_SEL.
- */
-/* default selectivity estimate for equalities such as "A = b" */
-#define DEFAULT_EQ_SEL	0.005
-/* default selectivity estimate for inequalities such as "A < b" */
-#define DEFAULT_INEQ_SEL  (1.0 / 3.0)
-/* default selectivity estimate for pattern-match operators such as LIKE */
-#define DEFAULT_MATCH_SEL	0.005
-/* default number of distinct values in a table */
-#define DEFAULT_NUM_DISTINCT  200
-/* default selectivity estimate for boolean and null test nodes */
-#define DEFAULT_UNK_SEL			0.005
-#define DEFAULT_NOT_UNK_SEL		(1.0 - DEFAULT_UNK_SEL)
-/*
- * Clamp a computed probability estimate (which may suffer from roundoff or
- * estimation errors) to valid range.  Argument must be a float variable.
- */
-#define CLAMP_PROBABILITY(p) \
-	do { \
-		if (p < 0.0) \
-			p = 0.0; \
-		else if (p > 1.0) \
-			p = 1.0; \
-	} while (0)
 /* Return data from examine_variable and friends */
 typedef struct
 {

--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -8,7 +8,7 @@
 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.19 2004/08/29 05:06:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.20 2004/11/09 00:34:46 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -19,6 +19,49 @@
 #include "nodes/parsenodes.h"
+/*
+ * Note: the default selectivity estimates are not chosen entirely at random.
+ * We want them to be small enough to ensure that indexscans will be used if
+ * available, for typical table densities of ~100 tuples/page.	Thus, for
+ * example, 0.01 is not quite small enough, since that makes it appear that
+ * nearly all pages will be hit anyway.  Also, since we sometimes estimate
+ * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
+ * 1/DEFAULT_EQ_SEL.
+ */
+/* default selectivity estimate for equalities such as "A = b" */
+#define DEFAULT_EQ_SEL	0.005
+/* default selectivity estimate for inequalities such as "A < b" */
+#define DEFAULT_INEQ_SEL  0.3333333333333333
+/* default selectivity estimate for range inequalities "A > b AND A < c" */
+#define DEFAULT_RANGE_INEQ_SEL  0.005
+/* default selectivity estimate for pattern-match operators such as LIKE */
+#define DEFAULT_MATCH_SEL	0.005
+/* default number of distinct values in a table */
+#define DEFAULT_NUM_DISTINCT  200
+/* default selectivity estimate for boolean and null test nodes */
+#define DEFAULT_UNK_SEL			0.005
+#define DEFAULT_NOT_UNK_SEL		(1.0 - DEFAULT_UNK_SEL)
+/*
+ * Clamp a computed probability estimate (which may suffer from roundoff or
+ * estimation errors) to valid range.  Argument must be a float variable.
+ */
+#define CLAMP_PROBABILITY(p) \
+	do { \
+		if (p < 0.0) \
+			p = 0.0; \
+		else if (p > 1.0) \
+			p = 1.0; \
+	} while (0)
 typedef enum
 {
 	Pattern_Type_Like, Pattern_Type_Like_IC,