Commit 547bb4a7 authored by Tom Lane's avatar Tom Lane

Use a hopefully-more-reliable method of detecting default selectivity

estimates when combining the estimates for a range query.  As pointed out
by Miquel van Smoorenburg, the existing check for an impossible combined
result would quite possibly fail to detect one default and one non-default
input.  It seems better to use the default range query estimate in such
cases.  To do so, add a check for an estimate of exactly DEFAULT_INEQ_SEL.
This is a bit ugly because it introduces additional coupling between
clauselist_selectivity and scalarltsel/scalargtsel, but it's not like
there wasn't plenty already...
parent e4387116
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.70 2004/08/29 05:06:43 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.71 2004/11/09 00:34:38 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -80,9 +80,10 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause, ...@@ -80,9 +80,10 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
* interpreting it as a value. Then the available range is 1-losel to hisel. * interpreting it as a value. Then the available range is 1-losel to hisel.
* However, this calculation double-excludes nulls, so really we need * However, this calculation double-excludes nulls, so really we need
* hisel + losel + null_frac - 1.) * hisel + losel + null_frac - 1.)
* If the calculation yields zero or negative, however, we chicken out and *
* use a default estimate; that probably means that one or both * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
* selectivities is a default estimate rather than an actual range value. * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
* yields an impossible (negative) result.
* *
* A free side-effect is that we can recognize redundant inequalities such * A free side-effect is that we can recognize redundant inequalities such
* as "x < 4 AND x < 5"; only the tighter constraint will be counted. * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
...@@ -194,7 +195,21 @@ clauselist_selectivity(Query *root, ...@@ -194,7 +195,21 @@ clauselist_selectivity(Query *root,
if (rqlist->have_lobound && rqlist->have_hibound) if (rqlist->have_lobound && rqlist->have_hibound)
{ {
/* Successfully matched a pair of range clauses */ /* Successfully matched a pair of range clauses */
Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0; Selectivity s2;
/*
* Exact equality to the default value probably means the
* selectivity function punted. This is not airtight but
* should be good enough.
*/
if (rqlist->hibound == DEFAULT_INEQ_SEL ||
rqlist->lobound == DEFAULT_INEQ_SEL)
{
s2 = DEFAULT_RANGE_INEQ_SEL;
}
else
{
s2 = rqlist->hibound + rqlist->lobound - 1.0;
/* Adjust for double-exclusion of NULLs */ /* Adjust for double-exclusion of NULLs */
s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid); s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid);
...@@ -205,8 +220,7 @@ clauselist_selectivity(Query *root, ...@@ -205,8 +220,7 @@ clauselist_selectivity(Query *root,
* tight range and got a bogus result due to roundoff errors. * tight range and got a bogus result due to roundoff errors.
* However, if s2 is very negative, then we probably have * However, if s2 is very negative, then we probably have
* default selectivity estimates on one or both sides of the * default selectivity estimates on one or both sides of the
* range. In that case, insert a not-so-wildly-optimistic * range that we failed to recognize above for some reason.
* default estimate.
*/ */
if (s2 <= 0.0) if (s2 <= 0.0)
{ {
...@@ -216,7 +230,7 @@ clauselist_selectivity(Query *root, ...@@ -216,7 +230,7 @@ clauselist_selectivity(Query *root,
* No data available --- use a default estimate that * No data available --- use a default estimate that
* is small, but not real small. * is small, but not real small.
*/ */
s2 = 0.005; s2 = DEFAULT_RANGE_INEQ_SEL;
} }
else else
{ {
...@@ -227,6 +241,7 @@ clauselist_selectivity(Query *root, ...@@ -227,6 +241,7 @@ clauselist_selectivity(Query *root,
s2 = 1.0e-10; s2 = 1.0e-10;
} }
} }
}
/* Merge in the selectivity of the pair of clauses */ /* Merge in the selectivity of the pair of clauses */
s1 *= s2; s1 *= s2;
} }
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.166 2004/09/18 19:39:50 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.167 2004/11/09 00:34:42 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -111,45 +111,6 @@ ...@@ -111,45 +111,6 @@
#include "utils/syscache.h" #include "utils/syscache.h"
/*
* Note: the default selectivity estimates are not chosen entirely at random.
* We want them to be small enough to ensure that indexscans will be used if
* available, for typical table densities of ~100 tuples/page. Thus, for
* example, 0.01 is not quite small enough, since that makes it appear that
* nearly all pages will be hit anyway. Also, since we sometimes estimate
* eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
* 1/DEFAULT_EQ_SEL.
*/
/* default selectivity estimate for equalities such as "A = b" */
#define DEFAULT_EQ_SEL 0.005
/* default selectivity estimate for inequalities such as "A < b" */
#define DEFAULT_INEQ_SEL (1.0 / 3.0)
/* default selectivity estimate for pattern-match operators such as LIKE */
#define DEFAULT_MATCH_SEL 0.005
/* default number of distinct values in a table */
#define DEFAULT_NUM_DISTINCT 200
/* default selectivity estimate for boolean and null test nodes */
#define DEFAULT_UNK_SEL 0.005
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
/*
* Clamp a computed probability estimate (which may suffer from roundoff or
* estimation errors) to valid range. Argument must be a float variable.
*/
#define CLAMP_PROBABILITY(p) \
do { \
if (p < 0.0) \
p = 0.0; \
else if (p > 1.0) \
p = 1.0; \
} while (0)
/* Return data from examine_variable and friends */ /* Return data from examine_variable and friends */
typedef struct typedef struct
{ {
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.19 2004/08/29 05:06:59 momjian Exp $ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.20 2004/11/09 00:34:46 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -19,6 +19,49 @@ ...@@ -19,6 +19,49 @@
#include "nodes/parsenodes.h" #include "nodes/parsenodes.h"
/*
* Note: the default selectivity estimates are not chosen entirely at random.
* We want them to be small enough to ensure that indexscans will be used if
* available, for typical table densities of ~100 tuples/page. Thus, for
* example, 0.01 is not quite small enough, since that makes it appear that
* nearly all pages will be hit anyway. Also, since we sometimes estimate
* eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
* 1/DEFAULT_EQ_SEL.
*/
/* default selectivity estimate for equalities such as "A = b" */
#define DEFAULT_EQ_SEL 0.005
/* default selectivity estimate for inequalities such as "A < b" */
#define DEFAULT_INEQ_SEL 0.3333333333333333
/* default selectivity estimate for range inequalities "A > b AND A < c" */
#define DEFAULT_RANGE_INEQ_SEL 0.005
/* default selectivity estimate for pattern-match operators such as LIKE */
#define DEFAULT_MATCH_SEL 0.005
/* default number of distinct values in a table */
#define DEFAULT_NUM_DISTINCT 200
/* default selectivity estimate for boolean and null test nodes */
#define DEFAULT_UNK_SEL 0.005
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
/*
* Clamp a computed probability estimate (which may suffer from roundoff or
* estimation errors) to valid range. Argument must be a float variable.
*/
#define CLAMP_PROBABILITY(p) \
do { \
if (p < 0.0) \
p = 0.0; \
else if (p > 1.0) \
p = 1.0; \
} while (0)
typedef enum typedef enum
{ {
Pattern_Type_Like, Pattern_Type_Like_IC, Pattern_Type_Like, Pattern_Type_Like_IC,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment