Commit 7d08ce28 authored by Tom Lane's avatar Tom Lane

Distinguish selectivity of < from <= and > from >=.

Historically, the selectivity functions have simply not distinguished
< from <=, or > from >=, arguing that the fraction of the population that
satisfies the "=" aspect can be considered to be vanishingly small, if the
comparison value isn't any of the most-common-values for the variable.
(If it is, the code path that executes the operator against each MCV will
take care of things properly.)  But that isn't really true unless we're
dealing with a continuum of variable values, and in practice we seldom are.
If "x = const" would estimate a nonzero number of rows for a given const
value, then it follows that we ought to estimate different numbers of rows
for "x < const" and "x <= const", even if the const is not one of the MCVs.
Handling this more honestly makes a significant difference in edge cases,
such as the estimate for a tight range (x BETWEEN y AND z where y and z
are close together).

Hence, split scalarltsel into scalarltsel/scalarlesel, and similarly
split scalargtsel into scalargtsel/scalargesel.  Adjust <= and >=
operator definitions to reference the new selectivity functions.
Improve the core ineq_histogram_selectivity() function to make a
correction for equality.  (Along the way, I learned quite a bit about
exactly why that function gives good answers, which I tried to memorialize
in improved comments.)

The corresponding join selectivity functions were, and remain, just stubs.
But I chose to split them similarly, to avoid confusion and to prevent the
need for doing this exercise again if someone ever makes them less stubby.

In passing, change ineq_histogram_selectivity's clamp for extreme
probability estimates so that it varies depending on the histogram
size, instead of being hardwired at 0.0001.  With the default histogram
size of 100 entries, you still get the old clamp value, but bigger
histograms should allow us to put more faith in edge values.

Tom Lane, reviewed by Aleksander Alekseev and Kuntal Ghosh

Discussion: https://postgr.es/m/12232.1499140410@sss.pgh.pa.us
parent 089880ba
......@@ -3,7 +3,8 @@
MODULES = citext
EXTENSION = citext
DATA = citext--1.4.sql citext--1.3--1.4.sql \
DATA = citext--1.4.sql citext--1.4--1.5.sql \
citext--1.3--1.4.sql \
citext--1.2--1.3.sql citext--1.1--1.2.sql \
citext--1.0--1.1.sql citext--unpackaged--1.0.sql
PGFILEDESC = "citext - case-insensitive character string data type"
......
/* contrib/citext/citext--1.4--1.5.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION citext UPDATE TO '1.5'" to load this file. \quit
ALTER OPERATOR <= (citext, citext) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel
);
ALTER OPERATOR >= (citext, citext) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel
);
# citext extension
comment = 'data type for case-insensitive character strings'
default_version = '1.4'
default_version = '1.5'
module_pathname = '$libdir/citext'
relocatable = true
......@@ -4,7 +4,8 @@ MODULE_big = cube
OBJS= cube.o cubeparse.o $(WIN32RES)
EXTENSION = cube
DATA = cube--1.2.sql cube--1.1--1.2.sql cube--1.0--1.1.sql \
DATA = cube--1.2.sql cube--1.2--1.3.sql \
cube--1.1--1.2.sql cube--1.0--1.1.sql \
cube--unpackaged--1.0.sql
PGFILEDESC = "cube - multidimensional cube data type"
......
/* contrib/cube/cube--1.2--1.3.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION cube UPDATE TO '1.3'" to load this file. \quit
ALTER OPERATOR <= (cube, cube) SET (
RESTRICT = scalarlesel, JOIN = scalarlejoinsel
);
ALTER OPERATOR >= (cube, cube) SET (
RESTRICT = scalargesel, JOIN = scalargejoinsel
);
# cube extension
comment = 'data type for multidimensional cubes'
default_version = '1.2'
default_version = '1.3'
module_pathname = '$libdir/cube'
relocatable = true
......@@ -5,7 +5,8 @@ OBJS = hstore_io.o hstore_op.o hstore_gist.o hstore_gin.o hstore_compat.o \
$(WIN32RES)
EXTENSION = hstore
DATA = hstore--1.4.sql hstore--1.3--1.4.sql hstore--1.2--1.3.sql \
DATA = hstore--1.4.sql hstore--1.4--1.5.sql \
hstore--1.3--1.4.sql hstore--1.2--1.3.sql \
hstore--1.1--1.2.sql hstore--1.0--1.1.sql \
hstore--unpackaged--1.0.sql
PGFILEDESC = "hstore - key/value pair data type"
......
/* contrib/hstore/hstore--1.4--1.5.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION hstore UPDATE TO '1.5'" to load this file. \quit
ALTER OPERATOR #<=# (hstore, hstore) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel
);
ALTER OPERATOR #>=# (hstore, hstore) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel
);
# hstore extension
comment = 'data type for storing sets of (key, value) pairs'
default_version = '1.4'
default_version = '1.5'
module_pathname = '$libdir/hstore'
relocatable = true
......@@ -3,7 +3,8 @@
MODULES = isn
EXTENSION = isn
DATA = isn--1.1.sql isn--1.0--1.1.sql isn--unpackaged--1.0.sql
DATA = isn--1.1.sql isn--1.1--1.2.sql \
isn--1.0--1.1.sql isn--unpackaged--1.0.sql
PGFILEDESC = "isn - data types for international product numbering standards"
REGRESS = isn
......
/* contrib/isn/isn--1.1--1.2.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION isn UPDATE TO '1.2'" to load this file. \quit
ALTER OPERATOR <= (ean13, ean13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ean13, ean13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (ean13, isbn13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ean13, isbn13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (isbn13, ean13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (isbn13, ean13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (ean13, ismn13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ean13, ismn13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (ismn13, ean13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ismn13, ean13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (ean13, issn13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ean13, issn13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (ean13, isbn) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ean13, isbn) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (ean13, ismn) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ean13, ismn) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (ean13, issn) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ean13, issn) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (ean13, upc) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ean13, upc) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (isbn13, isbn13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (isbn13, isbn13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (isbn13, isbn) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (isbn13, isbn) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (isbn, isbn) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (isbn, isbn) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (isbn, isbn13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (isbn, isbn13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (isbn, ean13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (isbn, ean13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (ismn13, ismn13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ismn13, ismn13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (ismn13, ismn) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ismn13, ismn) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (ismn, ismn) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ismn, ismn) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (ismn, ismn13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ismn, ismn13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (ismn, ean13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (ismn, ean13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (issn13, issn13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (issn13, issn13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (issn13, issn) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (issn13, issn) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (issn13, ean13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (issn13, ean13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (issn, issn) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (issn, issn) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (issn, issn13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (issn, issn13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (issn, ean13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (issn, ean13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (upc, upc) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (upc, upc) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
ALTER OPERATOR <= (upc, ean13) SET (
RESTRICT = scalarlesel,
JOIN = scalarlejoinsel);
ALTER OPERATOR >= (upc, ean13) SET (
RESTRICT = scalargesel,
JOIN = scalargejoinsel);
# isn extension
comment = 'data types for international product numbering standards'
default_version = '1.1'
default_version = '1.2'
module_pathname = '$libdir/isn'
relocatable = true
......@@ -801,8 +801,7 @@ CREATE OPERATOR &lt; (
It is important to specify the correct commutator and negator operators,
as well as suitable restriction and join selectivity
functions, otherwise the optimizer will be unable to make effective
use of the index. Note that the less-than, equal, and
greater-than cases should use different selectivity functions.
use of the index.
</para>
<para>
......
......@@ -242,20 +242,11 @@ column OP constant
<simplelist>
<member><function>eqsel</> for <literal>=</></member>
<member><function>neqsel</> for <literal>&lt;&gt;</></member>
<member><function>scalarltsel</> for <literal>&lt;</> or <literal>&lt;=</></member>
<member><function>scalargtsel</> for <literal>&gt;</> or <literal>&gt;=</></member>
<member><function>scalarltsel</> for <literal>&lt;</></member>
<member><function>scalarlesel</> for <literal>&lt;=</></member>
<member><function>scalargtsel</> for <literal>&gt;</></member>
<member><function>scalargesel</> for <literal>&gt;=</></member>
</simplelist>
It might seem a little odd that these are the categories, but they
make sense if you think about it. <literal>=</> will typically accept only
a small fraction of the rows in a table; <literal>&lt;&gt;</> will typically reject
only a small fraction. <literal>&lt;</> will accept a fraction that depends on
where the given constant falls in the range of values for that table
column (which, it just so happens, is information collected by
<command>ANALYZE</command> and made available to the selectivity estimator).
<literal>&lt;=</> will accept a slightly larger fraction than <literal>&lt;</> for the same
comparison constant, but they're close enough to not be worth
distinguishing, especially since we're not likely to do better than a
rough guess anyhow. Similar remarks apply to <literal>&gt;</> and <literal>&gt;=</>.
</para>
<para>
......@@ -267,10 +258,12 @@ column OP constant
</para>
<para>
You can use <function>scalarltsel</> and <function>scalargtsel</> for comparisons on data types that
have some sensible means of being converted into numeric scalars for
range comparisons. If possible, add the data type to those understood
by the function <function>convert_to_scalar()</function> in <filename>src/backend/utils/adt/selfuncs.c</filename>.
You can use <function>scalarltsel</>, <function>scalarlesel</>,
<function>scalargtsel</> and <function>scalargesel</> for comparisons on
data types that have some sensible means of being converted into numeric
scalars for range comparisons. If possible, add the data type to those
understood by the function <function>convert_to_scalar()</function> in
<filename>src/backend/utils/adt/selfuncs.c</filename>.
(Eventually, this function should be replaced by per-data-type functions
identified through a column of the <classname>pg_type</> system catalog; but that hasn't happened
yet.) If you do not do this, things will still work, but the optimizer's
......@@ -310,8 +303,10 @@ table1.column1 OP table2.column2
<simplelist>
<member><function>eqjoinsel</> for <literal>=</></member>
<member><function>neqjoinsel</> for <literal>&lt;&gt;</></member>
<member><function>scalarltjoinsel</> for <literal>&lt;</> or <literal>&lt;=</></member>
<member><function>scalargtjoinsel</> for <literal>&gt;</> or <literal>&gt;=</></member>
<member><function>scalarltjoinsel</> for <literal>&lt;</></member>
<member><function>scalarlejoinsel</> for <literal>&lt;=</></member>
<member><function>scalargtjoinsel</> for <literal>&gt;</></member>
<member><function>scalargejoinsel</> for <literal>&gt;=</></member>
<member><function>areajoinsel</> for 2D area-based comparisons</member>
<member><function>positionjoinsel</> for 2D position-based comparisons</member>
<member><function>contjoinsel</> for 2D containment-based comparisons</member>
......
......@@ -71,7 +71,7 @@ static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
*
* We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
* are recognized as possible range query components if they are restriction
* opclauses whose operators have scalarltsel() or scalargtsel() as their
* opclauses whose operators have scalarltsel or a related function as their
* restriction selectivity estimator. We pair up clauses of this form that
* refer to the same variable. An unpairable clause of this kind is simply
* multiplied into the selectivity product in the normal way. But when we
......@@ -92,8 +92,8 @@ static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
* A free side-effect is that we can recognize redundant inequalities such
* as "x < 4 AND x < 5"; only the tighter constraint will be counted.
*
* Of course this is all very dependent on the behavior of
* scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
* Of course this is all very dependent on the behavior of the inequality
* selectivity functions; perhaps some day we can generalize the approach.
*/
Selectivity
clauselist_selectivity(PlannerInfo *root,
......@@ -218,17 +218,19 @@ clauselist_selectivity(PlannerInfo *root,
if (ok)
{
/*
* If it's not a "<" or ">" operator, just merge the
* If it's not a "<"/"<="/">"/">=" operator, just merge the
* selectivity in generically. But if it's the right oprrest,
* add the clause to rqlist for later processing.
*/
switch (get_oprrest(expr->opno))
{
case F_SCALARLTSEL:
case F_SCALARLESEL:
addRangeClause(&rqlist, clause,
varonleft, true, s2);
break;
case F_SCALARGTSEL:
case F_SCALARGESEL:
addRangeClause(&rqlist, clause,
varonleft, false, s2);
break;
......@@ -368,7 +370,7 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
/*------
* We have found two similar clauses, such as
* x < y AND x < z.
* x < y AND x <= z.
* Keep only the more restrictive one.
*------
*/
......@@ -388,7 +390,7 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
/*------
* We have found two similar clauses, such as
* x > y AND x > z.
* x > y AND x >= z.
* Keep only the more restrictive one.
*------
*/
......
......@@ -957,8 +957,8 @@ convert_network_to_scalar(Datum value, Oid typid)
}
/*
* Can't get here unless someone tries to use scalarltsel/scalargtsel on
* an operator with one network and one non-network operand.
* Can't get here unless someone tries to use scalarineqsel() on an
* operator with one network and one non-network operand.
*/
elog(ERROR, "unsupported type: %u", typid);
return 0;
......
This diff is collapsed.
......@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201709081
#define CATALOG_VERSION_NO 201709131
#endif
This diff is collapsed.
......@@ -264,6 +264,15 @@ DESCR("join selectivity of < and related operators on scalar datatypes");
DATA(insert OID = 108 ( scalargtjoinsel PGNSP PGUID 12 1 0 0 0 f f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ scalargtjoinsel _null_ _null_ _null_ ));
DESCR("join selectivity of > and related operators on scalar datatypes");
DATA(insert OID = 336 ( scalarlesel PGNSP PGUID 12 1 0 0 0 f f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ scalarlesel _null_ _null_ _null_ ));
DESCR("restriction selectivity of <= and related operators on scalar datatypes");
DATA(insert OID = 337 ( scalargesel PGNSP PGUID 12 1 0 0 0 f f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ scalargesel _null_ _null_ _null_ ));
DESCR("restriction selectivity of >= and related operators on scalar datatypes");
DATA(insert OID = 386 ( scalarlejoinsel PGNSP PGUID 12 1 0 0 0 f f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ scalarlejoinsel _null_ _null_ _null_ ));
DESCR("join selectivity of <= and related operators on scalar datatypes");
DATA(insert OID = 398 ( scalargejoinsel PGNSP PGUID 12 1 0 0 0 f f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ scalargejoinsel _null_ _null_ _null_ ));
DESCR("join selectivity of >= and related operators on scalar datatypes");
DATA(insert OID = 109 ( unknownin PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 705 "2275" _null_ _null_ _null_ _null_ _null_ unknownin _null_ _null_ _null_ ));
DESCR("I/O");
DATA(insert OID = 110 ( unknownout PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2275 "705" _null_ _null_ _null_ _null_ _null_ unknownout _null_ _null_ _null_ ));
......
......@@ -174,7 +174,7 @@ CREATE OPERATOR < (
CREATE OPERATOR <= (
leftarg = complex, rightarg = complex, procedure = complex_abs_le,
commutator = >= , negator = > ,
restrict = scalarltsel, join = scalarltjoinsel
restrict = scalarlesel, join = scalarlejoinsel
);
CREATE OPERATOR = (
leftarg = complex, rightarg = complex, procedure = complex_abs_eq,
......@@ -186,7 +186,7 @@ CREATE OPERATOR = (
CREATE OPERATOR >= (
leftarg = complex, rightarg = complex, procedure = complex_abs_ge,
commutator = <= , negator = < ,
restrict = scalargtsel, join = scalargtjoinsel
restrict = scalargesel, join = scalargejoinsel
);
CREATE OPERATOR > (
leftarg = complex, rightarg = complex, procedure = complex_abs_gt,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment