Commit 8624ff76 authored by Tom Lane's avatar Tom Lane

Further selectivity-estimation work. Speed up eqsel()

(it should just call the given operator, not look up an = operator).
Fix intltsel() so that all numeric data types are converted to double
before trying to estimate where the given comparison value is in the
known range of column values.  intltsel() still needs work, or replacement,
for non-numeric data types ... but for nonintegral numeric types it
should now be delivering reasonable estimates.
parent d35eebe7
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.36 1999/08/01 04:54:22 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.37 1999/08/02 02:05:41 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -20,8 +20,10 @@ ...@@ -20,8 +20,10 @@
#include "access/heapam.h" #include "access/heapam.h"
#include "catalog/catname.h" #include "catalog/catname.h"
#include "catalog/pg_operator.h" #include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_statistic.h" #include "catalog/pg_statistic.h"
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "parser/parse_func.h"
#include "parser/parse_oper.h" #include "parser/parse_oper.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
...@@ -36,6 +38,8 @@ ...@@ -36,6 +38,8 @@
/* default selectivity estimate for inequalities such as "A < b" */ /* default selectivity estimate for inequalities such as "A < b" */
#define DEFAULT_INEQ_SEL (1.0 / 3.0) #define DEFAULT_INEQ_SEL (1.0 / 3.0)
static bool convert_to_scale(Datum value, Oid typid,
double *scaleval);
static void getattproperties(Oid relid, AttrNumber attnum, static void getattproperties(Oid relid, AttrNumber attnum,
Oid *typid, Oid *typid,
int *typlen, int *typlen,
...@@ -53,6 +57,12 @@ static double getattdisbursion(Oid relid, AttrNumber attnum); ...@@ -53,6 +57,12 @@ static double getattdisbursion(Oid relid, AttrNumber attnum);
/* /*
* eqsel - Selectivity of "=" for any data types. * eqsel - Selectivity of "=" for any data types.
*
* Note: this routine is also used to estimate selectivity for some
* operators that are not "=" but have comparable selectivity behavior,
* such as "~~" (text LIKE). Even for "=" we must keep in mind that
* the left and right datatypes may differ, so the type of the given
* constant "value" may be different from the type of the attribute.
*/ */
float64 float64
eqsel(Oid opid, eqsel(Oid opid,
...@@ -81,45 +91,38 @@ eqsel(Oid opid, ...@@ -81,45 +91,38 @@ eqsel(Oid opid,
getattproperties(relid, attno, getattproperties(relid, attno,
&typid, &typlen, &typbyval, &typmod); &typid, &typlen, &typbyval, &typmod);
/* get stats for the attribute, if available */
if (getattstatistics(relid, attno, typid, typmod, if (getattstatistics(relid, attno, typid, typmod,
&nullfrac, &commonfrac, &commonval, &nullfrac, &commonfrac, &commonval,
NULL, NULL)) NULL, NULL))
{ {
if (flag & SEL_CONSTANT) if (flag & SEL_CONSTANT)
{ {
/* Is the constant the same as the most common value? */ /* Is the constant "=" to the column's most common value?
HeapTuple oprtuple; * (Although the operator may not really be "=",
Oid ltype, * we will assume that seeing whether it returns TRUE
rtype; * for the most common value is useful information.
Operator func_operator; * If you don't like it, maybe you shouldn't be using
bool mostcommon = false; * eqsel for your operator...)
/* get left and right datatypes of the operator */
oprtuple = get_operator_tuple(opid);
if (! HeapTupleIsValid(oprtuple))
elog(ERROR, "eqsel: no tuple for operator %u", opid);
ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
/* and find appropriate equality operator (no, it ain't
* necessarily opid itself...)
*/ */
func_operator = oper("=", ltype, rtype, true); RegProcedure eqproc = get_opcode(opid);
bool mostcommon;
if (func_operator != NULL) if (eqproc == (RegProcedure) NULL)
{ elog(ERROR, "eqsel: no procedure for operator %u",
RegProcedure eqproc = ((Form_pg_operator) GETSTRUCT(func_operator))->oprcode; opid);
if (flag & SEL_RIGHT) /* given value on the right? */
mostcommon = (bool) /* be careful to apply operator right way 'round */
DatumGetUInt8(fmgr(eqproc, commonval, value)); if (flag & SEL_RIGHT)
else mostcommon = (bool)
mostcommon = (bool) DatumGetUInt8(fmgr(eqproc, commonval, value));
DatumGetUInt8(fmgr(eqproc, value, commonval)); else
} mostcommon = (bool)
DatumGetUInt8(fmgr(eqproc, value, commonval));
if (mostcommon) if (mostcommon)
{ {
/* Search is for the most common value. We know the /* Constant is "=" to the most common value. We know
* selectivity exactly (or as exactly as VACUUM could * selectivity exactly (or as exactly as VACUUM could
* calculate it, anyway). * calculate it, anyway).
*/ */
...@@ -179,6 +182,10 @@ eqsel(Oid opid, ...@@ -179,6 +182,10 @@ eqsel(Oid opid,
/* /*
* neqsel - Selectivity of "!=" for any data types. * neqsel - Selectivity of "!=" for any data types.
*
* This routine is also used for some operators that are not "!="
* but have comparable selectivity behavior. See above comments
* for eqsel().
*/ */
float64 float64
neqsel(Oid opid, neqsel(Oid opid,
...@@ -196,7 +203,11 @@ neqsel(Oid opid, ...@@ -196,7 +203,11 @@ neqsel(Oid opid,
/* /*
* intltsel - Selectivity of "<" (also "<=") for integers. * intltsel - Selectivity of "<" (also "<=") for integers.
* Should work for both longs and shorts. *
* Actually, this works and is used for all numeric types, so it should
* be renamed. In fact, it is also currently called for all manner of
* non-numeric types, for which it is NOT very helpful. That needs
* to be fixed.
*/ */
float64 float64
intltsel(Oid opid, intltsel(Oid opid,
...@@ -221,53 +232,37 @@ intltsel(Oid opid, ...@@ -221,53 +232,37 @@ intltsel(Oid opid,
int32 typmod; int32 typmod;
Datum hival, Datum hival,
loval; loval;
long val, double val,
high, high,
low, low,
numerator, numerator,
denominator; denominator;
/* get left and right datatypes of the operator */ /* Get left and right datatypes of the operator so we know
* what type the constant is.
*/
oprtuple = get_operator_tuple(opid); oprtuple = get_operator_tuple(opid);
if (! HeapTupleIsValid(oprtuple)) if (! HeapTupleIsValid(oprtuple))
elog(ERROR, "intltsel: no tuple for operator %u", opid); elog(ERROR, "intltsel: no tuple for operator %u", opid);
ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft; ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright; rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
/* /* Convert the constant to a uniform comparison scale. */
* TEMPORARY HACK: this code is currently getting called for if (! convert_to_scale(value,
* a bunch of non-integral types. Give a default estimate if ((flag & SEL_RIGHT) ? rtype : ltype),
* either side is not pass-by-val. Need better solution. &val))
*/
if (! get_typbyval(ltype) || ! get_typbyval(rtype))
{ {
/* Ideally we'd produce an error here, on the grounds that
* the given operator shouldn't have intltsel registered as its
* selectivity func unless we can deal with its operand types.
* But currently, all manner of stuff is invoking intltsel,
* so give a default estimate until that can be fixed.
*/
*result = DEFAULT_INEQ_SEL; *result = DEFAULT_INEQ_SEL;
return result; return result;
} }
/* Deduce type of the constant, and convert to uniform "long" format. /* Now get info and stats about the attribute */
* Note that constant might well be a different type than attribute.
* XXX this ought to use a type-specific "convert to double" op.
*/
typid = (flag & SEL_RIGHT) ? rtype : ltype;
switch (get_typlen(typid))
{
case 1:
val = (long) DatumGetUInt8(value);
break;
case 2:
val = (long) DatumGetInt16(value);
break;
case 4:
val = (long) DatumGetInt32(value);
break;
default:
elog(ERROR, "intltsel: unsupported type %u", typid);
*result = DEFAULT_INEQ_SEL;
return result;
}
/* Now get info about the attribute */
getattproperties(relid, attno, getattproperties(relid, attno,
&typid, &typlen, &typbyval, &typmod); &typid, &typlen, &typbyval, &typmod);
...@@ -275,60 +270,61 @@ intltsel(Oid opid, ...@@ -275,60 +270,61 @@ intltsel(Oid opid,
NULL, NULL, NULL, NULL, NULL, NULL,
&loval, &hival)) &loval, &hival))
{ {
/* no stats available, so default result */
*result = DEFAULT_INEQ_SEL; *result = DEFAULT_INEQ_SEL;
return result; return result;
} }
/*
* Convert loval/hival to common "long int" representation. /* Convert the attribute's loval/hival to common scale. */
*/ if (! convert_to_scale(loval, typid, &low) ||
switch (typlen) ! convert_to_scale(hival, typid, &high))
{
/* See above comments... */
if (! typbyval)
{
pfree(DatumGetPointer(hival));
pfree(DatumGetPointer(loval));
}
*result = DEFAULT_INEQ_SEL;
return result;
}
/* release temp storage if needed */
if (! typbyval)
{ {
case 1: pfree(DatumGetPointer(hival));
low = (long) DatumGetUInt8(loval); pfree(DatumGetPointer(loval));
high = (long) DatumGetUInt8(hival);
break;
case 2:
low = (long) DatumGetInt16(loval);
high = (long) DatumGetInt16(hival);
break;
case 4:
low = (long) DatumGetInt32(loval);
high = (long) DatumGetInt32(hival);
break;
default:
elog(ERROR, "intltsel: unsupported type %u", typid);
*result = DEFAULT_INEQ_SEL;
return result;
} }
if (val < low || val > high)
if (high <= low)
{ {
/* If given value is outside the statistical range, /* If we trusted the stats fully, we could return a small or
* assume we have out-of-date stats and return a default guess. * large selec depending on which side of the single data point
* We could return a small or large value if we trusted the stats * the constant is on. But it seems better to assume that the
* more. XXX change this eventually. * stats are out of date and return a default...
*/ */
*result = DEFAULT_INEQ_SEL; *result = DEFAULT_INEQ_SEL;
}
else if (val <= low || val >= high)
{
/* If given value is outside the statistical range, return a
* small or large value; but not 0.0/1.0 since there is a chance
* the stats are out of date.
*/
if (flag & SEL_RIGHT)
*result = (val <= low) ? 0.01 : 0.99;
else
*result = (val <= low) ? 0.99 : 0.01;
} }
else else
{ {
denominator = high - low; denominator = high - low;
if (denominator <= 0)
denominator = 1;
if (flag & SEL_RIGHT) if (flag & SEL_RIGHT)
numerator = val - low; numerator = val - low;
else else
numerator = high - val; numerator = high - val;
if (numerator <= 0) /* never return a zero estimate! */ *result = numerator / denominator;
numerator = 1;
if (numerator >= denominator)
*result = 1.0;
else
*result = (double) numerator / (double) denominator;
}
if (! typbyval)
{
pfree(DatumGetPointer(hival));
pfree(DatumGetPointer(loval));
} }
} }
return result; return result;
...@@ -336,7 +332,8 @@ intltsel(Oid opid, ...@@ -336,7 +332,8 @@ intltsel(Oid opid,
/* /*
* intgtsel - Selectivity of ">" (also ">=") for integers. * intgtsel - Selectivity of ">" (also ">=") for integers.
* Should work for both longs and shorts. *
* See above comments for intltsel.
*/ */
float64 float64
intgtsel(Oid opid, intgtsel(Oid opid,
...@@ -439,6 +436,77 @@ intgtjoinsel(Oid opid, ...@@ -439,6 +436,77 @@ intgtjoinsel(Oid opid,
return result; return result;
} }
/*
* convert_to_scale
* Convert a given value of the indicated type to the comparison
* scale needed by intltsel(). Returns "true" if successful.
*
* All numeric datatypes are simply converted to their equivalent
* "double" values.
* Future extension: convert string-like types to some suitable scale.
*/
static bool
convert_to_scale(Datum value, Oid typid,
double *scaleval)
{
/* Fast-path conversions for some built-in types */
switch (typid)
{
case BOOLOID:
*scaleval = (double) DatumGetUInt8(value);
return true;
case INT2OID:
*scaleval = (double) DatumGetInt16(value);
return true;
case INT4OID:
*scaleval = (double) DatumGetInt32(value);
return true;
// case INT8OID:
case FLOAT4OID:
*scaleval = (double) (* DatumGetFloat32(value));
return true;
case FLOAT8OID:
*scaleval = (double) (* DatumGetFloat64(value));
return true;
// case NUMERICOID:
case OIDOID:
case REGPROCOID:
/* we can treat OIDs as integers... */
*scaleval = (double) DatumGetObjectId(value);
return true;
default:
{
/* See whether there is a registered type-conversion function,
* namely a procedure named "float8" with the right signature.
*/
Oid oid_array[MAXFARGS];
HeapTuple ftup;
MemSet(oid_array, 0, MAXFARGS * sizeof(Oid));
oid_array[0] = typid;
ftup = SearchSysCacheTuple(PRONAME,
PointerGetDatum("float8"),
Int32GetDatum(1),
PointerGetDatum(oid_array),
0);
if (HeapTupleIsValid(ftup) &&
((Form_pg_proc) GETSTRUCT(ftup))->prorettype == FLOAT8OID)
{
RegProcedure convertproc = (RegProcedure) ftup->t_data->t_oid;
Datum converted = (Datum) fmgr(convertproc, value);
*scaleval = (double) (* DatumGetFloat64(converted));
return true;
}
break;
}
}
/* Don't know how to convert */
return false;
}
/* /*
* getattproperties * getattproperties
* Retrieve pg_attribute properties for an attribute, * Retrieve pg_attribute properties for an attribute,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment