Commit 40608e7f authored by Tom Lane's avatar Tom Lane

When estimating the selectivity of an inequality "column > constant" or

"column < constant", and the comparison value is in the first or last
histogram bin or outside the histogram entirely, try to fetch the actual
column min or max value using an index scan (if there is an index on the
column).  If successful, replace the lower or upper histogram bound with
that value before carrying on with the estimate.  This limits the
estimation error caused by moving min/max values when the comparison
value is close to the min or max.  Per a complaint from Josh Berkus.

It is tempting to consider using this mechanism for mergejoinscansel as well,
but that would inject index fetches into main-line join estimation not just
endpoint cases.  I'm refraining from that until we can get a better handle
on the costs of doing this type of lookup.
parent 89a091ed
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.125 2010/01/02 16:57:41 momjian Exp $ * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.126 2010/01/04 02:44:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1006,6 +1006,7 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse) ...@@ -1006,6 +1006,7 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse)
if (get_attstatsslot(statsTuple, node->skewColType, node->skewColTypmod, if (get_attstatsslot(statsTuple, node->skewColType, node->skewColTypmod,
STATISTIC_KIND_MCV, InvalidOid, STATISTIC_KIND_MCV, InvalidOid,
NULL,
&values, &nvalues, &values, &nvalues,
&numbers, &nnumbers)) &numbers, &nnumbers))
{ {
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_selfuncs.c,v 1.6 2010/01/02 16:57:53 momjian Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/ts_selfuncs.c,v 1.7 2010/01/04 02:44:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -170,6 +170,7 @@ tsquerysel(VariableStatData *vardata, Datum constval) ...@@ -170,6 +170,7 @@ tsquerysel(VariableStatData *vardata, Datum constval)
if (get_attstatsslot(vardata->statsTuple, if (get_attstatsslot(vardata->statsTuple,
TEXTOID, -1, TEXTOID, -1,
STATISTIC_KIND_MCELEM, InvalidOid, STATISTIC_KIND_MCELEM, InvalidOid,
NULL,
&values, &nvalues, &values, &nvalues,
&numbers, &nnumbers)) &numbers, &nnumbers))
{ {
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.266 2010/01/02 16:57:55 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.267 2010/01/04 02:44:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -92,9 +92,11 @@ ...@@ -92,9 +92,11 @@
#include <math.h> #include <math.h>
#include "access/sysattr.h" #include "access/sysattr.h"
#include "catalog/index.h"
#include "catalog/pg_opfamily.h" #include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h" #include "catalog/pg_statistic.h"
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "executor/executor.h"
#include "mb/pg_wchar.h" #include "mb/pg_wchar.h"
#include "nodes/makefuncs.h" #include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h" #include "nodes/nodeFuncs.h"
...@@ -118,6 +120,7 @@ ...@@ -118,6 +120,7 @@
#include "utils/pg_locale.h" #include "utils/pg_locale.h"
#include "utils/selfuncs.h" #include "utils/selfuncs.h"
#include "utils/syscache.h" #include "utils/syscache.h"
#include "utils/tqual.h"
/* Hooks for plugins to get control when we ask for stats */ /* Hooks for plugins to get control when we ask for stats */
...@@ -130,7 +133,8 @@ static double var_eq_const(VariableStatData *vardata, Oid operator, ...@@ -130,7 +133,8 @@ static double var_eq_const(VariableStatData *vardata, Oid operator,
static double var_eq_non_const(VariableStatData *vardata, Oid operator, static double var_eq_non_const(VariableStatData *vardata, Oid operator,
Node *other, Node *other,
bool varonleft); bool varonleft);
static double ineq_histogram_selectivity(VariableStatData *vardata, static double ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData *vardata,
FmgrInfo *opproc, bool isgt, FmgrInfo *opproc, bool isgt,
Datum constval, Oid consttype); Datum constval, Oid consttype);
static double eqjoinsel_inner(Oid operator, static double eqjoinsel_inner(Oid operator,
...@@ -161,7 +165,12 @@ static char *convert_string_datum(Datum value, Oid typid); ...@@ -161,7 +165,12 @@ static char *convert_string_datum(Datum value, Oid typid);
static double convert_timevalue_to_scalar(Datum value, Oid typid); static double convert_timevalue_to_scalar(Datum value, Oid typid);
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
Oid sortop, Datum *min, Datum *max); Oid sortop, Datum *min, Datum *max);
static Selectivity prefix_selectivity(VariableStatData *vardata, static bool get_actual_variable_range(PlannerInfo *root,
VariableStatData *vardata,
Oid sortop,
Datum *min, Datum *max);
static Selectivity prefix_selectivity(PlannerInfo *root,
VariableStatData *vardata,
Oid vartype, Oid opfamily, Const *prefixcon); Oid vartype, Oid opfamily, Const *prefixcon);
static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype); static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
static Datum string_to_datum(const char *str, Oid datatype); static Datum string_to_datum(const char *str, Oid datatype);
...@@ -266,6 +275,7 @@ var_eq_const(VariableStatData *vardata, Oid operator, ...@@ -266,6 +275,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
if (get_attstatsslot(vardata->statsTuple, if (get_attstatsslot(vardata->statsTuple,
vardata->atttype, vardata->atttypmod, vardata->atttype, vardata->atttypmod,
STATISTIC_KIND_MCV, InvalidOid, STATISTIC_KIND_MCV, InvalidOid,
NULL,
&values, &nvalues, &values, &nvalues,
&numbers, &nnumbers)) &numbers, &nnumbers))
{ {
...@@ -405,6 +415,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator, ...@@ -405,6 +415,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
if (get_attstatsslot(vardata->statsTuple, if (get_attstatsslot(vardata->statsTuple,
vardata->atttype, vardata->atttypmod, vardata->atttype, vardata->atttypmod,
STATISTIC_KIND_MCV, InvalidOid, STATISTIC_KIND_MCV, InvalidOid,
NULL,
NULL, NULL, NULL, NULL,
&numbers, &nnumbers)) &numbers, &nnumbers))
{ {
...@@ -514,7 +525,7 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, ...@@ -514,7 +525,7 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt,
* If there is a histogram, determine which bin the constant falls in, and * If there is a histogram, determine which bin the constant falls in, and
* compute the resulting contribution to selectivity. * compute the resulting contribution to selectivity.
*/ */
hist_selec = ineq_histogram_selectivity(vardata, &opproc, isgt, hist_selec = ineq_histogram_selectivity(root, vardata, &opproc, isgt,
constval, consttype); constval, consttype);
/* /*
...@@ -524,7 +535,7 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, ...@@ -524,7 +535,7 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt,
*/ */
selec = 1.0 - stats->stanullfrac - sumcommon; selec = 1.0 - stats->stanullfrac - sumcommon;
if (hist_selec > 0.0) if (hist_selec >= 0.0)
selec *= hist_selec; selec *= hist_selec;
else else
{ {
...@@ -575,6 +586,7 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, ...@@ -575,6 +586,7 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
get_attstatsslot(vardata->statsTuple, get_attstatsslot(vardata->statsTuple,
vardata->atttype, vardata->atttypmod, vardata->atttype, vardata->atttypmod,
STATISTIC_KIND_MCV, InvalidOid, STATISTIC_KIND_MCV, InvalidOid,
NULL,
&values, &nvalues, &values, &nvalues,
&numbers, &nnumbers)) &numbers, &nnumbers))
{ {
...@@ -648,6 +660,7 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, ...@@ -648,6 +660,7 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
get_attstatsslot(vardata->statsTuple, get_attstatsslot(vardata->statsTuple,
vardata->atttype, vardata->atttypmod, vardata->atttype, vardata->atttypmod,
STATISTIC_KIND_HISTOGRAM, InvalidOid, STATISTIC_KIND_HISTOGRAM, InvalidOid,
NULL,
&values, &nvalues, &values, &nvalues,
NULL, NULL)) NULL, NULL))
{ {
...@@ -689,23 +702,24 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, ...@@ -689,23 +702,24 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
* Determine the fraction of the variable's histogram population that * Determine the fraction of the variable's histogram population that
* satisfies the inequality condition, ie, VAR < CONST or VAR > CONST. * satisfies the inequality condition, ie, VAR < CONST or VAR > CONST.
* *
* Returns zero if there is no histogram (valid results will always be * Returns -1 if there is no histogram (valid results will always be >= 0).
* greater than zero).
* *
* Note that the result disregards both the most-common-values (if any) and * Note that the result disregards both the most-common-values (if any) and
* null entries. The caller is expected to combine this result with * null entries. The caller is expected to combine this result with
* statistics for those portions of the column population. * statistics for those portions of the column population.
*/ */
static double static double
ineq_histogram_selectivity(VariableStatData *vardata, ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData *vardata,
FmgrInfo *opproc, bool isgt, FmgrInfo *opproc, bool isgt,
Datum constval, Oid consttype) Datum constval, Oid consttype)
{ {
double hist_selec; double hist_selec;
Oid hist_op;
Datum *values; Datum *values;
int nvalues; int nvalues;
hist_selec = 0.0; hist_selec = -1.0;
/* /*
* Someday, ANALYZE might store more than one histogram per rel/att, * Someday, ANALYZE might store more than one histogram per rel/att,
...@@ -721,6 +735,7 @@ ineq_histogram_selectivity(VariableStatData *vardata, ...@@ -721,6 +735,7 @@ ineq_histogram_selectivity(VariableStatData *vardata,
get_attstatsslot(vardata->statsTuple, get_attstatsslot(vardata->statsTuple,
vardata->atttype, vardata->atttypmod, vardata->atttype, vardata->atttypmod,
STATISTIC_KIND_HISTOGRAM, InvalidOid, STATISTIC_KIND_HISTOGRAM, InvalidOid,
&hist_op,
&values, &nvalues, &values, &nvalues,
NULL, NULL)) NULL, NULL))
{ {
...@@ -732,16 +747,56 @@ ineq_histogram_selectivity(VariableStatData *vardata, ...@@ -732,16 +747,56 @@ ineq_histogram_selectivity(VariableStatData *vardata,
* actually sort-compatible with the histogram, you'll get garbage * actually sort-compatible with the histogram, you'll get garbage
* results ... but probably not any more garbage-y than you would * results ... but probably not any more garbage-y than you would
* from the old linear search.) * from the old linear search.)
*
* If the binary search accesses the first or last histogram entry,
* we try to replace that endpoint with the true column min or max
* as found by get_actual_variable_range(). This ameliorates
* misestimates when the min or max is moving as a result of
* changes since the last ANALYZE. Note that this could result
* in effectively including MCVs into the histogram that weren't
* there before, but we don't try to correct for that.
*/ */
double histfrac; double histfrac;
int lobound = 0; /* first possible slot to search */ int lobound = 0; /* first possible slot to search */
int hibound = nvalues; /* last+1 slot to search */ int hibound = nvalues; /* last+1 slot to search */
bool have_end = false;
/*
* If there are only two histogram entries, we'll want up-to-date
* values for both. (If there are more than two, we need at most
* one of them to be updated, so we deal with that within the
* loop.)
*/
if (nvalues == 2)
have_end = get_actual_variable_range(root,
vardata,
hist_op,
&values[0],
&values[1]);
while (lobound < hibound) while (lobound < hibound)
{ {
int probe = (lobound + hibound) / 2; int probe = (lobound + hibound) / 2;
bool ltcmp; bool ltcmp;
/*
* If we find ourselves about to compare to the first or last
* histogram entry, first try to replace it with the actual
* current min or max (unless we already did so above).
*/
if (probe == 0 && nvalues > 2)
have_end = get_actual_variable_range(root,
vardata,
hist_op,
&values[0],
NULL);
else if (probe == nvalues - 1 && nvalues > 2)
have_end = get_actual_variable_range(root,
vardata,
hist_op,
NULL,
&values[probe]);
ltcmp = DatumGetBool(FunctionCall2(opproc, ltcmp = DatumGetBool(FunctionCall2(opproc,
values[probe], values[probe],
constval)); constval));
...@@ -772,7 +827,7 @@ ineq_histogram_selectivity(VariableStatData *vardata, ...@@ -772,7 +827,7 @@ ineq_histogram_selectivity(VariableStatData *vardata,
double binfrac; double binfrac;
/* /*
* We have values[i-1] < constant < values[i]. * We have values[i-1] <= constant <= values[i].
* *
* Convert the constant and the two nearest bin boundary * Convert the constant and the two nearest bin boundary
* values to a uniform comparison scale, and do a linear * values to a uniform comparison scale, and do a linear
...@@ -840,13 +895,19 @@ ineq_histogram_selectivity(VariableStatData *vardata, ...@@ -840,13 +895,19 @@ ineq_histogram_selectivity(VariableStatData *vardata,
/* /*
* The histogram boundaries are only approximate to begin with, * The histogram boundaries are only approximate to begin with,
* and may well be out of date anyway. Therefore, don't believe * and may well be out of date anyway. Therefore, don't believe
* extremely small or large selectivity estimates. * extremely small or large selectivity estimates --- unless we
* got actual current endpoint values from the table.
*/ */
if (have_end)
CLAMP_PROBABILITY(hist_selec);
else
{
if (hist_selec < 0.0001) if (hist_selec < 0.0001)
hist_selec = 0.0001; hist_selec = 0.0001;
else if (hist_selec > 0.9999) else if (hist_selec > 0.9999)
hist_selec = 0.9999; hist_selec = 0.9999;
} }
}
free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
} }
...@@ -1198,7 +1259,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) ...@@ -1198,7 +1259,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Selectivity restsel; Selectivity restsel;
if (pstatus == Pattern_Prefix_Partial) if (pstatus == Pattern_Prefix_Partial)
prefixsel = prefix_selectivity(&vardata, vartype, prefixsel = prefix_selectivity(root, &vardata, vartype,
opfamily, prefix); opfamily, prefix);
else else
prefixsel = 1.0; prefixsel = 1.0;
...@@ -1363,6 +1424,7 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg, ...@@ -1363,6 +1424,7 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
if (get_attstatsslot(vardata.statsTuple, if (get_attstatsslot(vardata.statsTuple,
vardata.atttype, vardata.atttypmod, vardata.atttype, vardata.atttypmod,
STATISTIC_KIND_MCV, InvalidOid, STATISTIC_KIND_MCV, InvalidOid,
NULL,
&values, &nvalues, &values, &nvalues,
&numbers, &nnumbers) &numbers, &nnumbers)
&& nnumbers > 0) && nnumbers > 0)
...@@ -1999,6 +2061,7 @@ eqjoinsel_inner(Oid operator, ...@@ -1999,6 +2061,7 @@ eqjoinsel_inner(Oid operator,
vardata1->atttypmod, vardata1->atttypmod,
STATISTIC_KIND_MCV, STATISTIC_KIND_MCV,
InvalidOid, InvalidOid,
NULL,
&values1, &nvalues1, &values1, &nvalues1,
&numbers1, &nnumbers1); &numbers1, &nnumbers1);
} }
...@@ -2011,6 +2074,7 @@ eqjoinsel_inner(Oid operator, ...@@ -2011,6 +2074,7 @@ eqjoinsel_inner(Oid operator,
vardata2->atttypmod, vardata2->atttypmod,
STATISTIC_KIND_MCV, STATISTIC_KIND_MCV,
InvalidOid, InvalidOid,
NULL,
&values2, &nvalues2, &values2, &nvalues2,
&numbers2, &nnumbers2); &numbers2, &nnumbers2);
} }
...@@ -2232,6 +2296,7 @@ eqjoinsel_semi(Oid operator, ...@@ -2232,6 +2296,7 @@ eqjoinsel_semi(Oid operator,
vardata1->atttypmod, vardata1->atttypmod,
STATISTIC_KIND_MCV, STATISTIC_KIND_MCV,
InvalidOid, InvalidOid,
NULL,
&values1, &nvalues1, &values1, &nvalues1,
&numbers1, &nnumbers1); &numbers1, &nnumbers1);
} }
...@@ -2244,6 +2309,7 @@ eqjoinsel_semi(Oid operator, ...@@ -2244,6 +2309,7 @@ eqjoinsel_semi(Oid operator,
vardata2->atttypmod, vardata2->atttypmod,
STATISTIC_KIND_MCV, STATISTIC_KIND_MCV,
InvalidOid, InvalidOid,
NULL,
&values2, &nvalues2, &values2, &nvalues2,
&numbers2, &nnumbers2); &numbers2, &nnumbers2);
} }
...@@ -3226,7 +3292,9 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets) ...@@ -3226,7 +3292,9 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
if (get_attstatsslot(vardata.statsTuple, if (get_attstatsslot(vardata.statsTuple,
vardata.atttype, vardata.atttypmod, vardata.atttype, vardata.atttypmod,
STATISTIC_KIND_MCV, InvalidOid, STATISTIC_KIND_MCV, InvalidOid,
NULL, NULL, &numbers, &nnumbers)) NULL,
NULL, NULL,
&numbers, &nnumbers))
{ {
/* /*
* The first MCV stat is for the most common value. * The first MCV stat is for the most common value.
...@@ -4339,6 +4407,18 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, ...@@ -4339,6 +4407,18 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
int nvalues; int nvalues;
int i; int i;
/*
* XXX It's very tempting to try to use the actual column min and max,
* if we can get them relatively-cheaply with an index probe. However,
* since this function is called many times during join planning,
* that could have unpleasant effects on planning speed. Need more
* investigation before enabling this.
*/
#ifdef NOT_USED
if (get_actual_variable_range(root, vardata, sortop, min, max))
return true;
#endif
if (!HeapTupleIsValid(vardata->statsTuple)) if (!HeapTupleIsValid(vardata->statsTuple))
{ {
/* no stats available, so default result */ /* no stats available, so default result */
...@@ -4358,6 +4438,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, ...@@ -4358,6 +4438,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
if (get_attstatsslot(vardata->statsTuple, if (get_attstatsslot(vardata->statsTuple,
vardata->atttype, vardata->atttypmod, vardata->atttype, vardata->atttypmod,
STATISTIC_KIND_HISTOGRAM, sortop, STATISTIC_KIND_HISTOGRAM, sortop,
NULL,
&values, &nvalues, &values, &nvalues,
NULL, NULL)) NULL, NULL))
{ {
...@@ -4372,6 +4453,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, ...@@ -4372,6 +4453,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
else if (get_attstatsslot(vardata->statsTuple, else if (get_attstatsslot(vardata->statsTuple,
vardata->atttype, vardata->atttypmod, vardata->atttype, vardata->atttypmod,
STATISTIC_KIND_HISTOGRAM, InvalidOid, STATISTIC_KIND_HISTOGRAM, InvalidOid,
NULL,
&values, &nvalues, &values, &nvalues,
NULL, NULL)) NULL, NULL))
{ {
...@@ -4388,6 +4470,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, ...@@ -4388,6 +4470,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
if (get_attstatsslot(vardata->statsTuple, if (get_attstatsslot(vardata->statsTuple,
vardata->atttype, vardata->atttypmod, vardata->atttype, vardata->atttypmod,
STATISTIC_KIND_MCV, InvalidOid, STATISTIC_KIND_MCV, InvalidOid,
NULL,
&values, &nvalues, &values, &nvalues,
NULL, NULL)) NULL, NULL))
{ {
...@@ -4429,6 +4512,205 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, ...@@ -4429,6 +4512,205 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
} }
/*
* get_actual_variable_range
* Attempt to identify the current *actual* minimum and/or maximum
* of the specified variable, by looking for a suitable btree index
* and fetching its low and/or high values.
* If successful, store values in *min and *max, and return TRUE.
* (Either pointer can be NULL if that endpoint isn't needed.)
* If no data available, return FALSE.
*
* sortop is the "<" comparison operator to use.
*/
static bool
get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
Oid sortop,
Datum *min, Datum *max)
{
bool have_data = false;
RelOptInfo *rel = vardata->rel;
RangeTblEntry *rte;
ListCell *lc;
/* No hope if no relation or it doesn't have indexes */
if (rel == NULL || rel->indexlist == NIL)
return false;
/* If it has indexes it must be a plain relation */
rte = root->simple_rte_array[rel->relid];
Assert(rte->rtekind == RTE_RELATION);
/* Search through the indexes to see if any match our problem */
foreach(lc, rel->indexlist)
{
IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
ScanDirection indexscandir;
/* Ignore non-btree indexes */
if (index->relam != BTREE_AM_OID)
continue;
/*
* Ignore partial indexes --- we only want stats that cover the
* entire relation.
*/
if (index->indpred != NIL)
continue;
/*
* The index list might include fictitious indexes inserted by a
* get_relation_info hook --- don't try to access them.
*/
if (!OidIsValid(index->indexoid))
continue;
/*
* The first index column must match the desired variable and sort
* operator --- but we can use a descending-order index.
*/
if (sortop == index->fwdsortop[0])
indexscandir = ForwardScanDirection;
else if (sortop == index->revsortop[0])
indexscandir = BackwardScanDirection;
else
continue;
if (!match_index_to_operand(vardata->var, 0, index))
continue;
/*
* Found a suitable index to extract data from. We'll need an
* EState and a bunch of other infrastructure.
*/
{
EState *estate;
ExprContext *econtext;
MemoryContext tmpcontext;
MemoryContext oldcontext;
Relation heapRel;
Relation indexRel;
IndexInfo *indexInfo;
TupleTableSlot *slot;
int16 typLen;
bool typByVal;
ScanKeyData scankeys[1];
IndexScanDesc index_scan;
HeapTuple tup;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
estate = CreateExecutorState();
econtext = GetPerTupleExprContext(estate);
/* Make sure any cruft is generated in the econtext's memory */
tmpcontext = econtext->ecxt_per_tuple_memory;
oldcontext = MemoryContextSwitchTo(tmpcontext);
/*
* Open the table and index so we can read from them. We should
* already have at least AccessShareLock on the table, but not
* necessarily on the index.
*/
heapRel = heap_open(rte->relid, NoLock);
indexRel = index_open(index->indexoid, AccessShareLock);
/* extract index key information from the index's pg_index info */
indexInfo = BuildIndexInfo(indexRel);
/* some other stuff */
slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel));
econtext->ecxt_scantuple = slot;
get_typlenbyval(vardata->atttype, &typLen, &typByVal);
/* set up an IS NOT NULL scan key so that we ignore nulls */
ScanKeyEntryInitialize(&scankeys[0],
SK_ISNULL | SK_SEARCHNOTNULL,
1, /* index col to scan */
InvalidStrategy, /* no strategy */
InvalidOid, /* no strategy subtype */
InvalidOid, /* no reg proc for this */
(Datum) 0); /* constant */
have_data = true;
/* If min is requested ... */
if (min)
{
index_scan = index_beginscan(heapRel, indexRel, SnapshotNow,
1, scankeys);
/* Fetch first tuple in sortop's direction */
if ((tup = index_getnext(index_scan,
indexscandir)) != NULL)
{
/* Extract the index column values from the heap tuple */
ExecStoreTuple(tup, slot, InvalidBuffer, false);
FormIndexDatum(indexInfo, slot, estate,
values, isnull);
/* Shouldn't have got a null, but be careful */
if (isnull[0])
elog(ERROR, "found unexpected null value in index \"%s\"",
RelationGetRelationName(indexRel));
/* Copy the index column value out to caller's context */
MemoryContextSwitchTo(oldcontext);
*min = datumCopy(values[0], typByVal, typLen);
MemoryContextSwitchTo(tmpcontext);
}
else
have_data = false;
index_endscan(index_scan);
}
/* If max is requested, and we didn't find the index is empty */
if (max && have_data)
{
index_scan = index_beginscan(heapRel, indexRel, SnapshotNow,
1, scankeys);
/* Fetch first tuple in reverse direction */
if ((tup = index_getnext(index_scan,
-indexscandir)) != NULL)
{
/* Extract the index column values from the heap tuple */
ExecStoreTuple(tup, slot, InvalidBuffer, false);
FormIndexDatum(indexInfo, slot, estate,
values, isnull);
/* Shouldn't have got a null, but be careful */
if (isnull[0])
elog(ERROR, "found unexpected null value in index \"%s\"",
RelationGetRelationName(indexRel));
/* Copy the index column value out to caller's context */
MemoryContextSwitchTo(oldcontext);
*max = datumCopy(values[0], typByVal, typLen);
MemoryContextSwitchTo(tmpcontext);
}
else
have_data = false;
index_endscan(index_scan);
}
/* Clean everything up */
ExecDropSingleTupleTableSlot(slot);
index_close(indexRel, AccessShareLock);
heap_close(heapRel, NoLock);
MemoryContextSwitchTo(oldcontext);
FreeExecutorState(estate);
/* And we're done */
break;
}
}
return have_data;
}
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* Pattern analysis functions * Pattern analysis functions
...@@ -4795,7 +5077,7 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, ...@@ -4795,7 +5077,7 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype,
* more useful to use the upper-bound code than not. * more useful to use the upper-bound code than not.
*/ */
static Selectivity static Selectivity
prefix_selectivity(VariableStatData *vardata, prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
Oid vartype, Oid opfamily, Const *prefixcon) Oid vartype, Oid opfamily, Const *prefixcon)
{ {
Selectivity prefixsel; Selectivity prefixsel;
...@@ -4810,11 +5092,11 @@ prefix_selectivity(VariableStatData *vardata, ...@@ -4810,11 +5092,11 @@ prefix_selectivity(VariableStatData *vardata,
elog(ERROR, "no >= operator for opfamily %u", opfamily); elog(ERROR, "no >= operator for opfamily %u", opfamily);
fmgr_info(get_opcode(cmpopr), &opproc); fmgr_info(get_opcode(cmpopr), &opproc);
prefixsel = ineq_histogram_selectivity(vardata, &opproc, true, prefixsel = ineq_histogram_selectivity(root, vardata, &opproc, true,
prefixcon->constvalue, prefixcon->constvalue,
prefixcon->consttype); prefixcon->consttype);
if (prefixsel <= 0.0) if (prefixsel < 0.0)
{ {
/* No histogram is present ... return a suitable default estimate */ /* No histogram is present ... return a suitable default estimate */
return DEFAULT_MATCH_SEL; return DEFAULT_MATCH_SEL;
...@@ -4836,12 +5118,12 @@ prefix_selectivity(VariableStatData *vardata, ...@@ -4836,12 +5118,12 @@ prefix_selectivity(VariableStatData *vardata,
{ {
Selectivity topsel; Selectivity topsel;
topsel = ineq_histogram_selectivity(vardata, &opproc, false, topsel = ineq_histogram_selectivity(root, vardata, &opproc, false,
greaterstrcon->constvalue, greaterstrcon->constvalue,
greaterstrcon->consttype); greaterstrcon->consttype);
/* ineq_histogram_selectivity worked before, it shouldn't fail now */ /* ineq_histogram_selectivity worked before, it shouldn't fail now */
Assert(topsel > 0.0); Assert(topsel >= 0.0);
/* /*
* Merge the two selectivities in the same way as for a range query * Merge the two selectivities in the same way as for a range query
...@@ -5870,7 +6152,9 @@ btcostestimate(PG_FUNCTION_ARGS) ...@@ -5870,7 +6152,9 @@ btcostestimate(PG_FUNCTION_ARGS)
if (get_attstatsslot(vardata.statsTuple, InvalidOid, 0, if (get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
STATISTIC_KIND_CORRELATION, STATISTIC_KIND_CORRELATION,
index->fwdsortop[0], index->fwdsortop[0],
NULL, NULL, &numbers, &nnumbers)) NULL,
NULL, NULL,
&numbers, &nnumbers))
{ {
double varCorrelation; double varCorrelation;
...@@ -5887,7 +6171,9 @@ btcostestimate(PG_FUNCTION_ARGS) ...@@ -5887,7 +6171,9 @@ btcostestimate(PG_FUNCTION_ARGS)
else if (get_attstatsslot(vardata.statsTuple, InvalidOid, 0, else if (get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
STATISTIC_KIND_CORRELATION, STATISTIC_KIND_CORRELATION,
index->revsortop[0], index->revsortop[0],
NULL, NULL, &numbers, &nnumbers)) NULL,
NULL, NULL,
&numbers, &nnumbers))
{ {
double varCorrelation; double varCorrelation;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.165 2010/01/02 16:57:55 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.166 2010/01/04 02:44:40 tgl Exp $
* *
* NOTES * NOTES
* Eventually, the index information should go through here, too. * Eventually, the index information should go through here, too.
...@@ -2577,6 +2577,7 @@ get_attavgwidth(Oid relid, AttrNumber attnum) ...@@ -2577,6 +2577,7 @@ get_attavgwidth(Oid relid, AttrNumber attnum)
* atttypmod: typmod of attribute (can be 0 if values == NULL). * atttypmod: typmod of attribute (can be 0 if values == NULL).
* reqkind: STAKIND code for desired statistics slot kind. * reqkind: STAKIND code for desired statistics slot kind.
* reqop: STAOP value wanted, or InvalidOid if don't care. * reqop: STAOP value wanted, or InvalidOid if don't care.
* actualop: if not NULL, *actualop receives the actual STAOP value.
* values, nvalues: if not NULL, the slot's stavalues are extracted. * values, nvalues: if not NULL, the slot's stavalues are extracted.
* numbers, nnumbers: if not NULL, the slot's stanumbers are extracted. * numbers, nnumbers: if not NULL, the slot's stanumbers are extracted.
* *
...@@ -2589,6 +2590,7 @@ bool ...@@ -2589,6 +2590,7 @@ bool
get_attstatsslot(HeapTuple statstuple, get_attstatsslot(HeapTuple statstuple,
Oid atttype, int32 atttypmod, Oid atttype, int32 atttypmod,
int reqkind, Oid reqop, int reqkind, Oid reqop,
Oid *actualop,
Datum **values, int *nvalues, Datum **values, int *nvalues,
float4 **numbers, int *nnumbers) float4 **numbers, int *nnumbers)
{ {
...@@ -2611,6 +2613,9 @@ get_attstatsslot(HeapTuple statstuple, ...@@ -2611,6 +2613,9 @@ get_attstatsslot(HeapTuple statstuple,
if (i >= STATISTIC_NUM_SLOTS) if (i >= STATISTIC_NUM_SLOTS)
return false; /* not there */ return false; /* not there */
if (actualop)
*actualop = (&stats->staop1)[i];
if (values) if (values)
{ {
val = SysCacheGetAttr(STATRELATTINH, statstuple, val = SysCacheGetAttr(STATRELATTINH, statstuple,
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.130 2010/01/02 16:58:10 momjian Exp $ * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.131 2010/01/04 02:44:40 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -129,6 +129,7 @@ extern int32 get_attavgwidth(Oid relid, AttrNumber attnum); ...@@ -129,6 +129,7 @@ extern int32 get_attavgwidth(Oid relid, AttrNumber attnum);
extern bool get_attstatsslot(HeapTuple statstuple, extern bool get_attstatsslot(HeapTuple statstuple,
Oid atttype, int32 atttypmod, Oid atttype, int32 atttypmod,
int reqkind, Oid reqop, int reqkind, Oid reqop,
Oid *actualop,
Datum **values, int *nvalues, Datum **values, int *nvalues,
float4 **numbers, int *nnumbers); float4 **numbers, int *nnumbers);
extern void free_attstatsslot(Oid atttype, extern void free_attstatsslot(Oid atttype,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment