Commit bfd1ffa9 authored by Tom Lane's avatar Tom Lane

Change patternsel (LIKE/regex selectivity estimation) so that if there

is a large enough histogram, it will use the number of matches in the
histogram to derive a selectivity estimate, rather than the admittedly
pretty bogus heuristics involving examining the pattern contents.  I set
'large enough' at 100, but perhaps we should change that later.  Also
apply the same technique in contrib/ltree's <@ and @> estimator.  Per
discussion with Stefan Kaltenbrunner and Matteo Beccati.
parent 06b33f0e
/*
* op function for ltree
* Teodor Sigaev <teodor@stack.net>
* $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.12 2006/05/30 22:12:13 tgl Exp $
* $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.13 2006/09/20 19:50:21 tgl Exp $
*/
#include "ltree.h"
#include <ctype.h>
#include "catalog/pg_statistic.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
......@@ -606,6 +607,7 @@ ltreeparentsel(PG_FUNCTION_ARGS)
FmgrInfo contproc;
double mcvsum;
double mcvsel;
double nullfrac;
fmgr_info(get_opcode(operator), &contproc);
......@@ -616,10 +618,40 @@ ltreeparentsel(PG_FUNCTION_ARGS)
&mcvsum);
/*
* We have the exact selectivity for values appearing in the MCV list;
* use the default selectivity for the rest of the population.
* If the histogram is large enough, see what fraction of it the
* constant is "<@" to, and assume that's representative of the
* non-MCV population. Otherwise use the default selectivity for
* the non-MCV population.
*/
selec = mcvsel + DEFAULT_PARENT_SEL * (1.0 - mcvsum);
selec = histogram_selectivity(&vardata, &contproc,
constval, varonleft,
100, 1);
if (selec < 0)
{
/* Nope, fall back on default */
selec = DEFAULT_PARENT_SEL;
}
else
{
/* Yes, but don't believe extremely small or large estimates. */
if (selec < 0.0001)
selec = 0.0001;
else if (selec > 0.9999)
selec = 0.9999;
}
if (HeapTupleIsValid(vardata.statsTuple))
nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
else
nullfrac = 0.0;
/*
* Now merge the results from the MCV and histogram calculations,
* realizing that the histogram covers only the non-null values that
* are not listed in MCV.
*/
selec *= 1.0 - nullfrac - mcvsum;
selec += mcvsel;
}
else
selec = DEFAULT_PARENT_SEL;
......
This diff is collapsed.
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.34 2006/07/01 22:07:23 tgl Exp $
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.35 2006/09/20 19:50:21 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -110,6 +110,9 @@ extern double get_variable_numdistinct(VariableStatData *vardata);
extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
Datum constval, bool varonleft,
double *sumcommonp);
extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
Datum constval, bool varonleft,
int min_hist_size, int n_skip);
extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
Pattern_Type ptype,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment