Commit bfd1ffa9 authored by Tom Lane's avatar Tom Lane

Change patternsel (LIKE/regex selectivity estimation) so that if there

is a large enough histogram, it will use the number of matches in the
histogram to derive a selectivity estimate, rather than the admittedly
pretty bogus heuristics involving examining the pattern contents.  I set
'large enough' at 100, but perhaps we should change that later.  Also
apply the same technique in contrib/ltree's <@ and @> estimator.  Per
discussion with Stefan Kaltenbrunner and Matteo Beccati.
parent 06b33f0e
/* /*
* op function for ltree * op function for ltree
* Teodor Sigaev <teodor@stack.net> * Teodor Sigaev <teodor@stack.net>
* $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.12 2006/05/30 22:12:13 tgl Exp $ * $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.13 2006/09/20 19:50:21 tgl Exp $
*/ */
#include "ltree.h" #include "ltree.h"
#include <ctype.h> #include <ctype.h>
#include "catalog/pg_statistic.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
#include "utils/selfuncs.h" #include "utils/selfuncs.h"
#include "utils/syscache.h" #include "utils/syscache.h"
...@@ -606,6 +607,7 @@ ltreeparentsel(PG_FUNCTION_ARGS) ...@@ -606,6 +607,7 @@ ltreeparentsel(PG_FUNCTION_ARGS)
FmgrInfo contproc; FmgrInfo contproc;
double mcvsum; double mcvsum;
double mcvsel; double mcvsel;
double nullfrac;
fmgr_info(get_opcode(operator), &contproc); fmgr_info(get_opcode(operator), &contproc);
...@@ -616,10 +618,40 @@ ltreeparentsel(PG_FUNCTION_ARGS) ...@@ -616,10 +618,40 @@ ltreeparentsel(PG_FUNCTION_ARGS)
&mcvsum); &mcvsum);
/* /*
* We have the exact selectivity for values appearing in the MCV list; * If the histogram is large enough, see what fraction of it the
* use the default selectivity for the rest of the population. * constant is "<@" to, and assume that's representative of the
* non-MCV population. Otherwise use the default selectivity for
* the non-MCV population.
*/ */
selec = mcvsel + DEFAULT_PARENT_SEL * (1.0 - mcvsum); selec = histogram_selectivity(&vardata, &contproc,
constval, varonleft,
100, 1);
if (selec < 0)
{
/* Nope, fall back on default */
selec = DEFAULT_PARENT_SEL;
}
else
{
/* Yes, but don't believe extremely small or large estimates. */
if (selec < 0.0001)
selec = 0.0001;
else if (selec > 0.9999)
selec = 0.9999;
}
if (HeapTupleIsValid(vardata.statsTuple))
nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
else
nullfrac = 0.0;
/*
* Now merge the results from the MCV and histogram calculations,
* realizing that the histogram covers only the non-null values that
* are not listed in MCV.
*/
selec *= 1.0 - nullfrac - mcvsum;
selec += mcvsel;
} }
else else
selec = DEFAULT_PARENT_SEL; selec = DEFAULT_PARENT_SEL;
......
This diff is collapsed.
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.34 2006/07/01 22:07:23 tgl Exp $ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.35 2006/09/20 19:50:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -110,6 +110,9 @@ extern double get_variable_numdistinct(VariableStatData *vardata); ...@@ -110,6 +110,9 @@ extern double get_variable_numdistinct(VariableStatData *vardata);
extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
Datum constval, bool varonleft, Datum constval, bool varonleft,
double *sumcommonp); double *sumcommonp);
extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
Datum constval, bool varonleft,
int min_hist_size, int n_skip);
extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
Pattern_Type ptype, Pattern_Type ptype,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment