Change patternsel (LIKE/regex selectivity estimation) so that if there

is a large enough histogram, it will use the number of matches in the histogram to derive a selectivity estimate, rather than the admittedly pretty bogus heuristics involving examining the pattern contents. I set 'large enough' at 100, but perhaps we should change that later. Also apply the same technique in contrib/ltree's <@ and @> estimator. Per discussion with Stefan Kaltenbrunner and Matteo Beccati.

Change patternsel (LIKE/regex selectivity estimation) so that if there
is a large enough histogram, it will use the number of matches in the histogram to derive a selectivity estimate, rather than the admittedly pretty bogus heuristics involving examining the pattern contents. I set 'large enough' at 100, but perhaps we should change that later. Also apply the same technique in contrib/ltree's <@ and @> estimator. Per discussion with Stefan Kaltenbrunner and Matteo Beccati.
bfd1ffa9 · Tom Lane · 06b33f0e · bfd1ffa9 · bfd1ffa9 · bfd1ffa9
Commit bfd1ffa9 authored Sep 20, 2006 by Tom Lane
Showing with 242 additions and 113 deletions

contrib/ltree/ltree_op.c contrib/ltree/ltree_op.c +36 -4

src/backend/utils/adt/selfuncs.c src/backend/utils/adt/selfuncs.c +202 -108

src/include/utils/selfuncs.h src/include/utils/selfuncs.h +4 -1

No files found.
--- a/contrib/ltree/ltree_op.c
+++ b/contrib/ltree/ltree_op.c
 /*
 * op function for ltree
 * Teodor Sigaev <teodor@stack.net>
- * $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.12 2006/05/30 22:12:13 tgl Exp $
+ * $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.13 2006/09/20 19:50:21 tgl Exp $
 */
 #include "ltree.h"
 #include <ctype.h>
+#include "catalog/pg_statistic.h"
 #include "utils/lsyscache.h"
 #include "utils/selfuncs.h"
 #include "utils/syscache.h"
@@ -606,6 +607,7 @@ ltreeparentsel(PG_FUNCTION_ARGS)
 		FmgrInfo	contproc;
 		double		mcvsum;
 		double		mcvsel;
+		double		nullfrac;
 		fmgr_info(get_opcode(operator), &contproc);
@@ -616,10 +618,40 @@ ltreeparentsel(PG_FUNCTION_ARGS)
 								 &mcvsum);
 		/*
-		 * We have the exact selectivity for values appearing in the MCV list;
+		 * If the histogram is large enough, see what fraction of it the
-		 * use the default selectivity for the rest of the population.
+		 * constant is "<@" to, and assume that's representative of the
+		 * non-MCV population.  Otherwise use the default selectivity for
+		 * the non-MCV population.
 		 */
-		selec = mcvsel + DEFAULT_PARENT_SEL * (1.0 - mcvsum);
+		selec = histogram_selectivity(&vardata, &contproc,
+									  constval, varonleft,
+									  100, 1);
+		if (selec < 0)
+		{
+			/* Nope, fall back on default */
+			selec = DEFAULT_PARENT_SEL;
+		}
+		else
+		{
+			/* Yes, but don't believe extremely small or large estimates. */
+			if (selec < 0.0001)
+				selec = 0.0001;
+			else if (selec > 0.9999)
+				selec = 0.9999;
+		}
+		if (HeapTupleIsValid(vardata.statsTuple))
+			nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
+		else
+			nullfrac = 0.0;
+		/*
+		 * Now merge the results from the MCV and histogram calculations,
+		 * realizing that the histogram covers only the non-null values that
+		 * are not listed in MCV.
+		 */
+		selec *= 1.0 - nullfrac - mcvsum;
+		selec += mcvsel;
 	}
 	else
 		selec = DEFAULT_PARENT_SEL;

--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -8,7 +8,7 @@
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.34 2006/07/01 22:07:23 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.35 2006/09/20 19:50:21 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -110,6 +110,9 @@ extern double get_variable_numdistinct(VariableStatData *vardata);
 extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
 							  Datum constval, bool varonleft,
 							  double *sumcommonp);
+extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
+									Datum constval, bool varonleft,
+									int min_hist_size, int n_skip);
 extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
 					 Pattern_Type ptype,