Commit c0f7dcda authored by Tom Lane's avatar Tom Lane

Fix range-query estimation to not double-exclude NULLs, per gripe from

Ray Ontko 28-June-02.  Also, fix prefix_selectivity for NAME lefthand
variables (it was bogusly assuming binary compatibility), and adjust
make_greater_string() to not call pg_mbcliplen() with invalid multibyte
data (this last per bug report that I can't find at the moment, but it
was in July '02).
parent a3cb8746
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.51 2002/06/20 20:29:29 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.52 2002/10/19 02:56:16 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -102,7 +102,9 @@ restrictlist_selectivity(Query *root, ...@@ -102,7 +102,9 @@ restrictlist_selectivity(Query *root,
* see that hisel is the fraction of the range below the high bound, while * see that hisel is the fraction of the range below the high bound, while
* losel is the fraction above the low bound; so hisel can be interpreted * losel is the fraction above the low bound; so hisel can be interpreted
* directly as a 0..1 value but we need to convert losel to 1-losel before * directly as a 0..1 value but we need to convert losel to 1-losel before
* interpreting it as a value. Then the available range is 1-losel to hisel.) * interpreting it as a value. Then the available range is 1-losel to hisel.
* However, this calculation double-excludes nulls, so really we need
* hisel + losel + null_frac - 1.)
* If the calculation yields zero or negative, however, we chicken out and * If the calculation yields zero or negative, however, we chicken out and
* use a default estimate; that probably means that one or both * use a default estimate; that probably means that one or both
* selectivities is a default estimate rather than an actual range value. * selectivities is a default estimate rather than an actual range value.
...@@ -199,6 +201,9 @@ clauselist_selectivity(Query *root, ...@@ -199,6 +201,9 @@ clauselist_selectivity(Query *root,
/* Successfully matched a pair of range clauses */ /* Successfully matched a pair of range clauses */
Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0; Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0;
/* Adjust for double-exclusion of NULLs */
s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid);
/* /*
* A zero or slightly negative s2 should be converted into a * A zero or slightly negative s2 should be converted into a
* small positive value; we probably are dealing with a very * small positive value; we probably are dealing with a very
...@@ -503,12 +508,18 @@ clause_selectivity(Query *root, ...@@ -503,12 +508,18 @@ clause_selectivity(Query *root,
else if (IsA(clause, NullTest)) else if (IsA(clause, NullTest))
{ {
/* Use node specific selectivity calculation function */ /* Use node specific selectivity calculation function */
s1 = nulltestsel(root, (NullTest *) clause, varRelid); s1 = nulltestsel(root,
((NullTest *) clause)->nulltesttype,
((NullTest *) clause)->arg,
varRelid);
} }
else if (IsA(clause, BooleanTest)) else if (IsA(clause, BooleanTest))
{ {
/* Use node specific selectivity calculation function */ /* Use node specific selectivity calculation function */
s1 = booltestsel(root, (BooleanTest *) clause, varRelid); s1 = booltestsel(root,
((BooleanTest *) clause)->booltesttype,
((BooleanTest *) clause)->arg,
varRelid);
} }
else if (IsA(clause, RelabelType)) else if (IsA(clause, RelabelType))
{ {
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.118 2002/09/20 03:55:40 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.119 2002/10/19 02:56:16 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1007,10 +1007,9 @@ icnlikesel(PG_FUNCTION_ARGS) ...@@ -1007,10 +1007,9 @@ icnlikesel(PG_FUNCTION_ARGS)
* booltestsel - Selectivity of BooleanTest Node. * booltestsel - Selectivity of BooleanTest Node.
*/ */
Selectivity Selectivity
booltestsel(Query *root, BooleanTest *clause, int varRelid) booltestsel(Query *root, BoolTestType booltesttype, Node *arg, int varRelid)
{ {
Var *var; Var *var;
Node *arg;
Oid relid; Oid relid;
HeapTuple statsTuple; HeapTuple statsTuple;
Datum *values; Datum *values;
...@@ -1019,10 +1018,6 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) ...@@ -1019,10 +1018,6 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
int nnumbers; int nnumbers;
double selec; double selec;
Assert(clause && IsA(clause, BooleanTest));
arg = (Node *) clause->arg;
/* /*
* Ignore any binary-compatible relabeling (probably unnecessary, but * Ignore any binary-compatible relabeling (probably unnecessary, but
* can't hurt) * can't hurt)
...@@ -1040,7 +1035,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) ...@@ -1040,7 +1035,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
* the possibility of a NULL value when using clause_selectivity, * the possibility of a NULL value when using clause_selectivity,
* and just assume the value is either TRUE or FALSE. * and just assume the value is either TRUE or FALSE.
*/ */
switch (clause->booltesttype) switch (booltesttype)
{ {
case IS_UNKNOWN: case IS_UNKNOWN:
selec = DEFAULT_UNK_SEL; selec = DEFAULT_UNK_SEL;
...@@ -1058,7 +1053,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) ...@@ -1058,7 +1053,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
break; break;
default: default:
elog(ERROR, "booltestsel: unexpected booltesttype %d", elog(ERROR, "booltestsel: unexpected booltesttype %d",
(int) clause->booltesttype); (int) booltesttype);
selec = 0.0; /* Keep compiler quiet */ selec = 0.0; /* Keep compiler quiet */
break; break;
} }
...@@ -1107,7 +1102,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) ...@@ -1107,7 +1102,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
*/ */
freq_false = 1.0 - freq_true - freq_null; freq_false = 1.0 - freq_true - freq_null;
switch (clause->booltesttype) switch (booltesttype)
{ {
case IS_UNKNOWN: case IS_UNKNOWN:
/* select only NULL values */ /* select only NULL values */
...@@ -1135,7 +1130,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) ...@@ -1135,7 +1130,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
break; break;
default: default:
elog(ERROR, "booltestsel: unexpected booltesttype %d", elog(ERROR, "booltestsel: unexpected booltesttype %d",
(int) clause->booltesttype); (int) booltesttype);
selec = 0.0; /* Keep compiler quiet */ selec = 0.0; /* Keep compiler quiet */
break; break;
} }
...@@ -1151,7 +1146,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) ...@@ -1151,7 +1146,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
* Otherwise adjust for null fraction and assume an even split * Otherwise adjust for null fraction and assume an even split
* for boolean tests. * for boolean tests.
*/ */
switch (clause->booltesttype) switch (booltesttype)
{ {
case IS_UNKNOWN: case IS_UNKNOWN:
...@@ -1176,7 +1171,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) ...@@ -1176,7 +1171,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
break; break;
default: default:
elog(ERROR, "booltestsel: unexpected booltesttype %d", elog(ERROR, "booltestsel: unexpected booltesttype %d",
(int) clause->booltesttype); (int) booltesttype);
selec = 0.0; /* Keep compiler quiet */ selec = 0.0; /* Keep compiler quiet */
break; break;
} }
...@@ -1190,7 +1185,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) ...@@ -1190,7 +1185,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
* No VACUUM ANALYZE stats available, so use a default value. * No VACUUM ANALYZE stats available, so use a default value.
* (Note: not much point in recursing to clause_selectivity here.) * (Note: not much point in recursing to clause_selectivity here.)
*/ */
switch (clause->booltesttype) switch (booltesttype)
{ {
case IS_UNKNOWN: case IS_UNKNOWN:
selec = DEFAULT_UNK_SEL; selec = DEFAULT_UNK_SEL;
...@@ -1206,7 +1201,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) ...@@ -1206,7 +1201,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
break; break;
default: default:
elog(ERROR, "booltestsel: unexpected booltesttype %d", elog(ERROR, "booltestsel: unexpected booltesttype %d",
(int) clause->booltesttype); (int) booltesttype);
selec = 0.0; /* Keep compiler quiet */ selec = 0.0; /* Keep compiler quiet */
break; break;
} }
...@@ -1222,19 +1217,16 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid) ...@@ -1222,19 +1217,16 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
* nulltestsel - Selectivity of NullTest Node. * nulltestsel - Selectivity of NullTest Node.
*/ */
Selectivity Selectivity
nulltestsel(Query *root, NullTest *clause, int varRelid) nulltestsel(Query *root, NullTestType nulltesttype, Node *arg, int varRelid)
{ {
Var *var; Var *var;
Node *arg;
Oid relid; Oid relid;
HeapTuple statsTuple; HeapTuple statsTuple;
double selec; double selec;
double defselec; double defselec;
double freq_null; double freq_null;
Assert(clause && IsA(clause, NullTest)); switch (nulltesttype)
switch (clause->nulltesttype)
{ {
case IS_NULL: case IS_NULL:
defselec = DEFAULT_UNK_SEL; defselec = DEFAULT_UNK_SEL;
...@@ -1244,25 +1236,22 @@ nulltestsel(Query *root, NullTest *clause, int varRelid) ...@@ -1244,25 +1236,22 @@ nulltestsel(Query *root, NullTest *clause, int varRelid)
break; break;
default: default:
elog(ERROR, "nulltestsel: unexpected nulltesttype %d", elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
(int) clause->nulltesttype); (int) nulltesttype);
return (Selectivity) 0; /* keep compiler quiet */ return (Selectivity) 0; /* keep compiler quiet */
} }
arg = (Node *) clause->arg;
/* /*
* Ignore any binary-compatible relabeling * Ignore any binary-compatible relabeling
*/ */
if (IsA(arg, RelabelType)) if (IsA(arg, RelabelType))
arg = ((RelabelType *) arg)->arg; arg = ((RelabelType *) arg)->arg;
if (IsA(arg, Var) &&(varRelid == 0 || varRelid == ((Var *) arg)->varno)) if (IsA(arg, Var) &&
(varRelid == 0 || varRelid == ((Var *) arg)->varno))
var = (Var *) arg; var = (Var *) arg;
else else
{ {
/* /* punt if non-Var argument */
* punt if non-Var argument
*/
return (Selectivity) defselec; return (Selectivity) defselec;
} }
...@@ -1282,7 +1271,7 @@ nulltestsel(Query *root, NullTest *clause, int varRelid) ...@@ -1282,7 +1271,7 @@ nulltestsel(Query *root, NullTest *clause, int varRelid)
stats = (Form_pg_statistic) GETSTRUCT(statsTuple); stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
freq_null = stats->stanullfrac; freq_null = stats->stanullfrac;
switch (clause->nulltesttype) switch (nulltesttype)
{ {
case IS_NULL: case IS_NULL:
...@@ -1301,7 +1290,7 @@ nulltestsel(Query *root, NullTest *clause, int varRelid) ...@@ -1301,7 +1290,7 @@ nulltestsel(Query *root, NullTest *clause, int varRelid)
break; break;
default: default:
elog(ERROR, "nulltestsel: unexpected nulltesttype %d", elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
(int) clause->nulltesttype); (int) nulltesttype);
return (Selectivity) 0; /* keep compiler quiet */ return (Selectivity) 0; /* keep compiler quiet */
} }
...@@ -2978,6 +2967,10 @@ prefix_selectivity(Query *root, Var *var, Const *prefixcon) ...@@ -2978,6 +2967,10 @@ prefix_selectivity(Query *root, Var *var, Const *prefixcon)
else else
prefix = DatumGetCString(DirectFunctionCall1(byteaout, prefixcon->constvalue)); prefix = DatumGetCString(DirectFunctionCall1(byteaout, prefixcon->constvalue));
/* If var is type NAME, must adjust type of comparison constant */
if (var->vartype == NAMEOID)
prefixcon = string_to_const(prefix, NAMEOID);
cmpargs = makeList2(var, prefixcon); cmpargs = makeList2(var, prefixcon);
/* Assume scalargtsel is appropriate for all supported types */ /* Assume scalargtsel is appropriate for all supported types */
prefixsel = DatumGetFloat8(DirectFunctionCall4(scalargtsel, prefixsel = DatumGetFloat8(DirectFunctionCall4(scalargtsel,
...@@ -3014,6 +3007,9 @@ prefix_selectivity(Query *root, Var *var, Const *prefixcon) ...@@ -3014,6 +3007,9 @@ prefix_selectivity(Query *root, Var *var, Const *prefixcon)
*/ */
prefixsel = topsel + prefixsel - 1.0; prefixsel = topsel + prefixsel - 1.0;
/* Adjust for double-exclusion of NULLs */
prefixsel += nulltestsel(root, IS_NULL, (Node *) var, var->varno);
/* /*
* A zero or slightly negative prefixsel should be converted into * A zero or slightly negative prefixsel should be converted into
* a small positive value; we probably are dealing with a very * a small positive value; we probably are dealing with a very
...@@ -3351,6 +3347,7 @@ make_greater_string(const Const *str_const) ...@@ -3351,6 +3347,7 @@ make_greater_string(const Const *str_const)
while (len > 0) while (len > 0)
{ {
unsigned char *lastchar = (unsigned char *) (workstr + len - 1); unsigned char *lastchar = (unsigned char *) (workstr + len - 1);
unsigned char savelastchar = *lastchar;
/* /*
* Try to generate a larger string by incrementing the last byte. * Try to generate a larger string by incrementing the last byte.
...@@ -3369,6 +3366,9 @@ make_greater_string(const Const *str_const) ...@@ -3369,6 +3366,9 @@ make_greater_string(const Const *str_const)
} }
} }
/* restore last byte so we don't confuse pg_mbcliplen */
*lastchar = savelastchar;
/* /*
* Truncate off the last character, which might be more than 1 * Truncate off the last character, which might be more than 1
* byte, depending on the character encoding. * byte, depending on the character encoding.
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: selfuncs.h,v 1.8 2002/09/04 20:31:46 momjian Exp $ * $Id: selfuncs.h,v 1.9 2002/10/19 02:56:16 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -66,8 +66,10 @@ extern Datum icregexnejoinsel(PG_FUNCTION_ARGS); ...@@ -66,8 +66,10 @@ extern Datum icregexnejoinsel(PG_FUNCTION_ARGS);
extern Datum nlikejoinsel(PG_FUNCTION_ARGS); extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
extern Datum icnlikejoinsel(PG_FUNCTION_ARGS); extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
extern Selectivity booltestsel(Query *root, BooleanTest *clause, int varRelid); extern Selectivity booltestsel(Query *root, BoolTestType booltesttype,
extern Selectivity nulltestsel(Query *root, NullTest *clause, int varRelid); Node *arg, int varRelid);
extern Selectivity nulltestsel(Query *root, NullTestType nulltesttype,
Node *arg, int varRelid);
extern void mergejoinscansel(Query *root, Node *clause, extern void mergejoinscansel(Query *root, Node *clause,
Selectivity *leftscan, Selectivity *leftscan,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment