Commit 39df0f15 authored by Tom Lane's avatar Tom Lane

Allow planner to use expression-index stats for function calls in WHERE.

Previously, a function call appearing at the top level of WHERE had a
hard-wired selectivity estimate of 0.3333333, a kludge conveniently dated
in the source code itself to July 1992.  The expectation at the time was
that somebody would soon implement estimator support functions analogous
to those for operators; but no such code has appeared, nor does it seem
likely to in the near future.  We do have an alternative solution though,
at least for immutable functions on single relations: creating an
expression index on the function call will allow ANALYZE to gather stats
about the function's selectivity.  But the code in clause_selectivity()
failed to make use of such data even if it exists.

Refactor so that that will happen.  I chose to make it try this technique
for any clause type for which clause_selectivity() doesn't have a special
case, not just functions.  To avoid adding unnecessary overhead in the
common case where we don't learn anything new, make selfuncs.c provide an
API that hooks directly to examine_variable() and then var_eq_const(),
rather than the previous coding which laboriously constructed an OpExpr
only so that it could be expensively deconstructed again.

I preserved the behavior that the default estimate for a function call
is 0.3333333.  (For any other expression node type, it's 0.5, as before.)
I had originally thought to make the default be 0.5 across the board, but
changing a default estimate that's survived for twenty-three years seems
like something not to do without a lot more testing than I care to put
into it right now.

Per a complaint from Jehan-Guillaume de Rorthais.  Back-patch into 9.5,
but not further, at least for the moment.
parent 76f965ff
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
*/ */
#include "postgres.h" #include "postgres.h"
#include "catalog/pg_operator.h"
#include "nodes/makefuncs.h" #include "nodes/makefuncs.h"
#include "optimizer/clauses.h" #include "optimizer/clauses.h"
#include "optimizer/cost.h" #include "optimizer/cost.h"
...@@ -568,18 +567,8 @@ clause_selectivity(PlannerInfo *root, ...@@ -568,18 +567,8 @@ clause_selectivity(PlannerInfo *root,
if (var->varlevelsup == 0 && if (var->varlevelsup == 0 &&
(varRelid == 0 || varRelid == (int) var->varno)) (varRelid == 0 || varRelid == (int) var->varno))
{ {
/* /* Use the restriction selectivity function for a bool Var */
* A Var at the top of a clause must be a bool Var. This is s1 = boolvarsel(root, (Node *) var, varRelid);
* equivalent to the clause reln.attribute = 't', so we compute
* the selectivity as if that is what we have.
*/
s1 = restriction_selectivity(root,
BooleanEqualOperator,
list_make2(var,
makeBoolConst(true,
false)),
InvalidOid,
varRelid);
} }
} }
else if (IsA(clause, Const)) else if (IsA(clause, Const))
...@@ -680,25 +669,6 @@ clause_selectivity(PlannerInfo *root, ...@@ -680,25 +669,6 @@ clause_selectivity(PlannerInfo *root,
if (IsA(clause, DistinctExpr)) if (IsA(clause, DistinctExpr))
s1 = 1.0 - s1; s1 = 1.0 - s1;
} }
else if (is_funcclause(clause))
{
/*
* This is not an operator, so we guess at the selectivity. THIS IS A
* HACK TO GET V4 OUT THE DOOR. FUNCS SHOULD BE ABLE TO HAVE
* SELECTIVITIES THEMSELVES. -- JMH 7/9/92
*/
s1 = (Selectivity) 0.3333333;
}
#ifdef NOT_USED
else if (IsA(clause, SubPlan) ||
IsA(clause, AlternativeSubPlan))
{
/*
* Just for the moment! FIX ME! - vadim 02/04/98
*/
s1 = (Selectivity) 0.5;
}
#endif
else if (IsA(clause, ScalarArrayOpExpr)) else if (IsA(clause, ScalarArrayOpExpr))
{ {
/* Use node specific selectivity calculation function */ /* Use node specific selectivity calculation function */
...@@ -766,6 +736,17 @@ clause_selectivity(PlannerInfo *root, ...@@ -766,6 +736,17 @@ clause_selectivity(PlannerInfo *root,
jointype, jointype,
sjinfo); sjinfo);
} }
else
{
/*
* For anything else, see if we can consider it as a boolean variable.
* This only works if it's an immutable expression in Vars of a single
* relation; but there's no point in us checking that here because
* boolvarsel() will do it internally, and return a suitable default
* selectivity if not.
*/
s1 = boolvarsel(root, clause, varRelid);
}
/* Cache the result if possible */ /* Cache the result if possible */
if (cacheable) if (cacheable)
......
...@@ -105,6 +105,7 @@ ...@@ -105,6 +105,7 @@
#include "access/sysattr.h" #include "access/sysattr.h"
#include "catalog/index.h" #include "catalog/index.h"
#include "catalog/pg_collation.h" #include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h" #include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h" #include "catalog/pg_statistic.h"
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
...@@ -1439,6 +1440,50 @@ icnlikesel(PG_FUNCTION_ARGS) ...@@ -1439,6 +1440,50 @@ icnlikesel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true)); PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
} }
/*
* boolvarsel - Selectivity of Boolean variable.
*
* This can actually be called on any boolean-valued expression. If it
* involves only Vars of the specified relation, and if there are statistics
* about the Var or expression (the latter is possible if it's indexed) then
* we'll produce a real estimate; otherwise it's just a default.
*/
Selectivity
boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
{
VariableStatData vardata;
double selec;
examine_variable(root, arg, varRelid, &vardata);
if (HeapTupleIsValid(vardata.statsTuple))
{
/*
* A boolean variable V is equivalent to the clause V = 't', so we
* compute the selectivity as if that is what we have.
*/
selec = var_eq_const(&vardata, BooleanEqualOperator,
BoolGetDatum(true), false, true);
}
else if (is_funcclause(arg))
{
/*
* If we have no stats and it's a function call, estimate 0.3333333.
* This seems a pretty unprincipled choice, but Postgres has been
* using that estimate for function calls since 1992. The hoariness
* of this behavior suggests that we should not be in too much hurry
* to use another value.
*/
selec = 0.3333333;
}
else
{
/* Otherwise, the default estimate is 0.5 */
selec = 0.5;
}
ReleaseVariableStats(vardata);
return selec;
}
/* /*
* booltestsel - Selectivity of BooleanTest Node. * booltestsel - Selectivity of BooleanTest Node.
*/ */
......
...@@ -164,6 +164,7 @@ extern Datum icregexnejoinsel(PG_FUNCTION_ARGS); ...@@ -164,6 +164,7 @@ extern Datum icregexnejoinsel(PG_FUNCTION_ARGS);
extern Datum nlikejoinsel(PG_FUNCTION_ARGS); extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
extern Datum icnlikejoinsel(PG_FUNCTION_ARGS); extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
extern Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid);
extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype, extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype,
Node *arg, int varRelid, Node *arg, int varRelid,
JoinType jointype, SpecialJoinInfo *sjinfo); JoinType jointype, SpecialJoinInfo *sjinfo);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment