Commit 4d58a7ca authored by Tom Lane's avatar Tom Lane

Optimizer can now estimate selectivity of IS NULL, IS NOT NULL,

IS TRUE, etc, with some degree of verisimilitude.  Split out
selectivity support functions from builtins.h into a new header
file selfuncs.h, so as to reduce the number of header files builtins.h
must depend on.  Fix a few missing inclusions exposed thereby.
From Joe Conway, with some kibitzing from Tom Lane.
parent c31545af
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.74 2001/05/07 00:43:15 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.75 2001/06/25 21:11:43 tgl Exp $
*
* NOTES
* some of the executor utility code such as "ExecTypeFromTL" should be
......@@ -20,6 +20,7 @@
#include "postgres.h"
#include "catalog/pg_type.h"
#include "nodes/parsenodes.h"
#include "parser/parse_type.h"
#include "utils/builtins.h"
#include "utils/syscache.h"
......
......@@ -7,14 +7,13 @@
* Copyright (c) 1999-2001, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/comment.c,v 1.30 2001/06/13 21:44:40 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/commands/comment.c,v 1.31 2001/06/25 21:11:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "utils/builtins.h"
#include "access/heapam.h"
#include "catalog/catname.h"
#include "catalog/indexing.h"
......@@ -26,11 +25,12 @@
#include "catalog/pg_class.h"
#include "commands/comment.h"
#include "miscadmin.h"
#include "parser/parse.h"
#include "parser/parse_expr.h"
#include "parser/parse_func.h"
#include "parser/parse.h"
#include "rewrite/rewriteRemove.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/syscache.h"
......@@ -717,7 +717,7 @@ CommentOperator(char *opername, List *arguments, char *comment)
/*** Get the procedure associated with the operator ***/
data = (Form_pg_operator) GETSTRUCT(optuple);
oid = RegprocToOid(data->oprcode);
oid = data->oprcode;
if (oid == InvalidOid)
elog(ERROR, "operator '%s' does not have an underlying function", opername);
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.45 2001/06/05 05:26:04 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.46 2001/06/25 21:11:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -24,6 +24,7 @@
#include "parser/parsetree.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
/* note that pg_type.h hardwires size of bool as 1 ... duplicate it */
......@@ -509,6 +510,16 @@ clause_selectivity(Query *root,
*/
s1 = (Selectivity) 0.5;
}
else if (IsA(clause, NullTest))
{
/* Use node specific selectivity calculation function */
s1 = nulltestsel(root, (NullTest *) clause, varRelid);
}
else if (IsA(clause, BooleanTest))
{
/* Use node specific selectivity calculation function */
s1 = booltestsel(root, (BooleanTest *) clause, varRelid);
}
else if (IsA(clause, RelabelType))
{
/* Not sure this case is needed, but it can't hurt */
......@@ -517,5 +528,9 @@ clause_selectivity(Query *root,
varRelid);
}
#ifdef SELECTIVITY_DEBUG
elog(NOTICE, "clause_selectivity: s1 %f", s1);
#endif /* SELECTIVITY_DEBUG */
return s1;
}
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.107 2001/06/17 02:05:19 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.108 2001/06/25 21:11:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -37,6 +37,7 @@
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.190 2001/06/23 00:07:34 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.191 2001/06/25 21:11:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -33,6 +33,7 @@
#include "rewrite/rewriteManip.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/numeric.h"
#include "utils/relcache.h"
#include "utils/syscache.h"
......
/* -----------------------------------------------------------------------
* formatting.c
*
* $Header: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v 1.37 2001/05/03 22:53:07 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v 1.38 2001/06/25 21:11:44 tgl Exp $
*
*
* Portions Copyright (c) 1999-2000, PostgreSQL Global Development Group
......@@ -67,21 +67,23 @@
#define DEBUG_elog_output NOTICE
***/
#include <stdio.h>
#include <string.h>
#include "postgres.h"
#include <ctype.h>
#include <sys/time.h>
#include <unistd.h>
#ifdef USE_LOCALE
#include <locale.h>
#endif
#include <math.h>
#include <float.h>
#include "postgres.h"
#include "utils/builtins.h"
#include "utils/date.h"
#include "utils/datetime.h"
#include "utils/formatting.h"
#include "utils/int8.h"
#include "utils/numeric.h"
#include "utils/pg_locale.h"
/* ----------
......
......@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.93 2001/06/09 22:16:18 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.94 2001/06/25 21:11:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -93,6 +93,7 @@
#include "utils/date.h"
#include "utils/int8.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
/*
......@@ -117,6 +118,10 @@
/* default number of distinct values in a table */
#define DEFAULT_NUM_DISTINCT 200
/* default selectivity estimate for boolean and null test nodes */
#define DEFAULT_UNK_SEL 0.005
#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)
#define DEFAULT_BOOL_SEL 0.5
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
......@@ -933,6 +938,327 @@ icnlikesel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
/*
* booltestsel - Selectivity of BooleanTest Node.
*/
Selectivity
booltestsel(Query *root, BooleanTest *clause, int varRelid)
{
Var *var;
Node *arg;
Oid relid;
HeapTuple statsTuple;
Datum *values;
int nvalues;
float4 *numbers;
int nnumbers;
double selec;
Assert(clause && IsA(clause, BooleanTest));
arg = (Node *) clause->arg;
/*
* Ignore any binary-compatible relabeling (probably unnecessary,
* but can't hurt)
*/
if (IsA(arg, RelabelType))
arg = ((RelabelType *) arg)->arg;
if (IsA(arg, Var) && (varRelid == 0 || varRelid == ((Var *) arg)->varno))
var = (Var *) arg;
else
{
/*
* If argument is not a Var, we can't get statistics for it, but
* perhaps clause_selectivity can do something with it. We ignore
* the possibility of a NULL value when using clause_selectivity,
* and just assume the value is either TRUE or FALSE.
*/
switch (clause->booltesttype)
{
case IS_UNKNOWN:
selec = DEFAULT_UNK_SEL;
break;
case IS_NOT_UNKNOWN:
selec = DEFAULT_NOT_UNK_SEL;
break;
case IS_TRUE:
case IS_NOT_FALSE:
selec = (double) clause_selectivity(root, arg, varRelid);
break;
case IS_FALSE:
case IS_NOT_TRUE:
selec = 1.0 - (double) clause_selectivity(root, arg, varRelid);
break;
default:
elog(ERROR, "booltestsel: unexpected booltesttype %d",
(int) clause->booltesttype);
selec = 0.0; /* Keep compiler quiet */
break;
}
return (Selectivity) selec;
}
/* get stats for the attribute, if available */
relid = getrelid(var->varno, root->rtable);
if (relid == InvalidOid)
statsTuple = NULL;
else
statsTuple = SearchSysCache(STATRELATT,
ObjectIdGetDatum(relid),
Int16GetDatum(var->varattno),
0, 0);
if (HeapTupleIsValid(statsTuple))
{
Form_pg_statistic stats;
double freq_null;
stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
freq_null = stats->stanullfrac;
if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,
STATISTIC_KIND_MCV, InvalidOid,
&values, &nvalues,
&numbers, &nnumbers)
&& nnumbers > 0)
{
double freq_true;
double freq_false;
/*
* Get first MCV frequency and derive frequency for true.
*/
if (DatumGetBool(values[0]))
freq_true = numbers[0];
else
freq_true = 1.0 - numbers[0] - freq_null;
/*
* Next derive freqency for false.
* Then use these as appropriate to derive frequency for each case.
*/
freq_false = 1.0 - freq_true - freq_null;
switch (clause->booltesttype)
{
case IS_UNKNOWN:
/* select only NULL values */
selec = freq_null;
break;
case IS_NOT_UNKNOWN:
/* select non-NULL values */
selec = 1.0 - freq_null;
break;
case IS_TRUE:
/* select only TRUE values */
selec = freq_true;
break;
case IS_NOT_TRUE:
/* select non-TRUE values */
selec = 1.0 - freq_true;
break;
case IS_FALSE:
/* select only FALSE values */
selec = freq_false;
break;
case IS_NOT_FALSE:
/* select non-FALSE values */
selec = 1.0 - freq_false;
break;
default:
elog(ERROR, "booltestsel: unexpected booltesttype %d",
(int) clause->booltesttype);
selec = 0.0; /* Keep compiler quiet */
break;
}
free_attstatsslot(var->vartype, values, nvalues,
numbers, nnumbers);
}
else
{
/*
* No most-common-value info available.
* Still have null fraction information,
* so use it for IS [NOT] UNKNOWN.
* Otherwise adjust for null fraction and
* assume an even split for boolean tests.
*/
switch (clause->booltesttype)
{
case IS_UNKNOWN:
/*
* Use freq_null directly.
*/
selec = freq_null;
break;
case IS_NOT_UNKNOWN:
/*
* Select not unknown (not null) values.
* Calculate from freq_null.
*/
selec = 1.0 - freq_null;
break;
case IS_TRUE:
case IS_NOT_TRUE:
case IS_FALSE:
case IS_NOT_FALSE:
selec = (1.0 - freq_null) / 2.0;
break;
default:
elog(ERROR, "booltestsel: unexpected booltesttype %d",
(int) clause->booltesttype);
selec = 0.0; /* Keep compiler quiet */
break;
}
}
ReleaseSysCache(statsTuple);
}
else
{
/*
* No VACUUM ANALYZE stats available, so use a default value.
* (Note: not much point in recursing to clause_selectivity here.)
*/
switch (clause->booltesttype)
{
case IS_UNKNOWN:
selec = DEFAULT_UNK_SEL;
break;
case IS_NOT_UNKNOWN:
selec = DEFAULT_NOT_UNK_SEL;
break;
case IS_TRUE:
case IS_NOT_TRUE:
case IS_FALSE:
case IS_NOT_FALSE:
selec = DEFAULT_BOOL_SEL;
break;
default:
elog(ERROR, "booltestsel: unexpected booltesttype %d",
(int) clause->booltesttype);
selec = 0.0; /* Keep compiler quiet */
break;
}
}
/* result should be in range, but make sure... */
if (selec < 0.0)
selec = 0.0;
else if (selec > 1.0)
selec = 1.0;
return (Selectivity) selec;
}
/*
* nulltestsel - Selectivity of NullTest Node.
*/
Selectivity
nulltestsel(Query *root, NullTest *clause, int varRelid)
{
Var *var;
Node *arg;
Oid relid;
HeapTuple statsTuple;
double selec;
double defselec;
double freq_null;
Assert(clause && IsA(clause, NullTest));
switch (clause->nulltesttype)
{
case IS_NULL:
defselec = DEFAULT_UNK_SEL;
break;
case IS_NOT_NULL:
defselec = DEFAULT_NOT_UNK_SEL;
break;
default:
elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
(int) clause->nulltesttype);
return (Selectivity) 0; /* keep compiler quiet */
}
arg = (Node *) clause->arg;
/*
* Ignore any binary-compatible relabeling
*/
if (IsA(arg, RelabelType))
arg = ((RelabelType *) arg)->arg;
if (IsA(arg, Var) && (varRelid == 0 || varRelid == ((Var *) arg)->varno))
var = (Var *) arg;
else
{
/*
* punt if non-Var argument
*/
return (Selectivity) defselec;
}
relid = getrelid(var->varno, root->rtable);
if (relid == InvalidOid)
return (Selectivity) defselec;
/* get stats for the attribute, if available */
statsTuple = SearchSysCache(STATRELATT,
ObjectIdGetDatum(relid),
Int16GetDatum(var->varattno),
0, 0);
if (HeapTupleIsValid(statsTuple))
{
Form_pg_statistic stats;
stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
freq_null = stats->stanullfrac;
switch (clause->nulltesttype)
{
case IS_NULL:
/*
* Use freq_null directly.
*/
selec = freq_null;
break;
case IS_NOT_NULL:
/*
* Select not unknown (not null) values.
* Calculate from freq_null.
*/
selec = 1.0 - freq_null;
break;
default:
elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
(int) clause->nulltesttype);
return (Selectivity) 0; /* keep compiler quiet */
}
ReleaseSysCache(statsTuple);
}
else
{
/*
* No VACUUM ANALYZE stats available, so make a guess
*/
selec = defselec;
}
/* result should be in range, but make sure... */
if (selec < 0.0)
selec = 0.0;
else if (selec > 1.0)
selec = 1.0;
return (Selectivity) selec;
}
/*
* eqjoinsel - Join selectivity of "="
*/
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: builtins.h,v 1.155 2001/06/17 02:05:20 tgl Exp $
* $Id: builtins.h,v 1.156 2001/06/25 21:11:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -15,9 +15,8 @@
#define BUILTINS_H
#include "fmgr.h"
#include "nodes/relation.h" /* for amcostestimate parameters */
#include "storage/itemptr.h"
#include "utils/numeric.h"
#include "nodes/primnodes.h"
/*
* Defined in adt/
......@@ -342,57 +341,6 @@ extern char *deparse_expression(Node *expr, List *dpcontext,
bool forceprefix);
extern List *deparse_context_for(char *relname, Oid relid);
/* selfuncs.c */
extern Datum eqsel(PG_FUNCTION_ARGS);
extern Datum neqsel(PG_FUNCTION_ARGS);
extern Datum scalarltsel(PG_FUNCTION_ARGS);
extern Datum scalargtsel(PG_FUNCTION_ARGS);
extern Datum regexeqsel(PG_FUNCTION_ARGS);
extern Datum icregexeqsel(PG_FUNCTION_ARGS);
extern Datum likesel(PG_FUNCTION_ARGS);
extern Datum iclikesel(PG_FUNCTION_ARGS);
extern Datum regexnesel(PG_FUNCTION_ARGS);
extern Datum icregexnesel(PG_FUNCTION_ARGS);
extern Datum nlikesel(PG_FUNCTION_ARGS);
extern Datum icnlikesel(PG_FUNCTION_ARGS);
extern Datum eqjoinsel(PG_FUNCTION_ARGS);
extern Datum neqjoinsel(PG_FUNCTION_ARGS);
extern Datum scalarltjoinsel(PG_FUNCTION_ARGS);
extern Datum scalargtjoinsel(PG_FUNCTION_ARGS);
extern Datum regexeqjoinsel(PG_FUNCTION_ARGS);
extern Datum icregexeqjoinsel(PG_FUNCTION_ARGS);
extern Datum likejoinsel(PG_FUNCTION_ARGS);
extern Datum iclikejoinsel(PG_FUNCTION_ARGS);
extern Datum regexnejoinsel(PG_FUNCTION_ARGS);
extern Datum icregexnejoinsel(PG_FUNCTION_ARGS);
extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
extern Datum btcostestimate(PG_FUNCTION_ARGS);
extern Datum rtcostestimate(PG_FUNCTION_ARGS);
extern Datum hashcostestimate(PG_FUNCTION_ARGS);
extern Datum gistcostestimate(PG_FUNCTION_ARGS);
/* selfuncs.c supporting routines that are also used by optimizer code */
typedef enum
{
Pattern_Type_Like, Pattern_Type_Like_IC,
Pattern_Type_Regex, Pattern_Type_Regex_IC
} Pattern_Type;
typedef enum
{
Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
} Pattern_Prefix_Status;
extern Pattern_Prefix_Status pattern_fixed_prefix(char *patt,
Pattern_Type ptype,
char **prefix,
char **rest);
extern bool locale_is_like_safe(void);
extern char *make_greater_string(const char *str, Oid datatype);
/* tid.c */
extern Datum tidin(PG_FUNCTION_ARGS);
extern Datum tidout(PG_FUNCTION_ARGS);
......
/*-------------------------------------------------------------------------
*
* selfuncs.h
* Selectivity functions and index cost estimation functions for
* standard operators and index access methods.
*
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: selfuncs.h,v 1.1 2001/06/25 21:11:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef SELFUNCS_H
#define SELFUNCS_H
#include "fmgr.h"
#include "nodes/parsenodes.h"
typedef enum
{
Pattern_Type_Like, Pattern_Type_Like_IC,
Pattern_Type_Regex, Pattern_Type_Regex_IC
} Pattern_Type;
typedef enum
{
Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
} Pattern_Prefix_Status;
/* selfuncs.c */
extern Pattern_Prefix_Status pattern_fixed_prefix(char *patt,
Pattern_Type ptype,
char **prefix,
char **rest);
extern bool locale_is_like_safe(void);
extern char *make_greater_string(const char *str, Oid datatype);
extern Datum eqsel(PG_FUNCTION_ARGS);
extern Datum neqsel(PG_FUNCTION_ARGS);
extern Datum scalarltsel(PG_FUNCTION_ARGS);
extern Datum scalargtsel(PG_FUNCTION_ARGS);
extern Datum regexeqsel(PG_FUNCTION_ARGS);
extern Datum icregexeqsel(PG_FUNCTION_ARGS);
extern Datum likesel(PG_FUNCTION_ARGS);
extern Datum iclikesel(PG_FUNCTION_ARGS);
extern Datum regexnesel(PG_FUNCTION_ARGS);
extern Datum icregexnesel(PG_FUNCTION_ARGS);
extern Datum nlikesel(PG_FUNCTION_ARGS);
extern Datum icnlikesel(PG_FUNCTION_ARGS);
extern Datum eqjoinsel(PG_FUNCTION_ARGS);
extern Datum neqjoinsel(PG_FUNCTION_ARGS);
extern Datum scalarltjoinsel(PG_FUNCTION_ARGS);
extern Datum scalargtjoinsel(PG_FUNCTION_ARGS);
extern Datum regexeqjoinsel(PG_FUNCTION_ARGS);
extern Datum icregexeqjoinsel(PG_FUNCTION_ARGS);
extern Datum likejoinsel(PG_FUNCTION_ARGS);
extern Datum iclikejoinsel(PG_FUNCTION_ARGS);
extern Datum regexnejoinsel(PG_FUNCTION_ARGS);
extern Datum icregexnejoinsel(PG_FUNCTION_ARGS);
extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
Selectivity booltestsel(Query *root, BooleanTest *clause, int varRelid);
Selectivity nulltestsel(Query *root, NullTest *clause, int varRelid);
extern Datum btcostestimate(PG_FUNCTION_ARGS);
extern Datum rtcostestimate(PG_FUNCTION_ARGS);
extern Datum hashcostestimate(PG_FUNCTION_ARGS);
extern Datum gistcostestimate(PG_FUNCTION_ARGS);
#endif /* SELFUNCS_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment