Commit 44878506 authored by Tom Lane's avatar Tom Lane

First step in fixing selectivity-estimation code. eqsel and

neqsel now behave as per my suggestions in pghackers a few days ago.
selectivity for < > <= >= should work OK for integral types as well, but
still need work for nonintegral types.  Since these routines have never
actually executed before :-(, this may result in some significant changes
in the optimizer's choices of execution plans.  Let me know if you see
any serious misbehavior.
CAUTION: THESE CHANGES REQUIRE INITDB.  pg_statistic table has changed.
parent f851c6b0
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.115 1999/07/19 07:07:20 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.116 1999/08/01 04:54:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -78,7 +78,7 @@ static void vc_vacpage(Page page, VPageDescr vpd); ...@@ -78,7 +78,7 @@ static void vc_vacpage(Page page, VPageDescr vpd);
static void vc_vaconeind(VPageList vpl, Relation indrel, int num_tuples, int keep_tuples); static void vc_vaconeind(VPageList vpl, Relation indrel, int num_tuples, int keep_tuples);
static void vc_scanoneind(Relation indrel, int num_tuples); static void vc_scanoneind(Relation indrel, int num_tuples);
static void vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple); static void vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple);
static void vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int16 *bucket_len); static void vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len);
static void vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats); static void vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
static void vc_delhilowstats(Oid relid, int attcnt, int *attnums); static void vc_delhilowstats(Oid relid, int attcnt, int *attnums);
static VPageDescr vc_tidreapped(ItemPointer itemptr, VPageList vpl); static VPageDescr vc_tidreapped(ItemPointer itemptr, VPageList vpl);
...@@ -473,9 +473,13 @@ vc_vacone(Oid relid, bool analyze, List *va_cols) ...@@ -473,9 +473,13 @@ vc_vacone(Oid relid, bool analyze, List *va_cols)
{ {
pgopform = (Form_pg_operator) GETSTRUCT(func_operator); pgopform = (Form_pg_operator) GETSTRUCT(func_operator);
fmgr_info(pgopform->oprcode, &(stats->f_cmplt)); fmgr_info(pgopform->oprcode, &(stats->f_cmplt));
stats->op_cmplt = oprid(func_operator);
} }
else else
{
stats->f_cmplt.fn_addr = NULL; stats->f_cmplt.fn_addr = NULL;
stats->op_cmplt = InvalidOid;
}
func_operator = oper(">", stats->attr->atttypid, stats->attr->atttypid, true); func_operator = oper(">", stats->attr->atttypid, stats->attr->atttypid, true);
if (func_operator != NULL) if (func_operator != NULL)
...@@ -2200,8 +2204,8 @@ vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple) ...@@ -2200,8 +2204,8 @@ vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple)
{ {
swapDatum(stats->guess1, stats->guess2); swapDatum(stats->guess1, stats->guess2);
swapInt(stats->guess1_len, stats->guess2_len); swapInt(stats->guess1_len, stats->guess2_len);
stats->guess1_cnt = stats->guess2_hits;
swapLong(stats->guess1_hits, stats->guess2_hits); swapLong(stats->guess1_hits, stats->guess2_hits);
stats->guess1_cnt = stats->guess1_hits;
} }
if (stats->guess1_cnt > stats->best_cnt) if (stats->guess1_cnt > stats->best_cnt)
{ {
...@@ -2227,7 +2231,7 @@ vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple) ...@@ -2227,7 +2231,7 @@ vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple)
* *
*/ */
static void static void
vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int16 *bucket_len) vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len)
{ {
if (attr->attbyval && attr->attlen != -1) if (attr->attbyval && attr->attlen != -1)
*bucket = value; *bucket = value;
...@@ -2340,13 +2344,14 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats * ...@@ -2340,13 +2344,14 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
selratio = 0; selratio = 0;
else if (VacAttrStatsLtGtValid(stats) && stats->min_cnt + stats->max_cnt == stats->nonnull_cnt) else if (VacAttrStatsLtGtValid(stats) && stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
{ {
/* exact result when there are just 1 or 2 values... */
double min_cnt_d = stats->min_cnt, double min_cnt_d = stats->min_cnt,
max_cnt_d = stats->max_cnt, max_cnt_d = stats->max_cnt,
null_cnt_d = stats->null_cnt, null_cnt_d = stats->null_cnt,
nonnullcnt_d = stats->nonnull_cnt; /* prevent overflow */ nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */
selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) /
(nonnullcnt_d + null_cnt_d) / (nonnullcnt_d + null_cnt_d); (nonnull_cnt_d + null_cnt_d) / (nonnull_cnt_d + null_cnt_d);
} }
else else
{ {
...@@ -2359,7 +2364,9 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats * ...@@ -2359,7 +2364,9 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
*/ */
selratio = (most * most + 0.20 * most * (total - most)) / total / total; selratio = (most * most + 0.20 * most * (total - most)) / total / total;
} }
if (selratio > 1.0) if (selratio < 0.0)
selratio = 0.0;
else if (selratio > 1.0)
selratio = 1.0; selratio = 1.0;
attp->attdisbursion = selratio; attp->attdisbursion = selratio;
...@@ -2375,13 +2382,22 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats * ...@@ -2375,13 +2382,22 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
* doing system relations, especially pg_statistic is a * doing system relations, especially pg_statistic is a
* problem * problem
*/ */
if (VacAttrStatsLtGtValid(stats) && stats->initialized /* && if (VacAttrStatsLtGtValid(stats) && stats->initialized
* !IsSystemRelationName( /* && !IsSystemRelationName(pgcform->relname.data)
* */ )
pgcform->relname.data) */ )
{ {
float32data nullratio;
float32data bestratio;
FmgrInfo out_function; FmgrInfo out_function;
char *out_string; char *out_string;
double best_cnt_d = stats->best_cnt,
null_cnt_d = stats->null_cnt,
nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */
nullratio = null_cnt_d / (nonnull_cnt_d + null_cnt_d);
bestratio = best_cnt_d / (nonnull_cnt_d + null_cnt_d);
fmgr_info(stats->outfunc, &out_function);
for (i = 0; i < Natts_pg_statistic; ++i) for (i = 0; i < Natts_pg_statistic; ++i)
nulls[i] = ' '; nulls[i] = ' ';
...@@ -2391,26 +2407,34 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats * ...@@ -2391,26 +2407,34 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
* ---------------- * ----------------
*/ */
i = 0; i = 0;
values[i++] = (Datum) relid; /* 1 */ values[i++] = (Datum) relid; /* starelid */
values[i++] = (Datum) attp->attnum; /* 2 */ values[i++] = (Datum) attp->attnum; /* staattnum */
values[i++] = (Datum) InvalidOid; /* 3 */ values[i++] = (Datum) stats->op_cmplt; /* staop */
fmgr_info(stats->outfunc, &out_function); /* hack: this code knows float4 is pass-by-ref */
out_string = (*fmgr_faddr(&out_function)) (stats->min, stats->attr->atttypid); values[i++] = PointerGetDatum(&nullratio); /* stanullfrac */
values[i++] = (Datum) fmgr(F_TEXTIN, out_string); values[i++] = PointerGetDatum(&bestratio); /* stacommonfrac */
out_string = (*fmgr_faddr(&out_function)) (stats->best, stats->attr->atttypid, stats->attr->atttypmod);
values[i++] = PointerGetDatum(textin(out_string)); /* stacommonval */
pfree(out_string); pfree(out_string);
out_string = (char *) (*fmgr_faddr(&out_function)) (stats->max, stats->attr->atttypid); out_string = (*fmgr_faddr(&out_function)) (stats->min, stats->attr->atttypid, stats->attr->atttypmod);
values[i++] = (Datum) fmgr(F_TEXTIN, out_string); values[i++] = PointerGetDatum(textin(out_string)); /* staloval */
pfree(out_string);
out_string = (char *) (*fmgr_faddr(&out_function)) (stats->max, stats->attr->atttypid, stats->attr->atttypmod);
values[i++] = PointerGetDatum(textin(out_string)); /* stahival */
pfree(out_string); pfree(out_string);
stup = heap_formtuple(sd->rd_att, values, nulls); stup = heap_formtuple(sd->rd_att, values, nulls);
/* ---------------- /* ----------------
* insert the tuple in the relation and get the tuple's oid. * insert the tuple in the relation.
* ---------------- * ----------------
*/ */
heap_insert(sd, stup); heap_insert(sd, stup);
pfree(DatumGetPointer(values[3]));
pfree(DatumGetPointer(values[4])); /* release allocated space */
pfree(DatumGetPointer(values[Anum_pg_statistic_stacommonval-1]));
pfree(DatumGetPointer(values[Anum_pg_statistic_staloval-1]));
pfree(DatumGetPointer(values[Anum_pg_statistic_stahival-1]));
pfree(stup); pfree(stup);
} }
} }
......
This diff is collapsed.
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* Copyright (c) 1994, Regents of the University of California * Copyright (c) 1994, Regents of the University of California
* *
* $Id: pg_statistic.h,v 1.6 1999/02/13 23:21:15 momjian Exp $ * $Id: pg_statistic.h,v 1.7 1999/08/01 04:54:21 tgl Exp $
* *
* NOTES * NOTES
* the genbki.sh script reads this file and generates .bki * the genbki.sh script reads this file and generates .bki
...@@ -32,11 +32,32 @@ ...@@ -32,11 +32,32 @@
*/ */
CATALOG(pg_statistic) CATALOG(pg_statistic)
{ {
Oid starelid; /* These fields form the unique key for the entry: */
int2 staattnum; Oid starelid; /* relation containing attribute */
Oid staop; int2 staattnum; /* attribute (column) stats are for */
text stalokey; /* VARIABLE LENGTH FIELD */ Oid staop; /* '<' comparison op used for lo/hi vals */
text stahikey; /* VARIABLE LENGTH FIELD */ /* Note: the current VACUUM code will never produce more than one entry
* per column, but in theory there could be multiple entries if a datatype
* has more than one useful ordering operator. Also, the current code
* will not write an entry unless it found at least one non-NULL value
* in the column; so the remaining fields will never be NULL.
*/
/* These fields contain the stats about the column indicated by the key */
float4 stanullfrac; /* the fraction of the entries that are NULL */
float4 stacommonfrac; /* the fraction that are the most common val */
/* THE REST OF THESE ARE VARIABLE LENGTH FIELDS.
* They cannot be accessed as C struct entries; you have to use the
* full field access machinery (heap_getattr) for them.
*
* All three of these are text representations of data values of the
* column's data type. To re-create the actual Datum, do
* datatypein(textout(givenvalue)).
*/
text stacommonval; /* most common non-null value in column */
text staloval; /* smallest non-null value in column */
text stahival; /* largest non-null value in column */
} FormData_pg_statistic; } FormData_pg_statistic;
/* ---------------- /* ----------------
...@@ -50,11 +71,14 @@ typedef FormData_pg_statistic *Form_pg_statistic; ...@@ -50,11 +71,14 @@ typedef FormData_pg_statistic *Form_pg_statistic;
* compiler constants for pg_statistic * compiler constants for pg_statistic
* ---------------- * ----------------
*/ */
#define Natts_pg_statistic 5 #define Natts_pg_statistic 8
#define Anum_pg_statistic_starelid 1 #define Anum_pg_statistic_starelid 1
#define Anum_pg_statistic_staattnum 2 #define Anum_pg_statistic_staattnum 2
#define Anum_pg_statistic_staop 3 #define Anum_pg_statistic_staop 3
#define Anum_pg_statistic_stalokey 4 #define Anum_pg_statistic_stanullfrac 4
#define Anum_pg_statistic_stahikey 5 #define Anum_pg_statistic_stacommonfrac 5
#define Anum_pg_statistic_stacommonval 6
#define Anum_pg_statistic_staloval 7
#define Anum_pg_statistic_stahival 8
#endif /* PG_STATISTIC_H */ #endif /* PG_STATISTIC_H */
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* *
* Copyright (c) 1994, Regents of the University of California * Copyright (c) 1994, Regents of the University of California
* *
* $Id: vacuum.h,v 1.22 1999/07/15 15:21:03 momjian Exp $ * $Id: vacuum.h,v 1.23 1999/08/01 04:54:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -67,22 +67,23 @@ typedef struct ...@@ -67,22 +67,23 @@ typedef struct
guess2, guess2,
max, max,
min; min;
int16 best_len, int best_len,
guess1_len, guess1_len,
guess2_len, guess2_len,
max_len, max_len,
min_len; min_len;
int32 best_cnt, long best_cnt,
guess1_cnt, guess1_cnt,
guess1_hits, guess1_hits,
guess2_hits, guess2_hits,
null_cnt, null_cnt,
nonnull_cnt; nonnull_cnt,
int32 max_cnt, max_cnt,
min_cnt; min_cnt;
FmgrInfo f_cmpeq, FmgrInfo f_cmpeq,
f_cmplt, f_cmplt,
f_cmpgt; f_cmpgt;
Oid op_cmplt;
regproc outfunc; regproc outfunc;
bool initialized; bool initialized;
} VacAttrStats; } VacAttrStats;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* *
* Copyright (c) 1994, Regents of the University of California * Copyright (c) 1994, Regents of the University of California
* *
* $Id: builtins.h,v 1.84 1999/07/16 17:07:39 momjian Exp $ * $Id: builtins.h,v 1.85 1999/08/01 04:54:20 tgl Exp $
* *
* NOTES * NOTES
* This should normally only be included by fmgr.h. * This should normally only be included by fmgr.h.
...@@ -372,10 +372,10 @@ extern Oid regproctooid(RegProcedure rp); ...@@ -372,10 +372,10 @@ extern Oid regproctooid(RegProcedure rp);
#define RegprocToOid(rp) regproctooid(rp) #define RegprocToOid(rp) regproctooid(rp)
/* selfuncs.c */ /* selfuncs.c */
extern float64 eqsel(Oid opid, Oid relid, AttrNumber attno, char *value, int32 flag); extern float64 eqsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
extern float64 neqsel(Oid opid, Oid relid, AttrNumber attno, char *value, int32 flag); extern float64 neqsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
extern float64 intltsel(Oid opid, Oid relid, AttrNumber attno, int32 value, int32 flag); extern float64 intltsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
extern float64 intgtsel(Oid opid, Oid relid, AttrNumber attno, int32 value, int32 flag); extern float64 intgtsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
extern float64 eqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2); extern float64 eqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
extern float64 neqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2); extern float64 neqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
extern float64 intltjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2); extern float64 intltjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment