Commit 14f84cd8 authored by Tom Lane's avatar Tom Lane

Store -1 in attdisbursion to signal 'no duplicates in column'.

Centralize att_disbursion readout logic.
parent 5af4b04f
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.117 1999/08/08 17:13:10 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.118 1999/08/09 03:16:47 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -2346,12 +2346,20 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats * ...@@ -2346,12 +2346,20 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
} }
else if (stats->null_cnt <= 1 && stats->best_cnt == 1) else if (stats->null_cnt <= 1 && stats->best_cnt == 1)
{ {
/* looks like we have a unique-key attribute */ /* looks like we have a unique-key attribute ---
double total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt); * flag this with special -1.0 flag value.
*
selratio = 1.0 / total; * The correct disbursion is 1.0/numberOfRows, but since
* the relation row count can get updated without
* recomputing disbursion, we want to store a "symbolic"
* value and figure 1.0/numberOfRows on the fly.
*/
selratio = -1;
} }
else if (VacAttrStatsLtGtValid(stats) && stats->min_cnt + stats->max_cnt == stats->nonnull_cnt) else
{
if (VacAttrStatsLtGtValid(stats) &&
stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
{ {
/* exact result when there are just 1 or 2 values... */ /* exact result when there are just 1 or 2 values... */
double min_cnt_d = stats->min_cnt, double min_cnt_d = stats->min_cnt,
...@@ -2372,10 +2380,12 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats * ...@@ -2372,10 +2380,12 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
*/ */
selratio = (most * most + 0.20 * most * (total - most)) / (total * total); selratio = (most * most + 0.20 * most * (total - most)) / (total * total);
} }
/* Make sure calculated values are in-range */
if (selratio < 0.0) if (selratio < 0.0)
selratio = 0.0; selratio = 0.0;
else if (selratio > 1.0) else if (selratio > 1.0)
selratio = 1.0; selratio = 1.0;
}
attp->attdisbursion = selratio; attp->attdisbursion = selratio;
/* /*
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.43 1999/08/06 04:00:15 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.44 1999/08/09 03:16:43 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include "optimizer/pathnode.h" #include "optimizer/pathnode.h"
#include "optimizer/paths.h" #include "optimizer/paths.h"
#include "parser/parsetree.h" #include "parser/parsetree.h"
#include "utils/syscache.h" #include "utils/lsyscache.h"
static Path *best_innerjoin(List *join_paths, List *outer_relid); static Path *best_innerjoin(List *join_paths, List *outer_relid);
static List *sort_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, static List *sort_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
...@@ -586,7 +586,6 @@ hash_inner_and_outer(Query *root, ...@@ -586,7 +586,6 @@ hash_inner_and_outer(Query *root,
/* /*
* Estimate disbursion of the specified Var * Estimate disbursion of the specified Var
* Generate some kind of estimate, no matter what...
* *
* We use a default of 0.1 if we can't figure out anything better. * We use a default of 0.1 if we can't figure out anything better.
* This will typically discourage use of a hash rather strongly, * This will typically discourage use of a hash rather strongly,
...@@ -598,24 +597,11 @@ static Cost ...@@ -598,24 +597,11 @@ static Cost
estimate_disbursion(Query *root, Var *var) estimate_disbursion(Query *root, Var *var)
{ {
Oid relid; Oid relid;
HeapTuple atp;
double disbursion;
if (! IsA(var, Var)) if (! IsA(var, Var))
return 0.1; return 0.1;
relid = getrelid(var->varno, root->rtable); relid = getrelid(var->varno, root->rtable);
atp = SearchSysCacheTuple(ATTNUM, return (Cost) get_attdisbursion(relid, var->varattno, 0.1);
ObjectIdGetDatum(relid),
Int16GetDatum(var->varattno),
0, 0);
if (! HeapTupleIsValid(atp))
return 0.1;
disbursion = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion;
if (disbursion > 0.0)
return disbursion;
return 0.1;
} }
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.37 1999/08/02 02:05:41 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.38 1999/08/09 03:16:45 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -52,7 +52,6 @@ static bool getattstatistics(Oid relid, AttrNumber attnum, ...@@ -52,7 +52,6 @@ static bool getattstatistics(Oid relid, AttrNumber attnum,
Datum *commonval, Datum *commonval,
Datum *loval, Datum *loval,
Datum *hival); Datum *hival);
static double getattdisbursion(Oid relid, AttrNumber attnum);
/* /*
...@@ -172,7 +171,7 @@ eqsel(Oid opid, ...@@ -172,7 +171,7 @@ eqsel(Oid opid,
/* No VACUUM ANALYZE stats available, so make a guess using /* No VACUUM ANALYZE stats available, so make a guess using
* the disbursion stat (if we have that, which is unlikely...) * the disbursion stat (if we have that, which is unlikely...)
*/ */
selec = getattdisbursion(relid, attno); selec = get_attdisbursion(relid, attno, 0.01);
} }
*result = (float64data) selec; *result = (float64data) selec;
...@@ -374,8 +373,8 @@ eqjoinsel(Oid opid, ...@@ -374,8 +373,8 @@ eqjoinsel(Oid opid,
*result = 0.1; *result = 0.1;
else else
{ {
num1 = getattdisbursion(relid1, attno1); num1 = get_attdisbursion(relid1, attno1, 0.01);
num2 = getattdisbursion(relid2, attno2); num2 = get_attdisbursion(relid2, attno2, 0.01);
max = (num1 > num2) ? num1 : num2; max = (num1 > num2) ? num1 : num2;
if (max <= 0) if (max <= 0)
*result = 1.0; *result = 1.0;
...@@ -675,60 +674,6 @@ getattstatistics(Oid relid, AttrNumber attnum, Oid typid, int32 typmod, ...@@ -675,60 +674,6 @@ getattstatistics(Oid relid, AttrNumber attnum, Oid typid, int32 typmod,
return true; return true;
} }
/*
* getattdisbursion
* Retrieve the disbursion statistic for an attribute,
* or produce an estimate if no info is available.
*/
static double
getattdisbursion(Oid relid, AttrNumber attnum)
{
HeapTuple atp;
double disbursion;
int32 ntuples;
atp = SearchSysCacheTuple(ATTNUM,
ObjectIdGetDatum(relid),
Int16GetDatum(attnum),
0, 0);
if (!HeapTupleIsValid(atp))
{
/* this should not happen */
elog(ERROR, "getattdisbursion: no attribute tuple %u %d",
relid, attnum);
return 0.1;
}
disbursion = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion;
if (disbursion > 0.0)
return disbursion;
/* VACUUM ANALYZE has not stored a disbursion statistic for us.
* Produce an estimate = 1/numtuples. This may produce
* unreasonably small estimates for large tables, so limit
* the estimate to no less than 0.01.
*/
atp = SearchSysCacheTuple(RELOID,
ObjectIdGetDatum(relid),
0, 0, 0);
if (!HeapTupleIsValid(atp))
{
/* this should not happen */
elog(ERROR, "getattdisbursion: no relation tuple %u", relid);
return 0.1;
}
ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
if (ntuples > 0)
disbursion = 1.0 / (double) ntuples;
if (disbursion < 0.01)
disbursion = 0.01;
return disbursion;
}
float64 float64
btreesel(Oid operatorObjectId, btreesel(Oid operatorObjectId,
Oid indrelid, Oid indrelid,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment