Commit 8787bc8e authored by Tom Lane's avatar Tom Lane

After further thought about support for gathering stats on functional

indexes, it seems like we ought to put another layer of indirection
between the compute_stats functions and the actual data storage.  This
would allow us to compute the values on-the-fly, for example.
parent 1a465233
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.68 2004/02/12 23:41:02 tgl Exp $
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.69 2004/02/13 06:39:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -52,6 +52,7 @@ static double init_selection_state(int n);
static double select_next_random_record(double t, int n, double *stateptr);
static int compare_rows(const void *a, const void *b);
static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
static bool std_typanalyze(VacAttrStats *stats);
......@@ -259,12 +260,14 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt)
old_context = MemoryContextSwitchTo(col_context);
for (i = 0; i < attr_cnt; i++)
{
(*vacattrstats[i]->compute_stats) (vacattrstats[i],
vacattrstats[i]->tupattnum,
onerel->rd_att,
totalrows,
rows,
numrows);
VacAttrStats *stats = vacattrstats[i];
stats->rows = rows;
stats->tupDesc = onerel->rd_att;
(*stats->compute_stats) (stats,
std_fetch_func,
numrows,
totalrows);
MemoryContextResetAndDeleteChildren(col_context);
}
MemoryContextSwitchTo(old_context);
......@@ -861,6 +864,22 @@ update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
heap_close(sd, RowExclusiveLock);
}
/*
* Standard fetch function for use by compute_stats subroutines.
*
* This exists to provide some insulation between compute_stats routines
* and the actual storage of the sample data.
*/
static Datum
std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
{
int attnum = stats->tupattnum;
HeapTuple tuple = stats->rows[rownum];
TupleDesc tupDesc = stats->tupDesc;
return heap_getattr(tuple, attnum, tupDesc, isNull);
}
/*==========================================================================
*
......@@ -915,12 +934,14 @@ static SortFunctionKind datumCmpFnKind;
static int *datumCmpTupnoLink;
static void compute_minimal_stats(VacAttrStats *stats, int attnum,
TupleDesc tupDesc, double totalrows,
HeapTuple *rows, int numrows);
static void compute_scalar_stats(VacAttrStats *stats, int attnum,
TupleDesc tupDesc, double totalrows,
HeapTuple *rows, int numrows);
static void compute_minimal_stats(VacAttrStatsP stats,
AnalyzeAttrFetchFunc fetchfunc,
int samplerows,
double totalrows);
static void compute_scalar_stats(VacAttrStatsP stats,
AnalyzeAttrFetchFunc fetchfunc,
int samplerows,
double totalrows);
static int compare_scalars(const void *a, const void *b);
static int compare_mcvs(const void *a, const void *b);
......@@ -1024,9 +1045,10 @@ std_typanalyze(VacAttrStats *stats)
* depend mainly on the length of the list we are willing to keep.
*/
static void
compute_minimal_stats(VacAttrStats *stats, int attnum,
TupleDesc tupDesc, double totalrows,
HeapTuple *rows, int numrows)
compute_minimal_stats(VacAttrStatsP stats,
AnalyzeAttrFetchFunc fetchfunc,
int samplerows,
double totalrows)
{
int i;
int null_cnt = 0;
......@@ -1061,9 +1083,8 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
fmgr_info(mystats->eqfunc, &f_cmpeq);
for (i = 0; i < numrows; i++)
for (i = 0; i < samplerows; i++)
{
HeapTuple tuple = rows[i];
Datum value;
bool isnull;
bool match;
......@@ -1072,7 +1093,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
vacuum_delay_point();
value = heap_getattr(tuple, attnum, tupDesc, &isnull);
value = fetchfunc(stats, i, &isnull);
/* Check for null/nonnull */
if (isnull)
......@@ -1166,7 +1187,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
stats->stats_valid = true;
/* Do the simple null-frac and width stats */
stats->stanullfrac = (double) null_cnt / (double) numrows;
stats->stanullfrac = (double) null_cnt / (double) samplerows;
if (is_varwidth)
stats->stawidth = total_width / (double) nonnull_cnt;
else
......@@ -1222,10 +1243,10 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
denom,
stadistinct;
numer = (double) numrows *(double) d;
numer = (double) samplerows *(double) d;
denom = (double) (numrows - f1) +
(double) f1 *(double) numrows / totalrows;
denom = (double) (samplerows - f1) +
(double) f1 *(double) samplerows / totalrows;
stadistinct = numer / denom;
/* Clamp to sane range in case of roundoff error */
......@@ -1270,7 +1291,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
if (ndistinct < 0)
ndistinct = -ndistinct * totalrows;
/* estimate # of occurrences in sample of a typical value */
avgcount = (double) numrows / ndistinct;
avgcount = (double) samplerows / ndistinct;
/* set minimum threshold count to store a value */
mincount = avgcount * 1.25;
if (mincount < 2)
......@@ -1303,7 +1324,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
mcv_values[i] = datumCopy(track[i].value,
stats->attr->attbyval,
stats->attr->attlen);
mcv_freqs[i] = (double) track[i].count / (double) numrows;
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
}
MemoryContextSwitchTo(old_context);
......@@ -1333,9 +1354,10 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
* data values into order.
*/
static void
compute_scalar_stats(VacAttrStats *stats, int attnum,
TupleDesc tupDesc, double totalrows,
HeapTuple *rows, int numrows)
compute_scalar_stats(VacAttrStatsP stats,
AnalyzeAttrFetchFunc fetchfunc,
int samplerows,
double totalrows)
{
int i;
int null_cnt = 0;
......@@ -1359,23 +1381,22 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
int num_bins = stats->attr->attstattarget;
StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem));
tupnoLink = (int *) palloc(numrows * sizeof(int));
values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
tupnoLink = (int *) palloc(samplerows * sizeof(int));
track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
SelectSortFunction(mystats->ltopr, &cmpFn, &cmpFnKind);
fmgr_info(cmpFn, &f_cmpfn);
/* Initial scan to find sortable values */
for (i = 0; i < numrows; i++)
for (i = 0; i < samplerows; i++)
{
HeapTuple tuple = rows[i];
Datum value;
bool isnull;
vacuum_delay_point();
value = heap_getattr(tuple, attnum, tupDesc, &isnull);
value = fetchfunc(stats, i, &isnull);
/* Check for null/nonnull */
if (isnull)
......@@ -1505,7 +1526,7 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
stats->stats_valid = true;
/* Do the simple null-frac and width stats */
stats->stanullfrac = (double) null_cnt / (double) numrows;
stats->stanullfrac = (double) null_cnt / (double) samplerows;
if (is_varwidth)
stats->stawidth = total_width / (double) nonnull_cnt;
else
......@@ -1546,10 +1567,10 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
denom,
stadistinct;
numer = (double) numrows *(double) d;
numer = (double) samplerows *(double) d;
denom = (double) (numrows - f1) +
(double) f1 *(double) numrows / totalrows;
denom = (double) (samplerows - f1) +
(double) f1 *(double) samplerows / totalrows;
stadistinct = numer / denom;
/* Clamp to sane range in case of roundoff error */
......@@ -1599,13 +1620,13 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
if (ndistinct < 0)
ndistinct = -ndistinct * totalrows;
/* estimate # of occurrences in sample of a typical value */
avgcount = (double) numrows / ndistinct;
avgcount = (double) samplerows / ndistinct;
/* set minimum threshold count to store a value */
mincount = avgcount * 1.25;
if (mincount < 2)
mincount = 2;
/* don't let threshold exceed 1/K, however */
maxmincount = (double) numrows / (double) num_bins;
maxmincount = (double) samplerows / (double) num_bins;
if (mincount > maxmincount)
mincount = maxmincount;
if (num_mcv > track_cnt)
......@@ -1636,7 +1657,7 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
mcv_values[i] = datumCopy(values[track[i].first].value,
stats->attr->attbyval,
stats->attr->attlen);
mcv_freqs[i] = (double) track[i].count / (double) numrows;
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
}
MemoryContextSwitchTo(old_context);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.49 2004/02/12 23:41:04 tgl Exp $
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.50 2004/02/13 06:39:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -40,18 +40,18 @@
* and must return TRUE to continue analysis, FALSE to skip analysis of this
* column. In the TRUE case it must set the compute_stats and minrows fields,
* and can optionally set extra_data to pass additional info to compute_stats.
* minrows is its request for the minimum number of sample rows to be gathered
* (but note this request might not be honored, eg if there are fewer rows
* than that in the table).
*
* The compute_stats routine will be called after sample rows have been
* gathered. Aside from this struct, it is passed:
* attnum: attribute number within the supplied tuples
* tupDesc: tuple descriptor for the supplied tuples
* fetchfunc: a function for accessing the column values from the
* sample rows
* samplerows: the number of sample tuples
* totalrows: estimated total number of rows in relation
* rows: an array of the sample tuples
* numrows: the number of sample tuples
* Note that the passed attnum and tupDesc could possibly be different from
* what one would expect by looking at the pg_attribute row. It is important
* to use these values for extracting attribute values from the given rows
* (and not for any other purpose).
* The fetchfunc may be called with rownum running from 0 to samplerows-1.
* It returns a Datum and an isNull flag.
*
* compute_stats should set stats_valid TRUE if it is able to compute
* any useful statistics. If it does, the remainder of the struct holds
......@@ -60,6 +60,11 @@
* be CurrentMemoryContext when compute_stats is called.
*----------
*/
typedef struct VacAttrStats *VacAttrStatsP;
typedef Datum (*AnalyzeAttrFetchFunc) (VacAttrStatsP stats, int rownum,
bool *isNull);
typedef struct VacAttrStats
{
/*
......@@ -74,9 +79,10 @@ typedef struct VacAttrStats
* These fields must be filled in by the typanalyze routine,
* unless it returns FALSE.
*/
void (*compute_stats) (struct VacAttrStats *stats, int attnum,
TupleDesc tupDesc, double totalrows,
HeapTuple *rows, int numrows);
void (*compute_stats) (VacAttrStatsP stats,
AnalyzeAttrFetchFunc fetchfunc,
int samplerows,
double totalrows);
int minrows; /* Minimum # of rows wanted for stats */
void *extra_data; /* for extra type-specific data */
......@@ -100,6 +106,8 @@ typedef struct VacAttrStats
* be looked at by type-specific functions.
*/
int tupattnum; /* attribute number within tuples */
HeapTuple *rows; /* access info for fetch function */
TupleDesc tupDesc;
} VacAttrStats;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment