Commit 8787bc8e authored by Tom Lane's avatar Tom Lane

After further thought about support for gathering stats on functional

indexes, it seems like we ought to put another layer of indirection
between the compute_stats functions and the actual data storage.  This
would allow us to compute the values on-the-fly, for example.
parent 1a465233
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.68 2004/02/12 23:41:02 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.69 2004/02/13 06:39:49 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -52,6 +52,7 @@ static double init_selection_state(int n); ...@@ -52,6 +52,7 @@ static double init_selection_state(int n);
static double select_next_random_record(double t, int n, double *stateptr); static double select_next_random_record(double t, int n, double *stateptr);
static int compare_rows(const void *a, const void *b); static int compare_rows(const void *a, const void *b);
static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats); static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
static bool std_typanalyze(VacAttrStats *stats); static bool std_typanalyze(VacAttrStats *stats);
...@@ -259,12 +260,14 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt) ...@@ -259,12 +260,14 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt)
old_context = MemoryContextSwitchTo(col_context); old_context = MemoryContextSwitchTo(col_context);
for (i = 0; i < attr_cnt; i++) for (i = 0; i < attr_cnt; i++)
{ {
(*vacattrstats[i]->compute_stats) (vacattrstats[i], VacAttrStats *stats = vacattrstats[i];
vacattrstats[i]->tupattnum,
onerel->rd_att, stats->rows = rows;
totalrows, stats->tupDesc = onerel->rd_att;
rows, (*stats->compute_stats) (stats,
numrows); std_fetch_func,
numrows,
totalrows);
MemoryContextResetAndDeleteChildren(col_context); MemoryContextResetAndDeleteChildren(col_context);
} }
MemoryContextSwitchTo(old_context); MemoryContextSwitchTo(old_context);
...@@ -861,6 +864,22 @@ update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats) ...@@ -861,6 +864,22 @@ update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
heap_close(sd, RowExclusiveLock); heap_close(sd, RowExclusiveLock);
} }
/*
* Standard fetch function for use by compute_stats subroutines.
*
* This exists to provide some insulation between compute_stats routines
* and the actual storage of the sample data.
*/
static Datum
std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
{
int attnum = stats->tupattnum;
HeapTuple tuple = stats->rows[rownum];
TupleDesc tupDesc = stats->tupDesc;
return heap_getattr(tuple, attnum, tupDesc, isNull);
}
/*========================================================================== /*==========================================================================
* *
...@@ -915,12 +934,14 @@ static SortFunctionKind datumCmpFnKind; ...@@ -915,12 +934,14 @@ static SortFunctionKind datumCmpFnKind;
static int *datumCmpTupnoLink; static int *datumCmpTupnoLink;
static void compute_minimal_stats(VacAttrStats *stats, int attnum, static void compute_minimal_stats(VacAttrStatsP stats,
TupleDesc tupDesc, double totalrows, AnalyzeAttrFetchFunc fetchfunc,
HeapTuple *rows, int numrows); int samplerows,
static void compute_scalar_stats(VacAttrStats *stats, int attnum, double totalrows);
TupleDesc tupDesc, double totalrows, static void compute_scalar_stats(VacAttrStatsP stats,
HeapTuple *rows, int numrows); AnalyzeAttrFetchFunc fetchfunc,
int samplerows,
double totalrows);
static int compare_scalars(const void *a, const void *b); static int compare_scalars(const void *a, const void *b);
static int compare_mcvs(const void *a, const void *b); static int compare_mcvs(const void *a, const void *b);
...@@ -1024,9 +1045,10 @@ std_typanalyze(VacAttrStats *stats) ...@@ -1024,9 +1045,10 @@ std_typanalyze(VacAttrStats *stats)
* depend mainly on the length of the list we are willing to keep. * depend mainly on the length of the list we are willing to keep.
*/ */
static void static void
compute_minimal_stats(VacAttrStats *stats, int attnum, compute_minimal_stats(VacAttrStatsP stats,
TupleDesc tupDesc, double totalrows, AnalyzeAttrFetchFunc fetchfunc,
HeapTuple *rows, int numrows) int samplerows,
double totalrows)
{ {
int i; int i;
int null_cnt = 0; int null_cnt = 0;
...@@ -1061,9 +1083,8 @@ compute_minimal_stats(VacAttrStats *stats, int attnum, ...@@ -1061,9 +1083,8 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
fmgr_info(mystats->eqfunc, &f_cmpeq); fmgr_info(mystats->eqfunc, &f_cmpeq);
for (i = 0; i < numrows; i++) for (i = 0; i < samplerows; i++)
{ {
HeapTuple tuple = rows[i];
Datum value; Datum value;
bool isnull; bool isnull;
bool match; bool match;
...@@ -1072,7 +1093,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum, ...@@ -1072,7 +1093,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
vacuum_delay_point(); vacuum_delay_point();
value = heap_getattr(tuple, attnum, tupDesc, &isnull); value = fetchfunc(stats, i, &isnull);
/* Check for null/nonnull */ /* Check for null/nonnull */
if (isnull) if (isnull)
...@@ -1166,7 +1187,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum, ...@@ -1166,7 +1187,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
stats->stats_valid = true; stats->stats_valid = true;
/* Do the simple null-frac and width stats */ /* Do the simple null-frac and width stats */
stats->stanullfrac = (double) null_cnt / (double) numrows; stats->stanullfrac = (double) null_cnt / (double) samplerows;
if (is_varwidth) if (is_varwidth)
stats->stawidth = total_width / (double) nonnull_cnt; stats->stawidth = total_width / (double) nonnull_cnt;
else else
...@@ -1222,10 +1243,10 @@ compute_minimal_stats(VacAttrStats *stats, int attnum, ...@@ -1222,10 +1243,10 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
denom, denom,
stadistinct; stadistinct;
numer = (double) numrows *(double) d; numer = (double) samplerows *(double) d;
denom = (double) (numrows - f1) + denom = (double) (samplerows - f1) +
(double) f1 *(double) numrows / totalrows; (double) f1 *(double) samplerows / totalrows;
stadistinct = numer / denom; stadistinct = numer / denom;
/* Clamp to sane range in case of roundoff error */ /* Clamp to sane range in case of roundoff error */
...@@ -1270,7 +1291,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum, ...@@ -1270,7 +1291,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
if (ndistinct < 0) if (ndistinct < 0)
ndistinct = -ndistinct * totalrows; ndistinct = -ndistinct * totalrows;
/* estimate # of occurrences in sample of a typical value */ /* estimate # of occurrences in sample of a typical value */
avgcount = (double) numrows / ndistinct; avgcount = (double) samplerows / ndistinct;
/* set minimum threshold count to store a value */ /* set minimum threshold count to store a value */
mincount = avgcount * 1.25; mincount = avgcount * 1.25;
if (mincount < 2) if (mincount < 2)
...@@ -1303,7 +1324,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum, ...@@ -1303,7 +1324,7 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
mcv_values[i] = datumCopy(track[i].value, mcv_values[i] = datumCopy(track[i].value,
stats->attr->attbyval, stats->attr->attbyval,
stats->attr->attlen); stats->attr->attlen);
mcv_freqs[i] = (double) track[i].count / (double) numrows; mcv_freqs[i] = (double) track[i].count / (double) samplerows;
} }
MemoryContextSwitchTo(old_context); MemoryContextSwitchTo(old_context);
...@@ -1333,9 +1354,10 @@ compute_minimal_stats(VacAttrStats *stats, int attnum, ...@@ -1333,9 +1354,10 @@ compute_minimal_stats(VacAttrStats *stats, int attnum,
* data values into order. * data values into order.
*/ */
static void static void
compute_scalar_stats(VacAttrStats *stats, int attnum, compute_scalar_stats(VacAttrStatsP stats,
TupleDesc tupDesc, double totalrows, AnalyzeAttrFetchFunc fetchfunc,
HeapTuple *rows, int numrows) int samplerows,
double totalrows)
{ {
int i; int i;
int null_cnt = 0; int null_cnt = 0;
...@@ -1359,23 +1381,22 @@ compute_scalar_stats(VacAttrStats *stats, int attnum, ...@@ -1359,23 +1381,22 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
int num_bins = stats->attr->attstattarget; int num_bins = stats->attr->attstattarget;
StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data; StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem)); values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
tupnoLink = (int *) palloc(numrows * sizeof(int)); tupnoLink = (int *) palloc(samplerows * sizeof(int));
track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem)); track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
SelectSortFunction(mystats->ltopr, &cmpFn, &cmpFnKind); SelectSortFunction(mystats->ltopr, &cmpFn, &cmpFnKind);
fmgr_info(cmpFn, &f_cmpfn); fmgr_info(cmpFn, &f_cmpfn);
/* Initial scan to find sortable values */ /* Initial scan to find sortable values */
for (i = 0; i < numrows; i++) for (i = 0; i < samplerows; i++)
{ {
HeapTuple tuple = rows[i];
Datum value; Datum value;
bool isnull; bool isnull;
vacuum_delay_point(); vacuum_delay_point();
value = heap_getattr(tuple, attnum, tupDesc, &isnull); value = fetchfunc(stats, i, &isnull);
/* Check for null/nonnull */ /* Check for null/nonnull */
if (isnull) if (isnull)
...@@ -1505,7 +1526,7 @@ compute_scalar_stats(VacAttrStats *stats, int attnum, ...@@ -1505,7 +1526,7 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
stats->stats_valid = true; stats->stats_valid = true;
/* Do the simple null-frac and width stats */ /* Do the simple null-frac and width stats */
stats->stanullfrac = (double) null_cnt / (double) numrows; stats->stanullfrac = (double) null_cnt / (double) samplerows;
if (is_varwidth) if (is_varwidth)
stats->stawidth = total_width / (double) nonnull_cnt; stats->stawidth = total_width / (double) nonnull_cnt;
else else
...@@ -1546,10 +1567,10 @@ compute_scalar_stats(VacAttrStats *stats, int attnum, ...@@ -1546,10 +1567,10 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
denom, denom,
stadistinct; stadistinct;
numer = (double) numrows *(double) d; numer = (double) samplerows *(double) d;
denom = (double) (numrows - f1) + denom = (double) (samplerows - f1) +
(double) f1 *(double) numrows / totalrows; (double) f1 *(double) samplerows / totalrows;
stadistinct = numer / denom; stadistinct = numer / denom;
/* Clamp to sane range in case of roundoff error */ /* Clamp to sane range in case of roundoff error */
...@@ -1599,13 +1620,13 @@ compute_scalar_stats(VacAttrStats *stats, int attnum, ...@@ -1599,13 +1620,13 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
if (ndistinct < 0) if (ndistinct < 0)
ndistinct = -ndistinct * totalrows; ndistinct = -ndistinct * totalrows;
/* estimate # of occurrences in sample of a typical value */ /* estimate # of occurrences in sample of a typical value */
avgcount = (double) numrows / ndistinct; avgcount = (double) samplerows / ndistinct;
/* set minimum threshold count to store a value */ /* set minimum threshold count to store a value */
mincount = avgcount * 1.25; mincount = avgcount * 1.25;
if (mincount < 2) if (mincount < 2)
mincount = 2; mincount = 2;
/* don't let threshold exceed 1/K, however */ /* don't let threshold exceed 1/K, however */
maxmincount = (double) numrows / (double) num_bins; maxmincount = (double) samplerows / (double) num_bins;
if (mincount > maxmincount) if (mincount > maxmincount)
mincount = maxmincount; mincount = maxmincount;
if (num_mcv > track_cnt) if (num_mcv > track_cnt)
...@@ -1636,7 +1657,7 @@ compute_scalar_stats(VacAttrStats *stats, int attnum, ...@@ -1636,7 +1657,7 @@ compute_scalar_stats(VacAttrStats *stats, int attnum,
mcv_values[i] = datumCopy(values[track[i].first].value, mcv_values[i] = datumCopy(values[track[i].first].value,
stats->attr->attbyval, stats->attr->attbyval,
stats->attr->attlen); stats->attr->attlen);
mcv_freqs[i] = (double) track[i].count / (double) numrows; mcv_freqs[i] = (double) track[i].count / (double) samplerows;
} }
MemoryContextSwitchTo(old_context); MemoryContextSwitchTo(old_context);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.49 2004/02/12 23:41:04 tgl Exp $ * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.50 2004/02/13 06:39:49 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -40,18 +40,18 @@ ...@@ -40,18 +40,18 @@
* and must return TRUE to continue analysis, FALSE to skip analysis of this * and must return TRUE to continue analysis, FALSE to skip analysis of this
* column. In the TRUE case it must set the compute_stats and minrows fields, * column. In the TRUE case it must set the compute_stats and minrows fields,
* and can optionally set extra_data to pass additional info to compute_stats. * and can optionally set extra_data to pass additional info to compute_stats.
* minrows is its request for the minimum number of sample rows to be gathered
* (but note this request might not be honored, eg if there are fewer rows
* than that in the table).
* *
* The compute_stats routine will be called after sample rows have been * The compute_stats routine will be called after sample rows have been
* gathered. Aside from this struct, it is passed: * gathered. Aside from this struct, it is passed:
* attnum: attribute number within the supplied tuples * fetchfunc: a function for accessing the column values from the
* tupDesc: tuple descriptor for the supplied tuples * sample rows
* samplerows: the number of sample tuples
* totalrows: estimated total number of rows in relation * totalrows: estimated total number of rows in relation
* rows: an array of the sample tuples * The fetchfunc may be called with rownum running from 0 to samplerows-1.
* numrows: the number of sample tuples * It returns a Datum and an isNull flag.
* Note that the passed attnum and tupDesc could possibly be different from
* what one would expect by looking at the pg_attribute row. It is important
* to use these values for extracting attribute values from the given rows
* (and not for any other purpose).
* *
* compute_stats should set stats_valid TRUE if it is able to compute * compute_stats should set stats_valid TRUE if it is able to compute
* any useful statistics. If it does, the remainder of the struct holds * any useful statistics. If it does, the remainder of the struct holds
...@@ -60,6 +60,11 @@ ...@@ -60,6 +60,11 @@
* be CurrentMemoryContext when compute_stats is called. * be CurrentMemoryContext when compute_stats is called.
*---------- *----------
*/ */
typedef struct VacAttrStats *VacAttrStatsP;
typedef Datum (*AnalyzeAttrFetchFunc) (VacAttrStatsP stats, int rownum,
bool *isNull);
typedef struct VacAttrStats typedef struct VacAttrStats
{ {
/* /*
...@@ -74,9 +79,10 @@ typedef struct VacAttrStats ...@@ -74,9 +79,10 @@ typedef struct VacAttrStats
* These fields must be filled in by the typanalyze routine, * These fields must be filled in by the typanalyze routine,
* unless it returns FALSE. * unless it returns FALSE.
*/ */
void (*compute_stats) (struct VacAttrStats *stats, int attnum, void (*compute_stats) (VacAttrStatsP stats,
TupleDesc tupDesc, double totalrows, AnalyzeAttrFetchFunc fetchfunc,
HeapTuple *rows, int numrows); int samplerows,
double totalrows);
int minrows; /* Minimum # of rows wanted for stats */ int minrows; /* Minimum # of rows wanted for stats */
void *extra_data; /* for extra type-specific data */ void *extra_data; /* for extra type-specific data */
...@@ -100,6 +106,8 @@ typedef struct VacAttrStats ...@@ -100,6 +106,8 @@ typedef struct VacAttrStats
* be looked at by type-specific functions. * be looked at by type-specific functions.
*/ */
int tupattnum; /* attribute number within tuples */ int tupattnum; /* attribute number within tuples */
HeapTuple *rows; /* access info for fetch function */
TupleDesc tupDesc;
} VacAttrStats; } VacAttrStats;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment