Commit 67becf8d authored by Tom Lane's avatar Tom Lane

Fix ANALYZE's ancient deficiency of not trying to collect stats for expression

indexes when the index column type (the opclass opckeytype) is different from
the expression's datatype.  When coded, this limitation wasn't worth worrying
about because we had no intelligence to speak of in stats collection for the
datatypes used by such opclasses.  However, now that there's non-toy
estimation capability for tsvector queries, it amounts to a bug that ANALYZE
fails to do this.

The fix changes struct VacAttrStats, and therefore constitutes an API break
for custom typanalyze functions.  Therefore we can't back-patch it into
released branches, but it was agreed that 9.0 isn't yet frozen hard enough
to make such a change unacceptable.  Ergo, back-patch to 9.0 but no further.
The API break had better be mentioned in 9.0 release notes.
parent 97532f7c
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.152 2010/02/26 02:00:37 momjian Exp $ * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.153 2010/08/01 22:38:11 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -92,7 +92,8 @@ static void compute_index_stats(Relation onerel, double totalrows, ...@@ -92,7 +92,8 @@ static void compute_index_stats(Relation onerel, double totalrows,
AnlIndexData *indexdata, int nindexes, AnlIndexData *indexdata, int nindexes,
HeapTuple *rows, int numrows, HeapTuple *rows, int numrows,
MemoryContext col_context); MemoryContext col_context);
static VacAttrStats *examine_attribute(Relation onerel, int attnum); static VacAttrStats *examine_attribute(Relation onerel, int attnum,
Node *index_expr);
static int acquire_sample_rows(Relation onerel, HeapTuple *rows, static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
int targrows, double *totalrows, double *totaldeadrows); int targrows, double *totalrows, double *totaldeadrows);
static double random_fract(void); static double random_fract(void);
...@@ -339,7 +340,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -339,7 +340,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
(errcode(ERRCODE_UNDEFINED_COLUMN), (errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column \"%s\" of relation \"%s\" does not exist", errmsg("column \"%s\" of relation \"%s\" does not exist",
col, RelationGetRelationName(onerel)))); col, RelationGetRelationName(onerel))));
vacattrstats[tcnt] = examine_attribute(onerel, i); vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
if (vacattrstats[tcnt] != NULL) if (vacattrstats[tcnt] != NULL)
tcnt++; tcnt++;
} }
...@@ -353,7 +354,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -353,7 +354,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
tcnt = 0; tcnt = 0;
for (i = 1; i <= attr_cnt; i++) for (i = 1; i <= attr_cnt; i++)
{ {
vacattrstats[tcnt] = examine_attribute(onerel, i); vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
if (vacattrstats[tcnt] != NULL) if (vacattrstats[tcnt] != NULL)
tcnt++; tcnt++;
} }
...@@ -407,21 +408,8 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -407,21 +408,8 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
elog(ERROR, "too few entries in indexprs list"); elog(ERROR, "too few entries in indexprs list");
indexkey = (Node *) lfirst(indexpr_item); indexkey = (Node *) lfirst(indexpr_item);
indexpr_item = lnext(indexpr_item); indexpr_item = lnext(indexpr_item);
/*
* Can't analyze if the opclass uses a storage type
* different from the expression result type. We'd get
* confused because the type shown in pg_attribute for
* the index column doesn't match what we are getting
* from the expression. Perhaps this can be fixed
* someday, but for now, punt.
*/
if (exprType(indexkey) !=
Irel[ind]->rd_att->attrs[i]->atttypid)
continue;
thisdata->vacattrstats[tcnt] = thisdata->vacattrstats[tcnt] =
examine_attribute(Irel[ind], i + 1); examine_attribute(Irel[ind], i + 1, indexkey);
if (thisdata->vacattrstats[tcnt] != NULL) if (thisdata->vacattrstats[tcnt] != NULL)
{ {
tcnt++; tcnt++;
...@@ -802,9 +790,12 @@ compute_index_stats(Relation onerel, double totalrows, ...@@ -802,9 +790,12 @@ compute_index_stats(Relation onerel, double totalrows,
* *
* Determine whether the column is analyzable; if so, create and initialize * Determine whether the column is analyzable; if so, create and initialize
* a VacAttrStats struct for it. If not, return NULL. * a VacAttrStats struct for it. If not, return NULL.
*
* If index_expr isn't NULL, then we're trying to analyze an expression index,
* and index_expr is the expression tree representing the column's data.
*/ */
static VacAttrStats * static VacAttrStats *
examine_attribute(Relation onerel, int attnum) examine_attribute(Relation onerel, int attnum, Node *index_expr)
{ {
Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1]; Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
HeapTuple typtuple; HeapTuple typtuple;
...@@ -827,9 +818,30 @@ examine_attribute(Relation onerel, int attnum) ...@@ -827,9 +818,30 @@ examine_attribute(Relation onerel, int attnum)
stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats)); stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE); stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE); memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(attr->atttypid));
/*
* When analyzing an expression index, believe the expression tree's type
* not the column datatype --- the latter might be the opckeytype storage
* type of the opclass, which is not interesting for our purposes. (Note:
* if we did anything with non-expression index columns, we'd need to
* figure out where to get the correct type info from, but for now that's
* not a problem.) It's not clear whether anyone will care about the
* typmod, but we store that too just in case.
*/
if (index_expr)
{
stats->attrtypid = exprType(index_expr);
stats->attrtypmod = exprTypmod(index_expr);
}
else
{
stats->attrtypid = attr->atttypid;
stats->attrtypmod = attr->atttypmod;
}
typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(stats->attrtypid));
if (!HeapTupleIsValid(typtuple)) if (!HeapTupleIsValid(typtuple))
elog(ERROR, "cache lookup failed for type %u", attr->atttypid); elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type)); stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type)); memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
ReleaseSysCache(typtuple); ReleaseSysCache(typtuple);
...@@ -838,12 +850,12 @@ examine_attribute(Relation onerel, int attnum) ...@@ -838,12 +850,12 @@ examine_attribute(Relation onerel, int attnum)
/* /*
* The fields describing the stats->stavalues[n] element types default to * The fields describing the stats->stavalues[n] element types default to
* the type of the field being analyzed, but the type-specific typanalyze * the type of the data being analyzed, but the type-specific typanalyze
* function can change them if it wants to store something else. * function can change them if it wants to store something else.
*/ */
for (i = 0; i < STATISTIC_NUM_SLOTS; i++) for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
{ {
stats->statypid[i] = stats->attr->atttypid; stats->statypid[i] = stats->attrtypid;
stats->statyplen[i] = stats->attrtype->typlen; stats->statyplen[i] = stats->attrtype->typlen;
stats->statypbyval[i] = stats->attrtype->typbyval; stats->statypbyval[i] = stats->attrtype->typbyval;
stats->statypalign[i] = stats->attrtype->typalign; stats->statypalign[i] = stats->attrtype->typalign;
...@@ -1780,7 +1792,7 @@ std_typanalyze(VacAttrStats *stats) ...@@ -1780,7 +1792,7 @@ std_typanalyze(VacAttrStats *stats)
attr->attstattarget = default_statistics_target; attr->attstattarget = default_statistics_target;
/* Look for default "<" and "=" operators for column's type */ /* Look for default "<" and "=" operators for column's type */
get_sort_group_operators(attr->atttypid, get_sort_group_operators(stats->attrtypid,
false, false, false, false, false, false,
&ltopr, &eqopr, NULL); &ltopr, &eqopr, NULL);
...@@ -1860,10 +1872,10 @@ compute_minimal_stats(VacAttrStatsP stats, ...@@ -1860,10 +1872,10 @@ compute_minimal_stats(VacAttrStatsP stats,
int nonnull_cnt = 0; int nonnull_cnt = 0;
int toowide_cnt = 0; int toowide_cnt = 0;
double total_width = 0; double total_width = 0;
bool is_varlena = (!stats->attr->attbyval && bool is_varlena = (!stats->attrtype->typbyval &&
stats->attr->attlen == -1); stats->attrtype->typlen == -1);
bool is_varwidth = (!stats->attr->attbyval && bool is_varwidth = (!stats->attrtype->typbyval &&
stats->attr->attlen < 0); stats->attrtype->typlen < 0);
FmgrInfo f_cmpeq; FmgrInfo f_cmpeq;
typedef struct typedef struct
{ {
...@@ -2126,8 +2138,8 @@ compute_minimal_stats(VacAttrStatsP stats, ...@@ -2126,8 +2138,8 @@ compute_minimal_stats(VacAttrStatsP stats,
for (i = 0; i < num_mcv; i++) for (i = 0; i < num_mcv; i++)
{ {
mcv_values[i] = datumCopy(track[i].value, mcv_values[i] = datumCopy(track[i].value,
stats->attr->attbyval, stats->attrtype->typbyval,
stats->attr->attlen); stats->attrtype->typlen);
mcv_freqs[i] = (double) track[i].count / (double) samplerows; mcv_freqs[i] = (double) track[i].count / (double) samplerows;
} }
MemoryContextSwitchTo(old_context); MemoryContextSwitchTo(old_context);
...@@ -2184,10 +2196,10 @@ compute_scalar_stats(VacAttrStatsP stats, ...@@ -2184,10 +2196,10 @@ compute_scalar_stats(VacAttrStatsP stats,
int nonnull_cnt = 0; int nonnull_cnt = 0;
int toowide_cnt = 0; int toowide_cnt = 0;
double total_width = 0; double total_width = 0;
bool is_varlena = (!stats->attr->attbyval && bool is_varlena = (!stats->attrtype->typbyval &&
stats->attr->attlen == -1); stats->attrtype->typlen == -1);
bool is_varwidth = (!stats->attr->attbyval && bool is_varwidth = (!stats->attrtype->typbyval &&
stats->attr->attlen < 0); stats->attrtype->typlen < 0);
double corr_xysum; double corr_xysum;
Oid cmpFn; Oid cmpFn;
int cmpFlags; int cmpFlags;
...@@ -2476,8 +2488,8 @@ compute_scalar_stats(VacAttrStatsP stats, ...@@ -2476,8 +2488,8 @@ compute_scalar_stats(VacAttrStatsP stats,
for (i = 0; i < num_mcv; i++) for (i = 0; i < num_mcv; i++)
{ {
mcv_values[i] = datumCopy(values[track[i].first].value, mcv_values[i] = datumCopy(values[track[i].first].value,
stats->attr->attbyval, stats->attrtype->typbyval,
stats->attr->attlen); stats->attrtype->typlen);
mcv_freqs[i] = (double) track[i].count / (double) samplerows; mcv_freqs[i] = (double) track[i].count / (double) samplerows;
} }
MemoryContextSwitchTo(old_context); MemoryContextSwitchTo(old_context);
...@@ -2583,8 +2595,8 @@ compute_scalar_stats(VacAttrStatsP stats, ...@@ -2583,8 +2595,8 @@ compute_scalar_stats(VacAttrStatsP stats,
for (i = 0; i < num_hist; i++) for (i = 0; i < num_hist; i++)
{ {
hist_values[i] = datumCopy(values[pos].value, hist_values[i] = datumCopy(values[pos].value,
stats->attr->attbyval, stats->attrtype->typbyval,
stats->attr->attlen); stats->attrtype->typlen);
pos += delta; pos += delta;
posfrac += deltafrac; posfrac += deltafrac;
if (posfrac >= (num_hist - 1)) if (posfrac >= (num_hist - 1))
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.89 2010/02/09 21:43:30 tgl Exp $ * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.90 2010/08/01 22:38:11 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -62,9 +62,17 @@ typedef struct VacAttrStats ...@@ -62,9 +62,17 @@ typedef struct VacAttrStats
/* /*
* These fields are set up by the main ANALYZE code before invoking the * These fields are set up by the main ANALYZE code before invoking the
* type-specific typanalyze function. * type-specific typanalyze function.
*
* Note: do not assume that the data being analyzed has the same datatype
* shown in attr, ie do not trust attr->atttypid, attlen, etc. This is
* because some index opclasses store a different type than the underlying
* column/expression. Instead use attrtypid, attrtypmod, and attrtype for
* information about the datatype being fed to the typanalyze function.
*/ */
Form_pg_attribute attr; /* copy of pg_attribute row for column */ Form_pg_attribute attr; /* copy of pg_attribute row for column */
Form_pg_type attrtype; /* copy of pg_type row for column */ Oid attrtypid; /* type of data being analyzed */
int32 attrtypmod; /* typmod of data being analyzed */
Form_pg_type attrtype; /* copy of pg_type row for attrtypid */
MemoryContext anl_context; /* where to save long-lived data */ MemoryContext anl_context; /* where to save long-lived data */
/* /*
...@@ -95,10 +103,9 @@ typedef struct VacAttrStats ...@@ -95,10 +103,9 @@ typedef struct VacAttrStats
/* /*
* These fields describe the stavalues[n] element types. They will be * These fields describe the stavalues[n] element types. They will be
* initialized to be the same as the column's that's underlying the slot, * initialized to match attrtypid, but a custom typanalyze function might
* but a custom typanalyze function might want to store an array of * want to store an array of something other than the analyzed column's
* something other than the analyzed column's elements. It should then * elements. It should then overwrite these fields.
* overwrite these fields.
*/ */
Oid statypid[STATISTIC_NUM_SLOTS]; Oid statypid[STATISTIC_NUM_SLOTS];
int2 statyplen[STATISTIC_NUM_SLOTS]; int2 statyplen[STATISTIC_NUM_SLOTS];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment