Commit bf2a691e authored by Alvaro Herrera's avatar Alvaro Herrera

Fix extended statistics with partial analyzes

Either because of a previous ALTER TABLE .. SET STATISTICS 0 or because
of being invoked with a partial column list, ANALYZE could fail to
acquire sufficient data to build extended statistics.  Previously, this
would draw an ERROR and fail to collect any statistics at all (extended
and regular).  Change things so that we raise a WARNING instead, and
remove a hint that was wrong in half the cases.

Reported by: David Rowley
Discussion: https://postgr.es/m/CAKJS1f9Kk0NF6Fg7TA=JUXsjpS9kX6NVu27pb5QDCpOYAvb-Og@mail.gmail.com
parent 419a23b4
......@@ -23,11 +23,13 @@
#include "catalog/pg_collation.h"
#include "catalog/pg_statistic_ext.h"
#include "nodes/relation.h"
#include "postmaster/autovacuum.h"
#include "statistics/extended_stats_internal.h"
#include "statistics/statistics.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/syscache.h"
......@@ -38,6 +40,8 @@
typedef struct StatExtEntry
{
Oid statOid; /* OID of pg_statistic_ext entry */
char *schema; /* statistics schema */
char *name; /* statistics name */
Bitmapset *columns; /* attribute numbers covered by the statistics */
List *types; /* 'char' list of enabled statistic kinds */
} StatExtEntry;
......@@ -45,7 +49,7 @@ typedef struct StatExtEntry
static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
int natts, VacAttrStats **vacattrstats);
int nvacatts, VacAttrStats **vacatts);
static void statext_store(Relation pg_stext, Oid relid,
MVNDistinct *ndistinct, MVDependencies *dependencies,
VacAttrStats **stats);
......@@ -66,6 +70,12 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
Relation pg_stext;
ListCell *lc;
List *stats;
MemoryContext cxt;
MemoryContext oldcxt;
cxt = AllocSetContextCreate(CurrentMemoryContext, "stats ext",
ALLOCSET_DEFAULT_SIZES);
oldcxt = MemoryContextSwitchTo(cxt);
pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock);
stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
......@@ -78,9 +88,23 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
VacAttrStats **stats;
ListCell *lc2;
/* filter only the interesting vacattrstats records */
/*
* Check if we can build these stats based on the column analyzed.
* If not, report this fact (except in autovacuum) and move on.
*/
stats = lookup_var_attr_stats(onerel, stat->columns,
natts, vacattrstats);
if (!stats && !IsAutoVacuumWorkerProcess())
{
ereport(WARNING,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("extended statistics \"%s.%s\" could not be collected for relation %s.%s",
stat->schema, stat->name,
get_namespace_name(onerel->rd_rel->relnamespace),
RelationGetRelationName(onerel)),
errtable(onerel)));
continue;
}
/* check allowed number of dimensions */
Assert(bms_num_members(stat->columns) >= 2 &&
......@@ -104,6 +128,9 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
}
heap_close(pg_stext, RowExclusiveLock);
MemoryContextSwitchTo(oldcxt);
MemoryContextDelete(cxt);
}
/*
......@@ -168,6 +195,8 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
entry = palloc0(sizeof(StatExtEntry));
entry->statOid = HeapTupleGetOid(htup);
staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
entry->schema = get_namespace_name(staForm->stanamespace);
entry->name = pstrdup(NameStr(staForm->staname));
for (i = 0; i < staForm->stakeys.dim1; i++)
{
entry->columns = bms_add_member(entry->columns,
......@@ -200,18 +229,19 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
}
/*
* Using 'vacattrstats' of size 'natts' as input data, return a newly built
* VacAttrStats array which includes only the items corresponding to attributes
* indicated by 'attrs'.
* Using 'vacatts' of size 'nvacatts' as input data, return a newly built
* VacAttrStats array which includes only the items corresponding to
* attributes indicated by 'stakeys'. If we don't have all of the per column
* stats available to compute the extended stats, then we return NULL to indicate
* to the caller that the stats should not be built.
*/
static VacAttrStats **
lookup_var_attr_stats(Relation rel, Bitmapset *attrs, int natts,
VacAttrStats **vacattrstats)
lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
int nvacatts, VacAttrStats **vacatts)
{
int i = 0;
int x = -1;
VacAttrStats **stats;
Bitmapset *matched = NULL;
stats = (VacAttrStats **)
palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
......@@ -222,39 +252,34 @@ lookup_var_attr_stats(Relation rel, Bitmapset *attrs, int natts,
int j;
stats[i] = NULL;
for (j = 0; j < natts; j++)
for (j = 0; j < nvacatts; j++)
{
if (x == vacattrstats[j]->tupattnum)
if (x == vacatts[j]->tupattnum)
{
stats[i] = vacattrstats[j];
stats[i] = vacatts[j];
break;
}
}
if (!stats[i])
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("extended statistics could not be collected for column \"%s\" of relation %s.%s",
NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname),
get_namespace_name(rel->rd_rel->relnamespace),
RelationGetRelationName(rel)),
errhint("Consider ALTER TABLE \"%s\".\"%s\" ALTER \"%s\" SET STATISTICS -1",
get_namespace_name(rel->rd_rel->relnamespace),
RelationGetRelationName(rel),
NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname))));
{
/*
* Looks like stats were not gathered for one of the columns
* required. We'll be unable to build the extended stats without
* this column.
*/
pfree(stats);
return NULL;
}
/*
* Check that we found a non-dropped column and that the attnum
* matches.
*/
/*
* Sanity check that the column is not dropped - stats should have
* been removed in this case.
*/
Assert(!stats[i]->attr->attisdropped);
matched = bms_add_member(matched, stats[i]->tupattnum);
i++;
}
if (bms_subset_compare(matched, attrs) != BMS_EQUAL)
elog(ERROR, "could not find all attributes in attribute stats array");
bms_free(matched);
return stats;
}
......
......@@ -40,9 +40,11 @@ ALTER TABLE ab1 ALTER a SET STATISTICS 0;
INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
ANALYZE ab1;
ERROR: extended statistics could not be collected for column "a" of relation public.ab1
HINT: Consider ALTER TABLE "public"."ab1" ALTER "a" SET STATISTICS -1
WARNING: extended statistics "public.ab1_a_b_stats" could not be collected for relation public.ab1
ALTER TABLE ab1 ALTER a SET STATISTICS -1;
-- partial analyze doesn't build stats either
ANALYZE ab1 (a);
WARNING: extended statistics "public.ab1_a_b_stats" could not be collected for relation public.ab1
ANALYZE ab1;
DROP TABLE ab1;
-- n-distinct tests
......
......@@ -35,6 +35,8 @@ INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
ANALYZE ab1;
ALTER TABLE ab1 ALTER a SET STATISTICS -1;
-- partial analyze doesn't build stats either
ANALYZE ab1 (a);
ANALYZE ab1;
DROP TABLE ab1;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment