Commit a63378a0 authored by Tomas Vondra's avatar Tomas Vondra

Use column collation for extended statistics

The current extended statistics code was a bit confused which collation
to use.  When building the statistics, the collations defined as default
for the data types were used (since commit 5e092800).  The MCV code was
however using the column collations for MCV serialization, and then
DEFAULT_COLLATION_OID when computing estimates. So overall the code was
using all three possible options, inconsistently.

This uses the column colation everywhere - this makes it consistent with
what 5e092800 did for regular stats.  We however do not track the
collations in a catalog, because we can derive them from column-level
information.  This may need to change in the future, e.g. after allowing
statistics on expressions.

Reviewed-by: Tom Lane
Discussion: https://postgr.es/m/8736jdhbhc.fsf%40ansel.ydns.eu
Backpatch-to: 12
parent e38a55ba
...@@ -485,6 +485,10 @@ RemoveStatisticsById(Oid statsOid) ...@@ -485,6 +485,10 @@ RemoveStatisticsById(Oid statsOid)
* *
* For MCV lists that's not the case, as those statistics store the datums * For MCV lists that's not the case, as those statistics store the datums
* internally. In this case we simply reset the statistics value to NULL. * internally. In this case we simply reset the statistics value to NULL.
*
* Note that "type change" includes collation change, which means we can rely
* on the MCV list being consistent with the collation info in pg_attribute
* during estimation.
*/ */
void void
UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum, UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum,
......
...@@ -273,7 +273,7 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency, ...@@ -273,7 +273,7 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
colstat->attrtypid); colstat->attrtypid);
/* prepare the sort function for this dimension */ /* prepare the sort function for this dimension */
multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation); multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
} }
/* /*
......
...@@ -366,7 +366,7 @@ build_mss(VacAttrStats **stats, int numattrs) ...@@ -366,7 +366,7 @@ build_mss(VacAttrStats **stats, int numattrs)
elog(ERROR, "cache lookup failed for ordering operator for type %u", elog(ERROR, "cache lookup failed for ordering operator for type %u",
colstat->attrtypid); colstat->attrtypid);
multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation); multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
} }
return mss; return mss;
...@@ -686,7 +686,7 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats) ...@@ -686,7 +686,7 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
/* sort and deduplicate the data */ /* sort and deduplicate the data */
ssup[dim].ssup_cxt = CurrentMemoryContext; ssup[dim].ssup_cxt = CurrentMemoryContext;
ssup[dim].ssup_collation = DEFAULT_COLLATION_OID; ssup[dim].ssup_collation = stats[dim]->attrcollid;
ssup[dim].ssup_nulls_first = false; ssup[dim].ssup_nulls_first = false;
PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]); PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
...@@ -1630,15 +1630,22 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, ...@@ -1630,15 +1630,22 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
* First check whether the constant is below the lower * First check whether the constant is below the lower
* boundary (in that case we can skip the bucket, because * boundary (in that case we can skip the bucket, because
* there's no overlap). * there's no overlap).
*
* We don't store collations used to build the statistics,
* but we can use the collation for the attribute itself,
* as stored in varcollid. We do reset the statistics after
* a type change (including collation change), so this is
* OK. We may need to relax this after allowing extended
* statistics on expressions.
*/ */
if (varonleft) if (varonleft)
match = DatumGetBool(FunctionCall2Coll(&opproc, match = DatumGetBool(FunctionCall2Coll(&opproc,
DEFAULT_COLLATION_OID, var->varcollid,
item->values[idx], item->values[idx],
cst->constvalue)); cst->constvalue));
else else
match = DatumGetBool(FunctionCall2Coll(&opproc, match = DatumGetBool(FunctionCall2Coll(&opproc,
DEFAULT_COLLATION_OID, var->varcollid,
cst->constvalue, cst->constvalue,
item->values[idx])); item->values[idx]));
......
...@@ -477,7 +477,7 @@ ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, ...@@ -477,7 +477,7 @@ ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
colstat->attrtypid); colstat->attrtypid);
/* prepare the sort function for this dimension */ /* prepare the sort function for this dimension */
multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation); multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
/* accumulate all the data for this dimension into the arrays */ /* accumulate all the data for this dimension into the arrays */
for (j = 0; j < numrows; j++) for (j = 0; j < numrows; j++)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment