Commit e2eed789 authored by Tom Lane's avatar Tom Lane

Remove useless "rough estimate" path from mcelem_array_contained_selec.

The code in this function that tried to cope with a missing count histogram
was quite ineffective for anything except a perfectly flat distribution.
Furthermore, since we were already punting for missing MCELEM slot, it's
rather useless to sweat over missing DECHIST: there are no cases where
ANALYZE will create the first but not the second.  So just simplify the
code by punting rather than pretending we can do something useful.
parent 4fb694ae
...@@ -242,8 +242,7 @@ scalararraysel_containment(PlannerInfo *root, ...@@ -242,8 +242,7 @@ scalararraysel_containment(PlannerInfo *root,
} }
/* /*
* arraycontsel -- restriction selectivity for "arraycolumn @> const", * arraycontsel -- restriction selectivity for array @>, &&, <@ operators
* "arraycolumn && const" or "arraycolumn <@ const"
*/ */
Datum Datum
arraycontsel(PG_FUNCTION_ARGS) arraycontsel(PG_FUNCTION_ARGS)
...@@ -323,8 +322,7 @@ arraycontsel(PG_FUNCTION_ARGS) ...@@ -323,8 +322,7 @@ arraycontsel(PG_FUNCTION_ARGS)
} }
/* /*
* arraycontjoinsel -- join selectivity for "arraycolumn @> const", * arraycontjoinsel -- join selectivity for array @>, &&, <@ operators
* "arraycolumn && const" or "arraycolumn <@ const"
*/ */
Datum Datum
arraycontjoinsel(PG_FUNCTION_ARGS) arraycontjoinsel(PG_FUNCTION_ARGS)
...@@ -744,6 +742,10 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem, ...@@ -744,6 +742,10 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
if (numbers == NULL || nnumbers != nmcelem + 3) if (numbers == NULL || nnumbers != nmcelem + 3)
return DEFAULT_CONTAIN_SEL; return DEFAULT_CONTAIN_SEL;
/* Can't do much without a count histogram, either */
if (hist == NULL || nhist < 3)
return DEFAULT_CONTAIN_SEL;
/* /*
* Grab some of the summary statistics that compute_array_stats() stores: * Grab some of the summary statistics that compute_array_stats() stores:
* lowest frequency, frequency of null elements, and average distinct * lowest frequency, frequency of null elements, and average distinct
...@@ -751,11 +753,7 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem, ...@@ -751,11 +753,7 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
*/ */
minfreq = numbers[nmcelem]; minfreq = numbers[nmcelem];
nullelem_freq = numbers[nmcelem + 2]; nullelem_freq = numbers[nmcelem + 2];
if (hist && nhist > 0)
avg_count = hist[nhist - 1]; avg_count = hist[nhist - 1];
else
avg_count = 10.0f; /* default assumption */
/* /*
* "rest" will be the sum of the frequencies of all elements not * "rest" will be the sum of the frequencies of all elements not
...@@ -853,28 +851,23 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem, ...@@ -853,28 +851,23 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
*/ */
mult *= exp(-rest); mult *= exp(-rest);
/* Check we have nonempty distinct element count histogram */
if (hist && nhist >= 3)
{
/*---------- /*----------
* Using the distinct element count histogram requires * Using the distinct element count histogram requires
* O(unique_nitems * (nmcelem + unique_nitems)) * O(unique_nitems * (nmcelem + unique_nitems))
* operations. Beyond a certain computational cost threshold, it's * operations. Beyond a certain computational cost threshold, it's
* reasonable to sacrifice accuracy for decreased planning time. * reasonable to sacrifice accuracy for decreased planning time. We limit
* We limit the number of operations to EFFORT * nmcelem; since * the number of operations to EFFORT * nmcelem; since nmcelem is limited
* nmcelem is limited by the column's statistics target, the work * by the column's statistics target, the work done is user-controllable.
* done is user-controllable.
* *
* If the number of operations would be too large, we can reduce it * If the number of operations would be too large, we can reduce it
* without losing all accuracy by reducing unique_nitems and * without losing all accuracy by reducing unique_nitems and considering
* considering only the most-common elements of the constant array. * only the most-common elements of the constant array. To make the
* To make the results exactly match what we would have gotten with * results exactly match what we would have gotten with only those
* only those elements to start with, we'd have to remove any * elements to start with, we'd have to remove any discarded elements'
* discarded elements' frequencies from "mult", but since this is only * frequencies from "mult", but since this is only an approximation
* an approximation anyway, we don't bother with that. Therefore it's * anyway, we don't bother with that. Therefore it's sufficient to qsort
* sufficient to qsort elem_selec[] and take the largest elements. * elem_selec[] and take the largest elements. (They will no longer match
* (They will no longer match up with the elements of array_data[], * up with the elements of array_data[], but we don't care.)
* but we don't care.)
*---------- *----------
*/ */
#define EFFORT 100 #define EFFORT 100
...@@ -883,8 +876,8 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem, ...@@ -883,8 +876,8 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems)) unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems))
{ {
/* /*
* Use the quadratic formula to solve for largest allowable N; * Use the quadratic formula to solve for largest allowable N. We
* we have A = 1, B = nmcelem, C = - EFFORT * nmcelem. * have A = 1, B = nmcelem, C = - EFFORT * nmcelem.
*/ */
double b = (double) nmcelem; double b = (double) nmcelem;
int n; int n;
...@@ -905,7 +898,7 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem, ...@@ -905,7 +898,7 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f); dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f);
mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest); mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest);
/* ignore hist[nhist-1], which is the avg not a histogram member */ /* ignore hist[nhist-1], which is the average not a histogram member */
hist_part = calc_hist(hist, nhist - 1, unique_nitems); hist_part = calc_hist(hist, nhist - 1, unique_nitems);
selec = 0.0f; selec = 0.0f;
...@@ -923,13 +916,6 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem, ...@@ -923,13 +916,6 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
pfree(dist); pfree(dist);
pfree(mcelem_dist); pfree(mcelem_dist);
pfree(hist_part); pfree(hist_part);
}
else
{
/* We don't have histogram. Use a rough estimate. */
selec = mult;
}
pfree(elem_selec); pfree(elem_selec);
/* Take into account occurrence of NULL element. */ /* Take into account occurrence of NULL element. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment