Commit 97930cf5 authored by Tom Lane's avatar Tom Lane

Improve eqjoinsel's ndistinct clamping to work for multiple levels of join.

This patch fixes an oversight in my commit
7f3eba30 of 2008-10-23.  That patch
accounted for baserel restriction clauses that reduced the number of rows
coming out of a table (and hence the number of possibly-distinct values of
a join variable), but not for join restriction clauses that might have been
applied at a lower level of join.  To account for the latter, look up the
sizes of the min_lefthand and min_righthand inputs of the current join,
and clamp with those in the same way as for the base relations.

Noted while investigating a complaint from Ben Chobot, although this in
itself doesn't seem to explain his report.

Back-patch to 8.4; previous versions used different estimation methods
for which this heuristic isn't relevant.
parent 5cfe33fe
......@@ -142,9 +142,11 @@ static double ineq_histogram_selectivity(PlannerInfo *root,
FmgrInfo *opproc, bool isgt,
Datum constval, Oid consttype);
static double eqjoinsel_inner(Oid operator,
VariableStatData *vardata1, VariableStatData *vardata2);
VariableStatData *vardata1, VariableStatData *vardata2,
RelOptInfo *rel1, RelOptInfo *rel2);
static double eqjoinsel_semi(Oid operator,
VariableStatData *vardata1, VariableStatData *vardata2);
VariableStatData *vardata1, VariableStatData *vardata2,
RelOptInfo *rel1, RelOptInfo *rel2);
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
double *scaledlobound, double *scaledhibound);
......@@ -173,6 +175,7 @@ static bool get_actual_variable_range(PlannerInfo *root,
VariableStatData *vardata,
Oid sortop,
Datum *min, Datum *max);
static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
static Selectivity prefix_selectivity(PlannerInfo *root,
VariableStatData *vardata,
Oid vartype, Oid opfamily, Const *prefixcon);
......@@ -2008,24 +2011,47 @@ eqjoinsel(PG_FUNCTION_ARGS)
VariableStatData vardata1;
VariableStatData vardata2;
bool join_is_reversed;
RelOptInfo *rel1;
RelOptInfo *rel2;
get_join_variables(root, args, sjinfo,
&vardata1, &vardata2, &join_is_reversed);
/*
* Identify the join's direct input relations. We use the min lefthand
* and min righthand as the inputs, even though the join might actually
* get done with larger input relations. The min inputs are guaranteed to
* have been formed by now, though, and always using them ensures
* consistency of estimates.
*/
if (!join_is_reversed)
{
rel1 = find_join_input_rel(root, sjinfo->min_lefthand);
rel2 = find_join_input_rel(root, sjinfo->min_righthand);
}
else
{
rel1 = find_join_input_rel(root, sjinfo->min_righthand);
rel2 = find_join_input_rel(root, sjinfo->min_lefthand);
}
switch (sjinfo->jointype)
{
case JOIN_INNER:
case JOIN_LEFT:
case JOIN_FULL:
selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
selec = eqjoinsel_inner(operator, &vardata1, &vardata2,
rel1, rel2);
break;
case JOIN_SEMI:
case JOIN_ANTI:
if (!join_is_reversed)
selec = eqjoinsel_semi(operator, &vardata1, &vardata2);
selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
rel1, rel2);
else
selec = eqjoinsel_semi(get_commutator(operator),
&vardata2, &vardata1);
&vardata2, &vardata1,
rel2, rel1);
break;
default:
/* other values not expected here */
......@@ -2051,7 +2077,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
*/
static double
eqjoinsel_inner(Oid operator,
VariableStatData *vardata1, VariableStatData *vardata2)
VariableStatData *vardata1, VariableStatData *vardata2,
RelOptInfo *rel1, RelOptInfo *rel2)
{
double selec;
double nd1;
......@@ -2252,15 +2279,19 @@ eqjoinsel_inner(Oid operator,
* be, providing a crude correction for the selectivity of restriction
* clauses on those relations. (We don't do that in the other path
* since there we are comparing the nd values to stats for the whole
* relations.)
* relations.) We can apply this clamp both with respect to the base
* relations from which the join variables come, and to the immediate
* input relations of the current join.
*/
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
if (vardata1->rel)
nd1 = Min(nd1, vardata1->rel->rows);
nd1 = Min(nd1, rel1->rows);
if (vardata2->rel)
nd2 = Min(nd2, vardata2->rel->rows);
nd2 = Min(nd2, rel2->rows);
selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
if (nd1 > nd2)
......@@ -2287,7 +2318,8 @@ eqjoinsel_inner(Oid operator,
*/
static double
eqjoinsel_semi(Oid operator,
VariableStatData *vardata1, VariableStatData *vardata2)
VariableStatData *vardata1, VariableStatData *vardata2,
RelOptInfo *rel1, RelOptInfo *rel2)
{
double selec;
double nd1;
......@@ -2435,8 +2467,10 @@ eqjoinsel_semi(Oid operator,
{
if (vardata1->rel)
nd1 = Min(nd1, vardata1->rel->rows);
nd1 = Min(nd1, rel1->rows);
if (vardata2->rel)
nd2 = Min(nd2, vardata2->rel->rows);
nd2 = Min(nd2, rel2->rows);
if (nd1 <= nd2 || nd2 <= 0)
selec = 1.0 - nullfrac1;
......@@ -4759,6 +4793,37 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
return have_data;
}
/*
* find_join_input_rel
* Look up the input relation for a join.
*
* We assume that the input relation's RelOptInfo must have been constructed
* already.
*/
static RelOptInfo *
find_join_input_rel(PlannerInfo *root, Relids relids)
{
RelOptInfo *rel = NULL;
switch (bms_membership(relids))
{
case BMS_EMPTY_SET:
/* should not happen */
break;
case BMS_SINGLETON:
rel = find_base_rel(root, bms_singleton_member(relids));
break;
case BMS_MULTIPLE:
rel = find_join_rel(root, relids);
break;
}
if (rel == NULL)
elog(ERROR, "could not find RelOptInfo for given relids");
return rel;
}
/*-------------------------------------------------------------------------
*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment