Commit ad1c36b0 authored by Tom Lane's avatar Tom Lane

Fix foreign-key selectivity estimation in the presence of constants.

get_foreign_key_join_selectivity() looks for join clauses that equate
the two sides of the FK constraint.  However, if we have a query like
"WHERE fktab.a = pktab.a and fktab.a = 1", it won't find any such join
clause, because equivclass.c replaces the given clauses with "fktab.a
= 1 and pktab.a = 1", which can be enforced at the scan level, leaving
nothing to be done for column "a" at the join level.

We can fix that expectation without much trouble, but then a new problem
arises: applying the foreign-key-based selectivity rule produces a
rowcount underestimate, because we're effectively double-counting the
selectivity of the "fktab.a = 1" clause.  So we have to cancel that
selectivity out of the estimate.

To fix, refactor process_implied_equality() so that it can pass back the
new RestrictInfo to its callers in equivclass.c, allowing the generated
"fktab.a = 1" clause to be saved in the EquivalenceClass's ec_derives
list.  Then it's not much trouble to dig out the relevant RestrictInfo
when we need to adjust an FK selectivity estimate.  (While at it, we
can also remove the expensive use of initialize_mergeclause_eclasses()
to set up the new RestrictInfo's left_ec and right_ec pointers.
The equivclass.c code can set those basically for free.)

This seems like clearly a bug fix, but I'm hesitant to back-patch it,
first because there's some API/ABI risk for extensions and second because
we're usually loath to destabilize plan choices in stable branches.

Per report from Sigrid Ehrenreich.

Discussion: https://postgr.es/m/1019549.1603770457@sss.pgh.pa.us
Discussion: https://postgr.es/m/AM6PR02MB5287A0ADD936C1FA80973E72AB190@AM6PR02MB5287.eurprd02.prod.outlook.com
parent ce7f772c
...@@ -2352,6 +2352,7 @@ _outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node) ...@@ -2352,6 +2352,7 @@ _outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node)
WRITE_ATTRNUMBER_ARRAY(confkey, node->nkeys); WRITE_ATTRNUMBER_ARRAY(confkey, node->nkeys);
WRITE_OID_ARRAY(conpfeqop, node->nkeys); WRITE_OID_ARRAY(conpfeqop, node->nkeys);
WRITE_INT_FIELD(nmatched_ec); WRITE_INT_FIELD(nmatched_ec);
WRITE_INT_FIELD(nconst_ec);
WRITE_INT_FIELD(nmatched_rcols); WRITE_INT_FIELD(nmatched_rcols);
WRITE_INT_FIELD(nmatched_ri); WRITE_INT_FIELD(nmatched_ri);
/* for compactness, just print the number of matches per column: */ /* for compactness, just print the number of matches per column: */
......
...@@ -5066,9 +5066,16 @@ get_foreign_key_join_selectivity(PlannerInfo *root, ...@@ -5066,9 +5066,16 @@ get_foreign_key_join_selectivity(PlannerInfo *root,
* remove back into the worklist. * remove back into the worklist.
* *
* Since the matching clauses are known not outerjoin-delayed, they * Since the matching clauses are known not outerjoin-delayed, they
* should certainly have appeared in the initial joinclause list. If * would normally have appeared in the initial joinclause list. If we
* we didn't find them, they must have been matched to, and removed * didn't find them, there are two possibilities:
* by, some other FK in a previous iteration of this loop. (A likely *
* 1. If the FK match is based on an EC that is ec_has_const, it won't
* have generated any join clauses at all. We discount such ECs while
* checking to see if we have "all" the clauses. (Below, we'll adjust
* the selectivity estimate for this case.)
*
* 2. The clauses were matched to some other FK in a previous
* iteration of this loop, and thus removed from worklist. (A likely
* case is that two FKs are matched to the same EC; there will be only * case is that two FKs are matched to the same EC; there will be only
* one EC-derived clause in the initial list, so the first FK will * one EC-derived clause in the initial list, so the first FK will
* consume it.) Applying both FKs' selectivity independently risks * consume it.) Applying both FKs' selectivity independently risks
...@@ -5078,8 +5085,9 @@ get_foreign_key_join_selectivity(PlannerInfo *root, ...@@ -5078,8 +5085,9 @@ get_foreign_key_join_selectivity(PlannerInfo *root,
* Later we might think of a reasonable way to combine the estimates, * Later we might think of a reasonable way to combine the estimates,
* but for now, just punt, since this is a fairly uncommon situation. * but for now, just punt, since this is a fairly uncommon situation.
*/ */
if (list_length(removedlist) != if (removedlist == NIL ||
(fkinfo->nmatched_ec + fkinfo->nmatched_ri)) list_length(removedlist) !=
(fkinfo->nmatched_ec - fkinfo->nconst_ec + fkinfo->nmatched_ri))
{ {
worklist = list_concat(worklist, removedlist); worklist = list_concat(worklist, removedlist);
continue; continue;
...@@ -5138,9 +5146,48 @@ get_foreign_key_join_selectivity(PlannerInfo *root, ...@@ -5138,9 +5146,48 @@ get_foreign_key_join_selectivity(PlannerInfo *root,
fkselec *= 1.0 / ref_tuples; fkselec *= 1.0 / ref_tuples;
} }
/*
* If any of the FK columns participated in ec_has_const ECs, then
* equivclass.c will have generated "var = const" restrictions for
* each side of the join, thus reducing the sizes of both input
* relations. Taking the fkselec at face value would amount to
* double-counting the selectivity of the constant restriction for the
* referencing Var. Hence, look for the restriction clause(s) that
* were applied to the referencing Var(s), and divide out their
* selectivity to correct for this.
*/
if (fkinfo->nconst_ec > 0)
{
for (int i = 0; i < fkinfo->nkeys; i++)
{
EquivalenceClass *ec = fkinfo->eclass[i];
if (ec && ec->ec_has_const)
{
EquivalenceMember *em = fkinfo->fk_eclass_member[i];
RestrictInfo *rinfo = find_derived_clause_for_ec_member(ec,
em);
if (rinfo)
{
Selectivity s0;
s0 = clause_selectivity(root,
(Node *) rinfo,
0,
jointype,
sjinfo);
if (s0 > 0)
fkselec /= s0;
}
}
}
}
} }
*restrictlist = worklist; *restrictlist = worklist;
CLAMP_PROBABILITY(fkselec);
return fkselec; return fkselec;
} }
......
...@@ -840,10 +840,8 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) ...@@ -840,10 +840,8 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
* scanning of the quals and before Path construction begins. * scanning of the quals and before Path construction begins.
* *
* We make no attempt to avoid generating duplicate RestrictInfos here: we * We make no attempt to avoid generating duplicate RestrictInfos here: we
* don't search ec_sources for matches, nor put the created RestrictInfos * don't search ec_sources or ec_derives for matches. It doesn't really
* into ec_derives. Doing so would require some slightly ugly changes in * seem worth the trouble to do so.
* initsplan.c's API, and there's no real advantage, because the clauses
* generated here can't duplicate anything we will generate for joins anyway.
*/ */
void void
generate_base_implied_equalities(PlannerInfo *root) generate_base_implied_equalities(PlannerInfo *root)
...@@ -969,6 +967,7 @@ generate_base_implied_equalities_const(PlannerInfo *root, ...@@ -969,6 +967,7 @@ generate_base_implied_equalities_const(PlannerInfo *root,
{ {
EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
Oid eq_op; Oid eq_op;
RestrictInfo *rinfo;
Assert(!cur_em->em_is_child); /* no children yet */ Assert(!cur_em->em_is_child); /* no children yet */
if (cur_em == const_em) if (cur_em == const_em)
...@@ -982,14 +981,31 @@ generate_base_implied_equalities_const(PlannerInfo *root, ...@@ -982,14 +981,31 @@ generate_base_implied_equalities_const(PlannerInfo *root,
ec->ec_broken = true; ec->ec_broken = true;
break; break;
} }
process_implied_equality(root, eq_op, ec->ec_collation, rinfo = process_implied_equality(root, eq_op, ec->ec_collation,
cur_em->em_expr, const_em->em_expr, cur_em->em_expr, const_em->em_expr,
bms_copy(ec->ec_relids), bms_copy(ec->ec_relids),
bms_union(cur_em->em_nullable_relids, bms_union(cur_em->em_nullable_relids,
const_em->em_nullable_relids), const_em->em_nullable_relids),
ec->ec_min_security, ec->ec_min_security,
ec->ec_below_outer_join, ec->ec_below_outer_join,
cur_em->em_is_const); cur_em->em_is_const);
/*
* If the clause didn't degenerate to a constant, fill in the correct
* markings for a mergejoinable clause, and save it in ec_derives. (We
* will not re-use such clauses directly, but selectivity estimation
* may consult the list later. Note that this use of ec_derives does
* not overlap with its use for join clauses, since we never generate
* join clauses from an ec_has_const eclass.)
*/
if (rinfo && rinfo->mergeopfamilies)
{
/* it's not redundant, so don't set parent_ec */
rinfo->left_ec = rinfo->right_ec = ec;
rinfo->left_em = cur_em;
rinfo->right_em = const_em;
ec->ec_derives = lappend(ec->ec_derives, rinfo);
}
} }
} }
...@@ -1028,6 +1044,7 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, ...@@ -1028,6 +1044,7 @@ generate_base_implied_equalities_no_const(PlannerInfo *root,
{ {
EquivalenceMember *prev_em = prev_ems[relid]; EquivalenceMember *prev_em = prev_ems[relid];
Oid eq_op; Oid eq_op;
RestrictInfo *rinfo;
eq_op = select_equality_operator(ec, eq_op = select_equality_operator(ec,
prev_em->em_datatype, prev_em->em_datatype,
...@@ -1038,14 +1055,29 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, ...@@ -1038,14 +1055,29 @@ generate_base_implied_equalities_no_const(PlannerInfo *root,
ec->ec_broken = true; ec->ec_broken = true;
break; break;
} }
process_implied_equality(root, eq_op, ec->ec_collation, rinfo = process_implied_equality(root, eq_op, ec->ec_collation,
prev_em->em_expr, cur_em->em_expr, prev_em->em_expr, cur_em->em_expr,
bms_copy(ec->ec_relids), bms_copy(ec->ec_relids),
bms_union(prev_em->em_nullable_relids, bms_union(prev_em->em_nullable_relids,
cur_em->em_nullable_relids), cur_em->em_nullable_relids),
ec->ec_min_security, ec->ec_min_security,
ec->ec_below_outer_join, ec->ec_below_outer_join,
false); false);
/*
* If the clause didn't degenerate to a constant, fill in the
* correct markings for a mergejoinable clause. We don't put it
* in ec_derives however; we don't currently need to re-find such
* clauses, and we don't want to clutter that list with non-join
* clauses.
*/
if (rinfo && rinfo->mergeopfamilies)
{
/* it's not redundant, so don't set parent_ec */
rinfo->left_ec = rinfo->right_ec = ec;
rinfo->left_em = prev_em;
rinfo->right_em = cur_em;
}
} }
prev_ems[relid] = cur_em; prev_ems[relid] = cur_em;
} }
...@@ -2151,6 +2183,10 @@ exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2) ...@@ -2151,6 +2183,10 @@ exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2)
* we ignore that fine point here.) This is much like exprs_known_equal, * we ignore that fine point here.) This is much like exprs_known_equal,
* except that we insist on the comparison operator matching the eclass, so * except that we insist on the comparison operator matching the eclass, so
* that the result is definite not approximate. * that the result is definite not approximate.
*
* On success, we also set fkinfo->eclass[colno] to the matching eclass,
* and set fkinfo->fk_eclass_member[colno] to the eclass member for the
* referencing Var.
*/ */
EquivalenceClass * EquivalenceClass *
match_eclasses_to_foreign_key_col(PlannerInfo *root, match_eclasses_to_foreign_key_col(PlannerInfo *root,
...@@ -2180,8 +2216,8 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root, ...@@ -2180,8 +2216,8 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
{ {
EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes,
i); i);
bool item1member = false; EquivalenceMember *item1_em = NULL;
bool item2member = false; EquivalenceMember *item2_em = NULL;
ListCell *lc2; ListCell *lc2;
/* Never match to a volatile EC */ /* Never match to a volatile EC */
...@@ -2206,12 +2242,12 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root, ...@@ -2206,12 +2242,12 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
/* Match? */ /* Match? */
if (var->varno == var1varno && var->varattno == var1attno) if (var->varno == var1varno && var->varattno == var1attno)
item1member = true; item1_em = em;
else if (var->varno == var2varno && var->varattno == var2attno) else if (var->varno == var2varno && var->varattno == var2attno)
item2member = true; item2_em = em;
/* Have we found both PK and FK column in this EC? */ /* Have we found both PK and FK column in this EC? */
if (item1member && item2member) if (item1_em && item2_em)
{ {
/* /*
* Succeed if eqop matches EC's opfamilies. We could test * Succeed if eqop matches EC's opfamilies. We could test
...@@ -2221,7 +2257,11 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root, ...@@ -2221,7 +2257,11 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
if (opfamilies == NIL) /* compute if we didn't already */ if (opfamilies == NIL) /* compute if we didn't already */
opfamilies = get_mergejoin_opfamilies(eqop); opfamilies = get_mergejoin_opfamilies(eqop);
if (equal(opfamilies, ec->ec_opfamilies)) if (equal(opfamilies, ec->ec_opfamilies))
{
fkinfo->eclass[colno] = ec;
fkinfo->fk_eclass_member[colno] = item2_em;
return ec; return ec;
}
/* Otherwise, done with this EC, move on to the next */ /* Otherwise, done with this EC, move on to the next */
break; break;
} }
...@@ -2230,6 +2270,37 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root, ...@@ -2230,6 +2270,37 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
return NULL; return NULL;
} }
/*
* find_derived_clause_for_ec_member
* Search for a previously-derived clause mentioning the given EM.
*
* The eclass should be an ec_has_const EC, of which the EM is a non-const
* member. This should ensure there is just one derived clause mentioning
* the EM (and equating it to a constant).
* Returns NULL if no such clause can be found.
*/
RestrictInfo *
find_derived_clause_for_ec_member(EquivalenceClass *ec,
EquivalenceMember *em)
{
ListCell *lc;
Assert(ec->ec_has_const);
Assert(!em->em_is_const);
foreach(lc, ec->ec_derives)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
/*
* generate_base_implied_equalities_const will have put non-const
* members on the left side of derived clauses.
*/
if (rinfo->left_em == em)
return rinfo;
}
return NULL;
}
/* /*
* add_child_rel_equivalences * add_child_rel_equivalences
......
...@@ -62,14 +62,12 @@ static SpecialJoinInfo *make_outerjoininfo(PlannerInfo *root, ...@@ -62,14 +62,12 @@ static SpecialJoinInfo *make_outerjoininfo(PlannerInfo *root,
JoinType jointype, List *clause); JoinType jointype, List *clause);
static void compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause); static void compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause);
static void distribute_qual_to_rels(PlannerInfo *root, Node *clause, static void distribute_qual_to_rels(PlannerInfo *root, Node *clause,
bool is_deduced,
bool below_outer_join, bool below_outer_join,
JoinType jointype, JoinType jointype,
Index security_level, Index security_level,
Relids qualscope, Relids qualscope,
Relids ojscope, Relids ojscope,
Relids outerjoin_nonnullable, Relids outerjoin_nonnullable,
Relids deduced_nullable_relids,
List **postponed_qual_list); List **postponed_qual_list);
static bool check_outerjoin_delay(PlannerInfo *root, Relids *relids_p, static bool check_outerjoin_delay(PlannerInfo *root, Relids *relids_p,
Relids *nullable_relids_p, bool is_pushed_down); Relids *nullable_relids_p, bool is_pushed_down);
...@@ -815,9 +813,9 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, ...@@ -815,9 +813,9 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
if (bms_is_subset(pq->relids, *qualscope)) if (bms_is_subset(pq->relids, *qualscope))
distribute_qual_to_rels(root, pq->qual, distribute_qual_to_rels(root, pq->qual,
false, below_outer_join, JOIN_INNER, below_outer_join, JOIN_INNER,
root->qual_security_level, root->qual_security_level,
*qualscope, NULL, NULL, NULL, *qualscope, NULL, NULL,
NULL); NULL);
else else
*postponed_qual_list = lappend(*postponed_qual_list, pq); *postponed_qual_list = lappend(*postponed_qual_list, pq);
...@@ -831,9 +829,9 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, ...@@ -831,9 +829,9 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
Node *qual = (Node *) lfirst(l); Node *qual = (Node *) lfirst(l);
distribute_qual_to_rels(root, qual, distribute_qual_to_rels(root, qual,
false, below_outer_join, JOIN_INNER, below_outer_join, JOIN_INNER,
root->qual_security_level, root->qual_security_level,
*qualscope, NULL, NULL, NULL, *qualscope, NULL, NULL,
postponed_qual_list); postponed_qual_list);
} }
} }
...@@ -1008,10 +1006,10 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, ...@@ -1008,10 +1006,10 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
Node *qual = (Node *) lfirst(l); Node *qual = (Node *) lfirst(l);
distribute_qual_to_rels(root, qual, distribute_qual_to_rels(root, qual,
false, below_outer_join, j->jointype, below_outer_join, j->jointype,
root->qual_security_level, root->qual_security_level,
*qualscope, *qualscope,
ojscope, nonnullable_rels, NULL, ojscope, nonnullable_rels,
postponed_qual_list); postponed_qual_list);
} }
...@@ -1110,14 +1108,12 @@ process_security_barrier_quals(PlannerInfo *root, ...@@ -1110,14 +1108,12 @@ process_security_barrier_quals(PlannerInfo *root,
* than being pushed up to top of tree, which we don't want. * than being pushed up to top of tree, which we don't want.
*/ */
distribute_qual_to_rels(root, qual, distribute_qual_to_rels(root, qual,
false,
below_outer_join, below_outer_join,
JOIN_INNER, JOIN_INNER,
security_level, security_level,
qualscope, qualscope,
qualscope, qualscope,
NULL, NULL,
NULL,
NULL); NULL);
} }
security_level++; security_level++;
...@@ -1581,7 +1577,6 @@ compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause) ...@@ -1581,7 +1577,6 @@ compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause)
* as belonging to a higher join level, just add it to postponed_qual_list. * as belonging to a higher join level, just add it to postponed_qual_list.
* *
* 'clause': the qual clause to be distributed * 'clause': the qual clause to be distributed
* 'is_deduced': true if the qual came from implied-equality deduction
* 'below_outer_join': true if the qual is from a JOIN/ON that is below the * 'below_outer_join': true if the qual is from a JOIN/ON that is below the
* nullable side of a higher-level outer join * nullable side of a higher-level outer join
* 'jointype': type of join the qual is from (JOIN_INNER for a WHERE clause) * 'jointype': type of join the qual is from (JOIN_INNER for a WHERE clause)
...@@ -1593,8 +1588,6 @@ compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause) ...@@ -1593,8 +1588,6 @@ compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause)
* baserels appearing on the outer (nonnullable) side of the join * baserels appearing on the outer (nonnullable) side of the join
* (for FULL JOIN this includes both sides of the join, and must in fact * (for FULL JOIN this includes both sides of the join, and must in fact
* equal qualscope) * equal qualscope)
* 'deduced_nullable_relids': if is_deduced is true, the nullable relids to
* impute to the clause; otherwise NULL
* 'postponed_qual_list': list of PostponedQual structs, which we can add * 'postponed_qual_list': list of PostponedQual structs, which we can add
* this qual to if it turns out to belong to a higher join level. * this qual to if it turns out to belong to a higher join level.
* Can be NULL if caller knows postponement is impossible. * Can be NULL if caller knows postponement is impossible.
...@@ -1603,23 +1596,17 @@ compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause) ...@@ -1603,23 +1596,17 @@ compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause)
* 'ojscope' is needed if we decide to force the qual up to the outer-join * 'ojscope' is needed if we decide to force the qual up to the outer-join
* level, which will be ojscope not necessarily qualscope. * level, which will be ojscope not necessarily qualscope.
* *
* In normal use (when is_deduced is false), at the time this is called, * At the time this is called, root->join_info_list must contain entries for
* root->join_info_list must contain entries for all and only those special * all and only those special joins that are syntactically below this qual.
* joins that are syntactically below this qual. But when is_deduced is true,
* we are adding new deduced clauses after completion of deconstruct_jointree,
* so it cannot be assumed that root->join_info_list has anything to do with
* qual placement.
*/ */
static void static void
distribute_qual_to_rels(PlannerInfo *root, Node *clause, distribute_qual_to_rels(PlannerInfo *root, Node *clause,
bool is_deduced,
bool below_outer_join, bool below_outer_join,
JoinType jointype, JoinType jointype,
Index security_level, Index security_level,
Relids qualscope, Relids qualscope,
Relids ojscope, Relids ojscope,
Relids outerjoin_nonnullable, Relids outerjoin_nonnullable,
Relids deduced_nullable_relids,
List **postponed_qual_list) List **postponed_qual_list)
{ {
Relids relids; Relids relids;
...@@ -1653,7 +1640,6 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, ...@@ -1653,7 +1640,6 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
Assert(root->hasLateralRTEs); /* shouldn't happen otherwise */ Assert(root->hasLateralRTEs); /* shouldn't happen otherwise */
Assert(jointype == JOIN_INNER); /* mustn't postpone past outer join */ Assert(jointype == JOIN_INNER); /* mustn't postpone past outer join */
Assert(!is_deduced); /* shouldn't be deduced, either */
pq->qual = clause; pq->qual = clause;
pq->relids = relids; pq->relids = relids;
*postponed_qual_list = lappend(*postponed_qual_list, pq); *postponed_qual_list = lappend(*postponed_qual_list, pq);
...@@ -1754,24 +1740,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, ...@@ -1754,24 +1740,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
* This seems like another reason why it should perhaps be rethought. * This seems like another reason why it should perhaps be rethought.
*---------- *----------
*/ */
if (is_deduced) if (bms_overlap(relids, outerjoin_nonnullable))
{
/*
* If the qual came from implied-equality deduction, it should not be
* outerjoin-delayed, else deducer blew it. But we can't check this
* because the join_info_list may now contain OJs above where the qual
* belongs. For the same reason, we must rely on caller to supply the
* correct nullable_relids set.
*/
Assert(!ojscope);
is_pushed_down = true;
outerjoin_delayed = false;
nullable_relids = deduced_nullable_relids;
/* Don't feed it back for more deductions */
maybe_equivalence = false;
maybe_outer_join = false;
}
else if (bms_overlap(relids, outerjoin_nonnullable))
{ {
/* /*
* The qual is attached to an outer join and mentions (some of the) * The qual is attached to an outer join and mentions (some of the)
...@@ -2277,14 +2246,18 @@ distribute_restrictinfo_to_rels(PlannerInfo *root, ...@@ -2277,14 +2246,18 @@ distribute_restrictinfo_to_rels(PlannerInfo *root,
* can produce constant TRUE or constant FALSE. (Otherwise it's not, * can produce constant TRUE or constant FALSE. (Otherwise it's not,
* because the expressions went through eval_const_expressions already.) * because the expressions went through eval_const_expressions already.)
* *
* Returns the generated RestrictInfo, if any. The result will be NULL
* if both_const is true and we successfully reduced the clause to
* constant TRUE.
*
* Note: this function will copy item1 and item2, but it is caller's * Note: this function will copy item1 and item2, but it is caller's
* responsibility to make sure that the Relids parameters are fresh copies * responsibility to make sure that the Relids parameters are fresh copies
* not shared with other uses. * not shared with other uses.
* *
* This is currently used only when an EquivalenceClass is found to * Note: we do not do initialize_mergeclause_eclasses() here. It is
* contain pseudoconstants. See path/pathkeys.c for more details. * caller's responsibility that left_ec/right_ec be set as necessary.
*/ */
void RestrictInfo *
process_implied_equality(PlannerInfo *root, process_implied_equality(PlannerInfo *root,
Oid opno, Oid opno,
Oid collation, Oid collation,
...@@ -2296,24 +2269,27 @@ process_implied_equality(PlannerInfo *root, ...@@ -2296,24 +2269,27 @@ process_implied_equality(PlannerInfo *root,
bool below_outer_join, bool below_outer_join,
bool both_const) bool both_const)
{ {
Expr *clause; RestrictInfo *restrictinfo;
Node *clause;
Relids relids;
bool pseudoconstant = false;
/* /*
* Build the new clause. Copy to ensure it shares no substructure with * Build the new clause. Copy to ensure it shares no substructure with
* original (this is necessary in case there are subselects in there...) * original (this is necessary in case there are subselects in there...)
*/ */
clause = make_opclause(opno, clause = (Node *) make_opclause(opno,
BOOLOID, /* opresulttype */ BOOLOID, /* opresulttype */
false, /* opretset */ false, /* opretset */
copyObject(item1), copyObject(item1),
copyObject(item2), copyObject(item2),
InvalidOid, InvalidOid,
collation); collation);
/* If both constant, try to reduce to a boolean constant. */ /* If both constant, try to reduce to a boolean constant. */
if (both_const) if (both_const)
{ {
clause = (Expr *) eval_const_expressions(root, (Node *) clause); clause = eval_const_expressions(root, clause);
/* If we produced const TRUE, just drop the clause */ /* If we produced const TRUE, just drop the clause */
if (clause && IsA(clause, Const)) if (clause && IsA(clause, Const))
...@@ -2322,25 +2298,106 @@ process_implied_equality(PlannerInfo *root, ...@@ -2322,25 +2298,106 @@ process_implied_equality(PlannerInfo *root,
Assert(cclause->consttype == BOOLOID); Assert(cclause->consttype == BOOLOID);
if (!cclause->constisnull && DatumGetBool(cclause->constvalue)) if (!cclause->constisnull && DatumGetBool(cclause->constvalue))
return; return NULL;
}
}
/*
* The rest of this is a very cut-down version of distribute_qual_to_rels.
* We can skip most of the work therein, but there are a couple of special
* cases we still have to handle.
*
* Retrieve all relids mentioned within the possibly-simplified clause.
*/
relids = pull_varnos(clause);
Assert(bms_is_subset(relids, qualscope));
/*
* If the clause is variable-free, our normal heuristic for pushing it
* down to just the mentioned rels doesn't work, because there are none.
* Apply at the given qualscope, or at the top of tree if it's nonvolatile
* (which it very likely is, but we'll check, just to be sure).
*/
if (bms_is_empty(relids))
{
/* eval at original syntactic level */
relids = bms_copy(qualscope);
if (!contain_volatile_functions(clause))
{
/* mark as gating qual */
pseudoconstant = true;
/* tell createplan.c to check for gating quals */
root->hasPseudoConstantQuals = true;
/* if not below outer join, push it to top of tree */
if (!below_outer_join)
{
relids =
get_relids_in_jointree((Node *) root->parse->jointree,
false);
}
} }
} }
/*
* Build the RestrictInfo node itself.
*/
restrictinfo = make_restrictinfo((Expr *) clause,
true, /* is_pushed_down */
false, /* outerjoin_delayed */
pseudoconstant,
security_level,
relids,
NULL, /* outer_relids */
nullable_relids);
/*
* If it's a join clause, add vars used in the clause to targetlists of
* their relations, so that they will be emitted by the plan nodes that
* scan those relations (else they won't be available at the join node!).
*
* Typically, we'd have already done this when the component expressions
* were first seen by distribute_qual_to_rels; but it is possible that
* some of the Vars could have missed having that done because they only
* appeared in single-relation clauses originally. So do it here for
* safety.
*/
if (bms_membership(relids) == BMS_MULTIPLE)
{
List *vars = pull_var_clause(clause,
PVC_RECURSE_AGGREGATES |
PVC_RECURSE_WINDOWFUNCS |
PVC_INCLUDE_PLACEHOLDERS);
add_vars_to_targetlist(root, vars, relids, false);
list_free(vars);
}
/*
* Check mergejoinability. This will usually succeed, since the op came
* from an EquivalenceClass; but we could have reduced the original clause
* to a constant.
*/
check_mergejoinable(restrictinfo);
/*
* Note we don't do initialize_mergeclause_eclasses(); the caller can
* handle that much more cheaply than we can. It's okay to call
* distribute_restrictinfo_to_rels() before that happens.
*/
/* /*
* Push the new clause into all the appropriate restrictinfo lists. * Push the new clause into all the appropriate restrictinfo lists.
*/ */
distribute_qual_to_rels(root, (Node *) clause, distribute_restrictinfo_to_rels(root, restrictinfo);
true, below_outer_join, JOIN_INNER,
security_level, return restrictinfo;
qualscope, NULL, NULL, nullable_relids,
NULL);
} }
/* /*
* build_implied_join_equality --- build a RestrictInfo for a derived equality * build_implied_join_equality --- build a RestrictInfo for a derived equality
* *
* This overlaps the functionality of process_implied_equality(), but we * This overlaps the functionality of process_implied_equality(), but we
* must return the RestrictInfo, not push it into the joininfo tree. * must not push the RestrictInfo into the joininfo tree.
* *
* Note: this function will copy item1 and item2, but it is caller's * Note: this function will copy item1 and item2, but it is caller's
* responsibility to make sure that the Relids parameters are fresh copies * responsibility to make sure that the Relids parameters are fresh copies
...@@ -2455,18 +2512,19 @@ match_foreign_keys_to_quals(PlannerInfo *root) ...@@ -2455,18 +2512,19 @@ match_foreign_keys_to_quals(PlannerInfo *root)
*/ */
for (colno = 0; colno < fkinfo->nkeys; colno++) for (colno = 0; colno < fkinfo->nkeys; colno++)
{ {
EquivalenceClass *ec;
AttrNumber con_attno, AttrNumber con_attno,
ref_attno; ref_attno;
Oid fpeqop; Oid fpeqop;
ListCell *lc2; ListCell *lc2;
fkinfo->eclass[colno] = match_eclasses_to_foreign_key_col(root, ec = match_eclasses_to_foreign_key_col(root, fkinfo, colno);
fkinfo,
colno);
/* Don't bother looking for loose quals if we got an EC match */ /* Don't bother looking for loose quals if we got an EC match */
if (fkinfo->eclass[colno] != NULL) if (ec != NULL)
{ {
fkinfo->nmatched_ec++; fkinfo->nmatched_ec++;
if (ec->ec_has_const)
fkinfo->nconst_ec++;
continue; continue;
} }
......
...@@ -567,9 +567,11 @@ get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel, ...@@ -567,9 +567,11 @@ get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel,
memcpy(info->conpfeqop, cachedfk->conpfeqop, sizeof(info->conpfeqop)); memcpy(info->conpfeqop, cachedfk->conpfeqop, sizeof(info->conpfeqop));
/* zero out fields to be filled by match_foreign_keys_to_quals */ /* zero out fields to be filled by match_foreign_keys_to_quals */
info->nmatched_ec = 0; info->nmatched_ec = 0;
info->nconst_ec = 0;
info->nmatched_rcols = 0; info->nmatched_rcols = 0;
info->nmatched_ri = 0; info->nmatched_ri = 0;
memset(info->eclass, 0, sizeof(info->eclass)); memset(info->eclass, 0, sizeof(info->eclass));
memset(info->fk_eclass_member, 0, sizeof(info->fk_eclass_member));
memset(info->rinfos, 0, sizeof(info->rinfos)); memset(info->rinfos, 0, sizeof(info->rinfos));
root->fkey_list = lappend(root->fkey_list, info); root->fkey_list = lappend(root->fkey_list, info);
......
...@@ -889,10 +889,13 @@ typedef struct ForeignKeyOptInfo ...@@ -889,10 +889,13 @@ typedef struct ForeignKeyOptInfo
/* Derived info about whether FK's equality conditions match the query: */ /* Derived info about whether FK's equality conditions match the query: */
int nmatched_ec; /* # of FK cols matched by ECs */ int nmatched_ec; /* # of FK cols matched by ECs */
int nconst_ec; /* # of these ECs that are ec_has_const */
int nmatched_rcols; /* # of FK cols matched by non-EC rinfos */ int nmatched_rcols; /* # of FK cols matched by non-EC rinfos */
int nmatched_ri; /* total # of non-EC rinfos matched to FK */ int nmatched_ri; /* total # of non-EC rinfos matched to FK */
/* Pointer to eclass matching each column's condition, if there is one */ /* Pointer to eclass matching each column's condition, if there is one */
struct EquivalenceClass *eclass[INDEX_MAX_KEYS]; struct EquivalenceClass *eclass[INDEX_MAX_KEYS];
/* Pointer to eclass member for the referencing Var, if there is one */
struct EquivalenceMember *fk_eclass_member[INDEX_MAX_KEYS];
/* List of non-EC RestrictInfos matching each column's condition */ /* List of non-EC RestrictInfos matching each column's condition */
List *rinfos[INDEX_MAX_KEYS]; List *rinfos[INDEX_MAX_KEYS];
} ForeignKeyOptInfo; } ForeignKeyOptInfo;
......
...@@ -149,6 +149,8 @@ extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2); ...@@ -149,6 +149,8 @@ extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2);
extern EquivalenceClass *match_eclasses_to_foreign_key_col(PlannerInfo *root, extern EquivalenceClass *match_eclasses_to_foreign_key_col(PlannerInfo *root,
ForeignKeyOptInfo *fkinfo, ForeignKeyOptInfo *fkinfo,
int colno); int colno);
extern RestrictInfo *find_derived_clause_for_ec_member(EquivalenceClass *ec,
EquivalenceMember *em);
extern void add_child_rel_equivalences(PlannerInfo *root, extern void add_child_rel_equivalences(PlannerInfo *root,
AppendRelInfo *appinfo, AppendRelInfo *appinfo,
RelOptInfo *parent_rel, RelOptInfo *parent_rel,
......
...@@ -77,16 +77,16 @@ extern void create_lateral_join_info(PlannerInfo *root); ...@@ -77,16 +77,16 @@ extern void create_lateral_join_info(PlannerInfo *root);
extern List *deconstruct_jointree(PlannerInfo *root); extern List *deconstruct_jointree(PlannerInfo *root);
extern void distribute_restrictinfo_to_rels(PlannerInfo *root, extern void distribute_restrictinfo_to_rels(PlannerInfo *root,
RestrictInfo *restrictinfo); RestrictInfo *restrictinfo);
extern void process_implied_equality(PlannerInfo *root, extern RestrictInfo *process_implied_equality(PlannerInfo *root,
Oid opno, Oid opno,
Oid collation, Oid collation,
Expr *item1, Expr *item1,
Expr *item2, Expr *item2,
Relids qualscope, Relids qualscope,
Relids nullable_relids, Relids nullable_relids,
Index security_level, Index security_level,
bool below_outer_join, bool below_outer_join,
bool both_const); bool both_const);
extern RestrictInfo *build_implied_join_equality(Oid opno, extern RestrictInfo *build_implied_join_equality(Oid opno,
Oid collation, Oid collation,
Expr *item1, Expr *item1,
......
...@@ -5843,6 +5843,56 @@ select t1.b, ss.phv from join_ut1 t1 left join lateral ...@@ -5843,6 +5843,56 @@ select t1.b, ss.phv from join_ut1 t1 left join lateral
drop table join_pt1; drop table join_pt1;
drop table join_ut1; drop table join_ut1;
-- --
-- test estimation behavior with multi-column foreign key and constant qual
--
begin;
create table fkest (x integer, x10 integer, x10b integer, x100 integer);
insert into fkest select x, x/10, x/10, x/100 from generate_series(1,1000) x;
create unique index on fkest(x, x10, x100);
analyze fkest;
explain (costs off)
select * from fkest f1
join fkest f2 on (f1.x = f2.x and f1.x10 = f2.x10b and f1.x100 = f2.x100)
join fkest f3 on f1.x = f3.x
where f1.x100 = 2;
QUERY PLAN
-----------------------------------------------------------
Nested Loop
-> Hash Join
Hash Cond: ((f2.x = f1.x) AND (f2.x10b = f1.x10))
-> Seq Scan on fkest f2
Filter: (x100 = 2)
-> Hash
-> Seq Scan on fkest f1
Filter: (x100 = 2)
-> Index Scan using fkest_x_x10_x100_idx on fkest f3
Index Cond: (x = f1.x)
(10 rows)
alter table fkest add constraint fk
foreign key (x, x10b, x100) references fkest (x, x10, x100);
explain (costs off)
select * from fkest f1
join fkest f2 on (f1.x = f2.x and f1.x10 = f2.x10b and f1.x100 = f2.x100)
join fkest f3 on f1.x = f3.x
where f1.x100 = 2;
QUERY PLAN
-----------------------------------------------------
Hash Join
Hash Cond: ((f2.x = f1.x) AND (f2.x10b = f1.x10))
-> Hash Join
Hash Cond: (f3.x = f2.x)
-> Seq Scan on fkest f3
-> Hash
-> Seq Scan on fkest f2
Filter: (x100 = 2)
-> Hash
-> Seq Scan on fkest f1
Filter: (x100 = 2)
(11 rows)
rollback;
--
-- test that foreign key join estimation performs sanely for outer joins -- test that foreign key join estimation performs sanely for outer joins
-- --
begin; begin;
......
...@@ -1975,6 +1975,35 @@ select t1.b, ss.phv from join_ut1 t1 left join lateral ...@@ -1975,6 +1975,35 @@ select t1.b, ss.phv from join_ut1 t1 left join lateral
drop table join_pt1; drop table join_pt1;
drop table join_ut1; drop table join_ut1;
--
-- test estimation behavior with multi-column foreign key and constant qual
--
begin;
create table fkest (x integer, x10 integer, x10b integer, x100 integer);
insert into fkest select x, x/10, x/10, x/100 from generate_series(1,1000) x;
create unique index on fkest(x, x10, x100);
analyze fkest;
explain (costs off)
select * from fkest f1
join fkest f2 on (f1.x = f2.x and f1.x10 = f2.x10b and f1.x100 = f2.x100)
join fkest f3 on f1.x = f3.x
where f1.x100 = 2;
alter table fkest add constraint fk
foreign key (x, x10b, x100) references fkest (x, x10, x100);
explain (costs off)
select * from fkest f1
join fkest f2 on (f1.x = f2.x and f1.x10 = f2.x10b and f1.x100 = f2.x100)
join fkest f3 on f1.x = f3.x
where f1.x100 = 2;
rollback;
-- --
-- test that foreign key join estimation performs sanely for outer joins -- test that foreign key join estimation performs sanely for outer joins
-- --
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment