Commit c473d923 authored by Tom Lane's avatar Tom Lane

Fix cost_mergejoin's failure to adjust for rescanning of non-unique merge join

keys when considering a semi or anti join.  This requires estimating the
selectivity of the merge qual as though it were a regular inner join condition.
To allow caching both that and the real outer-join-aware selectivity, split
RestrictInfo.this_selec into two fields.

This fixes one of the problems reported by Kevin Grittner.
parent c87c31f1
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.422 2009/02/02 19:31:39 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.423 2009/02/06 23:43:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1606,7 +1606,8 @@ _copyRestrictInfo(RestrictInfo *from) ...@@ -1606,7 +1606,8 @@ _copyRestrictInfo(RestrictInfo *from)
/* EquivalenceClasses are never copied, so shallow-copy the pointers */ /* EquivalenceClasses are never copied, so shallow-copy the pointers */
COPY_SCALAR_FIELD(parent_ec); COPY_SCALAR_FIELD(parent_ec);
COPY_SCALAR_FIELD(eval_cost); COPY_SCALAR_FIELD(eval_cost);
COPY_SCALAR_FIELD(this_selec); COPY_SCALAR_FIELD(norm_selec);
COPY_SCALAR_FIELD(outer_selec);
COPY_NODE_FIELD(mergeopfamilies); COPY_NODE_FIELD(mergeopfamilies);
/* EquivalenceClasses are never copied, so shallow-copy the pointers */ /* EquivalenceClasses are never copied, so shallow-copy the pointers */
COPY_SCALAR_FIELD(left_ec); COPY_SCALAR_FIELD(left_ec);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.351 2009/02/02 19:31:39 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.352 2009/02/06 23:43:23 tgl Exp $
* *
* NOTES * NOTES
* Every node type that can appear in stored rules' parsetrees *must* * Every node type that can appear in stored rules' parsetrees *must*
...@@ -1609,7 +1609,8 @@ _outRestrictInfo(StringInfo str, RestrictInfo *node) ...@@ -1609,7 +1609,8 @@ _outRestrictInfo(StringInfo str, RestrictInfo *node)
WRITE_BITMAPSET_FIELD(right_relids); WRITE_BITMAPSET_FIELD(right_relids);
WRITE_NODE_FIELD(orclause); WRITE_NODE_FIELD(orclause);
/* don't write parent_ec, leads to infinite recursion in plan tree dump */ /* don't write parent_ec, leads to infinite recursion in plan tree dump */
WRITE_FLOAT_FIELD(this_selec, "%.4f"); WRITE_FLOAT_FIELD(norm_selec, "%.4f");
WRITE_FLOAT_FIELD(outer_selec, "%.4f");
WRITE_NODE_FIELD(mergeopfamilies); WRITE_NODE_FIELD(mergeopfamilies);
/* don't write left_ec, leads to infinite recursion in plan tree dump */ /* don't write left_ec, leads to infinite recursion in plan tree dump */
/* don't write right_ec, leads to infinite recursion in plan tree dump */ /* don't write right_ec, leads to infinite recursion in plan tree dump */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.96 2009/01/01 17:23:43 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.97 2009/02/06 23:43:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -516,21 +516,34 @@ clause_selectivity(PlannerInfo *root, ...@@ -516,21 +516,34 @@ clause_selectivity(PlannerInfo *root,
/* /*
* If the clause is marked redundant, always return 1.0. * If the clause is marked redundant, always return 1.0.
*/ */
if (rinfo->this_selec > 1) if (rinfo->norm_selec > 1)
return (Selectivity) 1.0; return (Selectivity) 1.0;
/* /*
* If possible, cache the result of the selectivity calculation for * If possible, cache the result of the selectivity calculation for
* the clause. We can cache if varRelid is zero or the clause * the clause. We can cache if varRelid is zero or the clause
* contains only vars of that relid --- otherwise varRelid will affect * contains only vars of that relid --- otherwise varRelid will affect
* the result, so mustn't cache. * the result, so mustn't cache. Outer join quals might be examined
* with either their join's actual jointype or JOIN_INNER, so we need
* two cache variables to remember both cases. Note: we assume the
* result won't change if we are switching the input relations or
* considering a unique-ified case, so we only need one cache variable
* for all non-JOIN_INNER cases.
*/ */
if (varRelid == 0 || if (varRelid == 0 ||
bms_is_subset_singleton(rinfo->clause_relids, varRelid)) bms_is_subset_singleton(rinfo->clause_relids, varRelid))
{ {
/* Cacheable --- do we already have the result? */ /* Cacheable --- do we already have the result? */
if (rinfo->this_selec >= 0) if (jointype == JOIN_INNER)
return rinfo->this_selec; {
if (rinfo->norm_selec >= 0)
return rinfo->norm_selec;
}
else
{
if (rinfo->outer_selec >= 0)
return rinfo->outer_selec;
}
cacheable = true; cacheable = true;
} }
...@@ -753,7 +766,12 @@ clause_selectivity(PlannerInfo *root, ...@@ -753,7 +766,12 @@ clause_selectivity(PlannerInfo *root,
/* Cache the result if possible */ /* Cache the result if possible */
if (cacheable) if (cacheable)
rinfo->this_selec = s1; {
if (jointype == JOIN_INNER)
rinfo->norm_selec = s1;
else
rinfo->outer_selec = s1;
}
#ifdef SELECTIVITY_DEBUG #ifdef SELECTIVITY_DEBUG
elog(DEBUG4, "clause_selectivity: s1 %f", s1); elog(DEBUG4, "clause_selectivity: s1 %f", s1);
......
...@@ -54,7 +54,7 @@ ...@@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.203 2009/01/01 17:23:43 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.204 2009/02/06 23:43:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -120,7 +120,7 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root, ...@@ -120,7 +120,7 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
PathKey *pathkey); PathKey *pathkey);
static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context); static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
static double approx_tuple_count(PlannerInfo *root, JoinPath *path, static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
List *quals, SpecialJoinInfo *sjinfo); List *quals);
static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
static double relation_byte_size(double tuples, int width); static double relation_byte_size(double tuples, int width);
static double page_size(double tuples, int width); static double page_size(double tuples, int width);
...@@ -1507,11 +1507,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) ...@@ -1507,11 +1507,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
/* /*
* Get approx # tuples passing the mergequals. We use approx_tuple_count * Get approx # tuples passing the mergequals. We use approx_tuple_count
* here for speed --- in most cases, any errors won't affect the result * here because we need an estimate done with JOIN_INNER semantics.
* much.
*/ */
mergejointuples = approx_tuple_count(root, &path->jpath, mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
mergeclauses, sjinfo);
/* /*
* When there are equal merge keys in the outer relation, the mergejoin * When there are equal merge keys in the outer relation, the mergejoin
...@@ -1539,16 +1537,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) ...@@ -1539,16 +1537,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
* when we should not. Can we do better without expensive selectivity * when we should not. Can we do better without expensive selectivity
* computations? * computations?
* *
* For SEMI and ANTI joins, only one inner tuple need be rescanned for * The whole issue is moot if we are working from a unique-ified outer
* each group of same-keyed outer tuples (assuming that all joinquals * input.
* are merge quals). This makes the effect small enough to ignore,
* so we just set rescannedtuples = 0. Likewise, the whole issue is
* moot if we are working from a unique-ified outer input.
*/ */
if (sjinfo->jointype == JOIN_SEMI || if (IsA(outer_path, UniquePath))
sjinfo->jointype == JOIN_ANTI)
rescannedtuples = 0;
else if (IsA(outer_path, UniquePath))
rescannedtuples = 0; rescannedtuples = 0;
else else
{ {
...@@ -1847,11 +1839,9 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) ...@@ -1847,11 +1839,9 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
/* /*
* Get approx # tuples passing the hashquals. We use approx_tuple_count * Get approx # tuples passing the hashquals. We use approx_tuple_count
* here for speed --- in most cases, any errors won't affect the result * here because we need an estimate done with JOIN_INNER semantics.
* much.
*/ */
hashjointuples = approx_tuple_count(root, &path->jpath, hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
hashclauses, sjinfo);
/* cost of source data */ /* cost of source data */
startup_cost += outer_path->startup_cost; startup_cost += outer_path->startup_cost;
...@@ -2324,6 +2314,11 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) ...@@ -2324,6 +2314,11 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
* The quals can be either an implicitly-ANDed list of boolean expressions, * The quals can be either an implicitly-ANDed list of boolean expressions,
* or a list of RestrictInfo nodes (typically the latter). * or a list of RestrictInfo nodes (typically the latter).
* *
* We intentionally compute the selectivity under JOIN_INNER rules, even
* if it's some type of outer join. This is appropriate because we are
* trying to figure out how many tuples pass the initial merge or hash
* join step.
*
* This is quick-and-dirty because we bypass clauselist_selectivity, and * This is quick-and-dirty because we bypass clauselist_selectivity, and
* simply multiply the independent clause selectivities together. Now * simply multiply the independent clause selectivities together. Now
* clauselist_selectivity often can't do any better than that anyhow, but * clauselist_selectivity often can't do any better than that anyhow, but
...@@ -2336,31 +2331,40 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) ...@@ -2336,31 +2331,40 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
* seems OK to live with the approximation. * seems OK to live with the approximation.
*/ */
static double static double
approx_tuple_count(PlannerInfo *root, JoinPath *path, approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals)
List *quals, SpecialJoinInfo *sjinfo)
{ {
double tuples; double tuples;
double outer_tuples = path->outerjoinpath->parent->rows; double outer_tuples = path->outerjoinpath->parent->rows;
double inner_tuples = path->innerjoinpath->parent->rows; double inner_tuples = path->innerjoinpath->parent->rows;
SpecialJoinInfo sjinfo;
Selectivity selec = 1.0; Selectivity selec = 1.0;
ListCell *l; ListCell *l;
/*
* Make up a SpecialJoinInfo for JOIN_INNER semantics.
*/
sjinfo.type = T_SpecialJoinInfo;
sjinfo.min_lefthand = path->outerjoinpath->parent->relids;
sjinfo.min_righthand = path->innerjoinpath->parent->relids;
sjinfo.syn_lefthand = path->outerjoinpath->parent->relids;
sjinfo.syn_righthand = path->innerjoinpath->parent->relids;
sjinfo.jointype = JOIN_INNER;
/* we don't bother trying to make the remaining fields valid */
sjinfo.lhs_strict = false;
sjinfo.delay_upper_joins = false;
sjinfo.join_quals = NIL;
/* Get the approximate selectivity */ /* Get the approximate selectivity */
foreach(l, quals) foreach(l, quals)
{ {
Node *qual = (Node *) lfirst(l); Node *qual = (Node *) lfirst(l);
/* Note that clause_selectivity will be able to cache its result */ /* Note that clause_selectivity will be able to cache its result */
selec *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo); selec *= clause_selectivity(root, qual, 0, JOIN_INNER, &sjinfo);
} }
/* Apply it correctly using the input relation sizes */ /* Apply it to the input relation sizes */
if (sjinfo->jointype == JOIN_SEMI) tuples = selec * outer_tuples * inner_tuples;
tuples = selec * outer_tuples;
else if (sjinfo->jointype == JOIN_ANTI)
tuples = (1.0 - selec) * outer_tuples;
else
tuples = selec * outer_tuples * inner_tuples;
return clamp_row_est(tuples); return clamp_row_est(tuples);
} }
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.16 2009/01/01 17:23:43 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.17 2009/02/06 23:43:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1200,7 +1200,8 @@ reconsider_outer_join_clauses(PlannerInfo *root) ...@@ -1200,7 +1200,8 @@ reconsider_outer_join_clauses(PlannerInfo *root)
list_delete_cell(root->left_join_clauses, cell, prev); list_delete_cell(root->left_join_clauses, cell, prev);
/* we throw it back anyway (see notes above) */ /* we throw it back anyway (see notes above) */
/* but the thrown-back clause has no extra selectivity */ /* but the thrown-back clause has no extra selectivity */
rinfo->this_selec = 2.0; rinfo->norm_selec = 2.0;
rinfo->outer_selec = 1.0;
distribute_restrictinfo_to_rels(root, rinfo); distribute_restrictinfo_to_rels(root, rinfo);
} }
else else
...@@ -1222,7 +1223,8 @@ reconsider_outer_join_clauses(PlannerInfo *root) ...@@ -1222,7 +1223,8 @@ reconsider_outer_join_clauses(PlannerInfo *root)
list_delete_cell(root->right_join_clauses, cell, prev); list_delete_cell(root->right_join_clauses, cell, prev);
/* we throw it back anyway (see notes above) */ /* we throw it back anyway (see notes above) */
/* but the thrown-back clause has no extra selectivity */ /* but the thrown-back clause has no extra selectivity */
rinfo->this_selec = 2.0; rinfo->norm_selec = 2.0;
rinfo->outer_selec = 1.0;
distribute_restrictinfo_to_rels(root, rinfo); distribute_restrictinfo_to_rels(root, rinfo);
} }
else else
...@@ -1244,7 +1246,8 @@ reconsider_outer_join_clauses(PlannerInfo *root) ...@@ -1244,7 +1246,8 @@ reconsider_outer_join_clauses(PlannerInfo *root)
list_delete_cell(root->full_join_clauses, cell, prev); list_delete_cell(root->full_join_clauses, cell, prev);
/* we throw it back anyway (see notes above) */ /* we throw it back anyway (see notes above) */
/* but the thrown-back clause has no extra selectivity */ /* but the thrown-back clause has no extra selectivity */
rinfo->this_selec = 2.0; rinfo->norm_selec = 2.0;
rinfo->outer_selec = 1.0;
distribute_restrictinfo_to_rels(root, rinfo); distribute_restrictinfo_to_rels(root, rinfo);
} }
else else
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.86 2009/01/01 17:23:44 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.87 2009/02/06 23:43:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -174,10 +174,11 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel) ...@@ -174,10 +174,11 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
{ {
orig_selec = clause_selectivity(root, (Node *) bestrinfo, orig_selec = clause_selectivity(root, (Node *) bestrinfo,
0, JOIN_INNER, NULL); 0, JOIN_INNER, NULL);
bestrinfo->this_selec = orig_selec / or_selec; bestrinfo->norm_selec = orig_selec / or_selec;
/* clamp result to sane range */ /* clamp result to sane range */
if (bestrinfo->this_selec > 1) if (bestrinfo->norm_selec > 1)
bestrinfo->this_selec = 1; bestrinfo->norm_selec = 1;
/* It isn't an outer join clause, so no need to adjust outer_selec */
} }
/* Tell caller to recompute rel's rows estimate */ /* Tell caller to recompute rel's rows estimate */
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.164 2009/01/01 17:23:44 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.165 2009/02/06 23:43:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1662,7 +1662,8 @@ adjust_appendrel_attrs_mutator(Node *node, AppendRelInfo *context) ...@@ -1662,7 +1662,8 @@ adjust_appendrel_attrs_mutator(Node *node, AppendRelInfo *context)
* different values when considering the child relation. * different values when considering the child relation.
*/ */
newinfo->eval_cost.startup = -1; newinfo->eval_cost.startup = -1;
newinfo->this_selec = -1; newinfo->norm_selec = -1;
newinfo->outer_selec = -1;
newinfo->left_ec = NULL; newinfo->left_ec = NULL;
newinfo->right_ec = NULL; newinfo->right_ec = NULL;
newinfo->left_em = NULL; newinfo->left_em = NULL;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.56 2009/01/01 17:23:45 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.57 2009/02/06 23:43:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -338,7 +338,8 @@ make_restrictinfo_internal(Expr *clause, ...@@ -338,7 +338,8 @@ make_restrictinfo_internal(Expr *clause,
restrictinfo->parent_ec = NULL; restrictinfo->parent_ec = NULL;
restrictinfo->eval_cost.startup = -1; restrictinfo->eval_cost.startup = -1;
restrictinfo->this_selec = -1; restrictinfo->norm_selec = -1;
restrictinfo->outer_selec = -1;
restrictinfo->mergeopfamilies = NIL; restrictinfo->mergeopfamilies = NIL;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.167 2009/01/01 17:24:00 momjian Exp $ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.168 2009/02/06 23:43:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -992,8 +992,11 @@ typedef struct RestrictInfo ...@@ -992,8 +992,11 @@ typedef struct RestrictInfo
/* cache space for cost and selectivity */ /* cache space for cost and selectivity */
QualCost eval_cost; /* eval cost of clause; -1 if not yet set */ QualCost eval_cost; /* eval cost of clause; -1 if not yet set */
Selectivity this_selec; /* selectivity; -1 if not yet set; >1 means Selectivity norm_selec; /* selectivity for "normal" (JOIN_INNER)
* semantics; -1 if not yet set; >1 means
* a redundant clause */ * a redundant clause */
Selectivity outer_selec; /* selectivity for outer join semantics;
* -1 if not yet set */
/* valid if clause is mergejoinable, else NIL */ /* valid if clause is mergejoinable, else NIL */
List *mergeopfamilies; /* opfamilies containing clause operator */ List *mergeopfamilies; /* opfamilies containing clause operator */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment