Commit 6a652252 authored by Tom Lane's avatar Tom Lane

Fix some planner issues found while investigating Kevin Grittner's report

of poorer planning in 8.3 than 8.2:

1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.

2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join.  This is safe
as long as the condition is strict and we re-test it at the upper join.

3. Keep the outer-join clause even if we successfully push a constant
across it.  This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make.  Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.

4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.

5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause.  This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
parent 8d546c71
This diff is collapsed.
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.114 2008/01/01 19:45:50 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.115 2008/01/09 20:42:27 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -34,7 +34,8 @@ static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, ...@@ -34,7 +34,8 @@ static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
List *restrictlist, JoinType jointype); List *restrictlist, JoinType jointype);
static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
RelOptInfo *outer_rel, JoinType jointype); RelOptInfo *outer_rel, JoinType jointype);
static List *select_mergejoin_clauses(RelOptInfo *joinrel, static List *select_mergejoin_clauses(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *outerrel,
RelOptInfo *innerrel, RelOptInfo *innerrel,
List *restrictlist, List *restrictlist,
...@@ -69,7 +70,8 @@ add_paths_to_joinrel(PlannerInfo *root, ...@@ -69,7 +70,8 @@ add_paths_to_joinrel(PlannerInfo *root,
* disable if it's a full join. * disable if it's a full join.
*/ */
if (enable_mergejoin || jointype == JOIN_FULL) if (enable_mergejoin || jointype == JOIN_FULL)
mergeclause_list = select_mergejoin_clauses(joinrel, mergeclause_list = select_mergejoin_clauses(root,
joinrel,
outerrel, outerrel,
innerrel, innerrel,
restrictlist, restrictlist,
...@@ -883,7 +885,8 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, ...@@ -883,7 +885,8 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
* currently of interest. * currently of interest.
*/ */
static List * static List *
select_mergejoin_clauses(RelOptInfo *joinrel, select_mergejoin_clauses(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *outerrel,
RelOptInfo *innerrel, RelOptInfo *innerrel,
List *restrictlist, List *restrictlist,
...@@ -937,6 +940,35 @@ select_mergejoin_clauses(RelOptInfo *joinrel, ...@@ -937,6 +940,35 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
continue; /* no good for these input relations */ continue; /* no good for these input relations */
} }
/*
* Insist that each side have a non-redundant eclass. This
* restriction is needed because various bits of the planner expect
* that each clause in a merge be associatable with some pathkey in a
* canonical pathkey list, but redundant eclasses can't appear in
* canonical sort orderings. (XXX it might be worth relaxing this,
* but not enough time to address it for 8.3.)
*
* Note: it would be bad if this condition failed for an otherwise
* mergejoinable FULL JOIN clause, since that would result in
* undesirable planner failure. I believe that is not possible
* however; a variable involved in a full join could only appear
* in below_outer_join eclasses, which aren't considered redundant.
*
* This case *can* happen for left/right join clauses: the
* outer-side variable could be equated to a constant. Because we
* will propagate that constant across the join clause, the loss of
* ability to do a mergejoin is not really all that big a deal, and
* so it's not clear that improving this is important.
*/
cache_mergeclause_eclasses(root, restrictinfo);
if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
{
have_nonmergeable_joinclause = true;
continue; /* can't handle redundant eclasses */
}
result_list = lappend(result_list, restrictinfo); result_list = lappend(result_list, restrictinfo);
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.83 2008/01/01 19:45:50 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.84 2008/01/09 20:42:27 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -90,16 +90,18 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel) ...@@ -90,16 +90,18 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
/* /*
* Find potentially interesting OR joinclauses. Note we must ignore any * Find potentially interesting OR joinclauses. Note we must ignore any
* joinclauses that are marked outerjoin_delayed, because they cannot be * joinclauses that are marked outerjoin_delayed or !is_pushed_down,
* pushed down to the per-relation level due to outer-join rules. (XXX in * because they cannot be pushed down to the per-relation level due to
* some cases it might be possible to allow this, but it would require * outer-join rules. (XXX in some cases it might be possible to allow
* substantially more bookkeeping about where the clause came from.) * this, but it would require substantially more bookkeeping about where
* the clause came from.)
*/ */
foreach(i, rel->joininfo) foreach(i, rel->joininfo)
{ {
RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
if (restriction_is_or_clause(rinfo) && if (restriction_is_or_clause(rinfo) &&
rinfo->is_pushed_down &&
!rinfo->outerjoin_delayed) !rinfo->outerjoin_delayed)
{ {
/* /*
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.92 2008/01/01 19:45:50 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.93 2008/01/09 20:42:28 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -30,13 +30,6 @@ ...@@ -30,13 +30,6 @@
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
/*
* If an EC contains a const and isn't below-outer-join, any PathKey depending
* on it must be redundant, since there's only one possible value of the key.
*/
#define MUST_BE_REDUNDANT(eclass) \
((eclass)->ec_has_const && !(eclass)->ec_below_outer_join)
static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily, static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily,
int strategy, bool nulls_first); int strategy, bool nulls_first);
static PathKey *make_canonical_pathkey(PlannerInfo *root, static PathKey *make_canonical_pathkey(PlannerInfo *root,
...@@ -164,7 +157,7 @@ pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys) ...@@ -164,7 +157,7 @@ pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
Assert(!new_ec->ec_merged); Assert(!new_ec->ec_merged);
/* Check for EC containing a constant --- unconditionally redundant */ /* Check for EC containing a constant --- unconditionally redundant */
if (MUST_BE_REDUNDANT(new_ec)) if (EC_MUST_BE_REDUNDANT(new_ec))
return true; return true;
/* If same EC already used in list, then redundant */ /* If same EC already used in list, then redundant */
...@@ -211,7 +204,7 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) ...@@ -211,7 +204,7 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
* pathkey_is_redundant would notice that, but we needn't even bother * pathkey_is_redundant would notice that, but we needn't even bother
* constructing the node... * constructing the node...
*/ */
if (MUST_BE_REDUNDANT(eclass)) if (EC_MUST_BE_REDUNDANT(eclass))
continue; continue;
/* OK, build a canonicalized PathKey struct */ /* OK, build a canonicalized PathKey struct */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.137 2008/01/01 19:45:50 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.138 2008/01/09 20:42:28 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -778,13 +778,13 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, ...@@ -778,13 +778,13 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
if (ojscope) if (ojscope)
{ {
/* clause is attached to outer join, eval it there */ /* clause is attached to outer join, eval it there */
relids = ojscope; relids = bms_copy(ojscope);
/* mustn't use as gating qual, so don't mark pseudoconstant */ /* mustn't use as gating qual, so don't mark pseudoconstant */
} }
else else
{ {
/* eval at original syntactic level */ /* eval at original syntactic level */
relids = qualscope; relids = bms_copy(qualscope);
if (!contain_volatile_functions(clause)) if (!contain_volatile_functions(clause))
{ {
/* mark as gating qual */ /* mark as gating qual */
...@@ -849,12 +849,15 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, ...@@ -849,12 +849,15 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
* We can't use such a clause to deduce equivalence (the left and * We can't use such a clause to deduce equivalence (the left and
* right sides might be unequal above the join because one of them has * right sides might be unequal above the join because one of them has
* gone to NULL) ... but we might be able to use it for more limited * gone to NULL) ... but we might be able to use it for more limited
* deductions, if there are no lower outer joins that delay its * deductions, if it is mergejoinable. So consider adding it to the
* application. If so, consider adding it to the lists of set-aside * lists of set-aside outer-join clauses.
* clauses.
*/ */
is_pushed_down = false;
maybe_equivalence = false; maybe_equivalence = false;
maybe_outer_join = !check_outerjoin_delay(root, &relids, false); maybe_outer_join = true;
/* Check to see if must be delayed by lower outer join */
outerjoin_delayed = check_outerjoin_delay(root, &relids, false);
/* /*
* Now force the qual to be evaluated exactly at the level of joining * Now force the qual to be evaluated exactly at the level of joining
...@@ -868,8 +871,6 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, ...@@ -868,8 +871,6 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
*/ */
Assert(ojscope); Assert(ojscope);
relids = ojscope; relids = ojscope;
is_pushed_down = false;
outerjoin_delayed = true;
Assert(!pseudoconstant); Assert(!pseudoconstant);
} }
else else
...@@ -880,7 +881,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, ...@@ -880,7 +881,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
*/ */
is_pushed_down = true; is_pushed_down = true;
/* Check to see if must be delayed by outer join */ /* Check to see if must be delayed by lower outer join */
outerjoin_delayed = check_outerjoin_delay(root, &relids, true); outerjoin_delayed = check_outerjoin_delay(root, &relids, true);
if (outerjoin_delayed) if (outerjoin_delayed)
...@@ -1052,8 +1053,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, ...@@ -1052,8 +1053,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
* mentioning only C cannot be applied below the join to A. * mentioning only C cannot be applied below the join to A.
* *
* For a non-pushed-down qual, this isn't going to determine where we place the * For a non-pushed-down qual, this isn't going to determine where we place the
* qual, but we need to determine outerjoin_delayed anyway so we can decide * qual, but we need to determine outerjoin_delayed anyway for possible use
* whether the qual is potentially useful for equivalence deductions. * in reconsider_outer_join_clauses().
* *
* Lastly, a pushed-down qual that references the nullable side of any current * Lastly, a pushed-down qual that references the nullable side of any current
* oj_info_list member and has to be evaluated above that OJ (because its * oj_info_list member and has to be evaluated above that OJ (because its
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.152 2008/01/01 19:45:58 momjian Exp $ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.153 2008/01/09 20:42:28 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -479,6 +479,13 @@ typedef struct EquivalenceClass ...@@ -479,6 +479,13 @@ typedef struct EquivalenceClass
struct EquivalenceClass *ec_merged; /* set if merged into another EC */ struct EquivalenceClass *ec_merged; /* set if merged into another EC */
} EquivalenceClass; } EquivalenceClass;
/*
* If an EC contains a const and isn't below-outer-join, any PathKey depending
* on it must be redundant, since there's only one possible value of the key.
*/
#define EC_MUST_BE_REDUNDANT(eclass) \
((eclass)->ec_has_const && !(eclass)->ec_below_outer_join)
/* /*
* EquivalenceMember - one member expression of an EquivalenceClass * EquivalenceMember - one member expression of an EquivalenceClass
* *
...@@ -856,17 +863,17 @@ typedef struct HashPath ...@@ -856,17 +863,17 @@ typedef struct HashPath
* *
* When dealing with outer joins we have to be very careful about pushing qual * When dealing with outer joins we have to be very careful about pushing qual
* clauses up and down the tree. An outer join's own JOIN/ON conditions must * clauses up and down the tree. An outer join's own JOIN/ON conditions must
* be evaluated exactly at that join node, and any quals appearing in WHERE or * be evaluated exactly at that join node, unless they are "degenerate"
* in a JOIN above the outer join cannot be pushed down below the outer join. * conditions that reference only Vars from the nullable side of the join.
* Otherwise the outer join will produce wrong results because it will see the * Quals appearing in WHERE or in a JOIN above the outer join cannot be pushed
* wrong sets of input rows. All quals are stored as RestrictInfo nodes * down below the outer join, if they reference any nullable Vars.
* during planning, but there's a flag to indicate whether a qual has been * RestrictInfo nodes contain a flag to indicate whether a qual has been
* pushed down to a lower level than its original syntactic placement in the * pushed down to a lower level than its original syntactic placement in the
* join tree would suggest. If an outer join prevents us from pushing a qual * join tree would suggest. If an outer join prevents us from pushing a qual
* down to its "natural" semantic level (the level associated with just the * down to its "natural" semantic level (the level associated with just the
* base rels used in the qual) then we mark the qual with a "required_relids" * base rels used in the qual) then we mark the qual with a "required_relids"
* value including more than just the base rels it actually uses. By * value including more than just the base rels it actually uses. By
* pretending that the qual references all the rels appearing in the outer * pretending that the qual references all the rels required to form the outer
* join, we prevent it from being evaluated below the outer join's joinrel. * join, we prevent it from being evaluated below the outer join's joinrel.
* When we do form the outer join's joinrel, we still need to distinguish * When we do form the outer join's joinrel, we still need to distinguish
* those quals that are actually in that join's JOIN/ON condition from those * those quals that are actually in that join's JOIN/ON condition from those
...@@ -878,11 +885,13 @@ typedef struct HashPath ...@@ -878,11 +885,13 @@ typedef struct HashPath
* It's possible for an OUTER JOIN clause to be marked is_pushed_down too, * It's possible for an OUTER JOIN clause to be marked is_pushed_down too,
* if we decide that it can be pushed down into the nullable side of the join. * if we decide that it can be pushed down into the nullable side of the join.
* In that case it acts as a plain filter qual for wherever it gets evaluated. * In that case it acts as a plain filter qual for wherever it gets evaluated.
* (In short, is_pushed_down is only false for non-degenerate outer join
* conditions. Possibly we should rename it to reflect that meaning?)
* *
* When application of a qual must be delayed by outer join, we also mark it * RestrictInfo nodes also contain an outerjoin_delayed flag, which is true
* with outerjoin_delayed = true. This isn't redundant with required_relids * if the clause's applicability must be delayed due to any outer joins
* because that might equal clause_relids whether or not it's an outer-join * appearing below its own syntactic level (ie, it references any Vars from
* clause. * the nullable side of any lower outer join).
* *
* In general, the referenced clause might be arbitrarily complex. The * In general, the referenced clause might be arbitrarily complex. The
* kinds of clauses we can handle as indexscan quals, mergejoin clauses, * kinds of clauses we can handle as indexscan quals, mergejoin clauses,
...@@ -932,7 +941,7 @@ typedef struct RestrictInfo ...@@ -932,7 +941,7 @@ typedef struct RestrictInfo
bool is_pushed_down; /* TRUE if clause was pushed down in level */ bool is_pushed_down; /* TRUE if clause was pushed down in level */
bool outerjoin_delayed; /* TRUE if delayed by outer join */ bool outerjoin_delayed; /* TRUE if delayed by lower outer join */
bool can_join; /* see comment above */ bool can_join; /* see comment above */
......
...@@ -2290,3 +2290,34 @@ from yy ...@@ -2290,3 +2290,34 @@ from yy
301 | | | | 1 301 | | | | 1
(3 rows) (3 rows)
--
-- regression test for improper pushing of constants across outer-join clauses
-- (as seen in early 8.2.x releases)
--
create temp table zt1 (f1 int primary key);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt1_pkey" for table "zt1"
create temp table zt2 (f2 int primary key);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt2_pkey" for table "zt2"
create temp table zt3 (f3 int primary key);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt3_pkey" for table "zt3"
insert into zt1 values(53);
insert into zt2 values(53);
select * from
zt2 left join zt3 on (f2 = f3)
left join zt1 on (f3 = f1)
where f2 = 53;
f2 | f3 | f1
----+----+----
53 | |
(1 row)
create temp view zv1 as select *,'dummy'::text AS junk from zt1;
select * from
zt2 left join zt3 on (f2 = f3)
left join zv1 on (f3 = f1)
where f2 = 53;
f2 | f3 | f1 | junk
----+----+----+------
53 | | |
(1 row)
...@@ -2290,3 +2290,34 @@ from yy ...@@ -2290,3 +2290,34 @@ from yy
301 | | | | 1 301 | | | | 1
(3 rows) (3 rows)
--
-- regression test for improper pushing of constants across outer-join clauses
-- (as seen in early 8.2.x releases)
--
create temp table zt1 (f1 int primary key);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt1_pkey" for table "zt1"
create temp table zt2 (f2 int primary key);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt2_pkey" for table "zt2"
create temp table zt3 (f3 int primary key);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt3_pkey" for table "zt3"
insert into zt1 values(53);
insert into zt2 values(53);
select * from
zt2 left join zt3 on (f2 = f3)
left join zt1 on (f3 = f1)
where f2 = 53;
f2 | f3 | f1
----+----+----
53 | |
(1 row)
create temp view zv1 as select *,'dummy'::text AS junk from zt1;
select * from
zt2 left join zt3 on (f2 = f3)
left join zv1 on (f3 = f1)
where f2 = 53;
f2 | f3 | f1 | junk
----+----+----+------
53 | | |
(1 row)
...@@ -462,3 +462,26 @@ from yy ...@@ -462,3 +462,26 @@ from yy
left join (SELECT * FROM yy where pkyy = 101) as yya ON yy.pkyy = yya.pkyy left join (SELECT * FROM yy where pkyy = 101) as yya ON yy.pkyy = yya.pkyy
left join xx xxa on yya.pkxx = xxa.pkxx left join xx xxa on yya.pkxx = xxa.pkxx
left join xx xxb on coalesce (xxa.pkxx, 1) = xxb.pkxx; left join xx xxb on coalesce (xxa.pkxx, 1) = xxb.pkxx;
--
-- regression test for improper pushing of constants across outer-join clauses
-- (as seen in early 8.2.x releases)
--
create temp table zt1 (f1 int primary key);
create temp table zt2 (f2 int primary key);
create temp table zt3 (f3 int primary key);
insert into zt1 values(53);
insert into zt2 values(53);
select * from
zt2 left join zt3 on (f2 = f3)
left join zt1 on (f3 = f1)
where f2 = 53;
create temp view zv1 as select *,'dummy'::text AS junk from zt1;
select * from
zt2 left join zt3 on (f2 = f3)
left join zv1 on (f3 = f1)
where f2 = 53;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment