Commit 6a652252 authored by Tom Lane's avatar Tom Lane

Fix some planner issues found while investigating Kevin Grittner's report

of poorer planning in 8.3 than 8.2:

1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.

2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join.  This is safe
as long as the condition is strict and we re-test it at the upper join.

3. Keep the outer-join clause even if we successfully push a constant
across it.  This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make.  Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.

4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.

5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause.  This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
parent 8d546c71
This diff is collapsed.
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.114 2008/01/01 19:45:50 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.115 2008/01/09 20:42:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -34,7 +34,8 @@ static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
List *restrictlist, JoinType jointype);
static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
RelOptInfo *outer_rel, JoinType jointype);
static List *select_mergejoin_clauses(RelOptInfo *joinrel,
static List *select_mergejoin_clauses(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
......@@ -69,7 +70,8 @@ add_paths_to_joinrel(PlannerInfo *root,
* disable if it's a full join.
*/
if (enable_mergejoin || jointype == JOIN_FULL)
mergeclause_list = select_mergejoin_clauses(joinrel,
mergeclause_list = select_mergejoin_clauses(root,
joinrel,
outerrel,
innerrel,
restrictlist,
......@@ -883,7 +885,8 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
* currently of interest.
*/
static List *
select_mergejoin_clauses(RelOptInfo *joinrel,
select_mergejoin_clauses(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
......@@ -937,6 +940,35 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
continue; /* no good for these input relations */
}
/*
* Insist that each side have a non-redundant eclass. This
* restriction is needed because various bits of the planner expect
* that each clause in a merge be associatable with some pathkey in a
* canonical pathkey list, but redundant eclasses can't appear in
* canonical sort orderings. (XXX it might be worth relaxing this,
* but not enough time to address it for 8.3.)
*
* Note: it would be bad if this condition failed for an otherwise
* mergejoinable FULL JOIN clause, since that would result in
* undesirable planner failure. I believe that is not possible
* however; a variable involved in a full join could only appear
* in below_outer_join eclasses, which aren't considered redundant.
*
* This case *can* happen for left/right join clauses: the
* outer-side variable could be equated to a constant. Because we
* will propagate that constant across the join clause, the loss of
* ability to do a mergejoin is not really all that big a deal, and
* so it's not clear that improving this is important.
*/
cache_mergeclause_eclasses(root, restrictinfo);
if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
{
have_nonmergeable_joinclause = true;
continue; /* can't handle redundant eclasses */
}
result_list = lappend(result_list, restrictinfo);
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.83 2008/01/01 19:45:50 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.84 2008/01/09 20:42:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -90,16 +90,18 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
/*
* Find potentially interesting OR joinclauses. Note we must ignore any
* joinclauses that are marked outerjoin_delayed, because they cannot be
* pushed down to the per-relation level due to outer-join rules. (XXX in
* some cases it might be possible to allow this, but it would require
* substantially more bookkeeping about where the clause came from.)
* joinclauses that are marked outerjoin_delayed or !is_pushed_down,
* because they cannot be pushed down to the per-relation level due to
* outer-join rules. (XXX in some cases it might be possible to allow
* this, but it would require substantially more bookkeeping about where
* the clause came from.)
*/
foreach(i, rel->joininfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
if (restriction_is_or_clause(rinfo) &&
rinfo->is_pushed_down &&
!rinfo->outerjoin_delayed)
{
/*
......
......@@ -11,7 +11,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.92 2008/01/01 19:45:50 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.93 2008/01/09 20:42:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -30,13 +30,6 @@
#include "utils/lsyscache.h"
/*
* If an EC contains a const and isn't below-outer-join, any PathKey depending
* on it must be redundant, since there's only one possible value of the key.
*/
#define MUST_BE_REDUNDANT(eclass) \
((eclass)->ec_has_const && !(eclass)->ec_below_outer_join)
static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily,
int strategy, bool nulls_first);
static PathKey *make_canonical_pathkey(PlannerInfo *root,
......@@ -164,7 +157,7 @@ pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
Assert(!new_ec->ec_merged);
/* Check for EC containing a constant --- unconditionally redundant */
if (MUST_BE_REDUNDANT(new_ec))
if (EC_MUST_BE_REDUNDANT(new_ec))
return true;
/* If same EC already used in list, then redundant */
......@@ -211,7 +204,7 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
* pathkey_is_redundant would notice that, but we needn't even bother
* constructing the node...
*/
if (MUST_BE_REDUNDANT(eclass))
if (EC_MUST_BE_REDUNDANT(eclass))
continue;
/* OK, build a canonicalized PathKey struct */
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.137 2008/01/01 19:45:50 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.138 2008/01/09 20:42:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -778,13 +778,13 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
if (ojscope)
{
/* clause is attached to outer join, eval it there */
relids = ojscope;
relids = bms_copy(ojscope);
/* mustn't use as gating qual, so don't mark pseudoconstant */
}
else
{
/* eval at original syntactic level */
relids = qualscope;
relids = bms_copy(qualscope);
if (!contain_volatile_functions(clause))
{
/* mark as gating qual */
......@@ -849,12 +849,15 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
* We can't use such a clause to deduce equivalence (the left and
* right sides might be unequal above the join because one of them has
* gone to NULL) ... but we might be able to use it for more limited
* deductions, if there are no lower outer joins that delay its
* application. If so, consider adding it to the lists of set-aside
* clauses.
* deductions, if it is mergejoinable. So consider adding it to the
* lists of set-aside outer-join clauses.
*/
is_pushed_down = false;
maybe_equivalence = false;
maybe_outer_join = !check_outerjoin_delay(root, &relids, false);
maybe_outer_join = true;
/* Check to see if must be delayed by lower outer join */
outerjoin_delayed = check_outerjoin_delay(root, &relids, false);
/*
* Now force the qual to be evaluated exactly at the level of joining
......@@ -868,8 +871,6 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
*/
Assert(ojscope);
relids = ojscope;
is_pushed_down = false;
outerjoin_delayed = true;
Assert(!pseudoconstant);
}
else
......@@ -880,7 +881,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
*/
is_pushed_down = true;
/* Check to see if must be delayed by outer join */
/* Check to see if must be delayed by lower outer join */
outerjoin_delayed = check_outerjoin_delay(root, &relids, true);
if (outerjoin_delayed)
......@@ -1052,8 +1053,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
* mentioning only C cannot be applied below the join to A.
*
* For a non-pushed-down qual, this isn't going to determine where we place the
* qual, but we need to determine outerjoin_delayed anyway so we can decide
* whether the qual is potentially useful for equivalence deductions.
* qual, but we need to determine outerjoin_delayed anyway for possible use
* in reconsider_outer_join_clauses().
*
* Lastly, a pushed-down qual that references the nullable side of any current
* oj_info_list member and has to be evaluated above that OJ (because its
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.152 2008/01/01 19:45:58 momjian Exp $
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.153 2008/01/09 20:42:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -479,6 +479,13 @@ typedef struct EquivalenceClass
struct EquivalenceClass *ec_merged; /* set if merged into another EC */
} EquivalenceClass;
/*
* If an EC contains a const and isn't below-outer-join, any PathKey depending
* on it must be redundant, since there's only one possible value of the key.
*/
#define EC_MUST_BE_REDUNDANT(eclass) \
((eclass)->ec_has_const && !(eclass)->ec_below_outer_join)
/*
* EquivalenceMember - one member expression of an EquivalenceClass
*
......@@ -856,17 +863,17 @@ typedef struct HashPath
*
* When dealing with outer joins we have to be very careful about pushing qual
* clauses up and down the tree. An outer join's own JOIN/ON conditions must
* be evaluated exactly at that join node, and any quals appearing in WHERE or
* in a JOIN above the outer join cannot be pushed down below the outer join.
* Otherwise the outer join will produce wrong results because it will see the
* wrong sets of input rows. All quals are stored as RestrictInfo nodes
* during planning, but there's a flag to indicate whether a qual has been
* be evaluated exactly at that join node, unless they are "degenerate"
* conditions that reference only Vars from the nullable side of the join.
* Quals appearing in WHERE or in a JOIN above the outer join cannot be pushed
* down below the outer join, if they reference any nullable Vars.
* RestrictInfo nodes contain a flag to indicate whether a qual has been
* pushed down to a lower level than its original syntactic placement in the
* join tree would suggest. If an outer join prevents us from pushing a qual
* down to its "natural" semantic level (the level associated with just the
* base rels used in the qual) then we mark the qual with a "required_relids"
* value including more than just the base rels it actually uses. By
* pretending that the qual references all the rels appearing in the outer
* pretending that the qual references all the rels required to form the outer
* join, we prevent it from being evaluated below the outer join's joinrel.
* When we do form the outer join's joinrel, we still need to distinguish
* those quals that are actually in that join's JOIN/ON condition from those
......@@ -878,11 +885,13 @@ typedef struct HashPath
* It's possible for an OUTER JOIN clause to be marked is_pushed_down too,
* if we decide that it can be pushed down into the nullable side of the join.
* In that case it acts as a plain filter qual for wherever it gets evaluated.
* (In short, is_pushed_down is only false for non-degenerate outer join
* conditions. Possibly we should rename it to reflect that meaning?)
*
* When application of a qual must be delayed by outer join, we also mark it
* with outerjoin_delayed = true. This isn't redundant with required_relids
* because that might equal clause_relids whether or not it's an outer-join
* clause.
* RestrictInfo nodes also contain an outerjoin_delayed flag, which is true
* if the clause's applicability must be delayed due to any outer joins
* appearing below its own syntactic level (ie, it references any Vars from
* the nullable side of any lower outer join).
*
* In general, the referenced clause might be arbitrarily complex. The
* kinds of clauses we can handle as indexscan quals, mergejoin clauses,
......@@ -932,7 +941,7 @@ typedef struct RestrictInfo
bool is_pushed_down; /* TRUE if clause was pushed down in level */
bool outerjoin_delayed; /* TRUE if delayed by outer join */
bool outerjoin_delayed; /* TRUE if delayed by lower outer join */
bool can_join; /* see comment above */
......
......@@ -2290,3 +2290,34 @@ from yy
301 | | | | 1
(3 rows)
--
-- regression test for improper pushing of constants across outer-join clauses
-- (as seen in early 8.2.x releases)
--
create temp table zt1 (f1 int primary key);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt1_pkey" for table "zt1"
create temp table zt2 (f2 int primary key);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt2_pkey" for table "zt2"
create temp table zt3 (f3 int primary key);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt3_pkey" for table "zt3"
insert into zt1 values(53);
insert into zt2 values(53);
select * from
zt2 left join zt3 on (f2 = f3)
left join zt1 on (f3 = f1)
where f2 = 53;
f2 | f3 | f1
----+----+----
53 | |
(1 row)
create temp view zv1 as select *,'dummy'::text AS junk from zt1;
select * from
zt2 left join zt3 on (f2 = f3)
left join zv1 on (f3 = f1)
where f2 = 53;
f2 | f3 | f1 | junk
----+----+----+------
53 | | |
(1 row)
......@@ -2290,3 +2290,34 @@ from yy
301 | | | | 1
(3 rows)
--
-- regression test for improper pushing of constants across outer-join clauses
-- (as seen in early 8.2.x releases)
--
create temp table zt1 (f1 int primary key);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt1_pkey" for table "zt1"
create temp table zt2 (f2 int primary key);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt2_pkey" for table "zt2"
create temp table zt3 (f3 int primary key);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt3_pkey" for table "zt3"
insert into zt1 values(53);
insert into zt2 values(53);
select * from
zt2 left join zt3 on (f2 = f3)
left join zt1 on (f3 = f1)
where f2 = 53;
f2 | f3 | f1
----+----+----
53 | |
(1 row)
create temp view zv1 as select *,'dummy'::text AS junk from zt1;
select * from
zt2 left join zt3 on (f2 = f3)
left join zv1 on (f3 = f1)
where f2 = 53;
f2 | f3 | f1 | junk
----+----+----+------
53 | | |
(1 row)
......@@ -462,3 +462,26 @@ from yy
left join (SELECT * FROM yy where pkyy = 101) as yya ON yy.pkyy = yya.pkyy
left join xx xxa on yya.pkxx = xxa.pkxx
left join xx xxb on coalesce (xxa.pkxx, 1) = xxb.pkxx;
--
-- regression test for improper pushing of constants across outer-join clauses
-- (as seen in early 8.2.x releases)
--
create temp table zt1 (f1 int primary key);
create temp table zt2 (f2 int primary key);
create temp table zt3 (f3 int primary key);
insert into zt1 values(53);
insert into zt2 values(53);
select * from
zt2 left join zt3 on (f2 = f3)
left join zt1 on (f3 = f1)
where f2 = 53;
create temp view zv1 as select *,'dummy'::text AS junk from zt1;
select * from
zt2 left join zt3 on (f2 = f3)
left join zv1 on (f3 = f1)
where f2 = 53;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment