Commit b78f6264 authored by Tom Lane's avatar Tom Lane

Rework join-removal logic as per recent discussion. In particular this

fixes things so that it works for cases where nested removals are possible.
The overhead of the optimization should be significantly less, as well.
parent a760893d
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.383 2010/02/16 22:34:43 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.384 2010/03/28 22:59:32 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
......@@ -1478,16 +1478,6 @@ _outUniquePath(StringInfo str, UniquePath *node)
WRITE_FLOAT_FIELD(rows, "%.0f");
}
static void
_outNoOpPath(StringInfo str, NoOpPath *node)
{
WRITE_NODE_TYPE("NOOPPATH");
_outPathInfo(str, (Path *) node);
WRITE_NODE_FIELD(subpath);
}
static void
_outNestPath(StringInfo str, NestPath *node)
{
......@@ -2740,9 +2730,6 @@ _outNode(StringInfo str, void *obj)
case T_UniquePath:
_outUniquePath(str, obj);
break;
case T_NoOpPath:
_outNoOpPath(str, obj);
break;
case T_NestPath:
_outNestPath(str, obj);
break;
......
$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.52 2009/09/29 01:20:34 tgl Exp $
$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.53 2010/03/28 22:59:32 tgl Exp $
Optimizer
=========
......@@ -354,7 +354,6 @@ RelOptInfo - a relation or joined relations
NestPath - nested-loop joins
MergePath - merge joins
HashPath - hash joins
NoOpPath - same as its input path (used when a join is removed)
EquivalenceClass - a data structure representing a set of values known equal
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.193 2010/02/26 02:00:44 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.194 2010/03/28 22:59:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -1396,10 +1396,6 @@ print_path(PlannerInfo *root, Path *path, int indent)
ptype = "Unique";
subpath = ((UniquePath *) path)->subpath;
break;
case T_NoOpPath:
ptype = "NoOp";
subpath = ((NoOpPath *) path)->subpath;
break;
case T_NestPath:
ptype = "NestLoop";
join = true;
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.131 2010/03/22 13:57:15 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.132 2010/03/28 22:59:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -22,11 +22,6 @@
#include "optimizer/paths.h"
static bool join_is_removable(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, JoinType jointype);
static void generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel);
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
......@@ -83,27 +78,11 @@ add_paths_to_joinrel(PlannerInfo *root,
{
List *mergeclause_list = NIL;
/*
* 0. Consider join removal. This is always the most efficient strategy,
* so if it works, there's no need to consider anything further.
*/
if (join_is_removable(root, joinrel, outerrel, innerrel,
restrictlist, jointype))
{
generate_outer_only(root, joinrel, outerrel);
return;
}
/*
* Find potential mergejoin clauses. We can skip this if we are not
* interested in doing a mergejoin. However, mergejoin is currently our
* only way of implementing full outer joins, so override mergejoin
* disable if it's a full join.
*
* Note: do this after join_is_removable(), because this sets the
* outer_is_left flags in the mergejoin clauses, while join_is_removable
* uses those flags for its own purposes. Currently, they set the flags
* the same way anyway, but let's avoid unnecessary entanglement.
*/
if (enable_mergejoin || jointype == JOIN_FULL)
mergeclause_list = select_mergejoin_clauses(root,
......@@ -185,188 +164,6 @@ clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
return false; /* no good for these input relations */
}
/*
* join_is_removable
* Determine whether we need not perform the join at all, because
* it will just duplicate its left input.
*
* This is true for a left join for which the join condition cannot match
* more than one inner-side row. (There are other possibly interesting
* cases, but we don't have the infrastructure to prove them.) We also
* have to check that the inner side doesn't generate any variables needed
* above the join.
*
* Note: there is no need to consider the symmetrical case of duplicating the
* right input, because add_paths_to_joinrel() will be called with each rel
* on the outer side.
*/
static bool
join_is_removable(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
JoinType jointype)
{
List *clause_list = NIL;
ListCell *l;
int attroff;
/*
* Currently, we only know how to remove left joins to a baserel with
* unique indexes. We can check most of these criteria pretty trivially
* to avoid doing useless extra work. But checking whether any of the
* indexes are unique would require iterating over the indexlist, so for
* now we just make sure there are indexes of some sort or other. If none
* of them are unique, join removal will still fail, just slightly later.
*/
if (jointype != JOIN_LEFT ||
innerrel->reloptkind == RELOPT_JOINREL ||
innerrel->rtekind != RTE_RELATION ||
innerrel->indexlist == NIL)
return false;
/*
* We can't remove the join if any inner-rel attributes are used above the
* join.
*
* Note that this test only detects use of inner-rel attributes in higher
* join conditions and the target list. There might be such attributes in
* pushed-down conditions at this join, too. We check that case below.
*
* As a micro-optimization, it seems better to start with max_attr and
* count down rather than starting with min_attr and counting up, on the
* theory that the system attributes are somewhat less likely to be wanted
* and should be tested last.
*/
for (attroff = innerrel->max_attr - innerrel->min_attr;
attroff >= 0;
attroff--)
{
if (!bms_is_subset(innerrel->attr_needed[attroff], joinrel->relids))
return false;
}
/*
* Similarly check that the inner rel doesn't produce any PlaceHolderVars
* that will be used above the join.
*/
foreach(l, root->placeholder_list)
{
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids) &&
!bms_is_subset(phinfo->ph_needed, joinrel->relids))
return false;
}
/*
* Search for mergejoinable clauses that constrain the inner rel against
* either the outer rel or a pseudoconstant. If an operator is
* mergejoinable then it behaves like equality for some btree opclass, so
* it's what we want. The mergejoinability test also eliminates clauses
* containing volatile functions, which we couldn't depend on.
*/
foreach(l, restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
/*
* If we find a pushed-down clause, it must have come from above the
* outer join and it must contain references to the inner rel. (If it
* had only outer-rel variables, it'd have been pushed down into the
* outer rel.) Therefore, we can conclude that join removal is unsafe
* without any examination of the clause contents.
*/
if (restrictinfo->is_pushed_down)
return false;
/* Ignore if it's not a mergejoinable clause */
if (!restrictinfo->can_join ||
restrictinfo->mergeopfamilies == NIL)
continue; /* not mergejoinable */
/*
* Check if clause has the form "outer op inner" or "inner op outer".
*/
if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
continue; /* no good for these input relations */
/* OK, add to list */
clause_list = lappend(clause_list, restrictinfo);
}
/* Now examine the rel's restriction clauses for var = const clauses */
foreach(l, innerrel->baserestrictinfo)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
/*
* Note: can_join won't be set for a restriction clause, but
* mergeopfamilies will be if it has a mergejoinable operator and
* doesn't contain volatile functions.
*/
if (restrictinfo->mergeopfamilies == NIL)
continue; /* not mergejoinable */
/*
* The clause certainly doesn't refer to anything but the given rel.
* If either side is pseudoconstant then we can use it.
*/
if (bms_is_empty(restrictinfo->left_relids))
{
/* righthand side is inner */
restrictinfo->outer_is_left = true;
}
else if (bms_is_empty(restrictinfo->right_relids))
{
/* lefthand side is inner */
restrictinfo->outer_is_left = false;
}
else
continue;
/* OK, add to list */
clause_list = lappend(clause_list, restrictinfo);
}
/* Now examine the indexes to see if we have a matching unique index */
if (relation_has_unique_index_for(root, innerrel, clause_list))
return true;
/*
* Some day it would be nice to check for other methods of establishing
* distinctness.
*/
return false;
}
/*
* generate_outer_only
* Generate "join" paths when we have found the join is removable.
*/
static void
generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel)
{
ListCell *lc;
/*
* For the moment, replicate all of the outerrel's paths as join paths.
* Some of them might not really be interesting above the join, if they
* have sort orderings that have no real use except to do a mergejoin for
* the join we've just found we don't need. But distinguishing that case
* probably isn't worth the extra code it would take.
*/
foreach(lc, outerrel->pathlist)
{
Path *outerpath = (Path *) lfirst(lc);
add_path(joinrel, (Path *)
create_noop_path(root, joinrel, outerpath));
}
}
/*
* sort_inner_and_outer
* Create mergejoin join paths by explicitly sorting both the outer and
......
......@@ -4,7 +4,7 @@
# Makefile for optimizer/plan
#
# IDENTIFICATION
# $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.15 2008/02/19 10:30:07 petere Exp $
# $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.16 2010/03/28 22:59:32 tgl Exp $
#
#-------------------------------------------------------------------------
......@@ -12,7 +12,7 @@ subdir = src/backend/optimizer/plan
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = createplan.o initsplan.o planagg.o planmain.o planner.o \
OBJS = analyzejoins.o createplan.o initsplan.o planagg.o planmain.o planner.o \
setrefs.o subselect.o
include $(top_srcdir)/src/backend/common.mk
/*-------------------------------------------------------------------------
*
* analyzejoins.c
* Routines for simplifying joins after initial query analysis
*
* While we do a great deal of join simplification in prep/prepjointree.c,
* certain optimizations cannot be performed at that stage for lack of
* detailed information about the query. The routines here are invoked
* after initsplan.c has done its work, and can do additional join removal
* and simplification steps based on the information extracted. The penalty
* is that we have to work harder to clean up after ourselves when we modify
* the query, since the derived data structures have to be updated too.
*
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/analyzejoins.c,v 1.1 2010/03/28 22:59:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
/* local functions */
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
static void remove_rel_from_query(PlannerInfo *root, int relid);
static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
/*
* remove_useless_joins
* Check for relations that don't actually need to be joined at all,
* and remove them from the query.
*
* We are passed the current joinlist and return the updated list. Other
* data structures that have to be updated are accessible via "root".
*/
List *
remove_useless_joins(PlannerInfo *root, List *joinlist)
{
ListCell *lc;
/*
* We are only interested in relations that are left-joined to, so we
* can scan the join_info_list to find them easily.
*/
restart:
foreach(lc, root->join_info_list)
{
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
int innerrelid;
int nremoved;
/* Skip if not removable */
if (!join_is_removable(root, sjinfo))
continue;
/*
* Currently, join_is_removable can only succeed when the sjinfo's
* righthand is a single baserel. Remove that rel from the query and
* joinlist.
*/
innerrelid = bms_singleton_member(sjinfo->min_righthand);
remove_rel_from_query(root, innerrelid);
/* We verify that exactly one reference gets removed from joinlist */
nremoved = 0;
joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
if (nremoved != 1)
elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
/*
* We can delete this SpecialJoinInfo from the list too, since it's no
* longer of interest.
*/
root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
/*
* Restart the scan. This is necessary to ensure we find all
* removable joins independently of ordering of the join_info_list
* (note that removal of attr_needed bits may make a join appear
* removable that did not before). Also, since we just deleted the
* current list cell, we'd have to have some kluge to continue the
* list scan anyway.
*/
goto restart;
}
return joinlist;
}
/*
* clause_sides_match_join
* Determine whether a join clause is of the right form to use in this join.
*
* We already know that the clause is a binary opclause referencing only the
* rels in the current join. The point here is to check whether it has the
* form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
* rather than mixing outer and inner vars on either side. If it matches,
* we set the transient flag outer_is_left to identify which side is which.
*/
static inline bool
clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
Relids innerrelids)
{
if (bms_is_subset(rinfo->left_relids, outerrelids) &&
bms_is_subset(rinfo->right_relids, innerrelids))
{
/* lefthand side is outer */
rinfo->outer_is_left = true;
return true;
}
else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
bms_is_subset(rinfo->right_relids, outerrelids))
{
/* righthand side is outer */
rinfo->outer_is_left = false;
return true;
}
return false; /* no good for these input relations */
}
/*
* join_is_removable
* Check whether we need not perform this special join at all, because
* it will just duplicate its left input.
*
* This is true for a left join for which the join condition cannot match
* more than one inner-side row. (There are other possibly interesting
* cases, but we don't have the infrastructure to prove them.) We also
* have to check that the inner side doesn't generate any variables needed
* above the join.
*/
static bool
join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
{
int innerrelid;
RelOptInfo *innerrel;
Relids joinrelids;
List *clause_list = NIL;
ListCell *l;
int attroff;
/*
* Currently, we only know how to remove left joins to a baserel with
* unique indexes. We can check most of these criteria pretty trivially
* to avoid doing useless extra work. But checking whether any of the
* indexes are unique would require iterating over the indexlist, so for
* now we just make sure there are indexes of some sort or other. If none
* of them are unique, join removal will still fail, just slightly later.
*/
if (sjinfo->jointype != JOIN_LEFT ||
sjinfo->delay_upper_joins ||
bms_membership(sjinfo->min_righthand) != BMS_SINGLETON)
return false;
innerrelid = bms_singleton_member(sjinfo->min_righthand);
innerrel = find_base_rel(root, innerrelid);
if (innerrel->reloptkind != RELOPT_BASEREL ||
innerrel->rtekind != RTE_RELATION ||
innerrel->indexlist == NIL)
return false;
/* Compute the relid set for the join we are considering */
joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
/*
* We can't remove the join if any inner-rel attributes are used above the
* join.
*
* Note that this test only detects use of inner-rel attributes in higher
* join conditions and the target list. There might be such attributes in
* pushed-down conditions at this join, too. We check that case below.
*
* As a micro-optimization, it seems better to start with max_attr and
* count down rather than starting with min_attr and counting up, on the
* theory that the system attributes are somewhat less likely to be wanted
* and should be tested last.
*/
for (attroff = innerrel->max_attr - innerrel->min_attr;
attroff >= 0;
attroff--)
{
if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids))
return false;
}
/*
* Similarly check that the inner rel doesn't produce any PlaceHolderVars
* that will be used above the join.
*/
foreach(l, root->placeholder_list)
{
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids) &&
!bms_is_subset(phinfo->ph_needed, joinrelids))
return false;
}
/*
* Search for mergejoinable clauses that constrain the inner rel against
* either the outer rel or a pseudoconstant. If an operator is
* mergejoinable then it behaves like equality for some btree opclass, so
* it's what we want. The mergejoinability test also eliminates clauses
* containing volatile functions, which we couldn't depend on.
*/
foreach(l, innerrel->joininfo)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
/* Ignore clauses not pertinent to this join */
if (!bms_is_subset(restrictinfo->required_relids, joinrelids))
continue;
/*
* If we find a pushed-down clause, it must have come from above the
* outer join and it must contain references to the inner rel. (If it
* had only outer-rel variables, it'd have been pushed down into the
* outer rel.) Therefore, we can conclude that join removal is unsafe
* without any examination of the clause contents.
*/
if (restrictinfo->is_pushed_down)
return false;
/* Ignore if it's not a mergejoinable clause */
if (!restrictinfo->can_join ||
restrictinfo->mergeopfamilies == NIL)
continue; /* not mergejoinable */
/*
* Check if clause has the form "outer op inner" or "inner op outer".
*/
if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
innerrel->relids))
continue; /* no good for these input relations */
/* OK, add to list */
clause_list = lappend(clause_list, restrictinfo);
}
/* Now examine the rel's restriction clauses for var = const clauses */
foreach(l, innerrel->baserestrictinfo)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
/*
* Note: can_join won't be set for a restriction clause, but
* mergeopfamilies will be if it has a mergejoinable operator and
* doesn't contain volatile functions.
*/
if (restrictinfo->mergeopfamilies == NIL)
continue; /* not mergejoinable */
/*
* The clause certainly doesn't refer to anything but the given rel.
* If either side is pseudoconstant then we can use it.
*/
if (bms_is_empty(restrictinfo->left_relids))
{
/* righthand side is inner */
restrictinfo->outer_is_left = true;
}
else if (bms_is_empty(restrictinfo->right_relids))
{
/* lefthand side is inner */
restrictinfo->outer_is_left = false;
}
else
continue;
/* OK, add to list */
clause_list = lappend(clause_list, restrictinfo);
}
/* Now examine the indexes to see if we have a matching unique index */
if (relation_has_unique_index_for(root, innerrel, clause_list))
return true;
/*
* Some day it would be nice to check for other methods of establishing
* distinctness.
*/
return false;
}
/*
* Remove the target relid from the planner's data structures, having
* determined that there is no need to include it in the query.
*
* We are not terribly thorough here. We must make sure that the rel is
* no longer treated as a baserel, and that attributes of other baserels
* are no longer marked as being needed at joins involving this rel.
* In particular, we don't bother removing join quals involving the rel from
* the joininfo lists; they'll just get ignored, since we will never form a
* join relation at which they could be evaluated.
*/
static void
remove_rel_from_query(PlannerInfo *root, int relid)
{
RelOptInfo *rel = find_base_rel(root, relid);
Index rti;
ListCell *l;
/*
* Mark the rel as "dead" to show it is no longer part of the join tree.
* (Removing it from the baserel array altogether seems too risky.)
*/
rel->reloptkind = RELOPT_DEADREL;
/*
* Remove references to the rel from other baserels' attr_needed arrays.
*/
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *otherrel = root->simple_rel_array[rti];
int attroff;
/* there may be empty slots corresponding to non-baserel RTEs */
if (otherrel == NULL)
continue;
Assert(otherrel->relid == rti); /* sanity check on array */
/* no point in processing target rel itself */
if (otherrel == rel)
continue;
for (attroff = otherrel->max_attr - otherrel->min_attr;
attroff >= 0;
attroff--)
{
otherrel->attr_needed[attroff] =
bms_del_member(otherrel->attr_needed[attroff], relid);
}
}
/*
* Likewise remove references from PlaceHolderVar data structures.
*
* Here we have a special case: if a PHV's eval_at set is just the target
* relid, we want to leave it that way instead of reducing it to the empty
* set. An empty eval_at set would confuse later processing since it
* would match every possible eval placement.
*/
foreach(l, root->placeholder_list)
{
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
if (bms_is_empty(phinfo->ph_eval_at)) /* oops, belay that */
phinfo->ph_eval_at = bms_add_member(phinfo->ph_eval_at, relid);
phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
}
}
/*
* Remove any occurrences of the target relid from a joinlist structure.
*
* It's easiest to build a whole new list structure, so we handle it that
* way. Efficiency is not a big deal here.
*
* *nremoved is incremented by the number of occurrences removed (there
* should be exactly one, but the caller checks that).
*/
static List *
remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
{
List *result = NIL;
ListCell *jl;
foreach(jl, joinlist)
{
Node *jlnode = (Node *) lfirst(jl);
if (IsA(jlnode, RangeTblRef))
{
int varno = ((RangeTblRef *) jlnode)->rtindex;
if (varno == relid)
(*nremoved)++;
else
result = lappend(result, jlnode);
}
else if (IsA(jlnode, List))
{
/* Recurse to handle subproblem */
List *sublist;
sublist = remove_rel_from_joinlist((List *) jlnode,
relid, nremoved);
/* Avoid including empty sub-lists in the result */
if (sublist)
result = lappend(result, sublist);
}
else
{
elog(ERROR, "unrecognized joinlist node type: %d",
(int) nodeTag(jlnode));
}
}
return result;
}
......@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.273 2010/02/26 02:00:45 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.274 2010/03/28 22:59:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -164,11 +164,6 @@ create_plan(PlannerInfo *root, Path *best_path)
case T_WorkTableScan:
plan = create_scan_plan(root, best_path);
break;
case T_Join:
/* this is only used for no-op joins */
Assert(IsA(best_path, NoOpPath));
plan = create_plan(root, ((NoOpPath *) best_path)->subpath);
break;
case T_HashJoin:
case T_MergeJoin:
case T_NestLoop:
......
......@@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.117 2010/01/02 16:57:47 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.118 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -179,31 +179,6 @@ query_planner(PlannerInfo *root, List *tlist,
*/
add_base_rels_to_query(root, (Node *) parse->jointree);
/*
* We should now have size estimates for every actual table involved in
* the query, so we can compute total_table_pages. Note that appendrels
* are not double-counted here, even though we don't bother to distinguish
* RelOptInfos for appendrel parents, because the parents will still have
* size zero.
*
* XXX if a table is self-joined, we will count it once per appearance,
* which perhaps is the wrong thing ... but that's not completely clear,
* and detecting self-joins here is difficult, so ignore it for now.
*/
total_pages = 0;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
if (brel == NULL)
continue;
Assert(brel->relid == rti); /* sanity check on array */
total_pages += (double) brel->pages;
}
root->total_table_pages = total_pages;
/*
* Examine the targetlist and qualifications, adding entries to baserel
* targetlists for all referenced Vars. Restrict and join clauses are
......@@ -248,6 +223,49 @@ query_planner(PlannerInfo *root, List *tlist,
*/
fix_placeholder_eval_levels(root);
/*
* Remove any useless outer joins. Ideally this would be done during
* jointree preprocessing, but the necessary information isn't available
* until we've built baserel data structures and classified qual clauses.
*/
joinlist = remove_useless_joins(root, joinlist);
/*
* Now distribute "placeholders" to base rels as needed. This has to be
* done after join removal because removal could change whether a
* placeholder is evaluatable at a base rel.
*/
add_placeholders_to_base_rels(root);
/*
* We should now have size estimates for every actual table involved in
* the query, and we also know which if any have been deleted from the
* query by join removal; so we can compute total_table_pages.
*
* Note that appendrels are not double-counted here, even though we don't
* bother to distinguish RelOptInfos for appendrel parents, because the
* parents will still have size zero.
*
* XXX if a table is self-joined, we will count it once per appearance,
* which perhaps is the wrong thing ... but that's not completely clear,
* and detecting self-joins here is difficult, so ignore it for now.
*/
total_pages = 0;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
if (brel == NULL)
continue;
Assert(brel->relid == rti); /* sanity check on array */
if (brel->reloptkind == RELOPT_BASEREL ||
brel->reloptkind == RELOPT_OTHER_MEMBER_REL)
total_pages += (double) brel->pages;
}
root->total_table_pages = total_pages;
/*
* Ready to do the primary planning.
*/
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.157 2010/02/26 02:00:47 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.158 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -1214,26 +1214,6 @@ distinct_col_search(int colno, List *colnos, List *opids)
return InvalidOid;
}
/*
* create_noop_path
* Creates a path equivalent to the input subpath, but having a different
* parent rel. This is used when a join is found to be removable.
*/
NoOpPath *
create_noop_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
{
NoOpPath *pathnode = makeNode(NoOpPath);
pathnode->path.pathtype = T_Join; /* by convention */
pathnode->path.parent = rel;
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost = subpath->total_cost;
pathnode->path.pathkeys = subpath->pathkeys;
pathnode->subpath = subpath;
return pathnode;
}
/*
* create_subqueryscan_path
* Creates a path corresponding to a sequential scan of a subquery,
......
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/placeholder.c,v 1.6 2010/01/02 16:57:48 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/placeholder.c,v 1.7 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -168,18 +168,27 @@ fix_placeholder_eval_levels(PlannerInfo *root)
list_free(vars);
}
}
}
/*
* Now, if any placeholder can be computed at a base rel and is needed
* above it, add it to that rel's targetlist. (This is essentially the
* same logic as in add_placeholders_to_joinrel, but we can't do that part
* until joinrels are formed.) We have to do this as a separate step
* because the ph_needed values aren't stable until the previous loop
* finishes.
*/
foreach(lc1, root->placeholder_list)
/*
* add_placeholders_to_base_rels
* Add any required PlaceHolderVars to base rels' targetlists.
*
* If any placeholder can be computed at a base rel and is needed above it,
* add it to that rel's targetlist. We have to do this separately from
* fix_placeholder_eval_levels() because join removal happens in between,
* and can change the ph_eval_at sets. There is essentially the same logic
* in add_placeholders_to_joinrel, but we can't do that part until joinrels
* are formed.
*/
void
add_placeholders_to_base_rels(PlannerInfo *root)
{
ListCell *lc;
foreach(lc, root->placeholder_list)
{
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc1);
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
Relids eval_at = phinfo->ph_eval_at;
if (bms_membership(eval_at) == BMS_SINGLETON)
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.233 2010/01/05 21:53:59 rhaas Exp $
* $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.234 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -217,7 +217,6 @@ typedef enum NodeTag
T_ResultPath,
T_MaterialPath,
T_UniquePath,
T_NoOpPath,
T_EquivalenceClass,
T_EquivalenceMember,
T_PathKey,
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.184 2010/02/26 02:01:25 momjian Exp $
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.185 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -240,7 +240,9 @@ typedef struct PlannerInfo
*
* We also have "other rels", which are like base rels in that they refer to
* single RT indexes; but they are not part of the join tree, and are given
* a different RelOptKind to identify them.
* a different RelOptKind to identify them. Lastly, there is a RelOptKind
* for "dead" relations, which are base rels that we have proven we don't
* need to join after all.
*
* Currently the only kind of otherrels are those made for member relations
* of an "append relation", that is an inheritance set or UNION ALL subquery.
......@@ -346,7 +348,8 @@ typedef enum RelOptKind
{
RELOPT_BASEREL,
RELOPT_JOINREL,
RELOPT_OTHER_MEMBER_REL
RELOPT_OTHER_MEMBER_REL,
RELOPT_DEADREL
} RelOptKind;
typedef struct RelOptInfo
......@@ -801,22 +804,6 @@ typedef struct UniquePath
double rows; /* estimated number of result tuples */
} UniquePath;
/*
* NoOpPath represents exactly the same plan as its subpath. This is used
* when we have determined that a join can be eliminated. The difference
* between the NoOpPath and its subpath is just that the NoOpPath's parent
* is the whole join relation while the subpath is for one of the joined
* relations (and the other one isn't needed).
*
* Note: path.pathtype is always T_Join, but this won't actually give rise
* to a Join plan node.
*/
typedef struct NoOpPath
{
Path path;
Path *subpath;
} NoOpPath;
/*
* All join-type paths share these fields.
*/
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.83 2010/02/26 02:01:26 momjian Exp $
* $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.84 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -51,8 +51,6 @@ extern ResultPath *create_result_path(List *quals);
extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
Path *subpath, SpecialJoinInfo *sjinfo);
extern NoOpPath *create_noop_path(PlannerInfo *root, RelOptInfo *rel,
Path *subpath);
extern Path *create_subqueryscan_path(RelOptInfo *rel, List *pathkeys);
extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel);
extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/placeholder.h,v 1.4 2010/01/02 16:58:07 momjian Exp $
* $PostgreSQL: pgsql/src/include/optimizer/placeholder.h,v 1.5 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -22,6 +22,7 @@ extern PlaceHolderVar *make_placeholder_expr(PlannerInfo *root, Expr *expr,
extern PlaceHolderInfo *find_placeholder_info(PlannerInfo *root,
PlaceHolderVar *phv);
extern void fix_placeholder_eval_levels(PlannerInfo *root);
extern void add_placeholders_to_base_rels(PlannerInfo *root);
extern void add_placeholders_to_joinrel(PlannerInfo *root,
RelOptInfo *joinrel);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.126 2010/02/26 02:01:26 momjian Exp $
* $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.127 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -108,6 +108,11 @@ extern RestrictInfo *build_implied_join_equality(Oid opno,
Expr *item2,
Relids qualscope);
/*
* prototypes for plan/analyzejoins.c
*/
extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
/*
* prototypes for plan/setrefs.c
*/
......
......@@ -2494,6 +2494,38 @@ select * from int4_tbl a full join int4_tbl b on false;
--
-- test join removal
--
begin;
CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "a_pkey" for table "a"
CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "b_pkey" for table "b"
CREATE TEMP TABLE c (id int PRIMARY KEY);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "c_pkey" for table "c"
INSERT INTO a VALUES (0, 0), (1, NULL);
INSERT INTO b VALUES (0, 0), (1, NULL);
INSERT INTO c VALUES (0), (1);
-- all three cases should be optimizable into a simple seqscan
explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
QUERY PLAN
---------------
Seq Scan on a
(1 row)
explain (costs off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id;
QUERY PLAN
---------------
Seq Scan on b
(1 row)
explain (costs off)
SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id)
ON (a.b_id = b.id);
QUERY PLAN
---------------
Seq Scan on a
(1 row)
rollback;
create temp table parent (k int primary key, pd int);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "parent_pkey" for table "parent"
create temp table child (k int unique, cd int);
......@@ -2540,3 +2572,24 @@ explain (costs off)
-> Seq Scan on child c
(5 rows)
-- bug 5255: this is not optimizable by join removal
begin;
CREATE TEMP TABLE a (id int PRIMARY KEY);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "a_pkey" for table "a"
CREATE TEMP TABLE b (id int PRIMARY KEY, a_id int);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "b_pkey" for table "b"
INSERT INTO a VALUES (0), (1);
INSERT INTO b VALUES (0, 0), (1, NULL);
SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
id | a_id | id
----+------+----
1 | |
(1 row)
SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
id | a_id
----+------
1 |
(1 row)
rollback;
......@@ -572,6 +572,24 @@ select * from int4_tbl a full join int4_tbl b on false;
-- test join removal
--
begin;
CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int);
CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int);
CREATE TEMP TABLE c (id int PRIMARY KEY);
INSERT INTO a VALUES (0, 0), (1, NULL);
INSERT INTO b VALUES (0, 0), (1, NULL);
INSERT INTO c VALUES (0), (1);
-- all three cases should be optimizable into a simple seqscan
explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
explain (costs off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id;
explain (costs off)
SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id)
ON (a.b_id = b.id);
rollback;
create temp table parent (k int primary key, pd int);
create temp table child (k int unique, cd int);
insert into parent values (1, 10), (2, 20), (3, 30);
......@@ -590,3 +608,16 @@ explain (costs off)
select p.*, linked from parent p
left join (select c.*, true as linked from child c) as ss
on (p.k = ss.k);
-- bug 5255: this is not optimizable by join removal
begin;
CREATE TEMP TABLE a (id int PRIMARY KEY);
CREATE TEMP TABLE b (id int PRIMARY KEY, a_id int);
INSERT INTO a VALUES (0), (1);
INSERT INTO b VALUES (0, 0), (1, NULL);
SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
rollback;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment