Commit 59649c3f authored by Robert Haas's avatar Robert Haas

Refactor merge path generation code.

This shouldn't change the set of paths that get generated in any
way, but it is preparatory work for further changes to allow a
partial path to be merge-joined witih a non-partial path to produce
a partial join path.

Dilip Kumar, with cosmetic adjustments by me.
parent f3b421da
......@@ -50,6 +50,15 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
List *restrictlist,
JoinType jointype,
bool *mergejoin_allowed);
static void generate_mergejoin_paths(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *innerrel,
Path *outerpath,
JoinType jointype,
JoinPathExtraData *extra,
bool useallclauses,
Path *inner_cheapest_total,
List *merge_pathkeys);
/*
......@@ -776,6 +785,241 @@ sort_inner_and_outer(PlannerInfo *root,
}
}
/*
* generate_mergejoin_paths
* Creates possible mergejoin paths for input outerpath.
*
* We generate mergejoins if mergejoin clauses are available. We have
* two ways to generate the inner path for a mergejoin: sort the cheapest
* inner path, or use an inner path that is already suitably ordered for the
* merge. If we have several mergeclauses, it could be that there is no inner
* path (or only a very expensive one) for the full list of mergeclauses, but
* better paths exist if we truncate the mergeclause list (thereby discarding
* some sort key requirements). So, we consider truncations of the
* mergeclause list as well as the full list. (Ideally we'd consider all
* subsets of the mergeclause list, but that seems way too expensive.)
*/
static void
generate_mergejoin_paths(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *innerrel,
Path *outerpath,
JoinType jointype,
JoinPathExtraData *extra,
bool useallclauses,
Path *inner_cheapest_total,
List *merge_pathkeys)
{
List *mergeclauses;
List *innersortkeys;
List *trialsortkeys;
Path *cheapest_startup_inner;
Path *cheapest_total_inner;
JoinType save_jointype = jointype;
int num_sortkeys;
int sortkeycnt;
if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
jointype = JOIN_INNER;
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(root,
outerpath->pathkeys,
true,
extra->mergeclause_list);
/*
* Done with this outer path if no chance for a mergejoin.
*
* Special corner case: for "x FULL JOIN y ON true", there will be no join
* clauses at all. Ordinarily we'd generate a clauseless nestloop path,
* but since mergejoin is our only join type that supports FULL JOIN
* without any join clauses, it's necessary to generate a clauseless
* mergejoin path instead.
*/
if (mergeclauses == NIL)
{
if (jointype == JOIN_FULL)
/* okay to try for mergejoin */ ;
else
return;
}
if (useallclauses &&
list_length(mergeclauses) != list_length(extra->mergeclause_list))
return;
/* Compute the required ordering of the inner path */
innersortkeys = make_inner_pathkeys_for_merge(root,
mergeclauses,
outerpath->pathkeys);
/*
* Generate a mergejoin on the basis of sorting the cheapest inner. Since
* a sort will be needed, only cheapest total cost matters. (But
* try_mergejoin_path will do the right thing if inner_cheapest_total is
* already correctly sorted.)
*/
try_mergejoin_path(root,
joinrel,
outerpath,
inner_cheapest_total,
merge_pathkeys,
mergeclauses,
NIL,
innersortkeys,
jointype,
extra);
/* Can't do anything else if inner path needs to be unique'd */
if (save_jointype == JOIN_UNIQUE_INNER)
return;
/*
* Look for presorted inner paths that satisfy the innersortkey list ---
* or any truncation thereof, if we are allowed to build a mergejoin using
* a subset of the merge clauses. Here, we consider both cheap startup
* cost and cheap total cost.
*
* Currently we do not consider parameterized inner paths here. This
* interacts with decisions elsewhere that also discriminate against
* mergejoins with parameterized inputs; see comments in
* src/backend/optimizer/README.
*
* As we shorten the sortkey list, we should consider only paths that are
* strictly cheaper than (in particular, not the same as) any path found
* in an earlier iteration. Otherwise we'd be intentionally using fewer
* merge keys than a given path allows (treating the rest as plain
* joinquals), which is unlikely to be a good idea. Also, eliminating
* paths here on the basis of compare_path_costs is a lot cheaper than
* building the mergejoin path only to throw it away.
*
* If inner_cheapest_total is well enough sorted to have not required a
* sort in the path made above, we shouldn't make a duplicate path with
* it, either. We handle that case with the same logic that handles the
* previous consideration, by initializing the variables that track
* cheapest-so-far properly. Note that we do NOT reject
* inner_cheapest_total if we find it matches some shorter set of
* pathkeys. That case corresponds to using fewer mergekeys to avoid
* sorting inner_cheapest_total, whereas we did sort it above, so the
* plans being considered are different.
*/
if (pathkeys_contained_in(innersortkeys,
inner_cheapest_total->pathkeys))
{
/* inner_cheapest_total didn't require a sort */
cheapest_startup_inner = inner_cheapest_total;
cheapest_total_inner = inner_cheapest_total;
}
else
{
/* it did require a sort, at least for the full set of keys */
cheapest_startup_inner = NULL;
cheapest_total_inner = NULL;
}
num_sortkeys = list_length(innersortkeys);
if (num_sortkeys > 1 && !useallclauses)
trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
else
trialsortkeys = innersortkeys; /* won't really truncate */
for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
{
Path *innerpath;
List *newclauses = NIL;
/*
* Look for an inner path ordered well enough for the first
* 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
* destructively, which is why we made a copy...
*/
trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
NULL,
TOTAL_COST);
if (innerpath != NULL &&
(cheapest_total_inner == NULL ||
compare_path_costs(innerpath, cheapest_total_inner,
TOTAL_COST) < 0))
{
/* Found a cheap (or even-cheaper) sorted path */
/* Select the right mergeclauses, if we didn't already */
if (sortkeycnt < num_sortkeys)
{
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
try_mergejoin_path(root,
joinrel,
outerpath,
innerpath,
merge_pathkeys,
newclauses,
NIL,
NIL,
jointype,
extra);
cheapest_total_inner = innerpath;
}
/* Same on the basis of cheapest startup cost ... */
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
NULL,
STARTUP_COST);
if (innerpath != NULL &&
(cheapest_startup_inner == NULL ||
compare_path_costs(innerpath, cheapest_startup_inner,
STARTUP_COST) < 0))
{
/* Found a cheap (or even-cheaper) sorted path */
if (innerpath != cheapest_total_inner)
{
/*
* Avoid rebuilding clause list if we already made one; saves
* memory in big join trees...
*/
if (newclauses == NIL)
{
if (sortkeycnt < num_sortkeys)
{
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
}
try_mergejoin_path(root,
joinrel,
outerpath,
innerpath,
merge_pathkeys,
newclauses,
NIL,
NIL,
jointype,
extra);
}
cheapest_startup_inner = innerpath;
}
/*
* Don't consider truncated sortkeys if we need all clauses.
*/
if (useallclauses)
break;
}
}
/*
* match_unsorted_outer
* Creates possible join paths for processing a single join relation
......@@ -790,15 +1034,8 @@ sort_inner_and_outer(PlannerInfo *root,
* cheapest-total inner-indexscan path (if any), and one on the
* cheapest-startup inner-indexscan path (if different).
*
* We also consider mergejoins if mergejoin clauses are available. We have
* two ways to generate the inner path for a mergejoin: sort the cheapest
* inner path, or use an inner path that is already suitably ordered for the
* merge. If we have several mergeclauses, it could be that there is no inner
* path (or only a very expensive one) for the full list of mergeclauses, but
* better paths exist if we truncate the mergeclause list (thereby discarding
* some sort key requirements). So, we consider truncations of the
* mergeclause list as well as the full list. (Ideally we'd consider all
* subsets of the mergeclause list, but that seems way too expensive.)
* We also consider mergejoins if mergejoin clauses are available. See
* detailed comments in generate_mergejoin_paths.
*
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
......@@ -894,13 +1131,6 @@ match_unsorted_outer(PlannerInfo *root,
{
Path *outerpath = (Path *) lfirst(lc1);
List *merge_pathkeys;
List *mergeclauses;
List *innersortkeys;
List *trialsortkeys;
Path *cheapest_startup_inner;
Path *cheapest_total_inner;
int num_sortkeys;
int sortkeycnt;
/*
* We cannot use an outer path that is parameterized by the inner rel.
......@@ -986,201 +1216,10 @@ match_unsorted_outer(PlannerInfo *root,
if (inner_cheapest_total == NULL)
continue;
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(root,
outerpath->pathkeys,
true,
extra->mergeclause_list);
/*
* Done with this outer path if no chance for a mergejoin.
*
* Special corner case: for "x FULL JOIN y ON true", there will be no
* join clauses at all. Ordinarily we'd generate a clauseless
* nestloop path, but since mergejoin is our only join type that
* supports FULL JOIN without any join clauses, it's necessary to
* generate a clauseless mergejoin path instead.
*/
if (mergeclauses == NIL)
{
if (jointype == JOIN_FULL)
/* okay to try for mergejoin */ ;
else
continue;
}
if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list))
continue;
/* Compute the required ordering of the inner path */
innersortkeys = make_inner_pathkeys_for_merge(root,
mergeclauses,
outerpath->pathkeys);
/*
* Generate a mergejoin on the basis of sorting the cheapest inner.
* Since a sort will be needed, only cheapest total cost matters. (But
* try_mergejoin_path will do the right thing if inner_cheapest_total
* is already correctly sorted.)
*/
try_mergejoin_path(root,
joinrel,
outerpath,
inner_cheapest_total,
merge_pathkeys,
mergeclauses,
NIL,
innersortkeys,
jointype,
extra);
/* Can't do anything else if inner path needs to be unique'd */
if (save_jointype == JOIN_UNIQUE_INNER)
continue;
/*
* Look for presorted inner paths that satisfy the innersortkey list
* --- or any truncation thereof, if we are allowed to build a
* mergejoin using a subset of the merge clauses. Here, we consider
* both cheap startup cost and cheap total cost.
*
* Currently we do not consider parameterized inner paths here. This
* interacts with decisions elsewhere that also discriminate against
* mergejoins with parameterized inputs; see comments in
* src/backend/optimizer/README.
*
* As we shorten the sortkey list, we should consider only paths that
* are strictly cheaper than (in particular, not the same as) any path
* found in an earlier iteration. Otherwise we'd be intentionally
* using fewer merge keys than a given path allows (treating the rest
* as plain joinquals), which is unlikely to be a good idea. Also,
* eliminating paths here on the basis of compare_path_costs is a lot
* cheaper than building the mergejoin path only to throw it away.
*
* If inner_cheapest_total is well enough sorted to have not required
* a sort in the path made above, we shouldn't make a duplicate path
* with it, either. We handle that case with the same logic that
* handles the previous consideration, by initializing the variables
* that track cheapest-so-far properly. Note that we do NOT reject
* inner_cheapest_total if we find it matches some shorter set of
* pathkeys. That case corresponds to using fewer mergekeys to avoid
* sorting inner_cheapest_total, whereas we did sort it above, so the
* plans being considered are different.
*/
if (pathkeys_contained_in(innersortkeys,
inner_cheapest_total->pathkeys))
{
/* inner_cheapest_total didn't require a sort */
cheapest_startup_inner = inner_cheapest_total;
cheapest_total_inner = inner_cheapest_total;
}
else
{
/* it did require a sort, at least for the full set of keys */
cheapest_startup_inner = NULL;
cheapest_total_inner = NULL;
}
num_sortkeys = list_length(innersortkeys);
if (num_sortkeys > 1 && !useallclauses)
trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
else
trialsortkeys = innersortkeys; /* won't really truncate */
for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
{
Path *innerpath;
List *newclauses = NIL;
/*
* Look for an inner path ordered well enough for the first
* 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
* destructively, which is why we made a copy...
*/
trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
NULL,
TOTAL_COST);
if (innerpath != NULL &&
(cheapest_total_inner == NULL ||
compare_path_costs(innerpath, cheapest_total_inner,
TOTAL_COST) < 0))
{
/* Found a cheap (or even-cheaper) sorted path */
/* Select the right mergeclauses, if we didn't already */
if (sortkeycnt < num_sortkeys)
{
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
try_mergejoin_path(root,
joinrel,
outerpath,
innerpath,
merge_pathkeys,
newclauses,
NIL,
NIL,
jointype,
extra);
cheapest_total_inner = innerpath;
}
/* Same on the basis of cheapest startup cost ... */
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
NULL,
STARTUP_COST);
if (innerpath != NULL &&
(cheapest_startup_inner == NULL ||
compare_path_costs(innerpath, cheapest_startup_inner,
STARTUP_COST) < 0))
{
/* Found a cheap (or even-cheaper) sorted path */
if (innerpath != cheapest_total_inner)
{
/*
* Avoid rebuilding clause list if we already made one;
* saves memory in big join trees...
*/
if (newclauses == NIL)
{
if (sortkeycnt < num_sortkeys)
{
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
}
try_mergejoin_path(root,
joinrel,
outerpath,
innerpath,
merge_pathkeys,
newclauses,
NIL,
NIL,
jointype,
extra);
}
cheapest_startup_inner = innerpath;
}
/*
* Don't consider truncated sortkeys if we need all clauses.
*/
if (useallclauses)
break;
}
/* Generate merge join paths */
generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
save_jointype, extra, useallclauses,
inner_cheapest_total, merge_pathkeys);
}
/*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment