Commit 3bc7dafa authored by Robert Haas's avatar Robert Haas

Consider parallel merge joins.

Commit 45be99f8 took the position
that performing a merge join in parallel was not likely to work out
well, but this conclusion was greeted with skepticism even at the
time.  Whether it was true then or not, it's clearly not true any
more now that we have parallel index scan.

Dilip Kumar, reviewed by Amit Kapila and by me.

Discussion: http://postgr.es/m/CAFiTN-v3=cM6nyFwFGp0fmvY4=kk79Hq9Fgu0u8CSJ-EEq1Tiw@mail.gmail.com
parent ef266239
...@@ -28,6 +28,16 @@ set_join_pathlist_hook_type set_join_pathlist_hook = NULL; ...@@ -28,6 +28,16 @@ set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
#define PATH_PARAM_BY_REL(path, rel) \ #define PATH_PARAM_BY_REL(path, rel) \
((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
static void try_partial_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
Path *outer_path,
Path *inner_path,
List *pathkeys,
List *mergeclauses,
List *outersortkeys,
List *innersortkeys,
JoinType jointype,
JoinPathExtraData *extra);
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinType jointype, JoinPathExtraData *extra); JoinType jointype, JoinPathExtraData *extra);
...@@ -40,6 +50,13 @@ static void consider_parallel_nestloop(PlannerInfo *root, ...@@ -40,6 +50,13 @@ static void consider_parallel_nestloop(PlannerInfo *root,
RelOptInfo *innerrel, RelOptInfo *innerrel,
JoinType jointype, JoinType jointype,
JoinPathExtraData *extra); JoinPathExtraData *extra);
static void consider_parallel_mergejoin(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
JoinType jointype,
JoinPathExtraData *extra,
Path *inner_cheapest_total);
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinType jointype, JoinPathExtraData *extra); JoinType jointype, JoinPathExtraData *extra);
...@@ -58,7 +75,8 @@ static void generate_mergejoin_paths(PlannerInfo *root, ...@@ -58,7 +75,8 @@ static void generate_mergejoin_paths(PlannerInfo *root,
JoinPathExtraData *extra, JoinPathExtraData *extra,
bool useallclauses, bool useallclauses,
Path *inner_cheapest_total, Path *inner_cheapest_total,
List *merge_pathkeys); List *merge_pathkeys,
bool is_partial);
/* /*
...@@ -416,11 +434,27 @@ try_mergejoin_path(PlannerInfo *root, ...@@ -416,11 +434,27 @@ try_mergejoin_path(PlannerInfo *root,
List *outersortkeys, List *outersortkeys,
List *innersortkeys, List *innersortkeys,
JoinType jointype, JoinType jointype,
JoinPathExtraData *extra) JoinPathExtraData *extra,
bool is_partial)
{ {
Relids required_outer; Relids required_outer;
JoinCostWorkspace workspace; JoinCostWorkspace workspace;
if (is_partial)
{
try_partial_mergejoin_path(root,
joinrel,
outer_path,
inner_path,
pathkeys,
mergeclauses,
outersortkeys,
innersortkeys,
jointype,
extra);
return;
}
/* /*
* Check to see if proposed path is still parameterized, and reject if the * Check to see if proposed path is still parameterized, and reject if the
* parameterization wouldn't be sensible. * parameterization wouldn't be sensible.
...@@ -480,6 +514,76 @@ try_mergejoin_path(PlannerInfo *root, ...@@ -480,6 +514,76 @@ try_mergejoin_path(PlannerInfo *root,
} }
} }
/*
* try_partial_mergejoin_path
* Consider a partial merge join path; if it appears useful, push it into
* the joinrel's pathlist via add_partial_path().
*/
static void
try_partial_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
Path *outer_path,
Path *inner_path,
List *pathkeys,
List *mergeclauses,
List *outersortkeys,
List *innersortkeys,
JoinType jointype,
JoinPathExtraData *extra)
{
JoinCostWorkspace workspace;
/*
* See comments in try_partial_hashjoin_path().
*/
Assert(bms_is_empty(joinrel->lateral_relids));
if (inner_path->param_info != NULL)
{
Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
if (!bms_is_empty(inner_paramrels))
return;
}
/*
* If the given paths are already well enough ordered, we can skip doing
* an explicit sort.
*/
if (outersortkeys &&
pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
outersortkeys = NIL;
if (innersortkeys &&
pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
innersortkeys = NIL;
/*
* See comments in try_partial_nestloop_path().
*/
initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
outer_path, inner_path,
outersortkeys, innersortkeys,
extra->sjinfo);
if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
return;
/* Might be good enough to be worth trying, so let's try it. */
add_partial_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
jointype,
&workspace,
extra->sjinfo,
outer_path,
inner_path,
extra->restrictlist,
pathkeys,
NULL,
mergeclauses,
outersortkeys,
innersortkeys));
}
/* /*
* try_hashjoin_path * try_hashjoin_path
* Consider a hash join path; if it appears useful, push it into * Consider a hash join path; if it appears useful, push it into
...@@ -649,8 +753,11 @@ sort_inner_and_outer(PlannerInfo *root, ...@@ -649,8 +753,11 @@ sort_inner_and_outer(PlannerInfo *root,
JoinType jointype, JoinType jointype,
JoinPathExtraData *extra) JoinPathExtraData *extra)
{ {
JoinType save_jointype = jointype;
Path *outer_path; Path *outer_path;
Path *inner_path; Path *inner_path;
Path *cheapest_partial_outer;
Path *cheapest_safe_inner = NULL;
List *all_pathkeys; List *all_pathkeys;
ListCell *l; ListCell *l;
...@@ -699,6 +806,30 @@ sort_inner_and_outer(PlannerInfo *root, ...@@ -699,6 +806,30 @@ sort_inner_and_outer(PlannerInfo *root,
jointype = JOIN_INNER; jointype = JOIN_INNER;
} }
/*
* If the joinrel is parallel-safe, we may be able to consider a partial
* merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
* outer path will be partial, and therefore we won't be able to properly
* guarantee uniqueness. Similarly, we can't handle JOIN_FULL and
* JOIN_RIGHT, because they can produce false null extended rows. Also,
* the resulting path must not be parameterized.
*/
if (joinrel->consider_parallel &&
save_jointype != JOIN_UNIQUE_OUTER &&
save_jointype != JOIN_FULL &&
save_jointype != JOIN_RIGHT &&
outerrel->partial_pathlist != NIL &&
bms_is_empty(joinrel->lateral_relids))
{
cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
if (inner_path->parallel_safe)
cheapest_safe_inner = inner_path;
else if (save_jointype != JOIN_UNIQUE_INNER)
cheapest_safe_inner =
get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
}
/* /*
* Each possible ordering of the available mergejoin clauses will generate * Each possible ordering of the available mergejoin clauses will generate
* a differently-sorted result path at essentially the same cost. We have * a differently-sorted result path at essentially the same cost. We have
...@@ -781,6 +912,23 @@ sort_inner_and_outer(PlannerInfo *root, ...@@ -781,6 +912,23 @@ sort_inner_and_outer(PlannerInfo *root,
outerkeys, outerkeys,
innerkeys, innerkeys,
jointype, jointype,
extra,
false);
/*
* If we have partial outer and parallel safe inner path then try
* partial mergejoin path.
*/
if (cheapest_partial_outer && cheapest_safe_inner)
try_partial_mergejoin_path(root,
joinrel,
cheapest_partial_outer,
cheapest_safe_inner,
merge_pathkeys,
cur_mergeclauses,
outerkeys,
innerkeys,
jointype,
extra); extra);
} }
} }
...@@ -808,7 +956,8 @@ generate_mergejoin_paths(PlannerInfo *root, ...@@ -808,7 +956,8 @@ generate_mergejoin_paths(PlannerInfo *root,
JoinPathExtraData *extra, JoinPathExtraData *extra,
bool useallclauses, bool useallclauses,
Path *inner_cheapest_total, Path *inner_cheapest_total,
List *merge_pathkeys) List *merge_pathkeys,
bool is_partial)
{ {
List *mergeclauses; List *mergeclauses;
List *innersortkeys; List *innersortkeys;
...@@ -868,7 +1017,8 @@ generate_mergejoin_paths(PlannerInfo *root, ...@@ -868,7 +1017,8 @@ generate_mergejoin_paths(PlannerInfo *root,
NIL, NIL,
innersortkeys, innersortkeys,
jointype, jointype,
extra); extra,
is_partial);
/* Can't do anything else if inner path needs to be unique'd */ /* Can't do anything else if inner path needs to be unique'd */
if (save_jointype == JOIN_UNIQUE_INNER) if (save_jointype == JOIN_UNIQUE_INNER)
...@@ -937,7 +1087,7 @@ generate_mergejoin_paths(PlannerInfo *root, ...@@ -937,7 +1087,7 @@ generate_mergejoin_paths(PlannerInfo *root,
trialsortkeys, trialsortkeys,
NULL, NULL,
TOTAL_COST, TOTAL_COST,
false); is_partial);
if (innerpath != NULL && if (innerpath != NULL &&
(cheapest_total_inner == NULL || (cheapest_total_inner == NULL ||
compare_path_costs(innerpath, cheapest_total_inner, compare_path_costs(innerpath, cheapest_total_inner,
...@@ -965,7 +1115,8 @@ generate_mergejoin_paths(PlannerInfo *root, ...@@ -965,7 +1115,8 @@ generate_mergejoin_paths(PlannerInfo *root,
NIL, NIL,
NIL, NIL,
jointype, jointype,
extra); extra,
is_partial);
cheapest_total_inner = innerpath; cheapest_total_inner = innerpath;
} }
/* Same on the basis of cheapest startup cost ... */ /* Same on the basis of cheapest startup cost ... */
...@@ -973,7 +1124,7 @@ generate_mergejoin_paths(PlannerInfo *root, ...@@ -973,7 +1124,7 @@ generate_mergejoin_paths(PlannerInfo *root,
trialsortkeys, trialsortkeys,
NULL, NULL,
STARTUP_COST, STARTUP_COST,
false); is_partial);
if (innerpath != NULL && if (innerpath != NULL &&
(cheapest_startup_inner == NULL || (cheapest_startup_inner == NULL ||
compare_path_costs(innerpath, cheapest_startup_inner, compare_path_costs(innerpath, cheapest_startup_inner,
...@@ -1009,7 +1160,8 @@ generate_mergejoin_paths(PlannerInfo *root, ...@@ -1009,7 +1160,8 @@ generate_mergejoin_paths(PlannerInfo *root,
NIL, NIL,
NIL, NIL,
jointype, jointype,
extra); extra,
is_partial);
} }
cheapest_startup_inner = innerpath; cheapest_startup_inner = innerpath;
} }
...@@ -1221,22 +1373,91 @@ match_unsorted_outer(PlannerInfo *root, ...@@ -1221,22 +1373,91 @@ match_unsorted_outer(PlannerInfo *root,
/* Generate merge join paths */ /* Generate merge join paths */
generate_mergejoin_paths(root, joinrel, innerrel, outerpath, generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
save_jointype, extra, useallclauses, save_jointype, extra, useallclauses,
inner_cheapest_total, merge_pathkeys); inner_cheapest_total, merge_pathkeys,
false);
} }
/* /*
* If the joinrel is parallel-safe and the join type supports nested * Consider partial nestloop and mergejoin plan if outerrel has any
* loops, we may be able to consider a partial nestloop plan. However, we * partial path and the joinrel is parallel-safe. However, we can't
* can't handle JOIN_UNIQUE_OUTER, because the outer path will be partial, * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
* and therefore we won't be able to properly guarantee uniqueness. Nor * therefore we won't be able to properly guarantee uniqueness. Nor can
* can we handle extra_lateral_rels, since partial paths must not be * we handle extra_lateral_rels, since partial paths must not be
* parameterized. * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
* because they can produce false null extended rows.
*/ */
if (joinrel->consider_parallel && nestjoinOK && if (joinrel->consider_parallel &&
save_jointype != JOIN_UNIQUE_OUTER && save_jointype != JOIN_UNIQUE_OUTER &&
save_jointype != JOIN_FULL &&
save_jointype != JOIN_RIGHT &&
outerrel->partial_pathlist != NIL &&
bms_is_empty(joinrel->lateral_relids)) bms_is_empty(joinrel->lateral_relids))
{
if (nestjoinOK)
consider_parallel_nestloop(root, joinrel, outerrel, innerrel, consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
save_jointype, extra); save_jointype, extra);
/*
* If inner_cheapest_total is NULL or non parallel-safe then find the
* cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
* can't use any alternative inner path.
*/
if (inner_cheapest_total == NULL ||
!inner_cheapest_total->parallel_safe)
{
if (save_jointype == JOIN_UNIQUE_INNER)
return;
inner_cheapest_total = get_cheapest_parallel_safe_total_inner(
innerrel->pathlist);
}
if (inner_cheapest_total)
consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
save_jointype, extra,
inner_cheapest_total);
}
}
/*
* consider_parallel_mergejoin
* Try to build partial paths for a joinrel by joining a partial path
* for the outer relation to a complete path for the inner relation.
*
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
* 'jointype' is the type of join to do
* 'extra' contains additional input values
* 'inner_cheapest_total' cheapest total path for innerrel
*/
static void
consider_parallel_mergejoin(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
JoinType jointype,
JoinPathExtraData *extra,
Path *inner_cheapest_total)
{
ListCell *lc1;
/* generate merge join path for each partial outer path */
foreach(lc1, outerrel->partial_pathlist)
{
Path *outerpath = (Path *) lfirst(lc1);
List *merge_pathkeys;
/*
* Figure out what useful ordering any paths we create will have.
*/
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerpath->pathkeys);
generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
extra, false, inner_cheapest_total,
merge_pathkeys, true);
}
} }
/* /*
......
...@@ -169,6 +169,31 @@ select count(*) from tenk1 where thousand > 95; ...@@ -169,6 +169,31 @@ select count(*) from tenk1 where thousand > 95;
reset enable_seqscan; reset enable_seqscan;
reset enable_bitmapscan; reset enable_bitmapscan;
-- test parallel merge join path.
set enable_hashjoin to off;
set enable_nestloop to off;
explain (costs off)
select count(*) from tenk1, tenk2 where tenk1.unique1 = tenk2.unique1;
QUERY PLAN
-------------------------------------------------------------------------------
Finalize Aggregate
-> Gather
Workers Planned: 4
-> Partial Aggregate
-> Merge Join
Merge Cond: (tenk1.unique1 = tenk2.unique1)
-> Parallel Index Only Scan using tenk1_unique1 on tenk1
-> Index Only Scan using tenk2_unique1 on tenk2
(8 rows)
select count(*) from tenk1, tenk2 where tenk1.unique1 = tenk2.unique1;
count
-------
10000
(1 row)
reset enable_hashjoin;
reset enable_nestloop;
set force_parallel_mode=1; set force_parallel_mode=1;
explain (costs off) explain (costs off)
select stringu1::int2 from tenk1 where unique1 = 1; select stringu1::int2 from tenk1 where unique1 = 1;
......
...@@ -64,6 +64,16 @@ select count(*) from tenk1 where thousand > 95; ...@@ -64,6 +64,16 @@ select count(*) from tenk1 where thousand > 95;
reset enable_seqscan; reset enable_seqscan;
reset enable_bitmapscan; reset enable_bitmapscan;
-- test parallel merge join path.
set enable_hashjoin to off;
set enable_nestloop to off;
explain (costs off)
select count(*) from tenk1, tenk2 where tenk1.unique1 = tenk2.unique1;
select count(*) from tenk1, tenk2 where tenk1.unique1 = tenk2.unique1;
reset enable_hashjoin;
reset enable_nestloop;
set force_parallel_mode=1; set force_parallel_mode=1;
explain (costs off) explain (costs off)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment