Commit 9c7f5229 authored by Tom Lane's avatar Tom Lane

Optimize joins when the inner relation can be proven unique.

If there can certainly be no more than one matching inner row for a given
outer row, then the executor can move on to the next outer row as soon as
it's found one match; there's no need to continue scanning the inner
relation for this outer row.  This saves useless scanning in nestloop
and hash joins.  In merge joins, it offers the opportunity to skip
mark/restore processing, because we know we have not advanced past the
first possible match for the next outer row.

Of course, the devil is in the details: the proof of uniqueness must
depend only on joinquals (not otherquals), and if we want to skip
mergejoin mark/restore then it must depend only on merge clauses.
To avoid adding more planning overhead than absolutely necessary,
the present patch errs in the conservative direction: there are cases
where inner_unique or skip_mark_restore processing could be used, but
it will not do so because it's not sure that the uniqueness proof
depended only on "safe" clauses.  This could be improved later.

David Rowley, reviewed and rather heavily editorialized on by me

Discussion: https://postgr.es/m/CAApHDvqF6Sw-TK98bW48TdtFJ+3a7D2mFyZ7++=D-RyPsL76gw@mail.gmail.com
parent f13a9121
...@@ -2336,8 +2336,8 @@ SELECT * ...@@ -2336,8 +2336,8 @@ SELECT *
WHERE t.id IS NULL OR m.id IS NULL; WHERE t.id IS NULL OR m.id IS NULL;
id | name | id | name id | name | id | name
----+------+----+------ ----+------+----+------
| | 2 | Two
2 | two | | 2 | two | |
| | 2 | Two
(2 rows) (2 rows)
REFRESH MATERIALIZED VIEW CONCURRENTLY citext_matview; REFRESH MATERIALIZED VIEW CONCURRENTLY citext_matview;
......
...@@ -2336,8 +2336,8 @@ SELECT * ...@@ -2336,8 +2336,8 @@ SELECT *
WHERE t.id IS NULL OR m.id IS NULL; WHERE t.id IS NULL OR m.id IS NULL;
id | name | id | name id | name | id | name
----+------+----+------ ----+------+----+------
| | 2 | Two
2 | two | | 2 | two | |
| | 2 | Two
(2 rows) (2 rows)
REFRESH MATERIALIZED VIEW CONCURRENTLY citext_matview; REFRESH MATERIALIZED VIEW CONCURRENTLY citext_matview;
......
...@@ -1343,6 +1343,23 @@ ExplainNode(PlanState *planstate, List *ancestors, ...@@ -1343,6 +1343,23 @@ ExplainNode(PlanState *planstate, List *ancestors,
if (es->verbose) if (es->verbose)
show_plan_tlist(planstate, ancestors, es); show_plan_tlist(planstate, ancestors, es);
/* unique join */
switch (nodeTag(plan))
{
case T_NestLoop:
case T_MergeJoin:
case T_HashJoin:
/* try not to be too chatty about this in text mode */
if (es->format != EXPLAIN_FORMAT_TEXT ||
(es->verbose && ((Join *) plan)->inner_unique))
ExplainPropertyBool("Inner Unique",
((Join *) plan)->inner_unique,
es);
break;
default:
break;
}
/* quals, sort keys, etc */ /* quals, sort keys, etc */
switch (nodeTag(plan)) switch (nodeTag(plan))
{ {
......
...@@ -288,10 +288,11 @@ ExecHashJoin(HashJoinState *node) ...@@ -288,10 +288,11 @@ ExecHashJoin(HashJoinState *node)
} }
/* /*
* In a semijoin, we'll consider returning the first * If we only need to join to the first matching inner
* match, but after that we're done with this outer tuple. * tuple, then consider returning this one, but after that
* continue with next outer tuple.
*/ */
if (node->js.jointype == JOIN_SEMI) if (node->js.single_match)
node->hj_JoinState = HJ_NEED_NEW_OUTER; node->hj_JoinState = HJ_NEED_NEW_OUTER;
if (otherqual == NULL || ExecQual(otherqual, econtext)) if (otherqual == NULL || ExecQual(otherqual, econtext))
...@@ -435,6 +436,12 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) ...@@ -435,6 +436,12 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
ExecInitResultTupleSlot(estate, &hjstate->js.ps); ExecInitResultTupleSlot(estate, &hjstate->js.ps);
hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate); hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
/*
* detect whether we need only consider the first matching inner tuple
*/
hjstate->js.single_match = (node->join.inner_unique ||
node->join.jointype == JOIN_SEMI);
/* set up null tuples for outer joins, if needed */ /* set up null tuples for outer joins, if needed */
switch (node->join.jointype) switch (node->join.jointype)
{ {
......
...@@ -802,10 +802,11 @@ ExecMergeJoin(MergeJoinState *node) ...@@ -802,10 +802,11 @@ ExecMergeJoin(MergeJoinState *node)
} }
/* /*
* In a semijoin, we'll consider returning the first * If we only need to join to the first matching inner
* match, but after that we're done with this outer tuple. * tuple, then consider returning this one, but after that
* continue with next outer tuple.
*/ */
if (node->js.jointype == JOIN_SEMI) if (node->js.single_match)
node->mj_JoinState = EXEC_MJ_NEXTOUTER; node->mj_JoinState = EXEC_MJ_NEXTOUTER;
qualResult = (otherqual == NULL || qualResult = (otherqual == NULL ||
...@@ -1050,6 +1051,10 @@ ExecMergeJoin(MergeJoinState *node) ...@@ -1050,6 +1051,10 @@ ExecMergeJoin(MergeJoinState *node)
* scan position to the first mark, and go join that tuple * scan position to the first mark, and go join that tuple
* (and any following ones) to the new outer. * (and any following ones) to the new outer.
* *
* If we were able to determine mark and restore are not
* needed, then we don't have to back up; the current
* inner is already the first possible match.
*
* NOTE: we do not need to worry about the MatchedInner * NOTE: we do not need to worry about the MatchedInner
* state for the rescanned inner tuples. We know all of * state for the rescanned inner tuples. We know all of
* them will match this new outer tuple and therefore * them will match this new outer tuple and therefore
...@@ -1062,16 +1067,19 @@ ExecMergeJoin(MergeJoinState *node) ...@@ -1062,16 +1067,19 @@ ExecMergeJoin(MergeJoinState *node)
* forcing the merge clause to never match, so we never * forcing the merge clause to never match, so we never
* get here. * get here.
*/ */
if (!node->mj_SkipMarkRestore)
{
ExecRestrPos(innerPlan); ExecRestrPos(innerPlan);
/* /*
* ExecRestrPos probably should give us back a new Slot, * ExecRestrPos probably should give us back a new
* but since it doesn't, use the marked slot. (The * Slot, but since it doesn't, use the marked slot.
* previously returned mj_InnerTupleSlot cannot be assumed * (The previously returned mj_InnerTupleSlot cannot
* to hold the required tuple.) * be assumed to hold the required tuple.)
*/ */
node->mj_InnerTupleSlot = innerTupleSlot; node->mj_InnerTupleSlot = innerTupleSlot;
/* we need not do MJEvalInnerValues again */ /* we need not do MJEvalInnerValues again */
}
node->mj_JoinState = EXEC_MJ_JOINTUPLES; node->mj_JoinState = EXEC_MJ_JOINTUPLES;
} }
...@@ -1172,6 +1180,7 @@ ExecMergeJoin(MergeJoinState *node) ...@@ -1172,6 +1180,7 @@ ExecMergeJoin(MergeJoinState *node)
if (compareResult == 0) if (compareResult == 0)
{ {
if (!node->mj_SkipMarkRestore)
ExecMarkPos(innerPlan); ExecMarkPos(innerPlan);
MarkInnerTuple(node->mj_InnerTupleSlot, node); MarkInnerTuple(node->mj_InnerTupleSlot, node);
...@@ -1466,11 +1475,18 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) ...@@ -1466,11 +1475,18 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
/* /*
* initialize child nodes * initialize child nodes
* *
* inner child must support MARK/RESTORE. * inner child must support MARK/RESTORE, unless we have detected that we
* don't need that. Note that skip_mark_restore must never be set if
* there are non-mergeclause joinquals, since the logic wouldn't work.
*/ */
Assert(node->join.joinqual == NIL || !node->skip_mark_restore);
mergestate->mj_SkipMarkRestore = node->skip_mark_restore;
outerPlanState(mergestate) = ExecInitNode(outerPlan(node), estate, eflags); outerPlanState(mergestate) = ExecInitNode(outerPlan(node), estate, eflags);
innerPlanState(mergestate) = ExecInitNode(innerPlan(node), estate, innerPlanState(mergestate) = ExecInitNode(innerPlan(node), estate,
eflags | EXEC_FLAG_MARK); mergestate->mj_SkipMarkRestore ?
eflags :
(eflags | EXEC_FLAG_MARK));
/* /*
* For certain types of inner child nodes, it is advantageous to issue * For certain types of inner child nodes, it is advantageous to issue
...@@ -1483,7 +1499,8 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) ...@@ -1483,7 +1499,8 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
* only if eflags doesn't specify REWIND. * only if eflags doesn't specify REWIND.
*/ */
if (IsA(innerPlan(node), Material) && if (IsA(innerPlan(node), Material) &&
(eflags & EXEC_FLAG_REWIND) == 0) (eflags & EXEC_FLAG_REWIND) == 0 &&
!mergestate->mj_SkipMarkRestore)
mergestate->mj_ExtraMarks = true; mergestate->mj_ExtraMarks = true;
else else
mergestate->mj_ExtraMarks = false; mergestate->mj_ExtraMarks = false;
...@@ -1497,6 +1514,13 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) ...@@ -1497,6 +1514,13 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
ExecSetSlotDescriptor(mergestate->mj_MarkedTupleSlot, ExecSetSlotDescriptor(mergestate->mj_MarkedTupleSlot,
ExecGetResultType(innerPlanState(mergestate))); ExecGetResultType(innerPlanState(mergestate)));
/*
* detect whether we need only consider the first matching inner tuple
*/
mergestate->js.single_match = (node->join.inner_unique ||
node->join.jointype == JOIN_SEMI);
/* set up null tuples for outer joins, if needed */
switch (node->join.jointype) switch (node->join.jointype)
{ {
case JOIN_INNER: case JOIN_INNER:
......
...@@ -219,10 +219,11 @@ ExecNestLoop(NestLoopState *node) ...@@ -219,10 +219,11 @@ ExecNestLoop(NestLoopState *node)
} }
/* /*
* In a semijoin, we'll consider returning the first match, but * If we only need to join to the first matching inner tuple, then
* after that we're done with this outer tuple. * consider returning this one, but after that continue with next
* outer tuple.
*/ */
if (node->js.jointype == JOIN_SEMI) if (node->js.single_match)
node->nl_NeedNewOuter = true; node->nl_NeedNewOuter = true;
if (otherqual == NULL || ExecQual(otherqual, econtext)) if (otherqual == NULL || ExecQual(otherqual, econtext))
...@@ -309,6 +310,13 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) ...@@ -309,6 +310,13 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
*/ */
ExecInitResultTupleSlot(estate, &nlstate->js.ps); ExecInitResultTupleSlot(estate, &nlstate->js.ps);
/*
* detect whether we need only consider the first matching inner tuple
*/
nlstate->js.single_match = (node->join.inner_unique ||
node->join.jointype == JOIN_SEMI);
/* set up null tuples for outer joins, if needed */
switch (node->join.jointype) switch (node->join.jointype)
{ {
case JOIN_INNER: case JOIN_INNER:
......
...@@ -797,6 +797,7 @@ CopyJoinFields(const Join *from, Join *newnode) ...@@ -797,6 +797,7 @@ CopyJoinFields(const Join *from, Join *newnode)
CopyPlanFields((const Plan *) from, (Plan *) newnode); CopyPlanFields((const Plan *) from, (Plan *) newnode);
COPY_SCALAR_FIELD(jointype); COPY_SCALAR_FIELD(jointype);
COPY_SCALAR_FIELD(inner_unique);
COPY_NODE_FIELD(joinqual); COPY_NODE_FIELD(joinqual);
} }
...@@ -857,6 +858,7 @@ _copyMergeJoin(const MergeJoin *from) ...@@ -857,6 +858,7 @@ _copyMergeJoin(const MergeJoin *from)
/* /*
* copy remainder of node * copy remainder of node
*/ */
COPY_SCALAR_FIELD(skip_mark_restore);
COPY_NODE_FIELD(mergeclauses); COPY_NODE_FIELD(mergeclauses);
numCols = list_length(from->mergeclauses); numCols = list_length(from->mergeclauses);
if (numCols > 0) if (numCols > 0)
......
...@@ -305,6 +305,7 @@ _outJoinPlanInfo(StringInfo str, const Join *node) ...@@ -305,6 +305,7 @@ _outJoinPlanInfo(StringInfo str, const Join *node)
_outPlanInfo(str, (const Plan *) node); _outPlanInfo(str, (const Plan *) node);
WRITE_ENUM_FIELD(jointype, JoinType); WRITE_ENUM_FIELD(jointype, JoinType);
WRITE_BOOL_FIELD(inner_unique);
WRITE_NODE_FIELD(joinqual); WRITE_NODE_FIELD(joinqual);
} }
...@@ -714,6 +715,7 @@ _outMergeJoin(StringInfo str, const MergeJoin *node) ...@@ -714,6 +715,7 @@ _outMergeJoin(StringInfo str, const MergeJoin *node)
_outJoinPlanInfo(str, (const Join *) node); _outJoinPlanInfo(str, (const Join *) node);
WRITE_BOOL_FIELD(skip_mark_restore);
WRITE_NODE_FIELD(mergeclauses); WRITE_NODE_FIELD(mergeclauses);
numCols = list_length(node->mergeclauses); numCols = list_length(node->mergeclauses);
...@@ -1707,6 +1709,7 @@ _outJoinPathInfo(StringInfo str, const JoinPath *node) ...@@ -1707,6 +1709,7 @@ _outJoinPathInfo(StringInfo str, const JoinPath *node)
_outPathInfo(str, (const Path *) node); _outPathInfo(str, (const Path *) node);
WRITE_ENUM_FIELD(jointype, JoinType); WRITE_ENUM_FIELD(jointype, JoinType);
WRITE_BOOL_FIELD(inner_unique);
WRITE_NODE_FIELD(outerjoinpath); WRITE_NODE_FIELD(outerjoinpath);
WRITE_NODE_FIELD(innerjoinpath); WRITE_NODE_FIELD(innerjoinpath);
WRITE_NODE_FIELD(joinrestrictinfo); WRITE_NODE_FIELD(joinrestrictinfo);
...@@ -2114,6 +2117,7 @@ _outMergePath(StringInfo str, const MergePath *node) ...@@ -2114,6 +2117,7 @@ _outMergePath(StringInfo str, const MergePath *node)
WRITE_NODE_FIELD(path_mergeclauses); WRITE_NODE_FIELD(path_mergeclauses);
WRITE_NODE_FIELD(outersortkeys); WRITE_NODE_FIELD(outersortkeys);
WRITE_NODE_FIELD(innersortkeys); WRITE_NODE_FIELD(innersortkeys);
WRITE_BOOL_FIELD(skip_mark_restore);
WRITE_BOOL_FIELD(materialize_inner); WRITE_BOOL_FIELD(materialize_inner);
} }
...@@ -2246,6 +2250,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node) ...@@ -2246,6 +2250,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_OID_FIELD(userid); WRITE_OID_FIELD(userid);
WRITE_BOOL_FIELD(useridiscurrent); WRITE_BOOL_FIELD(useridiscurrent);
/* we don't try to print fdwroutine or fdw_private */ /* we don't try to print fdwroutine or fdw_private */
/* can't print unique_for_rels/non_unique_for_rels; BMSes aren't Nodes */
WRITE_NODE_FIELD(baserestrictinfo); WRITE_NODE_FIELD(baserestrictinfo);
WRITE_UINT_FIELD(baserestrict_min_security); WRITE_UINT_FIELD(baserestrict_min_security);
WRITE_NODE_FIELD(joininfo); WRITE_NODE_FIELD(joininfo);
......
...@@ -1949,6 +1949,7 @@ ReadCommonJoin(Join *local_node) ...@@ -1949,6 +1949,7 @@ ReadCommonJoin(Join *local_node)
ReadCommonPlan(&local_node->plan); ReadCommonPlan(&local_node->plan);
READ_ENUM_FIELD(jointype, JoinType); READ_ENUM_FIELD(jointype, JoinType);
READ_BOOL_FIELD(inner_unique);
READ_NODE_FIELD(joinqual); READ_NODE_FIELD(joinqual);
} }
...@@ -1992,6 +1993,7 @@ _readMergeJoin(void) ...@@ -1992,6 +1993,7 @@ _readMergeJoin(void)
ReadCommonJoin(&local_node->join); ReadCommonJoin(&local_node->join);
READ_BOOL_FIELD(skip_mark_restore);
READ_NODE_FIELD(mergeclauses); READ_NODE_FIELD(mergeclauses);
numCols = list_length(local_node->mergeclauses); numCols = list_length(local_node->mergeclauses);
......
This diff is collapsed.
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "optimizer/cost.h" #include "optimizer/cost.h"
#include "optimizer/pathnode.h" #include "optimizer/pathnode.h"
#include "optimizer/paths.h" #include "optimizer/paths.h"
#include "optimizer/planmain.h"
/* Hook for plugins to get control in add_paths_to_joinrel() */ /* Hook for plugins to get control in add_paths_to_joinrel() */
set_join_pathlist_hook_type set_join_pathlist_hook = NULL; set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
...@@ -120,6 +121,35 @@ add_paths_to_joinrel(PlannerInfo *root, ...@@ -120,6 +121,35 @@ add_paths_to_joinrel(PlannerInfo *root,
extra.sjinfo = sjinfo; extra.sjinfo = sjinfo;
extra.param_source_rels = NULL; extra.param_source_rels = NULL;
/*
* See if the inner relation is provably unique for this outer rel.
*
* We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
* matter since the executor can make the equivalent optimization anyway;
* we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
* know we're going to force uniqueness of the innerrel below. For
* JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid letting that value escape
* this module.
*/
switch (jointype)
{
case JOIN_SEMI:
case JOIN_ANTI:
extra.inner_unique = false; /* well, unproven */
break;
case JOIN_UNIQUE_INNER:
extra.inner_unique = true;
break;
case JOIN_UNIQUE_OUTER:
extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
JOIN_INNER, restrictlist);
break;
default:
extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
jointype, restrictlist);
break;
}
/* /*
* Find potential mergejoin clauses. We can skip this if we are not * Find potential mergejoin clauses. We can skip this if we are not
* interested in doing a mergejoin. However, mergejoin may be our only * interested in doing a mergejoin. However, mergejoin may be our only
...@@ -136,10 +166,10 @@ add_paths_to_joinrel(PlannerInfo *root, ...@@ -136,10 +166,10 @@ add_paths_to_joinrel(PlannerInfo *root,
&mergejoin_allowed); &mergejoin_allowed);
/* /*
* If it's SEMI or ANTI join, compute correction factors for cost * If it's SEMI, ANTI, or inner_unique join, compute correction factors
* estimation. These will be the same for all paths. * for cost estimation. These will be the same for all paths.
*/ */
if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
compute_semi_anti_join_factors(root, outerrel, innerrel, compute_semi_anti_join_factors(root, outerrel, innerrel,
jointype, sjinfo, restrictlist, jointype, sjinfo, restrictlist,
&extra.semifactors); &extra.semifactors);
...@@ -336,8 +366,7 @@ try_nestloop_path(PlannerInfo *root, ...@@ -336,8 +366,7 @@ try_nestloop_path(PlannerInfo *root,
* methodology worthwhile. * methodology worthwhile.
*/ */
initial_cost_nestloop(root, &workspace, jointype, initial_cost_nestloop(root, &workspace, jointype,
outer_path, inner_path, outer_path, inner_path, extra);
extra->sjinfo, &extra->semifactors);
if (add_path_precheck(joinrel, if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost, workspace.startup_cost, workspace.total_cost,
...@@ -348,8 +377,7 @@ try_nestloop_path(PlannerInfo *root, ...@@ -348,8 +377,7 @@ try_nestloop_path(PlannerInfo *root,
joinrel, joinrel,
jointype, jointype,
&workspace, &workspace,
extra->sjinfo, extra,
&extra->semifactors,
outer_path, outer_path,
inner_path, inner_path,
extra->restrictlist, extra->restrictlist,
...@@ -399,8 +427,7 @@ try_partial_nestloop_path(PlannerInfo *root, ...@@ -399,8 +427,7 @@ try_partial_nestloop_path(PlannerInfo *root,
* cost. Bail out right away if it looks terrible. * cost. Bail out right away if it looks terrible.
*/ */
initial_cost_nestloop(root, &workspace, jointype, initial_cost_nestloop(root, &workspace, jointype,
outer_path, inner_path, outer_path, inner_path, extra);
extra->sjinfo, &extra->semifactors);
if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
return; return;
...@@ -410,8 +437,7 @@ try_partial_nestloop_path(PlannerInfo *root, ...@@ -410,8 +437,7 @@ try_partial_nestloop_path(PlannerInfo *root,
joinrel, joinrel,
jointype, jointype,
&workspace, &workspace,
extra->sjinfo, extra,
&extra->semifactors,
outer_path, outer_path,
inner_path, inner_path,
extra->restrictlist, extra->restrictlist,
...@@ -486,7 +512,7 @@ try_mergejoin_path(PlannerInfo *root, ...@@ -486,7 +512,7 @@ try_mergejoin_path(PlannerInfo *root,
initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
outer_path, inner_path, outer_path, inner_path,
outersortkeys, innersortkeys, outersortkeys, innersortkeys,
extra->sjinfo); extra);
if (add_path_precheck(joinrel, if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost, workspace.startup_cost, workspace.total_cost,
...@@ -497,7 +523,7 @@ try_mergejoin_path(PlannerInfo *root, ...@@ -497,7 +523,7 @@ try_mergejoin_path(PlannerInfo *root,
joinrel, joinrel,
jointype, jointype,
&workspace, &workspace,
extra->sjinfo, extra,
outer_path, outer_path,
inner_path, inner_path,
extra->restrictlist, extra->restrictlist,
...@@ -562,7 +588,7 @@ try_partial_mergejoin_path(PlannerInfo *root, ...@@ -562,7 +588,7 @@ try_partial_mergejoin_path(PlannerInfo *root,
initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
outer_path, inner_path, outer_path, inner_path,
outersortkeys, innersortkeys, outersortkeys, innersortkeys,
extra->sjinfo); extra);
if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
return; return;
...@@ -573,7 +599,7 @@ try_partial_mergejoin_path(PlannerInfo *root, ...@@ -573,7 +599,7 @@ try_partial_mergejoin_path(PlannerInfo *root,
joinrel, joinrel,
jointype, jointype,
&workspace, &workspace,
extra->sjinfo, extra,
outer_path, outer_path,
inner_path, inner_path,
extra->restrictlist, extra->restrictlist,
...@@ -620,8 +646,7 @@ try_hashjoin_path(PlannerInfo *root, ...@@ -620,8 +646,7 @@ try_hashjoin_path(PlannerInfo *root,
* never have any output pathkeys, per comments in create_hashjoin_path. * never have any output pathkeys, per comments in create_hashjoin_path.
*/ */
initial_cost_hashjoin(root, &workspace, jointype, hashclauses, initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
outer_path, inner_path, outer_path, inner_path, extra);
extra->sjinfo, &extra->semifactors);
if (add_path_precheck(joinrel, if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost, workspace.startup_cost, workspace.total_cost,
...@@ -632,8 +657,7 @@ try_hashjoin_path(PlannerInfo *root, ...@@ -632,8 +657,7 @@ try_hashjoin_path(PlannerInfo *root,
joinrel, joinrel,
jointype, jointype,
&workspace, &workspace,
extra->sjinfo, extra,
&extra->semifactors,
outer_path, outer_path,
inner_path, inner_path,
extra->restrictlist, extra->restrictlist,
...@@ -683,8 +707,7 @@ try_partial_hashjoin_path(PlannerInfo *root, ...@@ -683,8 +707,7 @@ try_partial_hashjoin_path(PlannerInfo *root,
* cost. Bail out right away if it looks terrible. * cost. Bail out right away if it looks terrible.
*/ */
initial_cost_hashjoin(root, &workspace, jointype, hashclauses, initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
outer_path, inner_path, outer_path, inner_path, extra);
extra->sjinfo, &extra->semifactors);
if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL)) if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
return; return;
...@@ -694,8 +717,7 @@ try_partial_hashjoin_path(PlannerInfo *root, ...@@ -694,8 +717,7 @@ try_partial_hashjoin_path(PlannerInfo *root,
joinrel, joinrel,
jointype, jointype,
&workspace, &workspace,
extra->sjinfo, extra,
&extra->semifactors,
outer_path, outer_path,
inner_path, inner_path,
extra->restrictlist, extra->restrictlist,
......
...@@ -41,6 +41,11 @@ static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); ...@@ -41,6 +41,11 @@ static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
List *clause_list); List *clause_list);
static Oid distinct_col_search(int colno, List *colnos, List *opids); static Oid distinct_col_search(int colno, List *colnos, List *opids);
static bool is_innerrel_unique_for(PlannerInfo *root,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
JoinType jointype,
List *restrictlist);
/* /*
...@@ -845,3 +850,171 @@ distinct_col_search(int colno, List *colnos, List *opids) ...@@ -845,3 +850,171 @@ distinct_col_search(int colno, List *colnos, List *opids)
} }
return InvalidOid; return InvalidOid;
} }
/*
* innerrel_is_unique
* Check if the innerrel provably contains at most one tuple matching any
* tuple from the outerrel, based on join clauses in the 'restrictlist'.
*
* The proof must be made based only on clauses that will be "joinquals"
* rather than "otherquals" at execution. For an inner join there's no
* difference; but if the join is outer, we must ignore pushed-down quals,
* as those will become "otherquals". Note that this means the answer might
* vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
* answer without regard to that, callers must take care not to call this
* with jointypes that would be classified differently by IS_OUTER_JOIN().
*
* The actual proof is undertaken by is_innerrel_unique_for(); this function
* is a frontend that is mainly concerned with caching the answers.
*/
bool
innerrel_is_unique(PlannerInfo *root,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
JoinType jointype,
List *restrictlist)
{
MemoryContext old_context;
ListCell *lc;
/* Certainly can't prove uniqueness when there are no joinclauses */
if (restrictlist == NIL)
return false;
/*
* Make a quick check to eliminate cases in which we will surely be unable
* to prove uniqueness of the innerrel.
*/
if (!rel_supports_distinctness(root, innerrel))
return false;
/*
* Query the cache to see if we've managed to prove that innerrel is
* unique for any subset of this outerrel. We don't need an exact match,
* as extra outerrels can't make the innerrel any less unique (or more
* formally, the restrictlist for a join to a superset outerrel must be a
* superset of the conditions we successfully used before).
*/
foreach(lc, innerrel->unique_for_rels)
{
Relids unique_for_rels = (Relids) lfirst(lc);
if (bms_is_subset(unique_for_rels, outerrel->relids))
return true; /* Success! */
}
/*
* Conversely, we may have already determined that this outerrel, or some
* superset thereof, cannot prove this innerrel to be unique.
*/
foreach(lc, innerrel->non_unique_for_rels)
{
Relids unique_for_rels = (Relids) lfirst(lc);
if (bms_is_subset(outerrel->relids, unique_for_rels))
return false;
}
/* No cached information, so try to make the proof. */
if (is_innerrel_unique_for(root, outerrel, innerrel,
jointype, restrictlist))
{
/*
* Cache the positive result for future probes, being sure to keep it
* in the planner_cxt even if we are working in GEQO.
*
* Note: one might consider trying to isolate the minimal subset of
* the outerrels that proved the innerrel unique. But it's not worth
* the trouble, because the planner builds up joinrels incrementally
* and so we'll see the minimally sufficient outerrels before any
* supersets of them anyway.
*/
old_context = MemoryContextSwitchTo(root->planner_cxt);
innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
bms_copy(outerrel->relids));
MemoryContextSwitchTo(old_context);
return true; /* Success! */
}
else
{
/*
* None of the join conditions for outerrel proved innerrel unique, so
* we can safely reject this outerrel or any subset of it in future
* checks.
*
* However, in normal planning mode, caching this knowledge is totally
* pointless; it won't be queried again, because we build up joinrels
* from smaller to larger. It is useful in GEQO mode, where the
* knowledge can be carried across successive planning attempts; and
* it's likely to be useful when using join-search plugins, too. Hence
* cache only when join_search_private is non-NULL. (Yeah, that's a
* hack, but it seems reasonable.)
*/
if (root->join_search_private)
{
old_context = MemoryContextSwitchTo(root->planner_cxt);
innerrel->non_unique_for_rels =
lappend(innerrel->non_unique_for_rels,
bms_copy(outerrel->relids));
MemoryContextSwitchTo(old_context);
}
return false;
}
}
/*
* is_innerrel_unique_for
* Check if the innerrel provably contains at most one tuple matching any
* tuple from the outerrel, based on join clauses in the 'restrictlist'.
*/
static bool
is_innerrel_unique_for(PlannerInfo *root,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
JoinType jointype,
List *restrictlist)
{
List *clause_list = NIL;
ListCell *lc;
/*
* Search for mergejoinable clauses that constrain the inner rel against
* the outer rel. If an operator is mergejoinable then it behaves like
* equality for some btree opclass, so it's what we want. The
* mergejoinability test also eliminates clauses containing volatile
* functions, which we couldn't depend on.
*/
foreach(lc, restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
/*
* As noted above, if it's a pushed-down clause and we're at an outer
* join, we can't use it.
*/
if (restrictinfo->is_pushed_down && IS_OUTER_JOIN(jointype))
continue;
/* Ignore if it's not a mergejoinable clause */
if (!restrictinfo->can_join ||
restrictinfo->mergeopfamilies == NIL)
continue; /* not mergejoinable */
/*
* Check if clause has the form "outer op inner" or "inner op outer",
* and if so mark which side is inner.
*/
if (!clause_sides_match_join(restrictinfo, outerrel->relids,
innerrel->relids))
continue; /* no good for these input relations */
/* OK, add to list */
clause_list = lappend(clause_list, restrictinfo);
}
/* Let rel_is_distinct_for() do the hard work */
return rel_is_distinct_for(root, innerrel, clause_list);
}
...@@ -215,12 +215,12 @@ static BitmapOr *make_bitmap_or(List *bitmapplans); ...@@ -215,12 +215,12 @@ static BitmapOr *make_bitmap_or(List *bitmapplans);
static NestLoop *make_nestloop(List *tlist, static NestLoop *make_nestloop(List *tlist,
List *joinclauses, List *otherclauses, List *nestParams, List *joinclauses, List *otherclauses, List *nestParams,
Plan *lefttree, Plan *righttree, Plan *lefttree, Plan *righttree,
JoinType jointype); JoinType jointype, bool inner_unique);
static HashJoin *make_hashjoin(List *tlist, static HashJoin *make_hashjoin(List *tlist,
List *joinclauses, List *otherclauses, List *joinclauses, List *otherclauses,
List *hashclauses, List *hashclauses,
Plan *lefttree, Plan *righttree, Plan *lefttree, Plan *righttree,
JoinType jointype); JoinType jointype, bool inner_unique);
static Hash *make_hash(Plan *lefttree, static Hash *make_hash(Plan *lefttree,
Oid skewTable, Oid skewTable,
AttrNumber skewColumn, AttrNumber skewColumn,
...@@ -235,7 +235,8 @@ static MergeJoin *make_mergejoin(List *tlist, ...@@ -235,7 +235,8 @@ static MergeJoin *make_mergejoin(List *tlist,
int *mergestrategies, int *mergestrategies,
bool *mergenullsfirst, bool *mergenullsfirst,
Plan *lefttree, Plan *righttree, Plan *lefttree, Plan *righttree,
JoinType jointype); JoinType jointype, bool inner_unique,
bool skip_mark_restore);
static Sort *make_sort(Plan *lefttree, int numCols, static Sort *make_sort(Plan *lefttree, int numCols,
AttrNumber *sortColIdx, Oid *sortOperators, AttrNumber *sortColIdx, Oid *sortOperators,
Oid *collations, bool *nullsFirst); Oid *collations, bool *nullsFirst);
...@@ -3714,7 +3715,8 @@ create_nestloop_plan(PlannerInfo *root, ...@@ -3714,7 +3715,8 @@ create_nestloop_plan(PlannerInfo *root,
nestParams, nestParams,
outer_plan, outer_plan,
inner_plan, inner_plan,
best_path->jointype); best_path->jointype,
best_path->inner_unique);
copy_generic_path_info(&join_plan->join.plan, &best_path->path); copy_generic_path_info(&join_plan->join.plan, &best_path->path);
...@@ -4016,7 +4018,9 @@ create_mergejoin_plan(PlannerInfo *root, ...@@ -4016,7 +4018,9 @@ create_mergejoin_plan(PlannerInfo *root,
mergenullsfirst, mergenullsfirst,
outer_plan, outer_plan,
inner_plan, inner_plan,
best_path->jpath.jointype); best_path->jpath.jointype,
best_path->jpath.inner_unique,
best_path->skip_mark_restore);
/* Costs of sort and material steps are included in path cost already */ /* Costs of sort and material steps are included in path cost already */
copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path);
...@@ -4156,7 +4160,8 @@ create_hashjoin_plan(PlannerInfo *root, ...@@ -4156,7 +4160,8 @@ create_hashjoin_plan(PlannerInfo *root,
hashclauses, hashclauses,
outer_plan, outer_plan,
(Plan *) hash_plan, (Plan *) hash_plan,
best_path->jpath.jointype); best_path->jpath.jointype,
best_path->jpath.inner_unique);
copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path);
...@@ -5349,7 +5354,8 @@ make_nestloop(List *tlist, ...@@ -5349,7 +5354,8 @@ make_nestloop(List *tlist,
List *nestParams, List *nestParams,
Plan *lefttree, Plan *lefttree,
Plan *righttree, Plan *righttree,
JoinType jointype) JoinType jointype,
bool inner_unique)
{ {
NestLoop *node = makeNode(NestLoop); NestLoop *node = makeNode(NestLoop);
Plan *plan = &node->join.plan; Plan *plan = &node->join.plan;
...@@ -5359,6 +5365,7 @@ make_nestloop(List *tlist, ...@@ -5359,6 +5365,7 @@ make_nestloop(List *tlist,
plan->lefttree = lefttree; plan->lefttree = lefttree;
plan->righttree = righttree; plan->righttree = righttree;
node->join.jointype = jointype; node->join.jointype = jointype;
node->join.inner_unique = inner_unique;
node->join.joinqual = joinclauses; node->join.joinqual = joinclauses;
node->nestParams = nestParams; node->nestParams = nestParams;
...@@ -5372,7 +5379,8 @@ make_hashjoin(List *tlist, ...@@ -5372,7 +5379,8 @@ make_hashjoin(List *tlist,
List *hashclauses, List *hashclauses,
Plan *lefttree, Plan *lefttree,
Plan *righttree, Plan *righttree,
JoinType jointype) JoinType jointype,
bool inner_unique)
{ {
HashJoin *node = makeNode(HashJoin); HashJoin *node = makeNode(HashJoin);
Plan *plan = &node->join.plan; Plan *plan = &node->join.plan;
...@@ -5383,6 +5391,7 @@ make_hashjoin(List *tlist, ...@@ -5383,6 +5391,7 @@ make_hashjoin(List *tlist,
plan->righttree = righttree; plan->righttree = righttree;
node->hashclauses = hashclauses; node->hashclauses = hashclauses;
node->join.jointype = jointype; node->join.jointype = jointype;
node->join.inner_unique = inner_unique;
node->join.joinqual = joinclauses; node->join.joinqual = joinclauses;
return node; return node;
...@@ -5424,7 +5433,9 @@ make_mergejoin(List *tlist, ...@@ -5424,7 +5433,9 @@ make_mergejoin(List *tlist,
bool *mergenullsfirst, bool *mergenullsfirst,
Plan *lefttree, Plan *lefttree,
Plan *righttree, Plan *righttree,
JoinType jointype) JoinType jointype,
bool inner_unique,
bool skip_mark_restore)
{ {
MergeJoin *node = makeNode(MergeJoin); MergeJoin *node = makeNode(MergeJoin);
Plan *plan = &node->join.plan; Plan *plan = &node->join.plan;
...@@ -5433,12 +5444,14 @@ make_mergejoin(List *tlist, ...@@ -5433,12 +5444,14 @@ make_mergejoin(List *tlist,
plan->qual = otherclauses; plan->qual = otherclauses;
plan->lefttree = lefttree; plan->lefttree = lefttree;
plan->righttree = righttree; plan->righttree = righttree;
node->skip_mark_restore = skip_mark_restore;
node->mergeclauses = mergeclauses; node->mergeclauses = mergeclauses;
node->mergeFamilies = mergefamilies; node->mergeFamilies = mergefamilies;
node->mergeCollations = mergecollations; node->mergeCollations = mergecollations;
node->mergeStrategies = mergestrategies; node->mergeStrategies = mergestrategies;
node->mergeNullsFirst = mergenullsfirst; node->mergeNullsFirst = mergenullsfirst;
node->join.jointype = jointype; node->join.jointype = jointype;
node->join.inner_unique = inner_unique;
node->join.joinqual = joinclauses; node->join.joinqual = joinclauses;
return node; return node;
......
...@@ -2049,8 +2049,7 @@ calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path) ...@@ -2049,8 +2049,7 @@ calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
* 'joinrel' is the join relation. * 'joinrel' is the join relation.
* 'jointype' is the type of join required * 'jointype' is the type of join required
* 'workspace' is the result from initial_cost_nestloop * 'workspace' is the result from initial_cost_nestloop
* 'sjinfo' is extra info about the join for selectivity estimation * 'extra' contains various information about the join
* 'semifactors' contains valid data if jointype is SEMI or ANTI
* 'outer_path' is the outer path * 'outer_path' is the outer path
* 'inner_path' is the inner path * 'inner_path' is the inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
...@@ -2064,8 +2063,7 @@ create_nestloop_path(PlannerInfo *root, ...@@ -2064,8 +2063,7 @@ create_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel, RelOptInfo *joinrel,
JoinType jointype, JoinType jointype,
JoinCostWorkspace *workspace, JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo, JoinPathExtraData *extra,
SemiAntiJoinFactors *semifactors,
Path *outer_path, Path *outer_path,
Path *inner_path, Path *inner_path,
List *restrict_clauses, List *restrict_clauses,
...@@ -2109,7 +2107,7 @@ create_nestloop_path(PlannerInfo *root, ...@@ -2109,7 +2107,7 @@ create_nestloop_path(PlannerInfo *root,
joinrel, joinrel,
outer_path, outer_path,
inner_path, inner_path,
sjinfo, extra->sjinfo,
required_outer, required_outer,
&restrict_clauses); &restrict_clauses);
pathnode->path.parallel_aware = false; pathnode->path.parallel_aware = false;
...@@ -2119,11 +2117,12 @@ create_nestloop_path(PlannerInfo *root, ...@@ -2119,11 +2117,12 @@ create_nestloop_path(PlannerInfo *root,
pathnode->path.parallel_workers = outer_path->parallel_workers; pathnode->path.parallel_workers = outer_path->parallel_workers;
pathnode->path.pathkeys = pathkeys; pathnode->path.pathkeys = pathkeys;
pathnode->jointype = jointype; pathnode->jointype = jointype;
pathnode->inner_unique = extra->inner_unique;
pathnode->outerjoinpath = outer_path; pathnode->outerjoinpath = outer_path;
pathnode->innerjoinpath = inner_path; pathnode->innerjoinpath = inner_path;
pathnode->joinrestrictinfo = restrict_clauses; pathnode->joinrestrictinfo = restrict_clauses;
final_cost_nestloop(root, pathnode, workspace, sjinfo, semifactors); final_cost_nestloop(root, pathnode, workspace, extra);
return pathnode; return pathnode;
} }
...@@ -2136,7 +2135,7 @@ create_nestloop_path(PlannerInfo *root, ...@@ -2136,7 +2135,7 @@ create_nestloop_path(PlannerInfo *root,
* 'joinrel' is the join relation * 'joinrel' is the join relation
* 'jointype' is the type of join required * 'jointype' is the type of join required
* 'workspace' is the result from initial_cost_mergejoin * 'workspace' is the result from initial_cost_mergejoin
* 'sjinfo' is extra info about the join for selectivity estimation * 'extra' contains various information about the join
* 'outer_path' is the outer path * 'outer_path' is the outer path
* 'inner_path' is the inner path * 'inner_path' is the inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
...@@ -2152,7 +2151,7 @@ create_mergejoin_path(PlannerInfo *root, ...@@ -2152,7 +2151,7 @@ create_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel, RelOptInfo *joinrel,
JoinType jointype, JoinType jointype,
JoinCostWorkspace *workspace, JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo, JoinPathExtraData *extra,
Path *outer_path, Path *outer_path,
Path *inner_path, Path *inner_path,
List *restrict_clauses, List *restrict_clauses,
...@@ -2172,7 +2171,7 @@ create_mergejoin_path(PlannerInfo *root, ...@@ -2172,7 +2171,7 @@ create_mergejoin_path(PlannerInfo *root,
joinrel, joinrel,
outer_path, outer_path,
inner_path, inner_path,
sjinfo, extra->sjinfo,
required_outer, required_outer,
&restrict_clauses); &restrict_clauses);
pathnode->jpath.path.parallel_aware = false; pathnode->jpath.path.parallel_aware = false;
...@@ -2182,15 +2181,17 @@ create_mergejoin_path(PlannerInfo *root, ...@@ -2182,15 +2181,17 @@ create_mergejoin_path(PlannerInfo *root,
pathnode->jpath.path.parallel_workers = outer_path->parallel_workers; pathnode->jpath.path.parallel_workers = outer_path->parallel_workers;
pathnode->jpath.path.pathkeys = pathkeys; pathnode->jpath.path.pathkeys = pathkeys;
pathnode->jpath.jointype = jointype; pathnode->jpath.jointype = jointype;
pathnode->jpath.inner_unique = extra->inner_unique;
pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses; pathnode->jpath.joinrestrictinfo = restrict_clauses;
pathnode->path_mergeclauses = mergeclauses; pathnode->path_mergeclauses = mergeclauses;
pathnode->outersortkeys = outersortkeys; pathnode->outersortkeys = outersortkeys;
pathnode->innersortkeys = innersortkeys; pathnode->innersortkeys = innersortkeys;
/* pathnode->skip_mark_restore will be set by final_cost_mergejoin */
/* pathnode->materialize_inner will be set by final_cost_mergejoin */ /* pathnode->materialize_inner will be set by final_cost_mergejoin */
final_cost_mergejoin(root, pathnode, workspace, sjinfo); final_cost_mergejoin(root, pathnode, workspace, extra);
return pathnode; return pathnode;
} }
...@@ -2202,8 +2203,7 @@ create_mergejoin_path(PlannerInfo *root, ...@@ -2202,8 +2203,7 @@ create_mergejoin_path(PlannerInfo *root,
* 'joinrel' is the join relation * 'joinrel' is the join relation
* 'jointype' is the type of join required * 'jointype' is the type of join required
* 'workspace' is the result from initial_cost_hashjoin * 'workspace' is the result from initial_cost_hashjoin
* 'sjinfo' is extra info about the join for selectivity estimation * 'extra' contains various information about the join
* 'semifactors' contains valid data if jointype is SEMI or ANTI
* 'outer_path' is the cheapest outer path * 'outer_path' is the cheapest outer path
* 'inner_path' is the cheapest inner path * 'inner_path' is the cheapest inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
...@@ -2216,8 +2216,7 @@ create_hashjoin_path(PlannerInfo *root, ...@@ -2216,8 +2216,7 @@ create_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel, RelOptInfo *joinrel,
JoinType jointype, JoinType jointype,
JoinCostWorkspace *workspace, JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo, JoinPathExtraData *extra,
SemiAntiJoinFactors *semifactors,
Path *outer_path, Path *outer_path,
Path *inner_path, Path *inner_path,
List *restrict_clauses, List *restrict_clauses,
...@@ -2234,7 +2233,7 @@ create_hashjoin_path(PlannerInfo *root, ...@@ -2234,7 +2233,7 @@ create_hashjoin_path(PlannerInfo *root,
joinrel, joinrel,
outer_path, outer_path,
inner_path, inner_path,
sjinfo, extra->sjinfo,
required_outer, required_outer,
&restrict_clauses); &restrict_clauses);
pathnode->jpath.path.parallel_aware = false; pathnode->jpath.path.parallel_aware = false;
...@@ -2256,13 +2255,14 @@ create_hashjoin_path(PlannerInfo *root, ...@@ -2256,13 +2255,14 @@ create_hashjoin_path(PlannerInfo *root,
*/ */
pathnode->jpath.path.pathkeys = NIL; pathnode->jpath.path.pathkeys = NIL;
pathnode->jpath.jointype = jointype; pathnode->jpath.jointype = jointype;
pathnode->jpath.inner_unique = extra->inner_unique;
pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses; pathnode->jpath.joinrestrictinfo = restrict_clauses;
pathnode->path_hashclauses = hashclauses; pathnode->path_hashclauses = hashclauses;
/* final_cost_hashjoin will fill in pathnode->num_batches */ /* final_cost_hashjoin will fill in pathnode->num_batches */
final_cost_hashjoin(root, pathnode, workspace, sjinfo, semifactors); final_cost_hashjoin(root, pathnode, workspace, extra);
return pathnode; return pathnode;
} }
......
...@@ -126,6 +126,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) ...@@ -126,6 +126,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->lateral_vars = NIL; rel->lateral_vars = NIL;
rel->lateral_referencers = NULL; rel->lateral_referencers = NULL;
rel->indexlist = NIL; rel->indexlist = NIL;
rel->statlist = NIL;
rel->pages = 0; rel->pages = 0;
rel->tuples = 0; rel->tuples = 0;
rel->allvisfrac = 0; rel->allvisfrac = 0;
...@@ -137,6 +138,8 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) ...@@ -137,6 +138,8 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->useridiscurrent = false; rel->useridiscurrent = false;
rel->fdwroutine = NULL; rel->fdwroutine = NULL;
rel->fdw_private = NULL; rel->fdw_private = NULL;
rel->unique_for_rels = NIL;
rel->non_unique_for_rels = NIL;
rel->baserestrictinfo = NIL; rel->baserestrictinfo = NIL;
rel->baserestrictcost.startup = 0; rel->baserestrictcost.startup = 0;
rel->baserestrictcost.per_tuple = 0; rel->baserestrictcost.per_tuple = 0;
...@@ -147,7 +150,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) ...@@ -147,7 +150,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
/* /*
* Pass top parent's relids down the inheritance hierarchy. If the parent * Pass top parent's relids down the inheritance hierarchy. If the parent
* has top_parent_relids set, it's a direct or an indirect child of the top * has top_parent_relids set, it's a direct or an indirect child of the top
* parent indicated by top_parent_relids. By extention this child is also * parent indicated by top_parent_relids. By extension this child is also
* an indirect child of that parent. * an indirect child of that parent.
*/ */
if (parent) if (parent)
...@@ -503,6 +506,7 @@ build_join_rel(PlannerInfo *root, ...@@ -503,6 +506,7 @@ build_join_rel(PlannerInfo *root,
joinrel->lateral_vars = NIL; joinrel->lateral_vars = NIL;
joinrel->lateral_referencers = NULL; joinrel->lateral_referencers = NULL;
joinrel->indexlist = NIL; joinrel->indexlist = NIL;
joinrel->statlist = NIL;
joinrel->pages = 0; joinrel->pages = 0;
joinrel->tuples = 0; joinrel->tuples = 0;
joinrel->allvisfrac = 0; joinrel->allvisfrac = 0;
...@@ -514,6 +518,8 @@ build_join_rel(PlannerInfo *root, ...@@ -514,6 +518,8 @@ build_join_rel(PlannerInfo *root,
joinrel->useridiscurrent = false; joinrel->useridiscurrent = false;
joinrel->fdwroutine = NULL; joinrel->fdwroutine = NULL;
joinrel->fdw_private = NULL; joinrel->fdw_private = NULL;
joinrel->unique_for_rels = NIL;
joinrel->non_unique_for_rels = NIL;
joinrel->baserestrictinfo = NIL; joinrel->baserestrictinfo = NIL;
joinrel->baserestrictcost.startup = 0; joinrel->baserestrictcost.startup = 0;
joinrel->baserestrictcost.per_tuple = 0; joinrel->baserestrictcost.per_tuple = 0;
......
...@@ -1536,6 +1536,8 @@ typedef struct JoinState ...@@ -1536,6 +1536,8 @@ typedef struct JoinState
{ {
PlanState ps; PlanState ps;
JoinType jointype; JoinType jointype;
bool single_match; /* True if we should skip to next outer tuple
* after finding one inner match */
ExprState *joinqual; /* JOIN quals (in addition to ps.qual) */ ExprState *joinqual; /* JOIN quals (in addition to ps.qual) */
} JoinState; } JoinState;
...@@ -1561,6 +1563,7 @@ typedef struct NestLoopState ...@@ -1561,6 +1563,7 @@ typedef struct NestLoopState
* NumClauses number of mergejoinable join clauses * NumClauses number of mergejoinable join clauses
* Clauses info for each mergejoinable clause * Clauses info for each mergejoinable clause
* JoinState current state of ExecMergeJoin state machine * JoinState current state of ExecMergeJoin state machine
* SkipMarkRestore true if we may skip Mark and Restore operations
* ExtraMarks true to issue extra Mark operations on inner scan * ExtraMarks true to issue extra Mark operations on inner scan
* ConstFalseJoin true if we have a constant-false joinqual * ConstFalseJoin true if we have a constant-false joinqual
* FillOuter true if should emit unjoined outer tuples anyway * FillOuter true if should emit unjoined outer tuples anyway
...@@ -1585,6 +1588,7 @@ typedef struct MergeJoinState ...@@ -1585,6 +1588,7 @@ typedef struct MergeJoinState
int mj_NumClauses; int mj_NumClauses;
MergeJoinClause mj_Clauses; /* array of length mj_NumClauses */ MergeJoinClause mj_Clauses; /* array of length mj_NumClauses */
int mj_JoinState; int mj_JoinState;
bool mj_SkipMarkRestore;
bool mj_ExtraMarks; bool mj_ExtraMarks;
bool mj_ConstFalseJoin; bool mj_ConstFalseJoin;
bool mj_FillOuter; bool mj_FillOuter;
......
...@@ -632,6 +632,7 @@ typedef struct CustomScan ...@@ -632,6 +632,7 @@ typedef struct CustomScan
* Join node * Join node
* *
* jointype: rule for joining tuples from left and right subtrees * jointype: rule for joining tuples from left and right subtrees
* inner_unique each outer tuple can match to no more than one inner tuple
* joinqual: qual conditions that came from JOIN/ON or JOIN/USING * joinqual: qual conditions that came from JOIN/ON or JOIN/USING
* (plan.qual contains conditions that came from WHERE) * (plan.qual contains conditions that came from WHERE)
* *
...@@ -642,12 +643,18 @@ typedef struct CustomScan ...@@ -642,12 +643,18 @@ typedef struct CustomScan
* (But plan.qual is still applied before actually returning a tuple.) * (But plan.qual is still applied before actually returning a tuple.)
* For an outer join, only joinquals are allowed to be used as the merge * For an outer join, only joinquals are allowed to be used as the merge
* or hash condition of a merge or hash join. * or hash condition of a merge or hash join.
*
* inner_unique is set if the joinquals are such that no more than one inner
* tuple could match any given outer tuple. This allows the executor to
* skip searching for additional matches. (This must be provable from just
* the joinquals, ignoring plan.qual, due to where the executor tests it.)
* ---------------- * ----------------
*/ */
typedef struct Join typedef struct Join
{ {
Plan plan; Plan plan;
JoinType jointype; JoinType jointype;
bool inner_unique;
List *joinqual; /* JOIN quals (in addition to plan.qual) */ List *joinqual; /* JOIN quals (in addition to plan.qual) */
} Join; } Join;
...@@ -689,6 +696,7 @@ typedef struct NestLoopParam ...@@ -689,6 +696,7 @@ typedef struct NestLoopParam
typedef struct MergeJoin typedef struct MergeJoin
{ {
Join join; Join join;
bool skip_mark_restore; /* Can we skip mark/restore calls? */
List *mergeclauses; /* mergeclauses as expression trees */ List *mergeclauses; /* mergeclauses as expression trees */
/* these are arrays, but have the same length as the mergeclauses list: */ /* these are arrays, but have the same length as the mergeclauses list: */
Oid *mergeFamilies; /* per-clause OIDs of btree opfamilies */ Oid *mergeFamilies; /* per-clause OIDs of btree opfamilies */
......
...@@ -442,6 +442,19 @@ typedef struct PlannerInfo ...@@ -442,6 +442,19 @@ typedef struct PlannerInfo
* fdwroutine - function hooks for FDW, if foreign table (else NULL) * fdwroutine - function hooks for FDW, if foreign table (else NULL)
* fdw_private - private state for FDW, if foreign table (else NULL) * fdw_private - private state for FDW, if foreign table (else NULL)
* *
* Two fields are used to cache knowledge acquired during the join search
* about whether this rel is provably unique when being joined to given other
* relation(s), ie, it can have at most one row matching any given row from
* that join relation. Currently we only attempt such proofs, and thus only
* populate these fields, for base rels; but someday they might be used for
* join rels too:
*
* unique_for_rels - list of Relid sets, each one being a set of other
* rels for which this one has been proven unique
* non_unique_for_rels - list of Relid sets, each one being a set of
* other rels for which we have tried and failed to prove
* this one unique
*
* The presence of the remaining fields depends on the restrictions * The presence of the remaining fields depends on the restrictions
* and joins that the relation participates in: * and joins that the relation participates in:
* *
...@@ -562,6 +575,10 @@ typedef struct RelOptInfo ...@@ -562,6 +575,10 @@ typedef struct RelOptInfo
struct FdwRoutine *fdwroutine; struct FdwRoutine *fdwroutine;
void *fdw_private; void *fdw_private;
/* cache space for remembering if we have proven this relation unique */
List *unique_for_rels; /* known unique for these other relid set(s) */
List *non_unique_for_rels; /* known not unique for these set(s) */
/* used by various scans and joins: */ /* used by various scans and joins: */
List *baserestrictinfo; /* RestrictInfo structures (if base List *baserestrictinfo; /* RestrictInfo structures (if base
* rel) */ * rel) */
...@@ -572,8 +589,8 @@ typedef struct RelOptInfo ...@@ -572,8 +589,8 @@ typedef struct RelOptInfo
* involving this rel */ * involving this rel */
bool has_eclass_joins; /* T means joininfo is incomplete */ bool has_eclass_joins; /* T means joininfo is incomplete */
/* used by "other" relations. */ /* used by "other" relations */
Relids top_parent_relids; /* Relids of topmost parents. */ Relids top_parent_relids; /* Relids of topmost parents */
} RelOptInfo; } RelOptInfo;
/* /*
...@@ -1272,6 +1289,9 @@ typedef struct JoinPath ...@@ -1272,6 +1289,9 @@ typedef struct JoinPath
JoinType jointype; JoinType jointype;
bool inner_unique; /* each outer tuple provably matches no more
* than one inner tuple */
Path *outerjoinpath; /* path for the outer side of the join */ Path *outerjoinpath; /* path for the outer side of the join */
Path *innerjoinpath; /* path for the inner side of the join */ Path *innerjoinpath; /* path for the inner side of the join */
...@@ -1314,6 +1334,13 @@ typedef JoinPath NestPath; ...@@ -1314,6 +1334,13 @@ typedef JoinPath NestPath;
* mergejoin. If it is not NIL then it is a PathKeys list describing * mergejoin. If it is not NIL then it is a PathKeys list describing
* the ordering that must be created by an explicit Sort node. * the ordering that must be created by an explicit Sort node.
* *
* skip_mark_restore is TRUE if the executor need not do mark/restore calls.
* Mark/restore overhead is usually required, but can be skipped if we know
* that the executor need find only one match per outer tuple, and that the
* mergeclauses are sufficient to identify a match. In such cases the
* executor can immediately advance the outer relation after processing a
* match, and therefoere it need never back up the inner relation.
*
* materialize_inner is TRUE if a Material node should be placed atop the * materialize_inner is TRUE if a Material node should be placed atop the
* inner input. This may appear with or without an inner Sort step. * inner input. This may appear with or without an inner Sort step.
*/ */
...@@ -1324,6 +1351,7 @@ typedef struct MergePath ...@@ -1324,6 +1351,7 @@ typedef struct MergePath
List *path_mergeclauses; /* join clauses to be used for merge */ List *path_mergeclauses; /* join clauses to be used for merge */
List *outersortkeys; /* keys for explicit sort, if any */ List *outersortkeys; /* keys for explicit sort, if any */
List *innersortkeys; /* keys for explicit sort, if any */ List *innersortkeys; /* keys for explicit sort, if any */
bool skip_mark_restore; /* can executor skip mark/restore? */
bool materialize_inner; /* add Materialize to inner? */ bool materialize_inner; /* add Materialize to inner? */
} MergePath; } MergePath;
...@@ -2112,8 +2140,8 @@ typedef struct PlannerParamItem ...@@ -2112,8 +2140,8 @@ typedef struct PlannerParamItem
} PlannerParamItem; } PlannerParamItem;
/* /*
* When making cost estimates for a SEMI or ANTI join, there are some * When making cost estimates for a SEMI/ANTI/inner_unique join, there are
* correction factors that are needed in both nestloop and hash joins * some correction factors that are needed in both nestloop and hash joins
* to account for the fact that the executor can stop scanning inner rows * to account for the fact that the executor can stop scanning inner rows
* as soon as it finds a match to the current outer row. These numbers * as soon as it finds a match to the current outer row. These numbers
* depend only on the selected outer and inner join relations, not on the * depend only on the selected outer and inner join relations, not on the
...@@ -2140,14 +2168,17 @@ typedef struct SemiAntiJoinFactors ...@@ -2140,14 +2168,17 @@ typedef struct SemiAntiJoinFactors
* clauses that apply to this join * clauses that apply to this join
* mergeclause_list is a list of RestrictInfo nodes for available * mergeclause_list is a list of RestrictInfo nodes for available
* mergejoin clauses in this join * mergejoin clauses in this join
* inner_unique is true if each outer tuple provably matches no more
* than one inner tuple
* sjinfo is extra info about special joins for selectivity estimation * sjinfo is extra info about special joins for selectivity estimation
* semifactors is as shown above (only valid for SEMI or ANTI joins) * semifactors is as shown above (only valid for SEMI/ANTI/inner_unique joins)
* param_source_rels are OK targets for parameterization of result paths * param_source_rels are OK targets for parameterization of result paths
*/ */
typedef struct JoinPathExtraData typedef struct JoinPathExtraData
{ {
List *restrictlist; List *restrictlist;
List *mergeclause_list; List *mergeclause_list;
bool inner_unique;
SpecialJoinInfo *sjinfo; SpecialJoinInfo *sjinfo;
SemiAntiJoinFactors semifactors; SemiAntiJoinFactors semifactors;
Relids param_source_rels; Relids param_source_rels;
......
...@@ -129,33 +129,29 @@ extern void initial_cost_nestloop(PlannerInfo *root, ...@@ -129,33 +129,29 @@ extern void initial_cost_nestloop(PlannerInfo *root,
JoinCostWorkspace *workspace, JoinCostWorkspace *workspace,
JoinType jointype, JoinType jointype,
Path *outer_path, Path *inner_path, Path *outer_path, Path *inner_path,
SpecialJoinInfo *sjinfo, JoinPathExtraData *extra);
SemiAntiJoinFactors *semifactors);
extern void final_cost_nestloop(PlannerInfo *root, NestPath *path, extern void final_cost_nestloop(PlannerInfo *root, NestPath *path,
JoinCostWorkspace *workspace, JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo, JoinPathExtraData *extra);
SemiAntiJoinFactors *semifactors);
extern void initial_cost_mergejoin(PlannerInfo *root, extern void initial_cost_mergejoin(PlannerInfo *root,
JoinCostWorkspace *workspace, JoinCostWorkspace *workspace,
JoinType jointype, JoinType jointype,
List *mergeclauses, List *mergeclauses,
Path *outer_path, Path *inner_path, Path *outer_path, Path *inner_path,
List *outersortkeys, List *innersortkeys, List *outersortkeys, List *innersortkeys,
SpecialJoinInfo *sjinfo); JoinPathExtraData *extra);
extern void final_cost_mergejoin(PlannerInfo *root, MergePath *path, extern void final_cost_mergejoin(PlannerInfo *root, MergePath *path,
JoinCostWorkspace *workspace, JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo); JoinPathExtraData *extra);
extern void initial_cost_hashjoin(PlannerInfo *root, extern void initial_cost_hashjoin(PlannerInfo *root,
JoinCostWorkspace *workspace, JoinCostWorkspace *workspace,
JoinType jointype, JoinType jointype,
List *hashclauses, List *hashclauses,
Path *outer_path, Path *inner_path, Path *outer_path, Path *inner_path,
SpecialJoinInfo *sjinfo, JoinPathExtraData *extra);
SemiAntiJoinFactors *semifactors);
extern void final_cost_hashjoin(PlannerInfo *root, HashPath *path, extern void final_cost_hashjoin(PlannerInfo *root, HashPath *path,
JoinCostWorkspace *workspace, JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo, JoinPathExtraData *extra);
SemiAntiJoinFactors *semifactors);
extern void cost_gather(GatherPath *path, PlannerInfo *root, extern void cost_gather(GatherPath *path, PlannerInfo *root,
RelOptInfo *baserel, ParamPathInfo *param_info, double *rows); RelOptInfo *baserel, ParamPathInfo *param_info, double *rows);
extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan); extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan);
......
...@@ -119,8 +119,7 @@ extern NestPath *create_nestloop_path(PlannerInfo *root, ...@@ -119,8 +119,7 @@ extern NestPath *create_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel, RelOptInfo *joinrel,
JoinType jointype, JoinType jointype,
JoinCostWorkspace *workspace, JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo, JoinPathExtraData *extra,
SemiAntiJoinFactors *semifactors,
Path *outer_path, Path *outer_path,
Path *inner_path, Path *inner_path,
List *restrict_clauses, List *restrict_clauses,
...@@ -131,7 +130,7 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root, ...@@ -131,7 +130,7 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel, RelOptInfo *joinrel,
JoinType jointype, JoinType jointype,
JoinCostWorkspace *workspace, JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo, JoinPathExtraData *extra,
Path *outer_path, Path *outer_path,
Path *inner_path, Path *inner_path,
List *restrict_clauses, List *restrict_clauses,
...@@ -145,8 +144,7 @@ extern HashPath *create_hashjoin_path(PlannerInfo *root, ...@@ -145,8 +144,7 @@ extern HashPath *create_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel, RelOptInfo *joinrel,
JoinType jointype, JoinType jointype,
JoinCostWorkspace *workspace, JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo, JoinPathExtraData *extra,
SemiAntiJoinFactors *semifactors,
Path *outer_path, Path *outer_path,
Path *inner_path, Path *inner_path,
List *restrict_clauses, List *restrict_clauses,
......
...@@ -105,6 +105,9 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root); ...@@ -105,6 +105,9 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root);
extern List *remove_useless_joins(PlannerInfo *root, List *joinlist); extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
extern bool query_supports_distinctness(Query *query); extern bool query_supports_distinctness(Query *query);
extern bool query_is_distinct_for(Query *query, List *colnos, List *opids); extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
extern bool innerrel_is_unique(PlannerInfo *root,
RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinType jointype, List *restrictlist);
/* /*
* prototypes for plan/setrefs.c * prototypes for plan/setrefs.c
......
...@@ -983,28 +983,30 @@ explain (costs off) select * ...@@ -983,28 +983,30 @@ explain (costs off) select *
from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z; group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
QUERY PLAN QUERY PLAN
------------------------------------------------------- ------------------------------------------------------
Group HashAggregate
Group Key: t1.a, t1.b, t2.x, t2.y Group Key: t1.a, t1.b, t2.x, t2.y
-> Merge Join -> Hash Join
Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y)) Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
-> Index Scan using t1_pkey on t1 -> Seq Scan on t2
-> Index Scan using t2_pkey on t2 -> Hash
(6 rows) -> Seq Scan on t1
(7 rows)
-- Test case where t1 can be optimized but not t2 -- Test case where t1 can be optimized but not t2
explain (costs off) select t1.*,t2.x,t2.z explain (costs off) select t1.*,t2.x,t2.z
from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z; group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
QUERY PLAN QUERY PLAN
------------------------------------------------------- ------------------------------------------------------
HashAggregate HashAggregate
Group Key: t1.a, t1.b, t2.x, t2.z Group Key: t1.a, t1.b, t2.x, t2.z
-> Merge Join -> Hash Join
Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y)) Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
-> Index Scan using t1_pkey on t1 -> Seq Scan on t2
-> Index Scan using t2_pkey on t2 -> Hash
(6 rows) -> Seq Scan on t1
(7 rows)
-- Cannot optimize when PK is deferrable -- Cannot optimize when PK is deferrable
explain (costs off) select * from t3 group by a,b,c; explain (costs off) select * from t3 group by a,b,c;
......
...@@ -317,12 +317,11 @@ explain (costs off) ...@@ -317,12 +317,11 @@ explain (costs off)
-> Index Scan using ec1_expr2 on ec1 ec1_1 -> Index Scan using ec1_expr2 on ec1 ec1_1
-> Index Scan using ec1_expr3 on ec1 ec1_2 -> Index Scan using ec1_expr3 on ec1 ec1_2
-> Index Scan using ec1_expr4 on ec1 ec1_3 -> Index Scan using ec1_expr4 on ec1 ec1_3
-> Materialize
-> Sort -> Sort
Sort Key: ec1.f1 USING < Sort Key: ec1.f1 USING <
-> Index Scan using ec1_pkey on ec1 -> Index Scan using ec1_pkey on ec1
Index Cond: (ff = '42'::bigint) Index Cond: (ff = '42'::bigint)
(20 rows) (19 rows)
-- check partially indexed scan -- check partially indexed scan
set enable_nestloop = on; set enable_nestloop = on;
...@@ -374,12 +373,11 @@ explain (costs off) ...@@ -374,12 +373,11 @@ explain (costs off)
Sort Key: (((ec1_2.ff + 3) + 1)) Sort Key: (((ec1_2.ff + 3) + 1))
-> Seq Scan on ec1 ec1_2 -> Seq Scan on ec1 ec1_2
-> Index Scan using ec1_expr4 on ec1 ec1_3 -> Index Scan using ec1_expr4 on ec1 ec1_3
-> Materialize
-> Sort -> Sort
Sort Key: ec1.f1 USING < Sort Key: ec1.f1 USING <
-> Index Scan using ec1_pkey on ec1 -> Index Scan using ec1_pkey on ec1
Index Cond: (ff = '42'::bigint) Index Cond: (ff = '42'::bigint)
(14 rows) (13 rows)
-- check effects of row-level security -- check effects of row-level security
set enable_nestloop = on; set enable_nestloop = on;
......
...@@ -3979,7 +3979,7 @@ select id from a where id in ( ...@@ -3979,7 +3979,7 @@ select id from a where id in (
); );
QUERY PLAN QUERY PLAN
---------------------------- ----------------------------
Hash Semi Join Hash Join
Hash Cond: (a.id = b.id) Hash Cond: (a.id = b.id)
-> Seq Scan on a -> Seq Scan on a
-> Hash -> Hash
...@@ -5327,3 +5327,310 @@ ERROR: invalid reference to FROM-clause entry for table "xx1" ...@@ -5327,3 +5327,310 @@ ERROR: invalid reference to FROM-clause entry for table "xx1"
LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss; LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
^ ^
HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query. HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query.
--
-- test planner's ability to mark joins as unique
--
create table j1 (id int primary key);
create table j2 (id int primary key);
create table j3 (id int);
insert into j1 values(1),(2),(3);
insert into j2 values(1),(2),(3);
insert into j3 values(1),(1);
analyze j1;
analyze j2;
analyze j3;
-- ensure join is properly marked as unique
explain (verbose, costs off)
select * from j1 inner join j2 on j1.id = j2.id;
QUERY PLAN
-----------------------------------
Hash Join
Output: j1.id, j2.id
Inner Unique: true
Hash Cond: (j1.id = j2.id)
-> Seq Scan on public.j1
Output: j1.id
-> Hash
Output: j2.id
-> Seq Scan on public.j2
Output: j2.id
(10 rows)
-- ensure join is not unique when not an equi-join
explain (verbose, costs off)
select * from j1 inner join j2 on j1.id > j2.id;
QUERY PLAN
-----------------------------------
Nested Loop
Output: j1.id, j2.id
Join Filter: (j1.id > j2.id)
-> Seq Scan on public.j1
Output: j1.id
-> Materialize
Output: j2.id
-> Seq Scan on public.j2
Output: j2.id
(9 rows)
-- ensure non-unique rel is not chosen as inner
explain (verbose, costs off)
select * from j1 inner join j3 on j1.id = j3.id;
QUERY PLAN
-----------------------------------
Hash Join
Output: j1.id, j3.id
Inner Unique: true
Hash Cond: (j3.id = j1.id)
-> Seq Scan on public.j3
Output: j3.id
-> Hash
Output: j1.id
-> Seq Scan on public.j1
Output: j1.id
(10 rows)
-- ensure left join is marked as unique
explain (verbose, costs off)
select * from j1 left join j2 on j1.id = j2.id;
QUERY PLAN
-----------------------------------
Hash Left Join
Output: j1.id, j2.id
Inner Unique: true
Hash Cond: (j1.id = j2.id)
-> Seq Scan on public.j1
Output: j1.id
-> Hash
Output: j2.id
-> Seq Scan on public.j2
Output: j2.id
(10 rows)
-- ensure right join is marked as unique
explain (verbose, costs off)
select * from j1 right join j2 on j1.id = j2.id;
QUERY PLAN
-----------------------------------
Hash Left Join
Output: j1.id, j2.id
Inner Unique: true
Hash Cond: (j2.id = j1.id)
-> Seq Scan on public.j2
Output: j2.id
-> Hash
Output: j1.id
-> Seq Scan on public.j1
Output: j1.id
(10 rows)
-- ensure full join is marked as unique
explain (verbose, costs off)
select * from j1 full join j2 on j1.id = j2.id;
QUERY PLAN
-----------------------------------
Hash Full Join
Output: j1.id, j2.id
Inner Unique: true
Hash Cond: (j1.id = j2.id)
-> Seq Scan on public.j1
Output: j1.id
-> Hash
Output: j2.id
-> Seq Scan on public.j2
Output: j2.id
(10 rows)
-- a clauseless (cross) join can't be unique
explain (verbose, costs off)
select * from j1 cross join j2;
QUERY PLAN
-----------------------------------
Nested Loop
Output: j1.id, j2.id
-> Seq Scan on public.j1
Output: j1.id
-> Materialize
Output: j2.id
-> Seq Scan on public.j2
Output: j2.id
(8 rows)
-- ensure a natural join is marked as unique
explain (verbose, costs off)
select * from j1 natural join j2;
QUERY PLAN
-----------------------------------
Hash Join
Output: j1.id
Inner Unique: true
Hash Cond: (j1.id = j2.id)
-> Seq Scan on public.j1
Output: j1.id
-> Hash
Output: j2.id
-> Seq Scan on public.j2
Output: j2.id
(10 rows)
-- ensure a distinct clause allows the inner to become unique
explain (verbose, costs off)
select * from j1
inner join (select distinct id from j3) j3 on j1.id = j3.id;
QUERY PLAN
-----------------------------------------------
Nested Loop
Output: j1.id, j3.id
Inner Unique: true
Join Filter: (j1.id = j3.id)
-> Seq Scan on public.j1
Output: j1.id
-> Materialize
Output: j3.id
-> Unique
Output: j3.id
-> Sort
Output: j3.id
Sort Key: j3.id
-> Seq Scan on public.j3
Output: j3.id
(15 rows)
-- ensure group by clause allows the inner to become unique
explain (verbose, costs off)
select * from j1
inner join (select id from j3 group by id) j3 on j1.id = j3.id;
QUERY PLAN
-----------------------------------------------
Nested Loop
Output: j1.id, j3.id
Inner Unique: true
Join Filter: (j1.id = j3.id)
-> Seq Scan on public.j1
Output: j1.id
-> Materialize
Output: j3.id
-> Group
Output: j3.id
Group Key: j3.id
-> Sort
Output: j3.id
Sort Key: j3.id
-> Seq Scan on public.j3
Output: j3.id
(16 rows)
drop table j1;
drop table j2;
drop table j3;
-- test more complex permutations of unique joins
create table j1 (id1 int, id2 int, primary key(id1,id2));
create table j2 (id1 int, id2 int, primary key(id1,id2));
create table j3 (id1 int, id2 int, primary key(id1,id2));
insert into j1 values(1,1),(1,2);
insert into j2 values(1,1);
insert into j3 values(1,1);
analyze j1;
analyze j2;
analyze j3;
-- ensure there's no unique join when not all columns which are part of the
-- unique index are seen in the join clause
explain (verbose, costs off)
select * from j1
inner join j2 on j1.id1 = j2.id1;
QUERY PLAN
------------------------------------------
Nested Loop
Output: j1.id1, j1.id2, j2.id1, j2.id2
Join Filter: (j1.id1 = j2.id1)
-> Seq Scan on public.j2
Output: j2.id1, j2.id2
-> Seq Scan on public.j1
Output: j1.id1, j1.id2
(7 rows)
-- ensure proper unique detection with multiple join quals
explain (verbose, costs off)
select * from j1
inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2;
QUERY PLAN
----------------------------------------------------------
Nested Loop
Output: j1.id1, j1.id2, j2.id1, j2.id2
Inner Unique: true
Join Filter: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2))
-> Seq Scan on public.j1
Output: j1.id1, j1.id2
-> Materialize
Output: j2.id1, j2.id2
-> Seq Scan on public.j2
Output: j2.id1, j2.id2
(10 rows)
-- ensure we don't detect the join to be unique when quals are not part of the
-- join condition
explain (verbose, costs off)
select * from j1
inner join j2 on j1.id1 = j2.id1 where j1.id2 = 1;
QUERY PLAN
------------------------------------------
Nested Loop
Output: j1.id1, j1.id2, j2.id1, j2.id2
Join Filter: (j1.id1 = j2.id1)
-> Seq Scan on public.j1
Output: j1.id1, j1.id2
Filter: (j1.id2 = 1)
-> Seq Scan on public.j2
Output: j2.id1, j2.id2
(8 rows)
-- as above, but for left joins.
explain (verbose, costs off)
select * from j1
left join j2 on j1.id1 = j2.id1 where j1.id2 = 1;
QUERY PLAN
------------------------------------------
Nested Loop Left Join
Output: j1.id1, j1.id2, j2.id1, j2.id2
Join Filter: (j1.id1 = j2.id1)
-> Seq Scan on public.j1
Output: j1.id1, j1.id2
Filter: (j1.id2 = 1)
-> Seq Scan on public.j2
Output: j2.id1, j2.id2
(8 rows)
-- validate logic in merge joins which skips mark and restore.
-- it should only do this if all quals which were used to detect the unique
-- are present as join quals, and not plain quals.
set enable_nestloop to 0;
set enable_hashjoin to 0;
set enable_sort to 0;
-- create an index that will be preferred over the PK to perform the join
create index j1_id1_idx on j1 (id1) where id1 % 1000 = 1;
explain (costs off) select * from j1 j1
inner join j1 j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1;
QUERY PLAN
--------------------------------------------
Merge Join
Merge Cond: (j1.id1 = j2.id1)
Join Filter: (j1.id2 = j2.id2)
-> Index Scan using j1_id1_idx on j1
-> Index Scan using j1_id1_idx on j1 j2
(5 rows)
select * from j1 j1
inner join j1 j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1;
id1 | id2 | id1 | id2
-----+-----+-----+-----
1 | 1 | 1 | 1
1 | 2 | 1 | 2
(2 rows)
reset enable_nestloop;
reset enable_hashjoin;
reset enable_sort;
drop table j1;
drop table j2;
drop table j3;
...@@ -1732,3 +1732,127 @@ update xx1 set x2 = f1 from xx1, lateral (select * from int4_tbl where f1 = x1) ...@@ -1732,3 +1732,127 @@ update xx1 set x2 = f1 from xx1, lateral (select * from int4_tbl where f1 = x1)
delete from xx1 using (select * from int4_tbl where f1 = x1) ss; delete from xx1 using (select * from int4_tbl where f1 = x1) ss;
delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss; delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss;
delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss; delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
--
-- test planner's ability to mark joins as unique
--
create table j1 (id int primary key);
create table j2 (id int primary key);
create table j3 (id int);
insert into j1 values(1),(2),(3);
insert into j2 values(1),(2),(3);
insert into j3 values(1),(1);
analyze j1;
analyze j2;
analyze j3;
-- ensure join is properly marked as unique
explain (verbose, costs off)
select * from j1 inner join j2 on j1.id = j2.id;
-- ensure join is not unique when not an equi-join
explain (verbose, costs off)
select * from j1 inner join j2 on j1.id > j2.id;
-- ensure non-unique rel is not chosen as inner
explain (verbose, costs off)
select * from j1 inner join j3 on j1.id = j3.id;
-- ensure left join is marked as unique
explain (verbose, costs off)
select * from j1 left join j2 on j1.id = j2.id;
-- ensure right join is marked as unique
explain (verbose, costs off)
select * from j1 right join j2 on j1.id = j2.id;
-- ensure full join is marked as unique
explain (verbose, costs off)
select * from j1 full join j2 on j1.id = j2.id;
-- a clauseless (cross) join can't be unique
explain (verbose, costs off)
select * from j1 cross join j2;
-- ensure a natural join is marked as unique
explain (verbose, costs off)
select * from j1 natural join j2;
-- ensure a distinct clause allows the inner to become unique
explain (verbose, costs off)
select * from j1
inner join (select distinct id from j3) j3 on j1.id = j3.id;
-- ensure group by clause allows the inner to become unique
explain (verbose, costs off)
select * from j1
inner join (select id from j3 group by id) j3 on j1.id = j3.id;
drop table j1;
drop table j2;
drop table j3;
-- test more complex permutations of unique joins
create table j1 (id1 int, id2 int, primary key(id1,id2));
create table j2 (id1 int, id2 int, primary key(id1,id2));
create table j3 (id1 int, id2 int, primary key(id1,id2));
insert into j1 values(1,1),(1,2);
insert into j2 values(1,1);
insert into j3 values(1,1);
analyze j1;
analyze j2;
analyze j3;
-- ensure there's no unique join when not all columns which are part of the
-- unique index are seen in the join clause
explain (verbose, costs off)
select * from j1
inner join j2 on j1.id1 = j2.id1;
-- ensure proper unique detection with multiple join quals
explain (verbose, costs off)
select * from j1
inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2;
-- ensure we don't detect the join to be unique when quals are not part of the
-- join condition
explain (verbose, costs off)
select * from j1
inner join j2 on j1.id1 = j2.id1 where j1.id2 = 1;
-- as above, but for left joins.
explain (verbose, costs off)
select * from j1
left join j2 on j1.id1 = j2.id1 where j1.id2 = 1;
-- validate logic in merge joins which skips mark and restore.
-- it should only do this if all quals which were used to detect the unique
-- are present as join quals, and not plain quals.
set enable_nestloop to 0;
set enable_hashjoin to 0;
set enable_sort to 0;
-- create an index that will be preferred over the PK to perform the join
create index j1_id1_idx on j1 (id1) where id1 % 1000 = 1;
explain (costs off) select * from j1 j1
inner join j1 j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1;
select * from j1 j1
inner join j1 j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1;
reset enable_nestloop;
reset enable_hashjoin;
reset enable_sort;
drop table j1;
drop table j2;
drop table j3;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment