Commit f49842d1 authored by Robert Haas's avatar Robert Haas

Basic partition-wise join functionality.

Instead of joining two partitioned tables in their entirety we can, if
it is an equi-join on the partition keys, join the matching partitions
individually.  This involves teaching the planner about "other join"
rels, which are related to regular join rels in the same way that
other member rels are related to baserels.  This can use significantly
more CPU time and memory than regular join planning, because there may
now be a set of "other" rels not only for every base relation but also
for every join relation.  In most practical cases, this probably
shouldn't be a problem, because (1) it's probably unusual to join many
tables each with many partitions using the partition keys for all
joins and (2) if you do that scenario then you probably have a big
enough machine to handle the increased memory cost of planning and (3)
the resulting plan is highly likely to be better, so what you spend in
planning you'll make up on the execution side.  All the same, for now,
turn this feature off by default.

Currently, we can only perform joins between two tables whose
partitioning schemes are absolutely identical.  It would be nice to
cope with other scenarios, such as extra partitions on one side or the
other with no match on the other side, but that will have to wait for
a future patch.

Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit
Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit
Khandekar, and by me.  A few final adjustments by me.

Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com
Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
parent fe9ba28e
...@@ -7346,3 +7346,123 @@ AND ftoptions @> array['fetch_size=60000']; ...@@ -7346,3 +7346,123 @@ AND ftoptions @> array['fetch_size=60000'];
(1 row) (1 row)
ROLLBACK; ROLLBACK;
-- ===================================================================
-- test partition-wise-joins
-- ===================================================================
SET enable_partition_wise_join=on;
CREATE TABLE fprt1 (a int, b int, c varchar) PARTITION BY RANGE(a);
CREATE TABLE fprt1_p1 (LIKE fprt1);
CREATE TABLE fprt1_p2 (LIKE fprt1);
INSERT INTO fprt1_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 2) i;
INSERT INTO fprt1_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 2) i;
CREATE FOREIGN TABLE ftprt1_p1 PARTITION OF fprt1 FOR VALUES FROM (0) TO (250)
SERVER loopback OPTIONS (table_name 'fprt1_p1', use_remote_estimate 'true');
CREATE FOREIGN TABLE ftprt1_p2 PARTITION OF fprt1 FOR VALUES FROM (250) TO (500)
SERVER loopback OPTIONS (TABLE_NAME 'fprt1_p2');
ANALYZE fprt1;
ANALYZE fprt1_p1;
ANALYZE fprt1_p2;
CREATE TABLE fprt2 (a int, b int, c varchar) PARTITION BY RANGE(b);
CREATE TABLE fprt2_p1 (LIKE fprt2);
CREATE TABLE fprt2_p2 (LIKE fprt2);
INSERT INTO fprt2_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 3) i;
INSERT INTO fprt2_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 3) i;
CREATE FOREIGN TABLE ftprt2_p1 PARTITION OF fprt2 FOR VALUES FROM (0) TO (250)
SERVER loopback OPTIONS (table_name 'fprt2_p1', use_remote_estimate 'true');
CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500)
SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true');
ANALYZE fprt2;
ANALYZE fprt2_p1;
ANALYZE fprt2_p2;
-- inner join three tables
EXPLAIN (COSTS OFF)
SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: t1.a, t3.c
-> Append
-> Foreign Scan
Relations: ((public.ftprt1_p1 t1) INNER JOIN (public.ftprt2_p1 t2)) INNER JOIN (public.ftprt1_p1 t3)
-> Foreign Scan
Relations: ((public.ftprt1_p2 t1) INNER JOIN (public.ftprt2_p2 t2)) INNER JOIN (public.ftprt1_p2 t3)
(7 rows)
SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
a | b | c
-----+-----+------
0 | 0 | 0000
150 | 150 | 0003
250 | 250 | 0005
400 | 400 | 0008
(4 rows)
-- left outer join + nullable clasue
EXPLAIN (COSTS OFF)
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
QUERY PLAN
-----------------------------------------------------------------------------------
Sort
Sort Key: t1.a, ftprt2_p1.b, ftprt2_p1.c
-> Append
-> Foreign Scan
Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
(5 rows)
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
a | b | c
---+---+------
0 | 0 | 0000
2 | |
4 | |
6 | 6 | 0000
8 | |
(5 rows)
-- with whole-row reference
EXPLAIN (COSTS OFF)
SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
QUERY PLAN
---------------------------------------------------------------------------------
Sort
Sort Key: ((t1.*)::fprt1), ((t2.*)::fprt2)
-> Append
-> Foreign Scan
Relations: (public.ftprt1_p1 t1) INNER JOIN (public.ftprt2_p1 t2)
-> Foreign Scan
Relations: (public.ftprt1_p2 t1) INNER JOIN (public.ftprt2_p2 t2)
(7 rows)
SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
t1 | t2
----------------+----------------
(0,0,0000) | (0,0,0000)
(150,150,0003) | (150,150,0003)
(250,250,0005) | (250,250,0005)
(400,400,0008) | (400,400,0008)
(4 rows)
-- join with lateral reference
EXPLAIN (COSTS OFF)
SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
QUERY PLAN
---------------------------------------------------------------------------------
Sort
Sort Key: t1.a, t1.b
-> Append
-> Foreign Scan
Relations: (public.ftprt1_p1 t1) INNER JOIN (public.ftprt2_p1 t2)
-> Foreign Scan
Relations: (public.ftprt1_p2 t1) INNER JOIN (public.ftprt2_p2 t2)
(7 rows)
SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
a | b
-----+-----
0 | 0
150 | 150
250 | 250
400 | 400
(4 rows)
RESET enable_partition_wise_join;
...@@ -1764,3 +1764,56 @@ WHERE ftrelid = 'table30000'::regclass ...@@ -1764,3 +1764,56 @@ WHERE ftrelid = 'table30000'::regclass
AND ftoptions @> array['fetch_size=60000']; AND ftoptions @> array['fetch_size=60000'];
ROLLBACK; ROLLBACK;
-- ===================================================================
-- test partition-wise-joins
-- ===================================================================
SET enable_partition_wise_join=on;
CREATE TABLE fprt1 (a int, b int, c varchar) PARTITION BY RANGE(a);
CREATE TABLE fprt1_p1 (LIKE fprt1);
CREATE TABLE fprt1_p2 (LIKE fprt1);
INSERT INTO fprt1_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 2) i;
INSERT INTO fprt1_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 2) i;
CREATE FOREIGN TABLE ftprt1_p1 PARTITION OF fprt1 FOR VALUES FROM (0) TO (250)
SERVER loopback OPTIONS (table_name 'fprt1_p1', use_remote_estimate 'true');
CREATE FOREIGN TABLE ftprt1_p2 PARTITION OF fprt1 FOR VALUES FROM (250) TO (500)
SERVER loopback OPTIONS (TABLE_NAME 'fprt1_p2');
ANALYZE fprt1;
ANALYZE fprt1_p1;
ANALYZE fprt1_p2;
CREATE TABLE fprt2 (a int, b int, c varchar) PARTITION BY RANGE(b);
CREATE TABLE fprt2_p1 (LIKE fprt2);
CREATE TABLE fprt2_p2 (LIKE fprt2);
INSERT INTO fprt2_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 3) i;
INSERT INTO fprt2_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 3) i;
CREATE FOREIGN TABLE ftprt2_p1 PARTITION OF fprt2 FOR VALUES FROM (0) TO (250)
SERVER loopback OPTIONS (table_name 'fprt2_p1', use_remote_estimate 'true');
CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500)
SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true');
ANALYZE fprt2;
ANALYZE fprt2_p1;
ANALYZE fprt2_p2;
-- inner join three tables
EXPLAIN (COSTS OFF)
SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
-- left outer join + nullable clasue
EXPLAIN (COSTS OFF)
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
-- with whole-row reference
EXPLAIN (COSTS OFF)
SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
-- join with lateral reference
EXPLAIN (COSTS OFF)
SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
RESET enable_partition_wise_join;
...@@ -3632,6 +3632,26 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class=" ...@@ -3632,6 +3632,26 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry id="guc-enable-partition-wise-join" xreflabel="enable_partition_wise_join">
<term><varname>enable_partition_wise_join</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>enable_partition_wise_join</> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Enables or disables the query planner's use of partition-wise join,
which allows a join between partitioned tables to be performed by
joining the matching partitions. Partition-wise join currently applies
only when the join conditions include all the partition keys, which
must be of the same data type and have exactly matching sets of child
partitions. Because partition-wise join planning can use significantly
more CPU time and memory during planning, the default is
<literal>off</>.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-enable-seqscan" xreflabel="enable_seqscan"> <varlistentry id="guc-enable-seqscan" xreflabel="enable_seqscan">
<term><varname>enable_seqscan</varname> (<type>boolean</type>) <term><varname>enable_seqscan</varname> (<type>boolean</type>)
<indexterm> <indexterm>
......
...@@ -1292,6 +1292,26 @@ ShutdownForeignScan(ForeignScanState *node); ...@@ -1292,6 +1292,26 @@ ShutdownForeignScan(ForeignScanState *node);
</para> </para>
</sect2> </sect2>
<sect2 id="fdw-callbacks-reparameterize-paths">
<title>FDW Routines For reparameterization of paths</title>
<para>
<programlisting>
List *
ReparameterizeForeignPathByChild(PlannerInfo *root, List *fdw_private,
RelOptInfo *child_rel);
</programlisting>
This function is called while converting a path parameterized by the
top-most parent of the given child relation <literal>child_rel</> to be
parameterized by the child relation. The function is used to reparameterize
any paths or translate any expression nodes saved in the given
<literal>fdw_private</> member of a <structname>ForeignPath</>. The
callback may use <literal>reparameterize_path_by_child</>,
<literal>adjust_appendrel_attrs</> or
<literal>adjust_appendrel_attrs_multilevel</> as required.
</para>
</sect2>
</sect1> </sect1>
<sect1 id="fdw-helpers"> <sect1 id="fdw-helpers">
......
...@@ -1075,3 +1075,29 @@ be desirable to postpone the Gather stage until as near to the top of the ...@@ -1075,3 +1075,29 @@ be desirable to postpone the Gather stage until as near to the top of the
plan as possible. Expanding the range of cases in which more work can be plan as possible. Expanding the range of cases in which more work can be
pushed below the Gather (and costing them accurately) is likely to keep us pushed below the Gather (and costing them accurately) is likely to keep us
busy for a long time to come. busy for a long time to come.
Partition-wise joins
--------------------
A join between two similarly partitioned tables can be broken down into joins
between their matching partitions if there exists an equi-join condition
between the partition keys of the joining tables. The equi-join between
partition keys implies that all join partners for a given row in one
partitioned table must be in the corresponding partition of the other
partitioned table. Because of this the join between partitioned tables to be
broken into joins between the matching partitions. The resultant join is
partitioned in the same way as the joining relations, thus allowing an N-way
join between similarly partitioned tables having equi-join condition between
their partition keys to be broken down into N-way joins between their matching
partitions. This technique of breaking down a join between partition tables
into join between their partitions is called partition-wise join. We will use
term "partitioned relation" for either a partitioned table or a join between
compatibly partitioned tables.
The partitioning properties of a partitioned relation are stored in its
RelOptInfo. The information about data types of partition keys are stored in
PartitionSchemeData structure. The planner maintains a list of canonical
partition schemes (distinct PartitionSchemeData objects) so that RelOptInfo of
any two partitioned relations with same partitioning scheme point to the same
PartitionSchemeData object. This reduces memory consumed by
PartitionSchemeData objects and makes it easy to compare the partition schemes
of joining relations.
...@@ -264,6 +264,9 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force) ...@@ -264,6 +264,9 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
/* Keep searching if join order is not valid */ /* Keep searching if join order is not valid */
if (joinrel) if (joinrel)
{ {
/* Create paths for partition-wise joins. */
generate_partition_wise_join_paths(root, joinrel);
/* Create GatherPaths for any useful partial paths for rel */ /* Create GatherPaths for any useful partial paths for rel */
generate_gather_paths(root, joinrel); generate_gather_paths(root, joinrel);
......
This diff is collapsed.
...@@ -127,6 +127,7 @@ bool enable_material = true; ...@@ -127,6 +127,7 @@ bool enable_material = true;
bool enable_mergejoin = true; bool enable_mergejoin = true;
bool enable_hashjoin = true; bool enable_hashjoin = true;
bool enable_gathermerge = true; bool enable_gathermerge = true;
bool enable_partition_wise_join = false;
typedef struct typedef struct
{ {
......
...@@ -26,9 +26,19 @@ ...@@ -26,9 +26,19 @@
/* Hook for plugins to get control in add_paths_to_joinrel() */ /* Hook for plugins to get control in add_paths_to_joinrel() */
set_join_pathlist_hook_type set_join_pathlist_hook = NULL; set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
#define PATH_PARAM_BY_REL(path, rel) \ /*
* Paths parameterized by the parent can be considered to be parameterized by
* any of its child.
*/
#define PATH_PARAM_BY_PARENT(path, rel) \
((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
(rel)->top_parent_relids))
#define PATH_PARAM_BY_REL_SELF(path, rel) \
((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
#define PATH_PARAM_BY_REL(path, rel) \
(PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
static void try_partial_mergejoin_path(PlannerInfo *root, static void try_partial_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel, RelOptInfo *joinrel,
Path *outer_path, Path *outer_path,
...@@ -115,6 +125,19 @@ add_paths_to_joinrel(PlannerInfo *root, ...@@ -115,6 +125,19 @@ add_paths_to_joinrel(PlannerInfo *root,
JoinPathExtraData extra; JoinPathExtraData extra;
bool mergejoin_allowed = true; bool mergejoin_allowed = true;
ListCell *lc; ListCell *lc;
Relids joinrelids;
/*
* PlannerInfo doesn't contain the SpecialJoinInfos created for joins
* between child relations, even if there is a SpecialJoinInfo node for
* the join between the topmost parents. So, while calculating Relids set
* representing the restriction, consider relids of topmost parent of
* partitions.
*/
if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
joinrelids = joinrel->top_parent_relids;
else
joinrelids = joinrel->relids;
extra.restrictlist = restrictlist; extra.restrictlist = restrictlist;
extra.mergeclause_list = NIL; extra.mergeclause_list = NIL;
...@@ -211,16 +234,16 @@ add_paths_to_joinrel(PlannerInfo *root, ...@@ -211,16 +234,16 @@ add_paths_to_joinrel(PlannerInfo *root,
* join has already been proven legal.) If the SJ is relevant, it * join has already been proven legal.) If the SJ is relevant, it
* presents constraints for joining to anything not in its RHS. * presents constraints for joining to anything not in its RHS.
*/ */
if (bms_overlap(joinrel->relids, sjinfo2->min_righthand) && if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
!bms_overlap(joinrel->relids, sjinfo2->min_lefthand)) !bms_overlap(joinrelids, sjinfo2->min_lefthand))
extra.param_source_rels = bms_join(extra.param_source_rels, extra.param_source_rels = bms_join(extra.param_source_rels,
bms_difference(root->all_baserels, bms_difference(root->all_baserels,
sjinfo2->min_righthand)); sjinfo2->min_righthand));
/* full joins constrain both sides symmetrically */ /* full joins constrain both sides symmetrically */
if (sjinfo2->jointype == JOIN_FULL && if (sjinfo2->jointype == JOIN_FULL &&
bms_overlap(joinrel->relids, sjinfo2->min_lefthand) && bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
!bms_overlap(joinrel->relids, sjinfo2->min_righthand)) !bms_overlap(joinrelids, sjinfo2->min_righthand))
extra.param_source_rels = bms_join(extra.param_source_rels, extra.param_source_rels = bms_join(extra.param_source_rels,
bms_difference(root->all_baserels, bms_difference(root->all_baserels,
sjinfo2->min_lefthand)); sjinfo2->min_lefthand));
...@@ -347,11 +370,25 @@ try_nestloop_path(PlannerInfo *root, ...@@ -347,11 +370,25 @@ try_nestloop_path(PlannerInfo *root,
JoinCostWorkspace workspace; JoinCostWorkspace workspace;
RelOptInfo *innerrel = inner_path->parent; RelOptInfo *innerrel = inner_path->parent;
RelOptInfo *outerrel = outer_path->parent; RelOptInfo *outerrel = outer_path->parent;
Relids innerrelids = innerrel->relids; Relids innerrelids;
Relids outerrelids = outerrel->relids; Relids outerrelids;
Relids inner_paramrels = PATH_REQ_OUTER(inner_path); Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
Relids outer_paramrels = PATH_REQ_OUTER(outer_path); Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
/*
* Paths are parameterized by top-level parents, so run parameterization
* tests on the parent relids.
*/
if (innerrel->top_parent_relids)
innerrelids = innerrel->top_parent_relids;
else
innerrelids = innerrel->relids;
if (outerrel->top_parent_relids)
outerrelids = outerrel->top_parent_relids;
else
outerrelids = outerrel->relids;
/* /*
* Check to see if proposed path is still parameterized, and reject if the * Check to see if proposed path is still parameterized, and reject if the
* parameterization wouldn't be sensible --- unless allow_star_schema_join * parameterization wouldn't be sensible --- unless allow_star_schema_join
...@@ -387,6 +424,27 @@ try_nestloop_path(PlannerInfo *root, ...@@ -387,6 +424,27 @@ try_nestloop_path(PlannerInfo *root,
workspace.startup_cost, workspace.total_cost, workspace.startup_cost, workspace.total_cost,
pathkeys, required_outer)) pathkeys, required_outer))
{ {
/*
* If the inner path is parameterized, it is parameterized by the
* topmost parent of the outer rel, not the outer rel itself. Fix
* that.
*/
if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
{
inner_path = reparameterize_path_by_child(root, inner_path,
outer_path->parent);
/*
* If we could not translate the path, we can't create nest loop
* path.
*/
if (!inner_path)
{
bms_free(required_outer);
return;
}
}
add_path(joinrel, (Path *) add_path(joinrel, (Path *)
create_nestloop_path(root, create_nestloop_path(root,
joinrel, joinrel,
...@@ -432,8 +490,20 @@ try_partial_nestloop_path(PlannerInfo *root, ...@@ -432,8 +490,20 @@ try_partial_nestloop_path(PlannerInfo *root,
if (inner_path->param_info != NULL) if (inner_path->param_info != NULL)
{ {
Relids inner_paramrels = inner_path->param_info->ppi_req_outer; Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
RelOptInfo *outerrel = outer_path->parent;
Relids outerrelids;
/*
* The inner and outer paths are parameterized, if at all, by the top
* level parents, not the child relations, so we must use those relids
* for our paramaterization tests.
*/
if (outerrel->top_parent_relids)
outerrelids = outerrel->top_parent_relids;
else
outerrelids = outerrel->relids;
if (!bms_is_subset(inner_paramrels, outer_path->parent->relids)) if (!bms_is_subset(inner_paramrels, outerrelids))
return; return;
} }
...@@ -446,6 +516,22 @@ try_partial_nestloop_path(PlannerInfo *root, ...@@ -446,6 +516,22 @@ try_partial_nestloop_path(PlannerInfo *root,
if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
return; return;
/*
* If the inner path is parameterized, it is parameterized by the topmost
* parent of the outer rel, not the outer rel itself. Fix that.
*/
if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
{
inner_path = reparameterize_path_by_child(root, inner_path,
outer_path->parent);
/*
* If we could not translate the path, we can't create nest loop path.
*/
if (!inner_path)
return;
}
/* Might be good enough to be worth trying, so let's try it. */ /* Might be good enough to be worth trying, so let's try it. */
add_partial_path(joinrel, (Path *) add_partial_path(joinrel, (Path *)
create_nestloop_path(root, create_nestloop_path(root,
......
This diff is collapsed.
...@@ -250,7 +250,8 @@ static Plan *prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, ...@@ -250,7 +250,8 @@ static Plan *prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
static EquivalenceMember *find_ec_member_for_tle(EquivalenceClass *ec, static EquivalenceMember *find_ec_member_for_tle(EquivalenceClass *ec,
TargetEntry *tle, TargetEntry *tle,
Relids relids); Relids relids);
static Sort *make_sort_from_pathkeys(Plan *lefttree, List *pathkeys); static Sort *make_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
Relids relids);
static Sort *make_sort_from_groupcols(List *groupcls, static Sort *make_sort_from_groupcols(List *groupcls,
AttrNumber *grpColIdx, AttrNumber *grpColIdx,
Plan *lefttree); Plan *lefttree);
...@@ -1652,7 +1653,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) ...@@ -1652,7 +1653,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags)
subplan = create_plan_recurse(root, best_path->subpath, subplan = create_plan_recurse(root, best_path->subpath,
flags | CP_SMALL_TLIST); flags | CP_SMALL_TLIST);
plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys); plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys, NULL);
copy_generic_path_info(&plan->plan, (Path *) best_path); copy_generic_path_info(&plan->plan, (Path *) best_path);
...@@ -3771,6 +3772,8 @@ create_mergejoin_plan(PlannerInfo *root, ...@@ -3771,6 +3772,8 @@ create_mergejoin_plan(PlannerInfo *root,
ListCell *lc; ListCell *lc;
ListCell *lop; ListCell *lop;
ListCell *lip; ListCell *lip;
Path *outer_path = best_path->jpath.outerjoinpath;
Path *inner_path = best_path->jpath.innerjoinpath;
/* /*
* MergeJoin can project, so we don't have to demand exact tlists from the * MergeJoin can project, so we don't have to demand exact tlists from the
...@@ -3834,8 +3837,10 @@ create_mergejoin_plan(PlannerInfo *root, ...@@ -3834,8 +3837,10 @@ create_mergejoin_plan(PlannerInfo *root,
*/ */
if (best_path->outersortkeys) if (best_path->outersortkeys)
{ {
Relids outer_relids = outer_path->parent->relids;
Sort *sort = make_sort_from_pathkeys(outer_plan, Sort *sort = make_sort_from_pathkeys(outer_plan,
best_path->outersortkeys); best_path->outersortkeys,
outer_relids);
label_sort_with_costsize(root, sort, -1.0); label_sort_with_costsize(root, sort, -1.0);
outer_plan = (Plan *) sort; outer_plan = (Plan *) sort;
...@@ -3846,8 +3851,10 @@ create_mergejoin_plan(PlannerInfo *root, ...@@ -3846,8 +3851,10 @@ create_mergejoin_plan(PlannerInfo *root,
if (best_path->innersortkeys) if (best_path->innersortkeys)
{ {
Relids inner_relids = inner_path->parent->relids;
Sort *sort = make_sort_from_pathkeys(inner_plan, Sort *sort = make_sort_from_pathkeys(inner_plan,
best_path->innersortkeys); best_path->innersortkeys,
inner_relids);
label_sort_with_costsize(root, sort, -1.0); label_sort_with_costsize(root, sort, -1.0);
inner_plan = (Plan *) sort; inner_plan = (Plan *) sort;
...@@ -5525,8 +5532,9 @@ make_sort(Plan *lefttree, int numCols, ...@@ -5525,8 +5532,9 @@ make_sort(Plan *lefttree, int numCols,
* the output parameters *p_numsortkeys etc. * the output parameters *p_numsortkeys etc.
* *
* When looking for matches to an EquivalenceClass's members, we will only * When looking for matches to an EquivalenceClass's members, we will only
* consider child EC members if they match 'relids'. This protects against * consider child EC members if they belong to given 'relids'. This protects
* possible incorrect matches to child expressions that contain no Vars. * against possible incorrect matches to child expressions that contain no
* Vars.
* *
* If reqColIdx isn't NULL then it contains sort key column numbers that * If reqColIdx isn't NULL then it contains sort key column numbers that
* we should match. This is used when making child plans for a MergeAppend; * we should match. This is used when making child plans for a MergeAppend;
...@@ -5681,11 +5689,11 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, ...@@ -5681,11 +5689,11 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
continue; continue;
/* /*
* Ignore child members unless they match the rel being * Ignore child members unless they belong to the rel being
* sorted. * sorted.
*/ */
if (em->em_is_child && if (em->em_is_child &&
!bms_equal(em->em_relids, relids)) !bms_is_subset(em->em_relids, relids))
continue; continue;
sortexpr = em->em_expr; sortexpr = em->em_expr;
...@@ -5769,7 +5777,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, ...@@ -5769,7 +5777,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
* find_ec_member_for_tle * find_ec_member_for_tle
* Locate an EquivalenceClass member matching the given TLE, if any * Locate an EquivalenceClass member matching the given TLE, if any
* *
* Child EC members are ignored unless they match 'relids'. * Child EC members are ignored unless they belong to given 'relids'.
*/ */
static EquivalenceMember * static EquivalenceMember *
find_ec_member_for_tle(EquivalenceClass *ec, find_ec_member_for_tle(EquivalenceClass *ec,
...@@ -5797,10 +5805,10 @@ find_ec_member_for_tle(EquivalenceClass *ec, ...@@ -5797,10 +5805,10 @@ find_ec_member_for_tle(EquivalenceClass *ec,
continue; continue;
/* /*
* Ignore child members unless they match the rel being sorted. * Ignore child members unless they belong to the rel being sorted.
*/ */
if (em->em_is_child && if (em->em_is_child &&
!bms_equal(em->em_relids, relids)) !bms_is_subset(em->em_relids, relids))
continue; continue;
/* Match if same expression (after stripping relabel) */ /* Match if same expression (after stripping relabel) */
...@@ -5821,9 +5829,10 @@ find_ec_member_for_tle(EquivalenceClass *ec, ...@@ -5821,9 +5829,10 @@ find_ec_member_for_tle(EquivalenceClass *ec,
* *
* 'lefttree' is the node which yields input tuples * 'lefttree' is the node which yields input tuples
* 'pathkeys' is the list of pathkeys by which the result is to be sorted * 'pathkeys' is the list of pathkeys by which the result is to be sorted
* 'relids' is the set of relations required by prepare_sort_from_pathkeys()
*/ */
static Sort * static Sort *
make_sort_from_pathkeys(Plan *lefttree, List *pathkeys) make_sort_from_pathkeys(Plan *lefttree, List *pathkeys, Relids relids)
{ {
int numsortkeys; int numsortkeys;
AttrNumber *sortColIdx; AttrNumber *sortColIdx;
...@@ -5833,7 +5842,7 @@ make_sort_from_pathkeys(Plan *lefttree, List *pathkeys) ...@@ -5833,7 +5842,7 @@ make_sort_from_pathkeys(Plan *lefttree, List *pathkeys)
/* Compute sort column info, and adjust lefttree as needed */ /* Compute sort column info, and adjust lefttree as needed */
lefttree = prepare_sort_from_pathkeys(lefttree, pathkeys, lefttree = prepare_sort_from_pathkeys(lefttree, pathkeys,
NULL, relids,
NULL, NULL,
false, false,
&numsortkeys, &numsortkeys,
......
...@@ -6150,3 +6150,25 @@ get_partitioned_child_rels(PlannerInfo *root, Index rti) ...@@ -6150,3 +6150,25 @@ get_partitioned_child_rels(PlannerInfo *root, Index rti)
return result; return result;
} }
/*
* get_partitioned_child_rels_for_join
* Build and return a list containing the RTI of every partitioned
* relation which is a child of some rel included in the join.
*/
List *
get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids)
{
List *result = NIL;
ListCell *l;
foreach(l, root->pcinfo_list)
{
PartitionedChildRelInfo *pc = lfirst(l);
if (bms_is_member(pc->parent_relid, join_relids))
result = list_concat(result, list_copy(pc->child_rels));
}
return result;
}
...@@ -41,6 +41,9 @@ typedef struct ...@@ -41,6 +41,9 @@ typedef struct
int num_vars; /* number of plain Var tlist entries */ int num_vars; /* number of plain Var tlist entries */
bool has_ph_vars; /* are there PlaceHolderVar entries? */ bool has_ph_vars; /* are there PlaceHolderVar entries? */
bool has_non_vars; /* are there other entries? */ bool has_non_vars; /* are there other entries? */
bool has_conv_whole_rows; /* are there ConvertRowtypeExpr
* entries encapsulating a whole-row
* Var? */
tlist_vinfo vars[FLEXIBLE_ARRAY_MEMBER]; /* has num_vars entries */ tlist_vinfo vars[FLEXIBLE_ARRAY_MEMBER]; /* has num_vars entries */
} indexed_tlist; } indexed_tlist;
...@@ -139,6 +142,7 @@ static List *set_returning_clause_references(PlannerInfo *root, ...@@ -139,6 +142,7 @@ static List *set_returning_clause_references(PlannerInfo *root,
int rtoffset); int rtoffset);
static bool extract_query_dependencies_walker(Node *node, static bool extract_query_dependencies_walker(Node *node,
PlannerInfo *context); PlannerInfo *context);
static bool is_converted_whole_row_reference(Node *node);
/***************************************************************************** /*****************************************************************************
* *
...@@ -1944,6 +1948,7 @@ build_tlist_index(List *tlist) ...@@ -1944,6 +1948,7 @@ build_tlist_index(List *tlist)
itlist->tlist = tlist; itlist->tlist = tlist;
itlist->has_ph_vars = false; itlist->has_ph_vars = false;
itlist->has_non_vars = false; itlist->has_non_vars = false;
itlist->has_conv_whole_rows = false;
/* Find the Vars and fill in the index array */ /* Find the Vars and fill in the index array */
vinfo = itlist->vars; vinfo = itlist->vars;
...@@ -1962,6 +1967,8 @@ build_tlist_index(List *tlist) ...@@ -1962,6 +1967,8 @@ build_tlist_index(List *tlist)
} }
else if (tle->expr && IsA(tle->expr, PlaceHolderVar)) else if (tle->expr && IsA(tle->expr, PlaceHolderVar))
itlist->has_ph_vars = true; itlist->has_ph_vars = true;
else if (is_converted_whole_row_reference((Node *) tle->expr))
itlist->has_conv_whole_rows = true;
else else
itlist->has_non_vars = true; itlist->has_non_vars = true;
} }
...@@ -1977,7 +1984,10 @@ build_tlist_index(List *tlist) ...@@ -1977,7 +1984,10 @@ build_tlist_index(List *tlist)
* This is like build_tlist_index, but we only index tlist entries that * This is like build_tlist_index, but we only index tlist entries that
* are Vars belonging to some rel other than the one specified. We will set * are Vars belonging to some rel other than the one specified. We will set
* has_ph_vars (allowing PlaceHolderVars to be matched), but not has_non_vars * has_ph_vars (allowing PlaceHolderVars to be matched), but not has_non_vars
* (so nothing other than Vars and PlaceHolderVars can be matched). * (so nothing other than Vars and PlaceHolderVars can be matched). In case of
* DML, where this function will be used, returning lists from child relations
* will be appended similar to a simple append relation. That does not require
* fixing ConvertRowtypeExpr references. So, those are not considered here.
*/ */
static indexed_tlist * static indexed_tlist *
build_tlist_index_other_vars(List *tlist, Index ignore_rel) build_tlist_index_other_vars(List *tlist, Index ignore_rel)
...@@ -1994,6 +2004,7 @@ build_tlist_index_other_vars(List *tlist, Index ignore_rel) ...@@ -1994,6 +2004,7 @@ build_tlist_index_other_vars(List *tlist, Index ignore_rel)
itlist->tlist = tlist; itlist->tlist = tlist;
itlist->has_ph_vars = false; itlist->has_ph_vars = false;
itlist->has_non_vars = false; itlist->has_non_vars = false;
itlist->has_conv_whole_rows = false;
/* Find the desired Vars and fill in the index array */ /* Find the desired Vars and fill in the index array */
vinfo = itlist->vars; vinfo = itlist->vars;
...@@ -2197,6 +2208,7 @@ static Node * ...@@ -2197,6 +2208,7 @@ static Node *
fix_join_expr_mutator(Node *node, fix_join_expr_context *context) fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
{ {
Var *newvar; Var *newvar;
bool converted_whole_row;
if (node == NULL) if (node == NULL)
return NULL; return NULL;
...@@ -2266,8 +2278,12 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) ...@@ -2266,8 +2278,12 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
} }
if (IsA(node, Param)) if (IsA(node, Param))
return fix_param_node(context->root, (Param *) node); return fix_param_node(context->root, (Param *) node);
/* Try matching more complex expressions too, if tlists have any */ /* Try matching more complex expressions too, if tlists have any */
if (context->outer_itlist && context->outer_itlist->has_non_vars) converted_whole_row = is_converted_whole_row_reference(node);
if (context->outer_itlist &&
(context->outer_itlist->has_non_vars ||
(context->outer_itlist->has_conv_whole_rows && converted_whole_row)))
{ {
newvar = search_indexed_tlist_for_non_var((Expr *) node, newvar = search_indexed_tlist_for_non_var((Expr *) node,
context->outer_itlist, context->outer_itlist,
...@@ -2275,7 +2291,9 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) ...@@ -2275,7 +2291,9 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
if (newvar) if (newvar)
return (Node *) newvar; return (Node *) newvar;
} }
if (context->inner_itlist && context->inner_itlist->has_non_vars) if (context->inner_itlist &&
(context->inner_itlist->has_non_vars ||
(context->inner_itlist->has_conv_whole_rows && converted_whole_row)))
{ {
newvar = search_indexed_tlist_for_non_var((Expr *) node, newvar = search_indexed_tlist_for_non_var((Expr *) node,
context->inner_itlist, context->inner_itlist,
...@@ -2395,7 +2413,9 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) ...@@ -2395,7 +2413,9 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context)
/* If no match, just fall through to process it normally */ /* If no match, just fall through to process it normally */
} }
/* Try matching more complex expressions too, if tlist has any */ /* Try matching more complex expressions too, if tlist has any */
if (context->subplan_itlist->has_non_vars) if (context->subplan_itlist->has_non_vars ||
(context->subplan_itlist->has_conv_whole_rows &&
is_converted_whole_row_reference(node)))
{ {
newvar = search_indexed_tlist_for_non_var((Expr *) node, newvar = search_indexed_tlist_for_non_var((Expr *) node,
context->subplan_itlist, context->subplan_itlist,
...@@ -2602,3 +2622,33 @@ extract_query_dependencies_walker(Node *node, PlannerInfo *context) ...@@ -2602,3 +2622,33 @@ extract_query_dependencies_walker(Node *node, PlannerInfo *context)
return expression_tree_walker(node, extract_query_dependencies_walker, return expression_tree_walker(node, extract_query_dependencies_walker,
(void *) context); (void *) context);
} }
/*
* is_converted_whole_row_reference
* If the given node is a ConvertRowtypeExpr encapsulating a whole-row
* reference as implicit cast, return true. Otherwise return false.
*/
static bool
is_converted_whole_row_reference(Node *node)
{
ConvertRowtypeExpr *convexpr;
if (!node || !IsA(node, ConvertRowtypeExpr))
return false;
/* Traverse nested ConvertRowtypeExpr's. */
convexpr = castNode(ConvertRowtypeExpr, node);
while (convexpr->convertformat == COERCE_IMPLICIT_CAST &&
IsA(convexpr->arg, ConvertRowtypeExpr))
convexpr = castNode(ConvertRowtypeExpr, convexpr->arg);
if (IsA(convexpr->arg, Var))
{
Var *var = castNode(Var, convexpr->arg);
if (var->varattno == 0)
return true;
}
return false;
}
...@@ -2269,6 +2269,59 @@ adjust_child_relids(Relids relids, int nappinfos, AppendRelInfo **appinfos) ...@@ -2269,6 +2269,59 @@ adjust_child_relids(Relids relids, int nappinfos, AppendRelInfo **appinfos)
return relids; return relids;
} }
/*
* Replace any relid present in top_parent_relids with its child in
* child_relids. Members of child_relids can be multiple levels below top
* parent in the partition hierarchy.
*/
Relids
adjust_child_relids_multilevel(PlannerInfo *root, Relids relids,
Relids child_relids, Relids top_parent_relids)
{
AppendRelInfo **appinfos;
int nappinfos;
Relids parent_relids = NULL;
Relids result;
Relids tmp_result = NULL;
int cnt;
/*
* If the given relids set doesn't contain any of the top parent relids,
* it will remain unchanged.
*/
if (!bms_overlap(relids, top_parent_relids))
return relids;
appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos);
/* Construct relids set for the immediate parent of the given child. */
for (cnt = 0; cnt < nappinfos; cnt++)
{
AppendRelInfo *appinfo = appinfos[cnt];
parent_relids = bms_add_member(parent_relids, appinfo->parent_relid);
}
/* Recurse if immediate parent is not the top parent. */
if (!bms_equal(parent_relids, top_parent_relids))
{
tmp_result = adjust_child_relids_multilevel(root, relids,
parent_relids,
top_parent_relids);
relids = tmp_result;
}
result = adjust_child_relids(relids, nappinfos, appinfos);
/* Free memory consumed by any intermediate result. */
if (tmp_result)
bms_free(tmp_result);
bms_free(parent_relids);
pfree(appinfos);
return result;
}
/* /*
* Adjust the targetlist entries of an inherited UPDATE operation * Adjust the targetlist entries of an inherited UPDATE operation
* *
...@@ -2408,6 +2461,48 @@ adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node, ...@@ -2408,6 +2461,48 @@ adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node,
return node; return node;
} }
/*
* Construct the SpecialJoinInfo for a child-join by translating
* SpecialJoinInfo for the join between parents. left_relids and right_relids
* are the relids of left and right side of the join respectively.
*/
SpecialJoinInfo *
build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo,
Relids left_relids, Relids right_relids)
{
SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo);
AppendRelInfo **left_appinfos;
int left_nappinfos;
AppendRelInfo **right_appinfos;
int right_nappinfos;
memcpy(sjinfo, parent_sjinfo, sizeof(SpecialJoinInfo));
left_appinfos = find_appinfos_by_relids(root, left_relids,
&left_nappinfos);
right_appinfos = find_appinfos_by_relids(root, right_relids,
&right_nappinfos);
sjinfo->min_lefthand = adjust_child_relids(sjinfo->min_lefthand,
left_nappinfos, left_appinfos);
sjinfo->min_righthand = adjust_child_relids(sjinfo->min_righthand,
right_nappinfos,
right_appinfos);
sjinfo->syn_lefthand = adjust_child_relids(sjinfo->syn_lefthand,
left_nappinfos, left_appinfos);
sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand,
right_nappinfos,
right_appinfos);
sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root,
(Node *) sjinfo->semi_rhs_exprs,
right_nappinfos,
right_appinfos);
pfree(left_appinfos);
pfree(right_appinfos);
return sjinfo;
}
/* /*
* find_appinfos_by_relids * find_appinfos_by_relids
* Find AppendRelInfo structures for all relations specified by relids. * Find AppendRelInfo structures for all relations specified by relids.
......
This diff is collapsed.
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "optimizer/pathnode.h" #include "optimizer/pathnode.h"
#include "optimizer/placeholder.h" #include "optimizer/placeholder.h"
#include "optimizer/planmain.h" #include "optimizer/planmain.h"
#include "optimizer/prep.h"
#include "optimizer/var.h" #include "optimizer/var.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
...@@ -414,6 +415,10 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, ...@@ -414,6 +415,10 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
Relids relids = joinrel->relids; Relids relids = joinrel->relids;
ListCell *lc; ListCell *lc;
/* This function is called only on the parent relations. */
Assert(!IS_OTHER_REL(joinrel) && !IS_OTHER_REL(outer_rel) &&
!IS_OTHER_REL(inner_rel));
foreach(lc, root->placeholder_list) foreach(lc, root->placeholder_list)
{ {
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
...@@ -459,3 +464,56 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, ...@@ -459,3 +464,56 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
} }
} }
} }
/*
* add_placeholders_to_child_joinrel
* Translate the PHVs in parent's targetlist and add them to the child's
* targetlist. Also adjust the cost
*/
void
add_placeholders_to_child_joinrel(PlannerInfo *root, RelOptInfo *childrel,
RelOptInfo *parentrel)
{
ListCell *lc;
AppendRelInfo **appinfos;
int nappinfos;
Assert(IS_JOIN_REL(childrel) && IS_JOIN_REL(parentrel));
Assert(IS_OTHER_REL(childrel));
/* Nothing to do if no PHVs. */
if (root->placeholder_list == NIL)
return;
appinfos = find_appinfos_by_relids(root, childrel->relids, &nappinfos);
foreach(lc, parentrel->reltarget->exprs)
{
PlaceHolderVar *phv = lfirst(lc);
if (IsA(phv, PlaceHolderVar))
{
/*
* In case the placeholder Var refers to any of the parent
* relations, translate it to refer to the corresponding child.
*/
if (bms_overlap(phv->phrels, parentrel->relids) &&
childrel->reloptkind == RELOPT_OTHER_JOINREL)
{
phv = (PlaceHolderVar *) adjust_appendrel_attrs(root,
(Node *) phv,
nappinfos,
appinfos);
}
childrel->reltarget->exprs = lappend(childrel->reltarget->exprs,
phv);
}
}
/* Adjust the cost and width of child targetlist. */
childrel->reltarget->cost.startup = parentrel->reltarget->cost.startup;
childrel->reltarget->cost.per_tuple = parentrel->reltarget->cost.per_tuple;
childrel->reltarget->width = parentrel->reltarget->width;
pfree(appinfos);
}
...@@ -71,7 +71,8 @@ static List *get_relation_statistics(RelOptInfo *rel, Relation relation); ...@@ -71,7 +71,8 @@ static List *get_relation_statistics(RelOptInfo *rel, Relation relation);
static void set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, static void set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
Relation relation); Relation relation);
static PartitionScheme find_partition_scheme(PlannerInfo *root, Relation rel); static PartitionScheme find_partition_scheme(PlannerInfo *root, Relation rel);
static List **build_baserel_partition_key_exprs(Relation relation, Index varno); static void set_baserel_partition_key_exprs(Relation relation,
RelOptInfo *rel);
/* /*
* get_relation_info - * get_relation_info -
...@@ -1832,7 +1833,7 @@ set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, ...@@ -1832,7 +1833,7 @@ set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
Assert(partdesc != NULL && rel->part_scheme != NULL); Assert(partdesc != NULL && rel->part_scheme != NULL);
rel->boundinfo = partdesc->boundinfo; rel->boundinfo = partdesc->boundinfo;
rel->nparts = partdesc->nparts; rel->nparts = partdesc->nparts;
rel->partexprs = build_baserel_partition_key_exprs(relation, rel->relid); set_baserel_partition_key_exprs(relation, rel);
} }
/* /*
...@@ -1907,21 +1908,24 @@ find_partition_scheme(PlannerInfo *root, Relation relation) ...@@ -1907,21 +1908,24 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
} }
/* /*
* build_baserel_partition_key_exprs * set_baserel_partition_key_exprs
* *
* Collects partition key expressions for a given base relation. Any single * Builds partition key expressions for the given base relation and sets them
* column partition keys are converted to Var nodes. All Var nodes are set * in given RelOptInfo. Any single column partition keys are converted to Var
* to the given varno. The partition key expressions are returned as an array * nodes. All Var nodes are restamped with the relid of given relation.
* of single element lists to be stored in RelOptInfo of the base relation.
*/ */
static List ** static void
build_baserel_partition_key_exprs(Relation relation, Index varno) set_baserel_partition_key_exprs(Relation relation,
RelOptInfo *rel)
{ {
PartitionKey partkey = RelationGetPartitionKey(relation); PartitionKey partkey = RelationGetPartitionKey(relation);
int partnatts; int partnatts;
int cnt; int cnt;
List **partexprs; List **partexprs;
ListCell *lc; ListCell *lc;
Index varno = rel->relid;
Assert(IS_SIMPLE_REL(rel) && rel->relid > 0);
/* A partitioned table should have a partition key. */ /* A partitioned table should have a partition key. */
Assert(partkey != NULL); Assert(partkey != NULL);
...@@ -1959,5 +1963,13 @@ build_baserel_partition_key_exprs(Relation relation, Index varno) ...@@ -1959,5 +1963,13 @@ build_baserel_partition_key_exprs(Relation relation, Index varno)
partexprs[cnt] = list_make1(partexpr); partexprs[cnt] = list_make1(partexpr);
} }
return partexprs; rel->partexprs = partexprs;
/*
* A base relation can not have nullable partition key expressions. We
* still allocate array of empty expressions lists to keep partition key
* expression handling code simple. See build_joinrel_partition_info() and
* match_expr_to_partition_keys().
*/
rel->nullable_partexprs = (List **) palloc0(sizeof(List *) * partnatts);
} }
This diff is collapsed.
...@@ -911,6 +911,15 @@ static struct config_bool ConfigureNamesBool[] = ...@@ -911,6 +911,15 @@ static struct config_bool ConfigureNamesBool[] =
true, true,
NULL, NULL, NULL NULL, NULL, NULL
}, },
{
{"enable_partition_wise_join", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables partition-wise join."),
NULL
},
&enable_partition_wise_join,
false,
NULL, NULL, NULL
},
{ {
{"geqo", PGC_USERSET, QUERY_TUNING_GEQO, {"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
......
...@@ -299,6 +299,7 @@ ...@@ -299,6 +299,7 @@
#enable_seqscan = on #enable_seqscan = on
#enable_sort = on #enable_sort = on
#enable_tidscan = on #enable_tidscan = on
#enable_partition_wise_join = off
# - Planner Cost Constants - # - Planner Cost Constants -
......
...@@ -158,6 +158,9 @@ typedef void (*ShutdownForeignScan_function) (ForeignScanState *node); ...@@ -158,6 +158,9 @@ typedef void (*ShutdownForeignScan_function) (ForeignScanState *node);
typedef bool (*IsForeignScanParallelSafe_function) (PlannerInfo *root, typedef bool (*IsForeignScanParallelSafe_function) (PlannerInfo *root,
RelOptInfo *rel, RelOptInfo *rel,
RangeTblEntry *rte); RangeTblEntry *rte);
typedef List *(*ReparameterizeForeignPathByChild_function) (PlannerInfo *root,
List *fdw_private,
RelOptInfo *child_rel);
/* /*
* FdwRoutine is the struct returned by a foreign-data wrapper's handler * FdwRoutine is the struct returned by a foreign-data wrapper's handler
...@@ -230,6 +233,9 @@ typedef struct FdwRoutine ...@@ -230,6 +233,9 @@ typedef struct FdwRoutine
ReInitializeDSMForeignScan_function ReInitializeDSMForeignScan; ReInitializeDSMForeignScan_function ReInitializeDSMForeignScan;
InitializeWorkerForeignScan_function InitializeWorkerForeignScan; InitializeWorkerForeignScan_function InitializeWorkerForeignScan;
ShutdownForeignScan_function ShutdownForeignScan; ShutdownForeignScan_function ShutdownForeignScan;
/* Support functions for path reparameterization. */
ReparameterizeForeignPathByChild_function ReparameterizeForeignPathByChild;
} FdwRoutine; } FdwRoutine;
......
...@@ -96,6 +96,9 @@ typedef struct CustomPathMethods ...@@ -96,6 +96,9 @@ typedef struct CustomPathMethods
List *tlist, List *tlist,
List *clauses, List *clauses,
List *custom_plans); List *custom_plans);
struct List *(*ReparameterizeCustomPathByChild) (PlannerInfo *root,
List *custom_private,
RelOptInfo *child_rel);
} CustomPathMethods; } CustomPathMethods;
/* /*
......
...@@ -391,6 +391,11 @@ typedef struct PartitionSchemeData *PartitionScheme; ...@@ -391,6 +391,11 @@ typedef struct PartitionSchemeData *PartitionScheme;
* handling join alias Vars. Currently this is not needed because all join * handling join alias Vars. Currently this is not needed because all join
* alias Vars are expanded to non-aliased form during preprocess_expression. * alias Vars are expanded to non-aliased form during preprocess_expression.
* *
* We also have relations representing joins between child relations of
* different partitioned tables. These relations are not added to
* join_rel_level lists as they are not joined directly by the dynamic
* programming algorithm.
*
* There is also a RelOptKind for "upper" relations, which are RelOptInfos * There is also a RelOptKind for "upper" relations, which are RelOptInfos
* that describe post-scan/join processing steps, such as aggregation. * that describe post-scan/join processing steps, such as aggregation.
* Many of the fields in these RelOptInfos are meaningless, but their Path * Many of the fields in these RelOptInfos are meaningless, but their Path
...@@ -525,14 +530,18 @@ typedef struct PartitionSchemeData *PartitionScheme; ...@@ -525,14 +530,18 @@ typedef struct PartitionSchemeData *PartitionScheme;
* boundinfo - Partition bounds * boundinfo - Partition bounds
* nparts - Number of partitions * nparts - Number of partitions
* part_rels - RelOptInfos for each partition * part_rels - RelOptInfos for each partition
* partexprs - Partition key expressions * partexprs, nullable_partexprs - Partition key expressions
* *
* Note: A base relation always has only one set of partition keys, but a join * Note: A base relation always has only one set of partition keys, but a join
* relation may have as many sets of partition keys as the number of relations * relation may have as many sets of partition keys as the number of relations
* being joined. partexprs is an array containing part_scheme->partnatts * being joined. partexprs and nullable_partexprs are arrays containing
* elements, each of which is a list of partition key expressions. For a base * part_scheme->partnatts elements each. Each of these elements is a list of
* relation each list contains only one expression, but for a join relation * partition key expressions. For a base relation each list in partexprs
* there can be one per baserel. * contains only one expression and nullable_partexprs is not populated. For a
* join relation, partexprs and nullable_partexprs contain partition key
* expressions from non-nullable and nullable relations resp. Lists at any
* given position in those arrays together contain as many elements as the
* number of joining relations.
*---------- *----------
*/ */
typedef enum RelOptKind typedef enum RelOptKind
...@@ -540,6 +549,7 @@ typedef enum RelOptKind ...@@ -540,6 +549,7 @@ typedef enum RelOptKind
RELOPT_BASEREL, RELOPT_BASEREL,
RELOPT_JOINREL, RELOPT_JOINREL,
RELOPT_OTHER_MEMBER_REL, RELOPT_OTHER_MEMBER_REL,
RELOPT_OTHER_JOINREL,
RELOPT_UPPER_REL, RELOPT_UPPER_REL,
RELOPT_DEADREL RELOPT_DEADREL
} RelOptKind; } RelOptKind;
...@@ -553,13 +563,17 @@ typedef enum RelOptKind ...@@ -553,13 +563,17 @@ typedef enum RelOptKind
(rel)->reloptkind == RELOPT_OTHER_MEMBER_REL) (rel)->reloptkind == RELOPT_OTHER_MEMBER_REL)
/* Is the given relation a join relation? */ /* Is the given relation a join relation? */
#define IS_JOIN_REL(rel) ((rel)->reloptkind == RELOPT_JOINREL) #define IS_JOIN_REL(rel) \
((rel)->reloptkind == RELOPT_JOINREL || \
(rel)->reloptkind == RELOPT_OTHER_JOINREL)
/* Is the given relation an upper relation? */ /* Is the given relation an upper relation? */
#define IS_UPPER_REL(rel) ((rel)->reloptkind == RELOPT_UPPER_REL) #define IS_UPPER_REL(rel) ((rel)->reloptkind == RELOPT_UPPER_REL)
/* Is the given relation an "other" relation? */ /* Is the given relation an "other" relation? */
#define IS_OTHER_REL(rel) ((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL) #define IS_OTHER_REL(rel) \
((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL || \
(rel)->reloptkind == RELOPT_OTHER_JOINREL)
typedef struct RelOptInfo typedef struct RelOptInfo
{ {
...@@ -645,9 +659,29 @@ typedef struct RelOptInfo ...@@ -645,9 +659,29 @@ typedef struct RelOptInfo
struct PartitionBoundInfoData *boundinfo; /* Partition bounds */ struct PartitionBoundInfoData *boundinfo; /* Partition bounds */
struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions, struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions,
* stored in the same order of bounds */ * stored in the same order of bounds */
List **partexprs; /* Partition key expressions. */ List **partexprs; /* Non-nullable partition key expressions. */
List **nullable_partexprs; /* Nullable partition key expressions. */
} RelOptInfo; } RelOptInfo;
/*
* Is given relation partitioned?
*
* A join between two partitioned relations with same partitioning scheme
* without any matching partitions will not have any partition in it but will
* have partition scheme set. So a relation is deemed to be partitioned if it
* has a partitioning scheme, bounds and positive number of partitions.
*/
#define IS_PARTITIONED_REL(rel) \
((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0)
/*
* Convenience macro to make sure that a partitioned relation has all the
* required members set.
*/
#define REL_HAS_ALL_PART_PROPS(rel) \
((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0 && \
(rel)->part_rels && (rel)->partexprs && (rel)->nullable_partexprs)
/* /*
* IndexOptInfo * IndexOptInfo
* Per-index information for planning/optimization * Per-index information for planning/optimization
......
...@@ -67,6 +67,7 @@ extern bool enable_material; ...@@ -67,6 +67,7 @@ extern bool enable_material;
extern bool enable_mergejoin; extern bool enable_mergejoin;
extern bool enable_hashjoin; extern bool enable_hashjoin;
extern bool enable_gathermerge; extern bool enable_gathermerge;
extern bool enable_partition_wise_join;
extern int constraint_exclusion; extern int constraint_exclusion;
extern double clamp_row_est(double nrows); extern double clamp_row_est(double nrows);
......
...@@ -251,6 +251,8 @@ extern LimitPath *create_limit_path(PlannerInfo *root, RelOptInfo *rel, ...@@ -251,6 +251,8 @@ extern LimitPath *create_limit_path(PlannerInfo *root, RelOptInfo *rel,
extern Path *reparameterize_path(PlannerInfo *root, Path *path, extern Path *reparameterize_path(PlannerInfo *root, Path *path,
Relids required_outer, Relids required_outer,
double loop_count); double loop_count);
extern Path *reparameterize_path_by_child(PlannerInfo *root, Path *path,
RelOptInfo *child_rel);
/* /*
* prototypes for relnode.c * prototypes for relnode.c
...@@ -290,5 +292,9 @@ extern ParamPathInfo *get_appendrel_parampathinfo(RelOptInfo *appendrel, ...@@ -290,5 +292,9 @@ extern ParamPathInfo *get_appendrel_parampathinfo(RelOptInfo *appendrel,
Relids required_outer); Relids required_outer);
extern ParamPathInfo *find_param_path_info(RelOptInfo *rel, extern ParamPathInfo *find_param_path_info(RelOptInfo *rel,
Relids required_outer); Relids required_outer);
extern RelOptInfo *build_child_join_rel(PlannerInfo *root,
RelOptInfo *outer_rel, RelOptInfo *inner_rel,
RelOptInfo *parent_joinrel, List *restrictlist,
SpecialJoinInfo *sjinfo, JoinType jointype);
#endif /* PATHNODE_H */ #endif /* PATHNODE_H */
...@@ -58,6 +58,8 @@ extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages, ...@@ -58,6 +58,8 @@ extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages,
double index_pages); double index_pages);
extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
Path *bitmapqual); Path *bitmapqual);
extern void generate_partition_wise_join_paths(PlannerInfo *root,
RelOptInfo *rel);
#ifdef OPTIMIZER_DEBUG #ifdef OPTIMIZER_DEBUG
extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel); extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
...@@ -111,6 +113,9 @@ extern bool have_join_order_restriction(PlannerInfo *root, ...@@ -111,6 +113,9 @@ extern bool have_join_order_restriction(PlannerInfo *root,
RelOptInfo *rel1, RelOptInfo *rel2); RelOptInfo *rel1, RelOptInfo *rel2);
extern bool have_dangerous_phv(PlannerInfo *root, extern bool have_dangerous_phv(PlannerInfo *root,
Relids outer_relids, Relids inner_params); Relids outer_relids, Relids inner_params);
extern void mark_dummy_rel(RelOptInfo *rel);
extern bool have_partkey_equi_join(RelOptInfo *rel1, RelOptInfo *rel2,
JoinType jointype, List *restrictlist);
/* /*
* equivclass.c * equivclass.c
......
...@@ -28,5 +28,7 @@ extern void fix_placeholder_input_needed_levels(PlannerInfo *root); ...@@ -28,5 +28,7 @@ extern void fix_placeholder_input_needed_levels(PlannerInfo *root);
extern void add_placeholders_to_base_rels(PlannerInfo *root); extern void add_placeholders_to_base_rels(PlannerInfo *root);
extern void add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, extern void add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outer_rel, RelOptInfo *inner_rel); RelOptInfo *outer_rel, RelOptInfo *inner_rel);
extern void add_placeholders_to_child_joinrel(PlannerInfo *root,
RelOptInfo *childrel, RelOptInfo *parentrel);
#endif /* PLACEHOLDER_H */ #endif /* PLACEHOLDER_H */
...@@ -58,5 +58,7 @@ extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr); ...@@ -58,5 +58,7 @@ extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr);
extern bool plan_cluster_use_sort(Oid tableOid, Oid indexOid); extern bool plan_cluster_use_sort(Oid tableOid, Oid indexOid);
extern List *get_partitioned_child_rels(PlannerInfo *root, Index rti); extern List *get_partitioned_child_rels(PlannerInfo *root, Index rti);
extern List *get_partitioned_child_rels_for_join(PlannerInfo *root,
Relids join_relids);
#endif /* PLANNER_H */ #endif /* PLANNER_H */
...@@ -62,4 +62,10 @@ extern Node *adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node, ...@@ -62,4 +62,10 @@ extern Node *adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node,
extern AppendRelInfo **find_appinfos_by_relids(PlannerInfo *root, extern AppendRelInfo **find_appinfos_by_relids(PlannerInfo *root,
Relids relids, int *nappinfos); Relids relids, int *nappinfos);
extern SpecialJoinInfo *build_child_join_sjinfo(PlannerInfo *root,
SpecialJoinInfo *parent_sjinfo,
Relids left_relids, Relids right_relids);
extern Relids adjust_child_relids_multilevel(PlannerInfo *root, Relids relids,
Relids child_relids, Relids top_parent_relids);
#endif /* PREP_H */ #endif /* PREP_H */
This diff is collapsed.
...@@ -70,21 +70,22 @@ select count(*) >= 0 as ok from pg_prepared_xacts; ...@@ -70,21 +70,22 @@ select count(*) >= 0 as ok from pg_prepared_xacts;
-- This is to record the prevailing planner enable_foo settings during -- This is to record the prevailing planner enable_foo settings during
-- a regression test run. -- a regression test run.
select name, setting from pg_settings where name like 'enable%'; select name, setting from pg_settings where name like 'enable%';
name | setting name | setting
----------------------+--------- ----------------------------+---------
enable_bitmapscan | on enable_bitmapscan | on
enable_gathermerge | on enable_gathermerge | on
enable_hashagg | on enable_hashagg | on
enable_hashjoin | on enable_hashjoin | on
enable_indexonlyscan | on enable_indexonlyscan | on
enable_indexscan | on enable_indexscan | on
enable_material | on enable_material | on
enable_mergejoin | on enable_mergejoin | on
enable_nestloop | on enable_nestloop | on
enable_seqscan | on enable_partition_wise_join | off
enable_sort | on enable_seqscan | on
enable_tidscan | on enable_sort | on
(12 rows) enable_tidscan | on
(13 rows)
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- Test that the pg_timezone_names and pg_timezone_abbrevs views are
-- more-or-less working. We can't test their contents in any great detail -- more-or-less working. We can't test their contents in any great detail
......
...@@ -104,7 +104,8 @@ test: publication subscription ...@@ -104,7 +104,8 @@ test: publication subscription
# ---------- # ----------
# Another group of parallel tests # Another group of parallel tests
# ---------- # ----------
test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock json jsonb json_encoding indirect_toast equivclass test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock json jsonb json_encoding indirect_toast equivclass partition_join
# ---------- # ----------
# Another group of parallel tests # Another group of parallel tests
# NB: temp.sql does a reconnect which transiently uses 2 connections, # NB: temp.sql does a reconnect which transiently uses 2 connections,
......
...@@ -180,3 +180,4 @@ test: with ...@@ -180,3 +180,4 @@ test: with
test: xml test: xml
test: event_trigger test: event_trigger
test: stats test: stats
test: partition_join
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment