Commit 0a480502 authored by Robert Haas's avatar Robert Haas

Expand partitioned table RTEs level by level, without flattening.

Flattening the partitioning hierarchy at this stage makes various
desirable optimizations difficult.  The original use case for this
patch was partition-wise join, which wants to match up the partitions
in one partitioning hierarchy with those in another such hierarchy.
However, it now seems that it will also be useful in making partition
pruning work using the PartitionDesc rather than constraint exclusion,
because with a flattened expansion, we have no easy way to figure out
which PartitionDescs apply to which leaf tables in a multi-level
partition hierarchy.

As it turns out, we end up creating both rte->inh and !rte->inh RTEs
for each intermediate partitioned table, just as we previously did for
the root table.  This seems unnecessary since the partitioned tables
have no storage and are not scanned.  We might want to go back and
rejigger things so that no partitioned tables (including the parent)
need !rte->inh RTEs, but that seems to require some adjustments not
related to the core purpose of this patch.

Ashutosh Bapat, reviewed by me and by Amit Langote.  Some final
adjustments by me.

Discussion: http://postgr.es/m/CAFjFpRd=1venqLL7oGU=C1dEkuvk2DJgvF+7uKbnPHaum1mvHQ@mail.gmail.com
parent 0c4b879b
......@@ -24,6 +24,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG
......@@ -352,8 +353,8 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
else if (rte->relkind == RELKIND_PARTITIONED_TABLE)
{
/*
* A partitioned table without leaf partitions is marked
* as a dummy rel.
* A partitioned table without any partitions is marked as
* a dummy rel.
*/
set_dummy_rel_pathlist(rel);
}
......@@ -867,6 +868,9 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
int nattrs;
ListCell *l;
/* Guard against stack overflow due to overly deep inheritance tree. */
check_stack_depth();
Assert(IS_SIMPLE_REL(rel));
/*
......@@ -1290,25 +1294,23 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
bool build_partitioned_rels = false;
/*
* A plain relation will already have a PartitionedChildRelInfo if it is
* partitioned. For a subquery RTE, no PartitionedChildRelInfo exists; we
* collect all partitioned_rels associated with any child. (This assumes
* that we don't need to look through multiple levels of subquery RTEs; if
* we ever do, we could create a PartitionedChildRelInfo with the
* accumulated list of partitioned_rels which would then be found when
* populated our parent rel with paths. For the present, that appears to
* be unnecessary.)
* A root partition will already have a PartitionedChildRelInfo, and a
* non-root partitioned table doesn't need one, because its Append paths
* will get flattened into the parent anyway. For a subquery RTE, no
* PartitionedChildRelInfo exists; we collect all partitioned_rels
* associated with any child. (This assumes that we don't need to look
* through multiple levels of subquery RTEs; if we ever do, we could
* create a PartitionedChildRelInfo with the accumulated list of
* partitioned_rels which would then be found when populated our parent
* rel with paths. For the present, that appears to be unnecessary.)
*/
rte = planner_rt_fetch(rel->relid, root);
switch (rte->rtekind)
{
case RTE_RELATION:
if (rte->relkind == RELKIND_PARTITIONED_TABLE)
{
partitioned_rels =
get_partitioned_child_rels(root, rel->relid);
Assert(list_length(partitioned_rels) >= 1);
}
break;
case RTE_SUBQUERY:
build_partitioned_rels = true;
......
......@@ -15,6 +15,7 @@
#include "postgres.h"
#include "catalog/pg_type.h"
#include "catalog/pg_class.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
......@@ -629,11 +630,28 @@ create_lateral_join_info(PlannerInfo *root)
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *brel = root->simple_rel_array[rti];
RangeTblEntry *brte = root->simple_rte_array[rti];
if (brel == NULL || brel->reloptkind != RELOPT_BASEREL)
if (brel == NULL)
continue;
/*
* In the case of table inheritance, the parent RTE is directly linked
* to every child table via an AppendRelInfo. In the case of table
* partitioning, the inheritance hierarchy is expanded one level at a
* time rather than flattened. Therefore, an other member rel that is
* a partitioned table may have children of its own, and must
* therefore be marked with the appropriate lateral info so that those
* children eventually get marked also.
*/
Assert(IS_SIMPLE_REL(brel));
Assert(brte);
if (brel->reloptkind == RELOPT_OTHER_MEMBER_REL &&
(brte->rtekind != RTE_RELATION ||
brte->relkind != RELKIND_PARTITIONED_TABLE))
continue;
if (root->simple_rte_array[rti]->inh)
if (brte->inh)
{
foreach(lc, root->append_rel_list)
{
......
......@@ -1038,7 +1038,7 @@ static void
inheritance_planner(PlannerInfo *root)
{
Query *parse = root->parse;
int parentRTindex = parse->resultRelation;
int top_parentRTindex = parse->resultRelation;
Bitmapset *subqueryRTindexes;
Bitmapset *modifiableARIindexes;
int nominalRelation = -1;
......@@ -1056,6 +1056,10 @@ inheritance_planner(PlannerInfo *root)
Index rti;
RangeTblEntry *parent_rte;
List *partitioned_rels = NIL;
PlannerInfo *parent_root;
Query *parent_parse;
Bitmapset *parent_relids = bms_make_singleton(top_parentRTindex);
PlannerInfo **parent_roots = NULL;
Assert(parse->commandType != CMD_INSERT);
......@@ -1119,11 +1123,31 @@ inheritance_planner(PlannerInfo *root)
* (including the root parent) as child members of the inheritance set do
* not appear anywhere else in the plan. The situation is exactly the
* opposite in the case of non-partitioned inheritance parent as described
* below.
* below. For the same reason, collect the list of descendant partitioned
* tables to be saved in ModifyTable node, so that executor can lock those
* as well.
*/
parent_rte = rt_fetch(parentRTindex, root->parse->rtable);
parent_rte = rt_fetch(top_parentRTindex, root->parse->rtable);
if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
nominalRelation = parentRTindex;
{
nominalRelation = top_parentRTindex;
partitioned_rels = get_partitioned_child_rels(root, top_parentRTindex);
/* The root partitioned table is included as a child rel */
Assert(list_length(partitioned_rels) >= 1);
}
/*
* The PlannerInfo for each child is obtained by translating the relevant
* members of the PlannerInfo for its immediate parent, which we find
* using the parent_relid in its AppendRelInfo. We save the PlannerInfo
* for each parent in an array indexed by relid for fast retrieval. Since
* the maximum number of parents is limited by the number of RTEs in the
* query, we use that number to allocate the array. An extra entry is
* needed since relids start from 1.
*/
parent_roots = (PlannerInfo **) palloc0((list_length(parse->rtable) + 1) *
sizeof(PlannerInfo *));
parent_roots[top_parentRTindex] = root;
/*
* And now we can get on with generating a plan for each child table.
......@@ -1137,15 +1161,24 @@ inheritance_planner(PlannerInfo *root)
Path *subpath;
/* append_rel_list contains all append rels; ignore others */
if (appinfo->parent_relid != parentRTindex)
if (!bms_is_member(appinfo->parent_relid, parent_relids))
continue;
/*
* expand_inherited_rtentry() always processes a parent before any of
* that parent's children, so the parent_root for this relation should
* already be available.
*/
parent_root = parent_roots[appinfo->parent_relid];
Assert(parent_root != NULL);
parent_parse = parent_root->parse;
/*
* We need a working copy of the PlannerInfo so that we can control
* propagation of information back to the main copy.
*/
subroot = makeNode(PlannerInfo);
memcpy(subroot, root, sizeof(PlannerInfo));
memcpy(subroot, parent_root, sizeof(PlannerInfo));
/*
* Generate modified query with this rel as target. We first apply
......@@ -1154,15 +1187,15 @@ inheritance_planner(PlannerInfo *root)
* then fool around with subquery RTEs.
*/
subroot->parse = (Query *)
adjust_appendrel_attrs(root,
(Node *) parse,
adjust_appendrel_attrs(parent_root,
(Node *) parent_parse,
1, &appinfo);
/*
* If there are securityQuals attached to the parent, move them to the
* child rel (they've already been transformed properly for that).
*/
parent_rte = rt_fetch(parentRTindex, subroot->parse->rtable);
parent_rte = rt_fetch(appinfo->parent_relid, subroot->parse->rtable);
child_rte = rt_fetch(appinfo->child_relid, subroot->parse->rtable);
child_rte->securityQuals = parent_rte->securityQuals;
parent_rte->securityQuals = NIL;
......@@ -1173,7 +1206,7 @@ inheritance_planner(PlannerInfo *root)
* executor doesn't need to see the modified copies --- we can just
* pass it the original rowMarks list.)
*/
subroot->rowMarks = copyObject(root->rowMarks);
subroot->rowMarks = copyObject(parent_root->rowMarks);
/*
* The append_rel_list likewise might contain references to subquery
......@@ -1190,7 +1223,7 @@ inheritance_planner(PlannerInfo *root)
ListCell *lc2;
subroot->append_rel_list = NIL;
foreach(lc2, root->append_rel_list)
foreach(lc2, parent_root->append_rel_list)
{
AppendRelInfo *appinfo2 = lfirst_node(AppendRelInfo, lc2);
......@@ -1225,7 +1258,7 @@ inheritance_planner(PlannerInfo *root)
ListCell *lr;
rti = 1;
foreach(lr, parse->rtable)
foreach(lr, parent_parse->rtable)
{
RangeTblEntry *rte = lfirst_node(RangeTblEntry, lr);
......@@ -1272,6 +1305,22 @@ inheritance_planner(PlannerInfo *root)
/* hack to mark target relation as an inheritance partition */
subroot->hasInheritedTarget = true;
/*
* If the child is further partitioned, remember it as a parent. Since
* a partitioned table does not have any data, we don't need to create
* a plan for it. We do, however, need to remember the PlannerInfo for
* use when processing its children.
*/
if (child_rte->inh)
{
Assert(child_rte->relkind == RELKIND_PARTITIONED_TABLE);
parent_relids =
bms_add_member(parent_relids, appinfo->child_relid);
parent_roots[appinfo->child_relid] = subroot;
continue;
}
/* Generate Path(s) for accessing this result relation */
grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ );
......@@ -1368,13 +1417,6 @@ inheritance_planner(PlannerInfo *root)
Assert(!parse->onConflict);
}
if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
{
partitioned_rels = get_partitioned_child_rels(root, parentRTindex);
/* The root partitioned table is included as a child rel */
Assert(list_length(partitioned_rels) >= 1);
}
/* Result path must go into outer query's FINAL upperrel */
final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
......
This diff is collapsed.
......@@ -1935,10 +1935,10 @@ typedef struct SpecialJoinInfo
*
* When we expand an inheritable table or a UNION-ALL subselect into an
* "append relation" (essentially, a list of child RTEs), we build an
* AppendRelInfo for each non-partitioned child RTE. The list of
* AppendRelInfos indicates which child RTEs must be included when expanding
* the parent, and each node carries information needed to translate Vars
* referencing the parent into Vars referencing that child.
* AppendRelInfo for each child RTE. The list of AppendRelInfos indicates
* which child RTEs must be included when expanding the parent, and each node
* carries information needed to translate Vars referencing the parent into
* Vars referencing that child.
*
* These structs are kept in the PlannerInfo node's append_rel_list.
* Note that we just throw all the structs into one list, and scan the
......
......@@ -625,6 +625,28 @@ select tableoid::regclass::text as relname, parted_tab.* from parted_tab order b
(3 rows)
drop table parted_tab;
-- Check UPDATE with multi-level partitioned inherited target
create table mlparted_tab (a int, b char, c text) partition by list (a);
create table mlparted_tab_part1 partition of mlparted_tab for values in (1);
create table mlparted_tab_part2 partition of mlparted_tab for values in (2) partition by list (b);
create table mlparted_tab_part3 partition of mlparted_tab for values in (3);
create table mlparted_tab_part2a partition of mlparted_tab_part2 for values in ('a');
create table mlparted_tab_part2b partition of mlparted_tab_part2 for values in ('b');
insert into mlparted_tab values (1, 'a'), (2, 'a'), (2, 'b'), (3, 'a');
update mlparted_tab mlp set c = 'xxx'
from
(select a from some_tab union all select a+1 from some_tab) ss (a)
where (mlp.a = ss.a and mlp.b = 'b') or mlp.a = 3;
select tableoid::regclass::text as relname, mlparted_tab.* from mlparted_tab order by 1,2;
relname | a | b | c
---------------------+---+---+-----
mlparted_tab_part1 | 1 | a |
mlparted_tab_part2a | 2 | a |
mlparted_tab_part2b | 2 | b | xxx
mlparted_tab_part3 | 3 | a | xxx
(4 rows)
drop table mlparted_tab;
drop table some_tab cascade;
NOTICE: drop cascades to table some_tab_child
/* Test multiple inheritance of column defaults */
......
......@@ -5328,6 +5328,59 @@ LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
^
HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query.
--
-- test LATERAL reference propagation down a multi-level inheritance hierarchy
-- produced for a multi-level partitioned table hierarchy.
--
create table pt1 (a int, b int, c varchar) partition by range(a);
create table pt1p1 partition of pt1 for values from (0) to (100) partition by range(b);
create table pt1p2 partition of pt1 for values from (100) to (200);
create table pt1p1p1 partition of pt1p1 for values from (0) to (100);
insert into pt1 values (1, 1, 'x'), (101, 101, 'y');
create table ut1 (a int, b int, c varchar);
insert into ut1 values (101, 101, 'y'), (2, 2, 'z');
explain (verbose, costs off)
select t1.b, ss.phv from ut1 t1 left join lateral
(select t2.a as t2a, t3.a t3a, least(t1.a, t2.a, t3.a) phv
from pt1 t2 join ut1 t3 on t2.a = t3.b) ss
on t1.a = ss.t2a order by t1.a;
QUERY PLAN
-------------------------------------------------------------
Sort
Output: t1.b, (LEAST(t1.a, t2.a, t3.a)), t1.a
Sort Key: t1.a
-> Nested Loop Left Join
Output: t1.b, (LEAST(t1.a, t2.a, t3.a)), t1.a
-> Seq Scan on public.ut1 t1
Output: t1.a, t1.b, t1.c
-> Hash Join
Output: t2.a, LEAST(t1.a, t2.a, t3.a)
Hash Cond: (t3.b = t2.a)
-> Seq Scan on public.ut1 t3
Output: t3.a, t3.b, t3.c
-> Hash
Output: t2.a
-> Append
-> Seq Scan on public.pt1p1p1 t2
Output: t2.a
Filter: (t1.a = t2.a)
-> Seq Scan on public.pt1p2 t2_1
Output: t2_1.a
Filter: (t1.a = t2_1.a)
(21 rows)
select t1.b, ss.phv from ut1 t1 left join lateral
(select t2.a as t2a, t3.a t3a, least(t1.a, t2.a, t3.a) phv
from pt1 t2 join ut1 t3 on t2.a = t3.b) ss
on t1.a = ss.t2a order by t1.a;
b | phv
-----+-----
2 |
101 | 101
(2 rows)
drop table pt1;
drop table ut1;
--
-- test that foreign key join estimation performs sanely for outer joins
--
begin;
......
......@@ -154,6 +154,23 @@ where parted_tab.a = ss.a;
select tableoid::regclass::text as relname, parted_tab.* from parted_tab order by 1,2;
drop table parted_tab;
-- Check UPDATE with multi-level partitioned inherited target
create table mlparted_tab (a int, b char, c text) partition by list (a);
create table mlparted_tab_part1 partition of mlparted_tab for values in (1);
create table mlparted_tab_part2 partition of mlparted_tab for values in (2) partition by list (b);
create table mlparted_tab_part3 partition of mlparted_tab for values in (3);
create table mlparted_tab_part2a partition of mlparted_tab_part2 for values in ('a');
create table mlparted_tab_part2b partition of mlparted_tab_part2 for values in ('b');
insert into mlparted_tab values (1, 'a'), (2, 'a'), (2, 'b'), (3, 'a');
update mlparted_tab mlp set c = 'xxx'
from
(select a from some_tab union all select a+1 from some_tab) ss (a)
where (mlp.a = ss.a and mlp.b = 'b') or mlp.a = 3;
select tableoid::regclass::text as relname, mlparted_tab.* from mlparted_tab order by 1,2;
drop table mlparted_tab;
drop table some_tab cascade;
/* Test multiple inheritance of column defaults */
......
......@@ -1733,6 +1733,29 @@ delete from xx1 using (select * from int4_tbl where f1 = x1) ss;
delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss;
delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
--
-- test LATERAL reference propagation down a multi-level inheritance hierarchy
-- produced for a multi-level partitioned table hierarchy.
--
create table pt1 (a int, b int, c varchar) partition by range(a);
create table pt1p1 partition of pt1 for values from (0) to (100) partition by range(b);
create table pt1p2 partition of pt1 for values from (100) to (200);
create table pt1p1p1 partition of pt1p1 for values from (0) to (100);
insert into pt1 values (1, 1, 'x'), (101, 101, 'y');
create table ut1 (a int, b int, c varchar);
insert into ut1 values (101, 101, 'y'), (2, 2, 'z');
explain (verbose, costs off)
select t1.b, ss.phv from ut1 t1 left join lateral
(select t2.a as t2a, t3.a t3a, least(t1.a, t2.a, t3.a) phv
from pt1 t2 join ut1 t3 on t2.a = t3.b) ss
on t1.a = ss.t2a order by t1.a;
select t1.b, ss.phv from ut1 t1 left join lateral
(select t2.a as t2a, t3.a t3a, least(t1.a, t2.a, t3.a) phv
from pt1 t2 join ut1 t3 on t2.a = t3.b) ss
on t1.a = ss.t2a order by t1.a;
drop table pt1;
drop table ut1;
--
-- test that foreign key join estimation performs sanely for outer joins
--
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment