Commit 0a480502 authored by Robert Haas's avatar Robert Haas

Expand partitioned table RTEs level by level, without flattening.

Flattening the partitioning hierarchy at this stage makes various
desirable optimizations difficult.  The original use case for this
patch was partition-wise join, which wants to match up the partitions
in one partitioning hierarchy with those in another such hierarchy.
However, it now seems that it will also be useful in making partition
pruning work using the PartitionDesc rather than constraint exclusion,
because with a flattened expansion, we have no easy way to figure out
which PartitionDescs apply to which leaf tables in a multi-level
partition hierarchy.

As it turns out, we end up creating both rte->inh and !rte->inh RTEs
for each intermediate partitioned table, just as we previously did for
the root table.  This seems unnecessary since the partitioned tables
have no storage and are not scanned.  We might want to go back and
rejigger things so that no partitioned tables (including the parent)
need !rte->inh RTEs, but that seems to require some adjustments not
related to the core purpose of this patch.

Ashutosh Bapat, reviewed by me and by Amit Langote.  Some final
adjustments by me.

Discussion: http://postgr.es/m/CAFjFpRd=1venqLL7oGU=C1dEkuvk2DJgvF+7uKbnPHaum1mvHQ@mail.gmail.com
parent 0c4b879b
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "catalog/pg_operator.h" #include "catalog/pg_operator.h"
#include "catalog/pg_proc.h" #include "catalog/pg_proc.h"
#include "foreign/fdwapi.h" #include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h" #include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h" #include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG #ifdef OPTIMIZER_DEBUG
...@@ -352,8 +353,8 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel, ...@@ -352,8 +353,8 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
else if (rte->relkind == RELKIND_PARTITIONED_TABLE) else if (rte->relkind == RELKIND_PARTITIONED_TABLE)
{ {
/* /*
* A partitioned table without leaf partitions is marked * A partitioned table without any partitions is marked as
* as a dummy rel. * a dummy rel.
*/ */
set_dummy_rel_pathlist(rel); set_dummy_rel_pathlist(rel);
} }
...@@ -867,6 +868,9 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, ...@@ -867,6 +868,9 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
int nattrs; int nattrs;
ListCell *l; ListCell *l;
/* Guard against stack overflow due to overly deep inheritance tree. */
check_stack_depth();
Assert(IS_SIMPLE_REL(rel)); Assert(IS_SIMPLE_REL(rel));
/* /*
...@@ -1290,25 +1294,23 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, ...@@ -1290,25 +1294,23 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
bool build_partitioned_rels = false; bool build_partitioned_rels = false;
/* /*
* A plain relation will already have a PartitionedChildRelInfo if it is * A root partition will already have a PartitionedChildRelInfo, and a
* partitioned. For a subquery RTE, no PartitionedChildRelInfo exists; we * non-root partitioned table doesn't need one, because its Append paths
* collect all partitioned_rels associated with any child. (This assumes * will get flattened into the parent anyway. For a subquery RTE, no
* that we don't need to look through multiple levels of subquery RTEs; if * PartitionedChildRelInfo exists; we collect all partitioned_rels
* we ever do, we could create a PartitionedChildRelInfo with the * associated with any child. (This assumes that we don't need to look
* accumulated list of partitioned_rels which would then be found when * through multiple levels of subquery RTEs; if we ever do, we could
* populated our parent rel with paths. For the present, that appears to * create a PartitionedChildRelInfo with the accumulated list of
* be unnecessary.) * partitioned_rels which would then be found when populated our parent
* rel with paths. For the present, that appears to be unnecessary.)
*/ */
rte = planner_rt_fetch(rel->relid, root); rte = planner_rt_fetch(rel->relid, root);
switch (rte->rtekind) switch (rte->rtekind)
{ {
case RTE_RELATION: case RTE_RELATION:
if (rte->relkind == RELKIND_PARTITIONED_TABLE) if (rte->relkind == RELKIND_PARTITIONED_TABLE)
{
partitioned_rels = partitioned_rels =
get_partitioned_child_rels(root, rel->relid); get_partitioned_child_rels(root, rel->relid);
Assert(list_length(partitioned_rels) >= 1);
}
break; break;
case RTE_SUBQUERY: case RTE_SUBQUERY:
build_partitioned_rels = true; build_partitioned_rels = true;
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "postgres.h" #include "postgres.h"
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "catalog/pg_class.h"
#include "nodes/nodeFuncs.h" #include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h" #include "optimizer/clauses.h"
#include "optimizer/cost.h" #include "optimizer/cost.h"
...@@ -629,11 +630,28 @@ create_lateral_join_info(PlannerInfo *root) ...@@ -629,11 +630,28 @@ create_lateral_join_info(PlannerInfo *root)
for (rti = 1; rti < root->simple_rel_array_size; rti++) for (rti = 1; rti < root->simple_rel_array_size; rti++)
{ {
RelOptInfo *brel = root->simple_rel_array[rti]; RelOptInfo *brel = root->simple_rel_array[rti];
RangeTblEntry *brte = root->simple_rte_array[rti];
if (brel == NULL || brel->reloptkind != RELOPT_BASEREL) if (brel == NULL)
continue;
/*
* In the case of table inheritance, the parent RTE is directly linked
* to every child table via an AppendRelInfo. In the case of table
* partitioning, the inheritance hierarchy is expanded one level at a
* time rather than flattened. Therefore, an other member rel that is
* a partitioned table may have children of its own, and must
* therefore be marked with the appropriate lateral info so that those
* children eventually get marked also.
*/
Assert(IS_SIMPLE_REL(brel));
Assert(brte);
if (brel->reloptkind == RELOPT_OTHER_MEMBER_REL &&
(brte->rtekind != RTE_RELATION ||
brte->relkind != RELKIND_PARTITIONED_TABLE))
continue; continue;
if (root->simple_rte_array[rti]->inh) if (brte->inh)
{ {
foreach(lc, root->append_rel_list) foreach(lc, root->append_rel_list)
{ {
......
...@@ -1038,7 +1038,7 @@ static void ...@@ -1038,7 +1038,7 @@ static void
inheritance_planner(PlannerInfo *root) inheritance_planner(PlannerInfo *root)
{ {
Query *parse = root->parse; Query *parse = root->parse;
int parentRTindex = parse->resultRelation; int top_parentRTindex = parse->resultRelation;
Bitmapset *subqueryRTindexes; Bitmapset *subqueryRTindexes;
Bitmapset *modifiableARIindexes; Bitmapset *modifiableARIindexes;
int nominalRelation = -1; int nominalRelation = -1;
...@@ -1056,6 +1056,10 @@ inheritance_planner(PlannerInfo *root) ...@@ -1056,6 +1056,10 @@ inheritance_planner(PlannerInfo *root)
Index rti; Index rti;
RangeTblEntry *parent_rte; RangeTblEntry *parent_rte;
List *partitioned_rels = NIL; List *partitioned_rels = NIL;
PlannerInfo *parent_root;
Query *parent_parse;
Bitmapset *parent_relids = bms_make_singleton(top_parentRTindex);
PlannerInfo **parent_roots = NULL;
Assert(parse->commandType != CMD_INSERT); Assert(parse->commandType != CMD_INSERT);
...@@ -1119,11 +1123,31 @@ inheritance_planner(PlannerInfo *root) ...@@ -1119,11 +1123,31 @@ inheritance_planner(PlannerInfo *root)
* (including the root parent) as child members of the inheritance set do * (including the root parent) as child members of the inheritance set do
* not appear anywhere else in the plan. The situation is exactly the * not appear anywhere else in the plan. The situation is exactly the
* opposite in the case of non-partitioned inheritance parent as described * opposite in the case of non-partitioned inheritance parent as described
* below. * below. For the same reason, collect the list of descendant partitioned
* tables to be saved in ModifyTable node, so that executor can lock those
* as well.
*/ */
parent_rte = rt_fetch(parentRTindex, root->parse->rtable); parent_rte = rt_fetch(top_parentRTindex, root->parse->rtable);
if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE) if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
nominalRelation = parentRTindex; {
nominalRelation = top_parentRTindex;
partitioned_rels = get_partitioned_child_rels(root, top_parentRTindex);
/* The root partitioned table is included as a child rel */
Assert(list_length(partitioned_rels) >= 1);
}
/*
* The PlannerInfo for each child is obtained by translating the relevant
* members of the PlannerInfo for its immediate parent, which we find
* using the parent_relid in its AppendRelInfo. We save the PlannerInfo
* for each parent in an array indexed by relid for fast retrieval. Since
* the maximum number of parents is limited by the number of RTEs in the
* query, we use that number to allocate the array. An extra entry is
* needed since relids start from 1.
*/
parent_roots = (PlannerInfo **) palloc0((list_length(parse->rtable) + 1) *
sizeof(PlannerInfo *));
parent_roots[top_parentRTindex] = root;
/* /*
* And now we can get on with generating a plan for each child table. * And now we can get on with generating a plan for each child table.
...@@ -1137,15 +1161,24 @@ inheritance_planner(PlannerInfo *root) ...@@ -1137,15 +1161,24 @@ inheritance_planner(PlannerInfo *root)
Path *subpath; Path *subpath;
/* append_rel_list contains all append rels; ignore others */ /* append_rel_list contains all append rels; ignore others */
if (appinfo->parent_relid != parentRTindex) if (!bms_is_member(appinfo->parent_relid, parent_relids))
continue; continue;
/*
* expand_inherited_rtentry() always processes a parent before any of
* that parent's children, so the parent_root for this relation should
* already be available.
*/
parent_root = parent_roots[appinfo->parent_relid];
Assert(parent_root != NULL);
parent_parse = parent_root->parse;
/* /*
* We need a working copy of the PlannerInfo so that we can control * We need a working copy of the PlannerInfo so that we can control
* propagation of information back to the main copy. * propagation of information back to the main copy.
*/ */
subroot = makeNode(PlannerInfo); subroot = makeNode(PlannerInfo);
memcpy(subroot, root, sizeof(PlannerInfo)); memcpy(subroot, parent_root, sizeof(PlannerInfo));
/* /*
* Generate modified query with this rel as target. We first apply * Generate modified query with this rel as target. We first apply
...@@ -1154,15 +1187,15 @@ inheritance_planner(PlannerInfo *root) ...@@ -1154,15 +1187,15 @@ inheritance_planner(PlannerInfo *root)
* then fool around with subquery RTEs. * then fool around with subquery RTEs.
*/ */
subroot->parse = (Query *) subroot->parse = (Query *)
adjust_appendrel_attrs(root, adjust_appendrel_attrs(parent_root,
(Node *) parse, (Node *) parent_parse,
1, &appinfo); 1, &appinfo);
/* /*
* If there are securityQuals attached to the parent, move them to the * If there are securityQuals attached to the parent, move them to the
* child rel (they've already been transformed properly for that). * child rel (they've already been transformed properly for that).
*/ */
parent_rte = rt_fetch(parentRTindex, subroot->parse->rtable); parent_rte = rt_fetch(appinfo->parent_relid, subroot->parse->rtable);
child_rte = rt_fetch(appinfo->child_relid, subroot->parse->rtable); child_rte = rt_fetch(appinfo->child_relid, subroot->parse->rtable);
child_rte->securityQuals = parent_rte->securityQuals; child_rte->securityQuals = parent_rte->securityQuals;
parent_rte->securityQuals = NIL; parent_rte->securityQuals = NIL;
...@@ -1173,7 +1206,7 @@ inheritance_planner(PlannerInfo *root) ...@@ -1173,7 +1206,7 @@ inheritance_planner(PlannerInfo *root)
* executor doesn't need to see the modified copies --- we can just * executor doesn't need to see the modified copies --- we can just
* pass it the original rowMarks list.) * pass it the original rowMarks list.)
*/ */
subroot->rowMarks = copyObject(root->rowMarks); subroot->rowMarks = copyObject(parent_root->rowMarks);
/* /*
* The append_rel_list likewise might contain references to subquery * The append_rel_list likewise might contain references to subquery
...@@ -1190,7 +1223,7 @@ inheritance_planner(PlannerInfo *root) ...@@ -1190,7 +1223,7 @@ inheritance_planner(PlannerInfo *root)
ListCell *lc2; ListCell *lc2;
subroot->append_rel_list = NIL; subroot->append_rel_list = NIL;
foreach(lc2, root->append_rel_list) foreach(lc2, parent_root->append_rel_list)
{ {
AppendRelInfo *appinfo2 = lfirst_node(AppendRelInfo, lc2); AppendRelInfo *appinfo2 = lfirst_node(AppendRelInfo, lc2);
...@@ -1225,7 +1258,7 @@ inheritance_planner(PlannerInfo *root) ...@@ -1225,7 +1258,7 @@ inheritance_planner(PlannerInfo *root)
ListCell *lr; ListCell *lr;
rti = 1; rti = 1;
foreach(lr, parse->rtable) foreach(lr, parent_parse->rtable)
{ {
RangeTblEntry *rte = lfirst_node(RangeTblEntry, lr); RangeTblEntry *rte = lfirst_node(RangeTblEntry, lr);
...@@ -1272,6 +1305,22 @@ inheritance_planner(PlannerInfo *root) ...@@ -1272,6 +1305,22 @@ inheritance_planner(PlannerInfo *root)
/* hack to mark target relation as an inheritance partition */ /* hack to mark target relation as an inheritance partition */
subroot->hasInheritedTarget = true; subroot->hasInheritedTarget = true;
/*
* If the child is further partitioned, remember it as a parent. Since
* a partitioned table does not have any data, we don't need to create
* a plan for it. We do, however, need to remember the PlannerInfo for
* use when processing its children.
*/
if (child_rte->inh)
{
Assert(child_rte->relkind == RELKIND_PARTITIONED_TABLE);
parent_relids =
bms_add_member(parent_relids, appinfo->child_relid);
parent_roots[appinfo->child_relid] = subroot;
continue;
}
/* Generate Path(s) for accessing this result relation */ /* Generate Path(s) for accessing this result relation */
grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ ); grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ );
...@@ -1368,13 +1417,6 @@ inheritance_planner(PlannerInfo *root) ...@@ -1368,13 +1417,6 @@ inheritance_planner(PlannerInfo *root)
Assert(!parse->onConflict); Assert(!parse->onConflict);
} }
if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
{
partitioned_rels = get_partitioned_child_rels(root, parentRTindex);
/* The root partitioned table is included as a child rel */
Assert(list_length(partitioned_rels) >= 1);
}
/* Result path must go into outer query's FINAL upperrel */ /* Result path must go into outer query's FINAL upperrel */
final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
......
...@@ -104,16 +104,14 @@ static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, ...@@ -104,16 +104,14 @@ static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte,
static void expand_partitioned_rtentry(PlannerInfo *root, static void expand_partitioned_rtentry(PlannerInfo *root,
RangeTblEntry *parentrte, RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel, Index parentRTindex, Relation parentrel,
PlanRowMark *parentrc, PartitionDesc partdesc, PlanRowMark *top_parentrc, LOCKMODE lockmode,
LOCKMODE lockmode, List **appinfos, List **partitioned_child_rels);
bool *has_child, List **appinfos,
List **partitioned_child_rels);
static void expand_single_inheritance_child(PlannerInfo *root, static void expand_single_inheritance_child(PlannerInfo *root,
RangeTblEntry *parentrte, RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel, Index parentRTindex, Relation parentrel,
PlanRowMark *parentrc, Relation childrel, PlanRowMark *top_parentrc, Relation childrel,
bool *has_child, List **appinfos, List **appinfos, RangeTblEntry **childrte_p,
List **partitioned_child_rels); Index *childRTindex_p);
static void make_inh_translation_list(Relation oldrelation, static void make_inh_translation_list(Relation oldrelation,
Relation newrelation, Relation newrelation,
Index newvarno, Index newvarno,
...@@ -1348,9 +1346,9 @@ expand_inherited_tables(PlannerInfo *root) ...@@ -1348,9 +1346,9 @@ expand_inherited_tables(PlannerInfo *root)
ListCell *rl; ListCell *rl;
/* /*
* expand_inherited_rtentry may add RTEs to parse->rtable; there is no * expand_inherited_rtentry may add RTEs to parse->rtable. The function is
* need to scan them since they can't have inh=true. So just scan as far * expected to recursively handle any RTEs that it creates with inh=true.
* as the original end of the rtable list. * So just scan as far as the original end of the rtable list.
*/ */
nrtes = list_length(root->parse->rtable); nrtes = list_length(root->parse->rtable);
rl = list_head(root->parse->rtable); rl = list_head(root->parse->rtable);
...@@ -1392,11 +1390,7 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) ...@@ -1392,11 +1390,7 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
Relation oldrelation; Relation oldrelation;
LOCKMODE lockmode; LOCKMODE lockmode;
List *inhOIDs; List *inhOIDs;
List *appinfos;
ListCell *l; ListCell *l;
bool has_child;
PartitionedChildRelInfo *pcinfo;
List *partitioned_child_rels = NIL;
/* Does RT entry allow inheritance? */ /* Does RT entry allow inheritance? */
if (!rte->inh) if (!rte->inh)
...@@ -1467,27 +1461,44 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) ...@@ -1467,27 +1461,44 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
oldrelation = heap_open(parentOID, NoLock); oldrelation = heap_open(parentOID, NoLock);
/* Scan the inheritance set and expand it */ /* Scan the inheritance set and expand it */
appinfos = NIL;
has_child = false;
if (RelationGetPartitionDesc(oldrelation) != NULL) if (RelationGetPartitionDesc(oldrelation) != NULL)
{ {
List *partitioned_child_rels = NIL;
Assert(rte->relkind == RELKIND_PARTITIONED_TABLE);
/* /*
* If this table has partitions, recursively expand them in the order * If this table has partitions, recursively expand them in the order
* in which they appear in the PartitionDesc. But first, expand the * in which they appear in the PartitionDesc.
* parent itself.
*/ */
expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc,
oldrelation,
&has_child, &appinfos,
&partitioned_child_rels);
expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc, expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc,
RelationGetPartitionDesc(oldrelation), lockmode, &root->append_rel_list,
lockmode, &partitioned_child_rels);
&has_child, &appinfos,
&partitioned_child_rels); /*
* We keep a list of objects in root, each of which maps a root
* partitioned parent RT index to the list of RT indexes of descendant
* partitioned child tables. When creating an Append or a ModifyTable
* path for the parent, we copy the child RT index list verbatim to
* the path so that it could be carried over to the executor so that
* the latter could identify the partitioned child tables.
*/
if (rte->inh && partitioned_child_rels != NIL)
{
PartitionedChildRelInfo *pcinfo;
pcinfo = makeNode(PartitionedChildRelInfo);
pcinfo->parent_relid = rti;
pcinfo->child_rels = partitioned_child_rels;
root->pcinfo_list = lappend(root->pcinfo_list, pcinfo);
}
} }
else else
{ {
List *appinfos = NIL;
RangeTblEntry *childrte;
Index childRTindex;
/* /*
* This table has no partitions. Expand any plain inheritance * This table has no partitions. Expand any plain inheritance
* children in the order the OIDs were returned by * children in the order the OIDs were returned by
...@@ -1518,51 +1529,30 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) ...@@ -1518,51 +1529,30 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc, expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc,
newrelation, newrelation,
&has_child, &appinfos, &appinfos, &childrte,
&partitioned_child_rels); &childRTindex);
/* Close child relations, but keep locks */ /* Close child relations, but keep locks */
if (childOID != parentOID) if (childOID != parentOID)
heap_close(newrelation, NoLock); heap_close(newrelation, NoLock);
} }
}
heap_close(oldrelation, NoLock);
/* /*
* If all the children were temp tables or a partitioned parent did not * If all the children were temp tables, pretend it's a
* have any leaf partitions, pretend it's a non-inheritance situation; we * non-inheritance situation; we don't need Append node in that case.
* don't need Append node in that case. The duplicate RTE we added for * The duplicate RTE we added for the parent table is harmless, so we
* the parent table is harmless, so we don't bother to get rid of it; * don't bother to get rid of it; ditto for the useless PlanRowMark
* ditto for the useless PlanRowMark node. * node.
*/ */
if (!has_child) if (list_length(appinfos) < 2)
{ rte->inh = false;
/* Clear flag before returning */ else
rte->inh = false; root->append_rel_list = list_concat(root->append_rel_list,
return; appinfos);
}
/*
* We keep a list of objects in root, each of which maps a partitioned
* parent RT index to the list of RT indexes of its partitioned child
* tables. When creating an Append or a ModifyTable path for the parent,
* we copy the child RT index list verbatim to the path so that it could
* be carried over to the executor so that the latter could identify the
* partitioned child tables.
*/
if (partitioned_child_rels != NIL)
{
pcinfo = makeNode(PartitionedChildRelInfo);
Assert(rte->relkind == RELKIND_PARTITIONED_TABLE);
pcinfo->parent_relid = rti;
pcinfo->child_rels = partitioned_child_rels;
root->pcinfo_list = lappend(root->pcinfo_list, pcinfo);
} }
/* Otherwise, OK to add to root->append_rel_list */ heap_close(oldrelation, NoLock);
root->append_rel_list = list_concat(root->append_rel_list, appinfos);
} }
/* /*
...@@ -1575,15 +1565,35 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) ...@@ -1575,15 +1565,35 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
static void static void
expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte, expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel, Index parentRTindex, Relation parentrel,
PlanRowMark *parentrc, PartitionDesc partdesc, PlanRowMark *top_parentrc, LOCKMODE lockmode,
LOCKMODE lockmode, List **appinfos, List **partitioned_child_rels)
bool *has_child, List **appinfos,
List **partitioned_child_rels)
{ {
int i; int i;
RangeTblEntry *childrte;
Index childRTindex;
bool has_child = false;
PartitionDesc partdesc = RelationGetPartitionDesc(parentrel);
check_stack_depth(); check_stack_depth();
/* A partitioned table should always have a partition descriptor. */
Assert(partdesc);
Assert(parentrte->inh);
/* First expand the partitioned table itself. */
expand_single_inheritance_child(root, parentrte, parentRTindex, parentrel,
top_parentrc, parentrel,
appinfos, &childrte, &childRTindex);
/*
* The partitioned table does not have data for itself but still need to
* be locked. Update given list of partitioned children with RTI of this
* partitioned relation.
*/
*partitioned_child_rels = lappend_int(*partitioned_child_rels,
childRTindex);
for (i = 0; i < partdesc->nparts; i++) for (i = 0; i < partdesc->nparts; i++)
{ {
Oid childOID = partdesc->oids[i]; Oid childOID = partdesc->oids[i];
...@@ -1599,23 +1609,30 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte, ...@@ -1599,23 +1609,30 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
continue; continue;
} }
/* We have a real partition. */
has_child = true;
expand_single_inheritance_child(root, parentrte, parentRTindex, expand_single_inheritance_child(root, parentrte, parentRTindex,
parentrel, parentrc, childrel, parentrel, top_parentrc, childrel,
has_child, appinfos, appinfos, &childrte, &childRTindex);
partitioned_child_rels);
/* If this child is itself partitioned, recurse */ /* If this child is itself partitioned, recurse */
if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
expand_partitioned_rtentry(root, parentrte, parentRTindex, expand_partitioned_rtentry(root, childrte, childRTindex,
parentrel, parentrc, childrel, top_parentrc, lockmode,
RelationGetPartitionDesc(childrel), appinfos, partitioned_child_rels);
lockmode,
has_child, appinfos,
partitioned_child_rels);
/* Close child relation, but keep locks */ /* Close child relation, but keep locks */
heap_close(childrel, NoLock); heap_close(childrel, NoLock);
} }
/*
* If the partitioned table has no partitions or all the partitions are
* temporary tables from other backends, treat this as non-inheritance
* case.
*/
if (!has_child)
parentrte->inh = false;
} }
/* /*
...@@ -1623,16 +1640,31 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte, ...@@ -1623,16 +1640,31 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
* Expand a single inheritance child, if needed. * Expand a single inheritance child, if needed.
* *
* If this is a temp table of another backend, we'll return without doing * If this is a temp table of another backend, we'll return without doing
* anything at all. Otherwise, we'll set "has_child" to true, build a * anything at all. Otherwise, build a RangeTblEntry and an AppendRelInfo, if
* RangeTblEntry and either a PartitionedChildRelInfo or AppendRelInfo as
* appropriate, plus maybe a PlanRowMark. * appropriate, plus maybe a PlanRowMark.
*
* We now expand the partition hierarchy level by level, creating a
* corresponding hierarchy of AppendRelInfos and RelOptInfos, where each
* partitioned descendant acts as a parent of its immediate partitions.
* (This is a difference from what older versions of PostgreSQL did and what
* is still done in the case of table inheritance for unpartitioned tables,
* where the hierarchy is flattened during RTE expansion.)
*
* PlanRowMarks still carry the top-parent's RTI, and the top-parent's
* allMarkTypes field still accumulates values from all descendents.
*
* "parentrte" and "parentRTindex" are immediate parent's RTE and
* RTI. "top_parentrc" is top parent's PlanRowMark.
*
* The child RangeTblEntry and its RTI are returned in "childrte_p" and
* "childRTindex_p" resp.
*/ */
static void static void
expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel, Index parentRTindex, Relation parentrel,
PlanRowMark *parentrc, Relation childrel, PlanRowMark *top_parentrc, Relation childrel,
bool *has_child, List **appinfos, List **appinfos, RangeTblEntry **childrte_p,
List **partitioned_child_rels) Index *childRTindex_p)
{ {
Query *parse = root->parse; Query *parse = root->parse;
Oid parentOID = RelationGetRelid(parentrel); Oid parentOID = RelationGetRelid(parentrel);
...@@ -1654,24 +1686,30 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, ...@@ -1654,24 +1686,30 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
* restriction clauses, so we don't need to do it here. * restriction clauses, so we don't need to do it here.
*/ */
childrte = copyObject(parentrte); childrte = copyObject(parentrte);
*childrte_p = childrte;
childrte->relid = childOID; childrte->relid = childOID;
childrte->relkind = childrel->rd_rel->relkind; childrte->relkind = childrel->rd_rel->relkind;
childrte->inh = false; /* A partitioned child will need to be expanded further. */
if (childOID != parentOID &&
childrte->relkind == RELKIND_PARTITIONED_TABLE)
childrte->inh = true;
else
childrte->inh = false;
childrte->requiredPerms = 0; childrte->requiredPerms = 0;
childrte->securityQuals = NIL; childrte->securityQuals = NIL;
parse->rtable = lappend(parse->rtable, childrte); parse->rtable = lappend(parse->rtable, childrte);
childRTindex = list_length(parse->rtable); childRTindex = list_length(parse->rtable);
*childRTindex_p = childRTindex;
/* /*
* Build an AppendRelInfo for this parent and child, unless the child is a * We need an AppendRelInfo if paths will be built for the child RTE. If
* partitioned table. * childrte->inh is true, then we'll always need to generate append paths
* for it. If childrte->inh is false, we must scan it if it's not a
* partitioned table; but if it is a partitioned table, then it never has
* any data of its own and need not be scanned.
*/ */
if (childrte->relkind != RELKIND_PARTITIONED_TABLE) if (childrte->relkind != RELKIND_PARTITIONED_TABLE || childrte->inh)
{ {
/* Remember if we saw a real child. */
if (childOID != parentOID)
*has_child = true;
appinfo = makeNode(AppendRelInfo); appinfo = makeNode(AppendRelInfo);
appinfo->parent_relid = parentRTindex; appinfo->parent_relid = parentRTindex;
appinfo->child_relid = childRTindex; appinfo->child_relid = childRTindex;
...@@ -1701,25 +1739,23 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, ...@@ -1701,25 +1739,23 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
appinfo->translated_vars); appinfo->translated_vars);
} }
} }
else
*partitioned_child_rels = lappend_int(*partitioned_child_rels,
childRTindex);
/* /*
* Build a PlanRowMark if parent is marked FOR UPDATE/SHARE. * Build a PlanRowMark if parent is marked FOR UPDATE/SHARE.
*/ */
if (parentrc) if (top_parentrc)
{ {
PlanRowMark *childrc = makeNode(PlanRowMark); PlanRowMark *childrc = makeNode(PlanRowMark);
childrc->rti = childRTindex; childrc->rti = childRTindex;
childrc->prti = parentRTindex; childrc->prti = top_parentrc->rti;
childrc->rowmarkId = parentrc->rowmarkId; childrc->rowmarkId = top_parentrc->rowmarkId;
/* Reselect rowmark type, because relkind might not match parent */ /* Reselect rowmark type, because relkind might not match parent */
childrc->markType = select_rowmark_type(childrte, parentrc->strength); childrc->markType = select_rowmark_type(childrte,
top_parentrc->strength);
childrc->allMarkTypes = (1 << childrc->markType); childrc->allMarkTypes = (1 << childrc->markType);
childrc->strength = parentrc->strength; childrc->strength = top_parentrc->strength;
childrc->waitPolicy = parentrc->waitPolicy; childrc->waitPolicy = top_parentrc->waitPolicy;
/* /*
* We mark RowMarks for partitioned child tables as parent RowMarks so * We mark RowMarks for partitioned child tables as parent RowMarks so
...@@ -1728,8 +1764,8 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, ...@@ -1728,8 +1764,8 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
*/ */
childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE); childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
/* Include child's rowmark type in parent's allMarkTypes */ /* Include child's rowmark type in top parent's allMarkTypes */
parentrc->allMarkTypes |= childrc->allMarkTypes; top_parentrc->allMarkTypes |= childrc->allMarkTypes;
root->rowMarks = lappend(root->rowMarks, childrc); root->rowMarks = lappend(root->rowMarks, childrc);
} }
......
...@@ -1935,10 +1935,10 @@ typedef struct SpecialJoinInfo ...@@ -1935,10 +1935,10 @@ typedef struct SpecialJoinInfo
* *
* When we expand an inheritable table or a UNION-ALL subselect into an * When we expand an inheritable table or a UNION-ALL subselect into an
* "append relation" (essentially, a list of child RTEs), we build an * "append relation" (essentially, a list of child RTEs), we build an
* AppendRelInfo for each non-partitioned child RTE. The list of * AppendRelInfo for each child RTE. The list of AppendRelInfos indicates
* AppendRelInfos indicates which child RTEs must be included when expanding * which child RTEs must be included when expanding the parent, and each node
* the parent, and each node carries information needed to translate Vars * carries information needed to translate Vars referencing the parent into
* referencing the parent into Vars referencing that child. * Vars referencing that child.
* *
* These structs are kept in the PlannerInfo node's append_rel_list. * These structs are kept in the PlannerInfo node's append_rel_list.
* Note that we just throw all the structs into one list, and scan the * Note that we just throw all the structs into one list, and scan the
......
...@@ -625,6 +625,28 @@ select tableoid::regclass::text as relname, parted_tab.* from parted_tab order b ...@@ -625,6 +625,28 @@ select tableoid::regclass::text as relname, parted_tab.* from parted_tab order b
(3 rows) (3 rows)
drop table parted_tab; drop table parted_tab;
-- Check UPDATE with multi-level partitioned inherited target
create table mlparted_tab (a int, b char, c text) partition by list (a);
create table mlparted_tab_part1 partition of mlparted_tab for values in (1);
create table mlparted_tab_part2 partition of mlparted_tab for values in (2) partition by list (b);
create table mlparted_tab_part3 partition of mlparted_tab for values in (3);
create table mlparted_tab_part2a partition of mlparted_tab_part2 for values in ('a');
create table mlparted_tab_part2b partition of mlparted_tab_part2 for values in ('b');
insert into mlparted_tab values (1, 'a'), (2, 'a'), (2, 'b'), (3, 'a');
update mlparted_tab mlp set c = 'xxx'
from
(select a from some_tab union all select a+1 from some_tab) ss (a)
where (mlp.a = ss.a and mlp.b = 'b') or mlp.a = 3;
select tableoid::regclass::text as relname, mlparted_tab.* from mlparted_tab order by 1,2;
relname | a | b | c
---------------------+---+---+-----
mlparted_tab_part1 | 1 | a |
mlparted_tab_part2a | 2 | a |
mlparted_tab_part2b | 2 | b | xxx
mlparted_tab_part3 | 3 | a | xxx
(4 rows)
drop table mlparted_tab;
drop table some_tab cascade; drop table some_tab cascade;
NOTICE: drop cascades to table some_tab_child NOTICE: drop cascades to table some_tab_child
/* Test multiple inheritance of column defaults */ /* Test multiple inheritance of column defaults */
......
...@@ -5328,6 +5328,59 @@ LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss; ...@@ -5328,6 +5328,59 @@ LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
^ ^
HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query. HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query.
-- --
-- test LATERAL reference propagation down a multi-level inheritance hierarchy
-- produced for a multi-level partitioned table hierarchy.
--
create table pt1 (a int, b int, c varchar) partition by range(a);
create table pt1p1 partition of pt1 for values from (0) to (100) partition by range(b);
create table pt1p2 partition of pt1 for values from (100) to (200);
create table pt1p1p1 partition of pt1p1 for values from (0) to (100);
insert into pt1 values (1, 1, 'x'), (101, 101, 'y');
create table ut1 (a int, b int, c varchar);
insert into ut1 values (101, 101, 'y'), (2, 2, 'z');
explain (verbose, costs off)
select t1.b, ss.phv from ut1 t1 left join lateral
(select t2.a as t2a, t3.a t3a, least(t1.a, t2.a, t3.a) phv
from pt1 t2 join ut1 t3 on t2.a = t3.b) ss
on t1.a = ss.t2a order by t1.a;
QUERY PLAN
-------------------------------------------------------------
Sort
Output: t1.b, (LEAST(t1.a, t2.a, t3.a)), t1.a
Sort Key: t1.a
-> Nested Loop Left Join
Output: t1.b, (LEAST(t1.a, t2.a, t3.a)), t1.a
-> Seq Scan on public.ut1 t1
Output: t1.a, t1.b, t1.c
-> Hash Join
Output: t2.a, LEAST(t1.a, t2.a, t3.a)
Hash Cond: (t3.b = t2.a)
-> Seq Scan on public.ut1 t3
Output: t3.a, t3.b, t3.c
-> Hash
Output: t2.a
-> Append
-> Seq Scan on public.pt1p1p1 t2
Output: t2.a
Filter: (t1.a = t2.a)
-> Seq Scan on public.pt1p2 t2_1
Output: t2_1.a
Filter: (t1.a = t2_1.a)
(21 rows)
select t1.b, ss.phv from ut1 t1 left join lateral
(select t2.a as t2a, t3.a t3a, least(t1.a, t2.a, t3.a) phv
from pt1 t2 join ut1 t3 on t2.a = t3.b) ss
on t1.a = ss.t2a order by t1.a;
b | phv
-----+-----
2 |
101 | 101
(2 rows)
drop table pt1;
drop table ut1;
--
-- test that foreign key join estimation performs sanely for outer joins -- test that foreign key join estimation performs sanely for outer joins
-- --
begin; begin;
......
...@@ -154,6 +154,23 @@ where parted_tab.a = ss.a; ...@@ -154,6 +154,23 @@ where parted_tab.a = ss.a;
select tableoid::regclass::text as relname, parted_tab.* from parted_tab order by 1,2; select tableoid::regclass::text as relname, parted_tab.* from parted_tab order by 1,2;
drop table parted_tab; drop table parted_tab;
-- Check UPDATE with multi-level partitioned inherited target
create table mlparted_tab (a int, b char, c text) partition by list (a);
create table mlparted_tab_part1 partition of mlparted_tab for values in (1);
create table mlparted_tab_part2 partition of mlparted_tab for values in (2) partition by list (b);
create table mlparted_tab_part3 partition of mlparted_tab for values in (3);
create table mlparted_tab_part2a partition of mlparted_tab_part2 for values in ('a');
create table mlparted_tab_part2b partition of mlparted_tab_part2 for values in ('b');
insert into mlparted_tab values (1, 'a'), (2, 'a'), (2, 'b'), (3, 'a');
update mlparted_tab mlp set c = 'xxx'
from
(select a from some_tab union all select a+1 from some_tab) ss (a)
where (mlp.a = ss.a and mlp.b = 'b') or mlp.a = 3;
select tableoid::regclass::text as relname, mlparted_tab.* from mlparted_tab order by 1,2;
drop table mlparted_tab;
drop table some_tab cascade; drop table some_tab cascade;
/* Test multiple inheritance of column defaults */ /* Test multiple inheritance of column defaults */
......
...@@ -1733,6 +1733,29 @@ delete from xx1 using (select * from int4_tbl where f1 = x1) ss; ...@@ -1733,6 +1733,29 @@ delete from xx1 using (select * from int4_tbl where f1 = x1) ss;
delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss; delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss;
delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss; delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
--
-- test LATERAL reference propagation down a multi-level inheritance hierarchy
-- produced for a multi-level partitioned table hierarchy.
--
create table pt1 (a int, b int, c varchar) partition by range(a);
create table pt1p1 partition of pt1 for values from (0) to (100) partition by range(b);
create table pt1p2 partition of pt1 for values from (100) to (200);
create table pt1p1p1 partition of pt1p1 for values from (0) to (100);
insert into pt1 values (1, 1, 'x'), (101, 101, 'y');
create table ut1 (a int, b int, c varchar);
insert into ut1 values (101, 101, 'y'), (2, 2, 'z');
explain (verbose, costs off)
select t1.b, ss.phv from ut1 t1 left join lateral
(select t2.a as t2a, t3.a t3a, least(t1.a, t2.a, t3.a) phv
from pt1 t2 join ut1 t3 on t2.a = t3.b) ss
on t1.a = ss.t2a order by t1.a;
select t1.b, ss.phv from ut1 t1 left join lateral
(select t2.a as t2a, t3.a t3a, least(t1.a, t2.a, t3.a) phv
from pt1 t2 join ut1 t3 on t2.a = t3.b) ss
on t1.a = ss.t2a order by t1.a;
drop table pt1;
drop table ut1;
-- --
-- test that foreign key join estimation performs sanely for outer joins -- test that foreign key join estimation performs sanely for outer joins
-- --
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment