Commit 30833ba1 authored by Robert Haas's avatar Robert Haas

Expand partitioned tables in PartDesc order.

Previously, we expanded the inheritance hierarchy in the order in
which find_all_inheritors had locked the tables, but that turns out
to block quite a bit of useful optimization.  For example, a
partition-wise join can't count on two tables with matching bounds
to get expanded in the same order.

Where possible, this change results in expanding partitioned tables in
*bound* order.  Bound order isn't well-defined for a list-partitioned
table with a null-accepting partition or for a list-partitioned table
where the bounds for a single partition are interleaved with other
partitions.  However, when expansion in bound order is possible, it
opens up further opportunities for optimization, such as
strength-reducing MergeAppend to Append when the expansion order
matches the desired sort order.

Patch by me, with cosmetic revisions by Ashutosh Bapat.

Discussion: http://postgr.es/m/CA+TgmoZrKj7kEzcMSum3aXV4eyvvbh9WD=c6m=002WMheDyE3A@mail.gmail.com
parent 6708e447
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include "access/heapam.h" #include "access/heapam.h"
#include "access/htup_details.h" #include "access/htup_details.h"
#include "access/sysattr.h" #include "access/sysattr.h"
#include "catalog/partition.h"
#include "catalog/pg_inherits_fn.h" #include "catalog/pg_inherits_fn.h"
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "miscadmin.h" #include "miscadmin.h"
...@@ -100,6 +101,19 @@ static List *generate_append_tlist(List *colTypes, List *colCollations, ...@@ -100,6 +101,19 @@ static List *generate_append_tlist(List *colTypes, List *colCollations,
static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist); static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte,
Index rti); Index rti);
static void expand_partitioned_rtentry(PlannerInfo *root,
RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel,
PlanRowMark *parentrc, PartitionDesc partdesc,
LOCKMODE lockmode,
bool *has_child, List **appinfos,
List **partitioned_child_rels);
static void expand_single_inheritance_child(PlannerInfo *root,
RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel,
PlanRowMark *parentrc, Relation childrel,
bool *has_child, List **appinfos,
List **partitioned_child_rels);
static void make_inh_translation_list(Relation oldrelation, static void make_inh_translation_list(Relation oldrelation,
Relation newrelation, Relation newrelation,
Index newvarno, Index newvarno,
...@@ -1455,13 +1469,34 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) ...@@ -1455,13 +1469,34 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
/* Scan the inheritance set and expand it */ /* Scan the inheritance set and expand it */
appinfos = NIL; appinfos = NIL;
has_child = false; has_child = false;
if (RelationGetPartitionDesc(oldrelation) != NULL)
{
/*
* If this table has partitions, recursively expand them in the order
* in which they appear in the PartitionDesc. But first, expand the
* parent itself.
*/
expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc,
oldrelation,
&has_child, &appinfos,
&partitioned_child_rels);
expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc,
RelationGetPartitionDesc(oldrelation),
lockmode,
&has_child, &appinfos,
&partitioned_child_rels);
}
else
{
/*
* This table has no partitions. Expand any plain inheritance
* children in the order the OIDs were returned by
* find_all_inheritors.
*/
foreach(l, inhOIDs) foreach(l, inhOIDs)
{ {
Oid childOID = lfirst_oid(l); Oid childOID = lfirst_oid(l);
Relation newrelation; Relation newrelation;
RangeTblEntry *childrte;
Index childRTindex;
AppendRelInfo *appinfo;
/* Open rel if needed; we already have required locks */ /* Open rel if needed; we already have required locks */
if (childOID != parentOID) if (childOID != parentOID)
...@@ -1472,8 +1507,8 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) ...@@ -1472,8 +1507,8 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
/* /*
* It is possible that the parent table has children that are temp * It is possible that the parent table has children that are temp
* tables of other backends. We cannot safely access such tables * tables of other backends. We cannot safely access such tables
* (because of buffering issues), and the best thing to do seems to be * (because of buffering issues), and the best thing to do seems
* to silently ignore them. * to be to silently ignore them.
*/ */
if (childOID != parentOID && RELATION_IS_OTHER_TEMP(newrelation)) if (childOID != parentOID && RELATION_IS_OTHER_TEMP(newrelation))
{ {
...@@ -1481,21 +1516,139 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) ...@@ -1481,21 +1516,139 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
continue; continue;
} }
expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc,
newrelation,
&has_child, &appinfos,
&partitioned_child_rels);
/* Close child relations, but keep locks */
if (childOID != parentOID)
heap_close(newrelation, NoLock);
}
}
heap_close(oldrelation, NoLock);
/* /*
* Build an RTE for the child, and attach to query's rangetable list. * If all the children were temp tables or a partitioned parent did not
* We copy most fields of the parent's RTE, but replace relation OID * have any leaf partitions, pretend it's a non-inheritance situation; we
* and relkind, and set inh = false. Also, set requiredPerms to zero * don't need Append node in that case. The duplicate RTE we added for
* since all required permissions checks are done on the original RTE. * the parent table is harmless, so we don't bother to get rid of it;
* Likewise, set the child's securityQuals to empty, because we only * ditto for the useless PlanRowMark node.
* want to apply the parent's RLS conditions regardless of what RLS
* properties individual children may have. (This is an intentional
* choice to make inherited RLS work like regular permissions checks.)
* The parent securityQuals will be propagated to children along with
* other base restriction clauses, so we don't need to do it here.
*/ */
childrte = copyObject(rte); if (!has_child)
{
/* Clear flag before returning */
rte->inh = false;
return;
}
/*
* We keep a list of objects in root, each of which maps a partitioned
* parent RT index to the list of RT indexes of its partitioned child
* tables. When creating an Append or a ModifyTable path for the parent,
* we copy the child RT index list verbatim to the path so that it could
* be carried over to the executor so that the latter could identify the
* partitioned child tables.
*/
if (partitioned_child_rels != NIL)
{
pcinfo = makeNode(PartitionedChildRelInfo);
Assert(rte->relkind == RELKIND_PARTITIONED_TABLE);
pcinfo->parent_relid = rti;
pcinfo->child_rels = partitioned_child_rels;
root->pcinfo_list = lappend(root->pcinfo_list, pcinfo);
}
/* Otherwise, OK to add to root->append_rel_list */
root->append_rel_list = list_concat(root->append_rel_list, appinfos);
}
static void
expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel,
PlanRowMark *parentrc, PartitionDesc partdesc,
LOCKMODE lockmode,
bool *has_child, List **appinfos,
List **partitioned_child_rels)
{
int i;
check_stack_depth();
for (i = 0; i < partdesc->nparts; i++)
{
Oid childOID = partdesc->oids[i];
Relation childrel;
/* Open rel; we already have required locks */
childrel = heap_open(childOID, NoLock);
/* As in expand_inherited_rtentry, skip non-local temp tables */
if (RELATION_IS_OTHER_TEMP(childrel))
{
heap_close(childrel, lockmode);
continue;
}
expand_single_inheritance_child(root, parentrte, parentRTindex,
parentrel, parentrc, childrel,
has_child, appinfos,
partitioned_child_rels);
/* If this child is itself partitioned, recurse */
if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
expand_partitioned_rtentry(root, parentrte, parentRTindex,
parentrel, parentrc,
RelationGetPartitionDesc(childrel),
lockmode,
has_child, appinfos,
partitioned_child_rels);
/* Close child relation, but keep locks */
heap_close(childrel, NoLock);
}
}
/*
* expand_single_inheritance_child
* Expand a single inheritance child, if needed.
*
* If this is a temp table of another backend, we'll return without doing
* anything at all. Otherwise, we'll set "has_child" to true, build a
* RangeTblEntry and either a PartitionedChildRelInfo or AppendRelInfo as
* appropriate, plus maybe a PlanRowMark.
*/
static void
expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel,
PlanRowMark *parentrc, Relation childrel,
bool *has_child, List **appinfos,
List **partitioned_child_rels)
{
Query *parse = root->parse;
Oid parentOID = RelationGetRelid(parentrel);
Oid childOID = RelationGetRelid(childrel);
RangeTblEntry *childrte;
Index childRTindex;
AppendRelInfo *appinfo;
/*
* Build an RTE for the child, and attach to query's rangetable list. We
* copy most fields of the parent's RTE, but replace relation OID and
* relkind, and set inh = false. Also, set requiredPerms to zero since
* all required permissions checks are done on the original RTE. Likewise,
* set the child's securityQuals to empty, because we only want to apply
* the parent's RLS conditions regardless of what RLS properties
* individual children may have. (This is an intentional choice to make
* inherited RLS work like regular permissions checks.) The parent
* securityQuals will be propagated to children along with other base
* restriction clauses, so we don't need to do it here.
*/
childrte = copyObject(parentrte);
childrte->relid = childOID; childrte->relid = childOID;
childrte->relkind = newrelation->rd_rel->relkind; childrte->relkind = childrel->rd_rel->relkind;
childrte->inh = false; childrte->inh = false;
childrte->requiredPerms = 0; childrte->requiredPerms = 0;
childrte->securityQuals = NIL; childrte->securityQuals = NIL;
...@@ -1503,121 +1656,76 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) ...@@ -1503,121 +1656,76 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
childRTindex = list_length(parse->rtable); childRTindex = list_length(parse->rtable);
/* /*
* Build an AppendRelInfo for this parent and child, unless the child * Build an AppendRelInfo for this parent and child, unless the child is a
* is a partitioned table. * partitioned table.
*/ */
if (childrte->relkind != RELKIND_PARTITIONED_TABLE) if (childrte->relkind != RELKIND_PARTITIONED_TABLE)
{ {
/* Remember if we saw a real child. */ /* Remember if we saw a real child. */
if (childOID != parentOID) if (childOID != parentOID)
has_child = true; *has_child = true;
appinfo = makeNode(AppendRelInfo); appinfo = makeNode(AppendRelInfo);
appinfo->parent_relid = rti; appinfo->parent_relid = parentRTindex;
appinfo->child_relid = childRTindex; appinfo->child_relid = childRTindex;
appinfo->parent_reltype = oldrelation->rd_rel->reltype; appinfo->parent_reltype = parentrel->rd_rel->reltype;
appinfo->child_reltype = newrelation->rd_rel->reltype; appinfo->child_reltype = childrel->rd_rel->reltype;
make_inh_translation_list(oldrelation, newrelation, childRTindex, make_inh_translation_list(parentrel, childrel, childRTindex,
&appinfo->translated_vars); &appinfo->translated_vars);
appinfo->parent_reloid = parentOID; appinfo->parent_reloid = parentOID;
appinfos = lappend(appinfos, appinfo); *appinfos = lappend(*appinfos, appinfo);
/* /*
* Translate the column permissions bitmaps to the child's attnums * Translate the column permissions bitmaps to the child's attnums (we
* (we have to build the translated_vars list before we can do * have to build the translated_vars list before we can do this). But
* this). But if this is the parent table, leave copyObject's * if this is the parent table, leave copyObject's result alone.
* result alone.
* *
* Note: we need to do this even though the executor won't run any * Note: we need to do this even though the executor won't run any
* permissions checks on the child RTE. The * permissions checks on the child RTE. The insertedCols/updatedCols
* insertedCols/updatedCols bitmaps may be examined for * bitmaps may be examined for trigger-firing purposes.
* trigger-firing purposes.
*/ */
if (childOID != parentOID) if (childOID != parentOID)
{ {
childrte->selectedCols = translate_col_privs(rte->selectedCols, childrte->selectedCols = translate_col_privs(parentrte->selectedCols,
appinfo->translated_vars); appinfo->translated_vars);
childrte->insertedCols = translate_col_privs(rte->insertedCols, childrte->insertedCols = translate_col_privs(parentrte->insertedCols,
appinfo->translated_vars); appinfo->translated_vars);
childrte->updatedCols = translate_col_privs(rte->updatedCols, childrte->updatedCols = translate_col_privs(parentrte->updatedCols,
appinfo->translated_vars); appinfo->translated_vars);
} }
} }
else else
partitioned_child_rels = lappend_int(partitioned_child_rels, *partitioned_child_rels = lappend_int(*partitioned_child_rels,
childRTindex); childRTindex);
/* /*
* Build a PlanRowMark if parent is marked FOR UPDATE/SHARE. * Build a PlanRowMark if parent is marked FOR UPDATE/SHARE.
*/ */
if (oldrc) if (parentrc)
{ {
PlanRowMark *newrc = makeNode(PlanRowMark); PlanRowMark *childrc = makeNode(PlanRowMark);
newrc->rti = childRTindex; childrc->rti = childRTindex;
newrc->prti = rti; childrc->prti = parentRTindex;
newrc->rowmarkId = oldrc->rowmarkId; childrc->rowmarkId = parentrc->rowmarkId;
/* Reselect rowmark type, because relkind might not match parent */ /* Reselect rowmark type, because relkind might not match parent */
newrc->markType = select_rowmark_type(childrte, oldrc->strength); childrc->markType = select_rowmark_type(childrte, parentrc->strength);
newrc->allMarkTypes = (1 << newrc->markType); childrc->allMarkTypes = (1 << childrc->markType);
newrc->strength = oldrc->strength; childrc->strength = parentrc->strength;
newrc->waitPolicy = oldrc->waitPolicy; childrc->waitPolicy = parentrc->waitPolicy;
/* /*
* We mark RowMarks for partitioned child tables as parent * We mark RowMarks for partitioned child tables as parent RowMarks so
* RowMarks so that the executor ignores them (except their * that the executor ignores them (except their existence means that
* existence means that the child tables be locked using * the child tables be locked using appropriate mode).
* appropriate mode).
*/ */
newrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE); childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
/* Include child's rowmark type in parent's allMarkTypes */ /* Include child's rowmark type in parent's allMarkTypes */
oldrc->allMarkTypes |= newrc->allMarkTypes; parentrc->allMarkTypes |= childrc->allMarkTypes;
root->rowMarks = lappend(root->rowMarks, newrc);
}
/* Close child relations, but keep locks */
if (childOID != parentOID)
heap_close(newrelation, NoLock);
}
heap_close(oldrelation, NoLock);
/*
* If all the children were temp tables or a partitioned parent did not
* have any leaf partitions, pretend it's a non-inheritance situation; we
* don't need Append node in that case. The duplicate RTE we added for
* the parent table is harmless, so we don't bother to get rid of it;
* ditto for the useless PlanRowMark node.
*/
if (!has_child)
{
/* Clear flag before returning */
rte->inh = false;
return;
}
/* root->rowMarks = lappend(root->rowMarks, childrc);
* We keep a list of objects in root, each of which maps a partitioned
* parent RT index to the list of RT indexes of its partitioned child
* tables. When creating an Append or a ModifyTable path for the parent,
* we copy the child RT index list verbatim to the path so that it could
* be carried over to the executor so that the latter could identify the
* partitioned child tables.
*/
if (partitioned_child_rels != NIL)
{
pcinfo = makeNode(PartitionedChildRelInfo);
Assert(rte->relkind == RELKIND_PARTITIONED_TABLE);
pcinfo->parent_relid = rti;
pcinfo->child_rels = partitioned_child_rels;
root->pcinfo_list = lappend(root->pcinfo_list, pcinfo);
} }
/* Otherwise, OK to add to root->append_rel_list */
root->append_rel_list = list_concat(root->append_rel_list, appinfos);
} }
/* /*
......
...@@ -278,12 +278,12 @@ select tableoid::regclass, * from list_parted; ...@@ -278,12 +278,12 @@ select tableoid::regclass, * from list_parted;
-------------+----+---- -------------+----+----
part_aa_bb | aA | part_aa_bb | aA |
part_cc_dd | cC | 1 part_cc_dd | cC | 1
part_null | | 0
part_null | | 1
part_ee_ff1 | ff | 1 part_ee_ff1 | ff | 1
part_ee_ff1 | EE | 1 part_ee_ff1 | EE | 1
part_ee_ff2 | ff | 11 part_ee_ff2 | ff | 11
part_ee_ff2 | EE | 10 part_ee_ff2 | EE | 10
part_null | | 0
part_null | | 1
(8 rows) (8 rows)
-- some more tests to exercise tuple-routing with multi-level partitioning -- some more tests to exercise tuple-routing with multi-level partitioning
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment