Commit 1c2cb274 authored by Tom Lane's avatar Tom Lane

Fix run-time partition pruning for appends with multiple source rels.

The previous coding here supposed that if run-time partitioning applied to
a particular Append/MergeAppend plan, then all child plans of that node
must be members of a single partitioning hierarchy.  This is totally wrong,
since an Append could be formed from a UNION ALL: we could have multiple
hierarchies sharing the same Append, or child plans that aren't part of any
hierarchy.

To fix, restructure the related plan-time and execution-time data
structures so that we can have a separate list or array for each
partitioning hierarchy.  Also track subplans that are not part of any
hierarchy, and make sure they don't get pruned.

Per reports from Phil Florent and others.  Back-patch to v11, since
the bug originated there.

David Rowley, with a lot of cosmetic adjustments by me; thanks also
to Amit Langote for review.

Discussion: https://postgr.es/m/HE1PR03MB17068BB27404C90B5B788BCABA7B0@HE1PR03MB1706.eurprd03.prod.outlook.com
parent c40489e4
This diff is collapsed.
......@@ -129,7 +129,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
appendstate->as_whichplan = INVALID_SUBPLAN_INDEX;
/* If run-time partition pruning is enabled, then set that up now */
if (node->part_prune_infos != NIL)
if (node->part_prune_info != NULL)
{
PartitionPruneState *prunestate;
......@@ -138,7 +138,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
/* Create the working data structure for pruning. */
prunestate = ExecCreatePartitionPruneState(&appendstate->ps,
node->part_prune_infos);
node->part_prune_info);
appendstate->as_prune_state = prunestate;
/* Perform an initial partition prune, if required. */
......
......@@ -90,7 +90,7 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
mergestate->ms_noopscan = false;
/* If run-time partition pruning is enabled, then set that up now */
if (node->part_prune_infos != NIL)
if (node->part_prune_info != NULL)
{
PartitionPruneState *prunestate;
......@@ -98,7 +98,7 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
ExecAssignExprContext(estate, &mergestate->ps);
prunestate = ExecCreatePartitionPruneState(&mergestate->ps,
node->part_prune_infos);
node->part_prune_info);
mergestate->ms_prune_state = prunestate;
/* Perform an initial partition prune, if required. */
......
......@@ -245,7 +245,7 @@ _copyAppend(const Append *from)
COPY_NODE_FIELD(appendplans);
COPY_SCALAR_FIELD(first_partial_plan);
COPY_NODE_FIELD(partitioned_rels);
COPY_NODE_FIELD(part_prune_infos);
COPY_NODE_FIELD(part_prune_info);
return newnode;
}
......@@ -273,7 +273,7 @@ _copyMergeAppend(const MergeAppend *from)
COPY_POINTER_FIELD(sortOperators, from->numCols * sizeof(Oid));
COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid));
COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool));
COPY_NODE_FIELD(part_prune_infos);
COPY_NODE_FIELD(part_prune_info);
return newnode;
}
......@@ -1182,6 +1182,17 @@ _copyPartitionPruneInfo(const PartitionPruneInfo *from)
{
PartitionPruneInfo *newnode = makeNode(PartitionPruneInfo);
COPY_NODE_FIELD(prune_infos);
COPY_BITMAPSET_FIELD(other_subplans);
return newnode;
}
static PartitionedRelPruneInfo *
_copyPartitionedRelPruneInfo(const PartitionedRelPruneInfo *from)
{
PartitionedRelPruneInfo *newnode = makeNode(PartitionedRelPruneInfo);
COPY_SCALAR_FIELD(reloid);
COPY_NODE_FIELD(pruning_steps);
COPY_BITMAPSET_FIELD(present_parts);
......@@ -4908,6 +4919,9 @@ copyObjectImpl(const void *from)
case T_PartitionPruneInfo:
retval = _copyPartitionPruneInfo(from);
break;
case T_PartitionedRelPruneInfo:
retval = _copyPartitionedRelPruneInfo(from);
break;
case T_PartitionPruneStepOp:
retval = _copyPartitionPruneStepOp(from);
break;
......
......@@ -402,7 +402,7 @@ _outAppend(StringInfo str, const Append *node)
WRITE_NODE_FIELD(appendplans);
WRITE_INT_FIELD(first_partial_plan);
WRITE_NODE_FIELD(partitioned_rels);
WRITE_NODE_FIELD(part_prune_infos);
WRITE_NODE_FIELD(part_prune_info);
}
static void
......@@ -435,7 +435,7 @@ _outMergeAppend(StringInfo str, const MergeAppend *node)
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %s", booltostr(node->nullsFirst[i]));
WRITE_NODE_FIELD(part_prune_infos);
WRITE_NODE_FIELD(part_prune_info);
}
static void
......@@ -1014,10 +1014,19 @@ _outPlanRowMark(StringInfo str, const PlanRowMark *node)
static void
_outPartitionPruneInfo(StringInfo str, const PartitionPruneInfo *node)
{
WRITE_NODE_TYPE("PARTITIONPRUNEINFO");
WRITE_NODE_FIELD(prune_infos);
WRITE_BITMAPSET_FIELD(other_subplans);
}
static void
_outPartitionedRelPruneInfo(StringInfo str, const PartitionedRelPruneInfo *node)
{
int i;
WRITE_NODE_TYPE("PARTITIONPRUNEINFO");
WRITE_NODE_TYPE("PARTITIONEDRELPRUNEINFO");
WRITE_OID_FIELD(reloid);
WRITE_NODE_FIELD(pruning_steps);
......@@ -3831,6 +3840,9 @@ outNode(StringInfo str, const void *obj)
case T_PartitionPruneInfo:
_outPartitionPruneInfo(str, obj);
break;
case T_PartitionedRelPruneInfo:
_outPartitionedRelPruneInfo(str, obj);
break;
case T_PartitionPruneStepOp:
_outPartitionPruneStepOp(str, obj);
break;
......
......@@ -1612,7 +1612,7 @@ _readAppend(void)
READ_NODE_FIELD(appendplans);
READ_INT_FIELD(first_partial_plan);
READ_NODE_FIELD(partitioned_rels);
READ_NODE_FIELD(part_prune_infos);
READ_NODE_FIELD(part_prune_info);
READ_DONE();
}
......@@ -1634,7 +1634,7 @@ _readMergeAppend(void)
READ_OID_ARRAY(sortOperators, local_node->numCols);
READ_OID_ARRAY(collations, local_node->numCols);
READ_BOOL_ARRAY(nullsFirst, local_node->numCols);
READ_NODE_FIELD(part_prune_infos);
READ_NODE_FIELD(part_prune_info);
READ_DONE();
}
......@@ -2329,6 +2329,17 @@ _readPartitionPruneInfo(void)
{
READ_LOCALS(PartitionPruneInfo);
READ_NODE_FIELD(prune_infos);
READ_BITMAPSET_FIELD(other_subplans);
READ_DONE();
}
static PartitionedRelPruneInfo *
_readPartitionedRelPruneInfo(void)
{
READ_LOCALS(PartitionedRelPruneInfo);
READ_OID_FIELD(reloid);
READ_NODE_FIELD(pruning_steps);
READ_BITMAPSET_FIELD(present_parts);
......@@ -2726,6 +2737,8 @@ parseNodeString(void)
return_value = _readPlanRowMark();
else if (MATCH("PARTITIONPRUNEINFO", 18))
return_value = _readPartitionPruneInfo();
else if (MATCH("PARTITIONEDRELPRUNEINFO", 23))
return_value = _readPartitionedRelPruneInfo();
else if (MATCH("PARTITIONPRUNESTEPOP", 20))
return_value = _readPartitionPruneStepOp();
else if (MATCH("PARTITIONPRUNESTEPCOMBINE", 25))
......
......@@ -1388,7 +1388,6 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
List *all_child_outers = NIL;
ListCell *l;
List *partitioned_rels = NIL;
bool build_partitioned_rels = false;
double partial_rows = -1;
/* If appropriate, consider parallel append */
......@@ -1413,10 +1412,11 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
if (rel->part_scheme != NULL)
{
if (IS_SIMPLE_REL(rel))
partitioned_rels = rel->partitioned_child_rels;
partitioned_rels = list_make1(rel->partitioned_child_rels);
else if (IS_JOIN_REL(rel))
{
int relid = -1;
List *partrels = NIL;
/*
* For a partitioned joinrel, concatenate the component rels'
......@@ -1430,16 +1430,16 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
component = root->simple_rel_array[relid];
Assert(component->part_scheme != NULL);
Assert(list_length(component->partitioned_child_rels) >= 1);
partitioned_rels =
list_concat(partitioned_rels,
partrels =
list_concat(partrels,
list_copy(component->partitioned_child_rels));
}
partitioned_rels = list_make1(partrels);
}
Assert(list_length(partitioned_rels) >= 1);
}
else if (rel->rtekind == RTE_SUBQUERY)
build_partitioned_rels = true;
/*
* For every non-dummy child, remember the cheapest path. Also, identify
......@@ -1453,17 +1453,12 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
Path *cheapest_partial_path = NULL;
/*
* If we need to build partitioned_rels, accumulate the partitioned
* rels for this child. We must ensure that parents are always listed
* before their child partitioned tables.
* For UNION ALLs with non-empty partitioned_child_rels, accumulate
* the Lists of child relations.
*/
if (build_partitioned_rels)
{
List *cprels = childrel->partitioned_child_rels;
partitioned_rels = list_concat(partitioned_rels,
list_copy(cprels));
}
if (rel->rtekind == RTE_SUBQUERY && childrel->partitioned_child_rels != NIL)
partitioned_rels = lappend(partitioned_rels,
childrel->partitioned_child_rels);
/*
* If child has an unparameterized cheapest-total path, add that to
......
......@@ -124,6 +124,7 @@ static BitmapHeapScan *create_bitmap_scan_plan(PlannerInfo *root,
static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual,
List **qual, List **indexqual, List **indexECs);
static void bitmap_subplan_mark_shared(Plan *plan);
static List *flatten_partitioned_rels(List *partitioned_rels);
static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
List *tlist, List *scan_clauses);
static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root,
......@@ -202,7 +203,8 @@ static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual
static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual,
Index scanrelid, int wtParam);
static Append *make_append(List *appendplans, int first_partial_plan,
List *tlist, List *partitioned_rels, List *partpruneinfos);
List *tlist, List *partitioned_rels,
PartitionPruneInfo *partpruneinfo);
static RecursiveUnion *make_recursive_union(List *tlist,
Plan *lefttree,
Plan *righttree,
......@@ -1030,7 +1032,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
List *subplans = NIL;
ListCell *subpaths;
RelOptInfo *rel = best_path->path.parent;
List *partpruneinfos = NIL;
PartitionPruneInfo *partpruneinfo = NULL;
/*
* The subpaths list could be empty, if every child was proven empty by
......@@ -1070,8 +1072,8 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
/*
* If any quals exist, they may be useful to perform further partition
* pruning during execution. Gather information needed by the executor
* to do partition pruning.
* pruning during execution. Gather information needed by the executor to
* do partition pruning.
*/
if (enable_partition_pruning &&
rel->reloptkind == RELOPT_BASEREL &&
......@@ -1093,10 +1095,11 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
}
if (prunequal != NIL)
partpruneinfos =
make_partition_pruneinfo(root,
partpruneinfo =
make_partition_pruneinfo(root, rel,
best_path->subpaths,
best_path->partitioned_rels,
best_path->subpaths, prunequal);
prunequal);
}
/*
......@@ -1108,7 +1111,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
plan = make_append(subplans, best_path->first_partial_path,
tlist, best_path->partitioned_rels,
partpruneinfos);
partpruneinfo);
copy_generic_path_info(&plan->plan, (Path *) best_path);
......@@ -1132,7 +1135,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
List *subplans = NIL;
ListCell *subpaths;
RelOptInfo *rel = best_path->path.parent;
List *partpruneinfos = NIL;
PartitionPruneInfo *partpruneinfo = NULL;
/*
* We don't have the actual creation of the MergeAppend node split out
......@@ -1220,8 +1223,8 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
/*
* If any quals exist, they may be useful to perform further partition
* pruning during execution. Gather information needed by the executor
* to do partition pruning.
* pruning during execution. Gather information needed by the executor to
* do partition pruning.
*/
if (enable_partition_pruning &&
rel->reloptkind == RELOPT_BASEREL &&
......@@ -1244,14 +1247,16 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
}
if (prunequal != NIL)
partpruneinfos = make_partition_pruneinfo(root,
best_path->partitioned_rels,
best_path->subpaths, prunequal);
partpruneinfo = make_partition_pruneinfo(root, rel,
best_path->subpaths,
best_path->partitioned_rels,
prunequal);
}
node->partitioned_rels = best_path->partitioned_rels;
node->partitioned_rels =
flatten_partitioned_rels(best_path->partitioned_rels);
node->mergeplans = subplans;
node->part_prune_infos = partpruneinfos;
node->part_prune_info = partpruneinfo;
return (Plan *) node;
}
......@@ -5000,6 +5005,27 @@ bitmap_subplan_mark_shared(Plan *plan)
elog(ERROR, "unrecognized node type: %d", nodeTag(plan));
}
/*
* flatten_partitioned_rels
* Convert List of Lists into a single List with all elements from the
* sub-lists.
*/
static List *
flatten_partitioned_rels(List *partitioned_rels)
{
List *newlist = NIL;
ListCell *lc;
foreach(lc, partitioned_rels)
{
List *sublist = lfirst(lc);
newlist = list_concat(newlist, list_copy(sublist));
}
return newlist;
}
/*****************************************************************************
*
* PLAN NODE BUILDING ROUTINES
......@@ -5343,7 +5369,7 @@ make_foreignscan(List *qptlist,
static Append *
make_append(List *appendplans, int first_partial_plan,
List *tlist, List *partitioned_rels,
List *partpruneinfos)
PartitionPruneInfo *partpruneinfo)
{
Append *node = makeNode(Append);
Plan *plan = &node->plan;
......@@ -5354,8 +5380,8 @@ make_append(List *appendplans, int first_partial_plan,
plan->righttree = NULL;
node->appendplans = appendplans;
node->first_partial_plan = first_partial_plan;
node->partitioned_rels = partitioned_rels;
node->part_prune_infos = partpruneinfos;
node->partitioned_rels = flatten_partitioned_rels(partitioned_rels);
node->part_prune_info = partpruneinfo;
return node;
}
......@@ -6512,7 +6538,7 @@ make_modifytable(PlannerInfo *root,
node->operation = operation;
node->canSetTag = canSetTag;
node->nominalRelation = nominalRelation;
node->partitioned_rels = partitioned_rels;
node->partitioned_rels = flatten_partitioned_rels(partitioned_rels);
node->partColsUpdated = partColsUpdated;
node->resultRelations = resultRelations;
node->resultRelIndex = -1; /* will be set correctly in setrefs.c */
......
......@@ -1616,6 +1616,7 @@ inheritance_planner(PlannerInfo *root)
* contain at least one member, that is, the root parent's index.
*/
Assert(list_length(partitioned_rels) >= 1);
partitioned_rels = list_make1(partitioned_rels);
}
/* Create Path representing a ModifyTable to do the UPDATE/DELETE work */
......
This diff is collapsed.
......@@ -112,15 +112,14 @@ typedef struct PartitionTupleRouting
TupleTableSlot *root_tuple_slot;
} PartitionTupleRouting;
/*-----------------------
* PartitionPruningData - Per-partitioned-table data for run-time pruning
/*
* PartitionedRelPruningData - Per-partitioned-table data for run-time pruning
* of partitions. For a multilevel partitioned table, we have one of these
* for the topmost partition plus one for each non-leaf child partition,
* ordered such that parents appear before their children.
* for the topmost partition plus one for each non-leaf child partition.
*
* subplan_map[] and subpart_map[] have the same definitions as in
* PartitionPruneInfo (see plannodes.h); though note that here,
* subpart_map contains indexes into PartitionPruneState.partprunedata[].
* PartitionedRelPruneInfo (see plannodes.h); though note that here,
* subpart_map contains indexes into PartitionPruningData.partrelprunedata[].
*
* subplan_map Subplan index by partition index, or -1.
* subpart_map Subpart index by partition index, or -1.
......@@ -134,9 +133,8 @@ typedef struct PartitionTupleRouting
* executor startup (for this partitioning level).
* do_exec_prune true if pruning should be performed during
* executor run (for this partitioning level).
*-----------------------
*/
typedef struct PartitionPruningData
typedef struct PartitionedRelPruningData
{
int *subplan_map;
int *subpart_map;
......@@ -145,43 +143,59 @@ typedef struct PartitionPruningData
List *pruning_steps;
bool do_initial_prune;
bool do_exec_prune;
} PartitionedRelPruningData;
/*
* PartitionPruningData - Holds all the run-time pruning information for
* a single partitioning hierarchy containing one or more partitions.
* partrelprunedata[] is an array ordered such that parents appear before
* their children; in particular, the first entry is the topmost partition,
* which was actually named in the SQL query.
*/
typedef struct PartitionPruningData
{
int num_partrelprunedata; /* number of array entries */
PartitionedRelPruningData partrelprunedata[FLEXIBLE_ARRAY_MEMBER];
} PartitionPruningData;
/*-----------------------
/*
* PartitionPruneState - State object required for plan nodes to perform
* run-time partition pruning.
*
* This struct can be attached to plan types which support arbitrary Lists of
* subplans containing partitions to allow subplans to be eliminated due to
* subplans containing partitions, to allow subplans to be eliminated due to
* the clauses being unable to match to any tuple that the subplan could
* possibly produce. Note that we currently support only one partitioned
* table per parent plan node, hence partprunedata[] need describe only one
* partitioning hierarchy.
* possibly produce.
*
* partprunedata Array of PartitionPruningData for the plan's
* partitioned relation, ordered such that parent tables
* appear before children (hence, topmost table is first).
* num_partprunedata Number of items in 'partprunedata' array.
* do_initial_prune true if pruning should be performed during executor
* startup (at any hierarchy level).
* do_exec_prune true if pruning should be performed during
* executor run (at any hierarchy level).
* execparamids Contains paramids of PARAM_EXEC Params found within
* any of the partprunedata structs. Pruning must be
* done again each time the value of one of these
* parameters changes.
* other_subplans Contains indexes of subplans that don't belong to any
* "partprunedata", e.g UNION ALL children that are not
* partitioned tables, or a partitioned table that the
* planner deemed run-time pruning to be useless for.
* These must not be pruned.
* prune_context A short-lived memory context in which to execute the
* partition pruning functions.
*-----------------------
* do_initial_prune true if pruning should be performed during executor
* startup (at any hierarchy level).
* do_exec_prune true if pruning should be performed during
* executor run (at any hierarchy level).
* num_partprunedata Number of items in "partprunedata" array.
* partprunedata Array of PartitionPruningData pointers for the plan's
* partitioned relation(s), one for each partitioning
* hierarchy that requires run-time pruning.
*/
typedef struct PartitionPruneState
{
PartitionPruningData *partprunedata;
int num_partprunedata;
bool do_initial_prune;
bool do_exec_prune;
Bitmapset *execparamids;
Bitmapset *other_subplans;
MemoryContext prune_context;
bool do_initial_prune;
bool do_exec_prune;
int num_partprunedata;
PartitionPruningData *partprunedata[FLEXIBLE_ARRAY_MEMBER];
} PartitionPruneState;
extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
......@@ -210,7 +224,7 @@ extern HeapTuple ConvertPartitionTupleSlot(TupleConversionMap *map,
extern void ExecCleanupTupleRouting(ModifyTableState *mtstate,
PartitionTupleRouting *proute);
extern PartitionPruneState *ExecCreatePartitionPruneState(PlanState *planstate,
List *partitionpruneinfo);
PartitionPruneInfo *partitionpruneinfo);
extern void ExecDestroyPartitionPruneState(PartitionPruneState *prunestate);
extern Bitmapset *ExecFindMatchingSubPlans(PartitionPruneState *prunestate);
extern Bitmapset *ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate,
......
......@@ -88,6 +88,7 @@ typedef enum NodeTag
T_NestLoopParam,
T_PlanRowMark,
T_PartitionPruneInfo,
T_PartitionedRelPruneInfo,
T_PartitionPruneStepOp,
T_PartitionPruneStepCombine,
T_PlanInvalItem,
......
......@@ -241,6 +241,8 @@ typedef struct ModifyTable
List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */
} ModifyTable;
struct PartitionPruneInfo; /* forward reference to struct below */
/* ----------------
* Append node -
* Generate the concatenation of the results of sub-plans.
......@@ -260,8 +262,8 @@ typedef struct Append
/* RT indexes of non-leaf tables in a partition tree */
List *partitioned_rels;
/* Info for run-time subplan pruning, one entry per partitioned_rels */
List *part_prune_infos; /* List of PartitionPruneInfo */
/* Info for run-time subplan pruning; NULL if we're not doing that */
struct PartitionPruneInfo *part_prune_info;
} Append;
/* ----------------
......@@ -281,9 +283,8 @@ typedef struct MergeAppend
Oid *sortOperators; /* OIDs of operators to sort them by */
Oid *collations; /* OIDs of collations */
bool *nullsFirst; /* NULLS FIRST/LAST directions */
/* Info for run-time subplan pruning, one entry per partitioned_rels */
List *part_prune_infos; /* List of PartitionPruneInfo */
/* Info for run-time subplan pruning; NULL if we're not doing that */
struct PartitionPruneInfo *part_prune_info;
} MergeAppend;
/* ----------------
......@@ -1063,12 +1064,32 @@ typedef struct PlanRowMark
* We also store various details to tell the executor when it should be
* performing partition pruning.
*
* Each PartitionPruneInfo describes the partitioning rules for a single
* partitioned table (a/k/a level of partitioning). For a multilevel
* partitioned table, we have a List of PartitionPruneInfos, where the
* first entry represents the topmost partitioned table and additional
* entries represent non-leaf child partitions, ordered such that parents
* appear before their children.
* Each PartitionedRelPruneInfo describes the partitioning rules for a single
* partitioned table (a/k/a level of partitioning). Since a partitioning
* hierarchy could contain multiple levels, we represent it by a List of
* PartitionedRelPruneInfos, where the first entry represents the topmost
* partitioned table and additional entries represent non-leaf child
* partitions, ordered such that parents appear before their children.
* Then, since an Append-type node could have multiple partitioning
* hierarchies among its children, we have an unordered List of those Lists.
*
* prune_infos List of Lists containing PartitionedRelPruneInfo nodes,
* one sublist per run-time-prunable partition hierarchy
* appearing in the parent plan node's subplans.
* other_subplans Indexes of any subplans that are not accounted for
* by any of the PartitionedRelPruneInfo nodes in
* "prune_infos". These subplans must not be pruned.
*/
typedef struct PartitionPruneInfo
{
NodeTag type;
List *prune_infos;
Bitmapset *other_subplans;
} PartitionPruneInfo;
/*
* PartitionedRelPruneInfo - Details required to allow the executor to prune
* partitions for a single partitioned table.
*
* subplan_map[] and subpart_map[] are indexed by partition index (where
* zero is the topmost partition, and non-leaf partitions must come before
......@@ -1076,11 +1097,12 @@ typedef struct PlanRowMark
* zero-based index of the partition's subplan in the parent plan's subplan
* list; it is -1 if the partition is non-leaf or has been pruned. For a
* non-leaf partition p, subpart_map[p] contains the zero-based index of
* that sub-partition's PartitionPruneInfo in the plan's PartitionPruneInfo
* list; it is -1 if the partition is a leaf or has been pruned. All these
* indexes are global across the whole partitioned table and Append plan node.
* that sub-partition's PartitionedRelPruneInfo in the hierarchy's
* PartitionedRelPruneInfo list; it is -1 if the partition is a leaf or has
* been pruned. Note that subplan indexes are global across the parent plan
* node, but partition indexes are valid only within a particular hierarchy.
*/
typedef struct PartitionPruneInfo
typedef struct PartitionedRelPruneInfo
{
NodeTag type;
Oid reloid; /* OID of partition rel for this level */
......@@ -1098,7 +1120,7 @@ typedef struct PartitionPruneInfo
bool do_exec_prune; /* true if pruning should be performed during
* executor run. */
Bitmapset *execparamids; /* All PARAM_EXEC Param IDs in pruning_steps */
} PartitionPruneInfo;
} PartitionedRelPruneInfo;
/*
* Abstract Node type for partition pruning steps (there are no concrete
......
......@@ -74,9 +74,11 @@ typedef struct PartitionPruneContext
#define PruneCxtStateIdx(partnatts, step_id, keyno) \
((partnatts) * (step_id) + (keyno))
extern List *make_partition_pruneinfo(PlannerInfo *root,
extern PartitionPruneInfo *make_partition_pruneinfo(PlannerInfo *root,
RelOptInfo *parentrel,
List *subpaths,
List *partitioned_rels,
List *subpaths, List *prunequal);
List *prunequal);
extern Relids prune_append_rel_partitions(RelOptInfo *rel);
extern Bitmapset *get_matching_partitions(PartitionPruneContext *context,
List *pruning_steps);
......
......@@ -2382,6 +2382,96 @@ select * from ab where a = (select max(a) from lprt_a) and b = (select max(a)-1
Index Cond: (a = $0)
(52 rows)
-- Test run-time partition pruning with UNION ALL parents
explain (analyze, costs off, summary off, timing off)
select * from (select * from ab where a = 1 union all select * from ab) ab where b = (select 1);
QUERY PLAN
-------------------------------------------------------------------------------
Append (actual rows=0 loops=1)
InitPlan 1 (returns $0)
-> Result (actual rows=1 loops=1)
-> Append (actual rows=0 loops=1)
-> Bitmap Heap Scan on ab_a1_b1 ab_a1_b1_1 (actual rows=0 loops=1)
Recheck Cond: (a = 1)
Filter: (b = $0)
-> Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0 loops=1)
Index Cond: (a = 1)
-> Bitmap Heap Scan on ab_a1_b2 ab_a1_b2_1 (never executed)
Recheck Cond: (a = 1)
Filter: (b = $0)
-> Bitmap Index Scan on ab_a1_b2_a_idx (never executed)
Index Cond: (a = 1)
-> Bitmap Heap Scan on ab_a1_b3 ab_a1_b3_1 (never executed)
Recheck Cond: (a = 1)
Filter: (b = $0)
-> Bitmap Index Scan on ab_a1_b3_a_idx (never executed)
Index Cond: (a = 1)
-> Seq Scan on ab_a1_b1 (actual rows=0 loops=1)
Filter: (b = $0)
-> Seq Scan on ab_a1_b2 (never executed)
Filter: (b = $0)
-> Seq Scan on ab_a1_b3 (never executed)
Filter: (b = $0)
-> Seq Scan on ab_a2_b1 (actual rows=0 loops=1)
Filter: (b = $0)
-> Seq Scan on ab_a2_b2 (never executed)
Filter: (b = $0)
-> Seq Scan on ab_a2_b3 (never executed)
Filter: (b = $0)
-> Seq Scan on ab_a3_b1 (actual rows=0 loops=1)
Filter: (b = $0)
-> Seq Scan on ab_a3_b2 (never executed)
Filter: (b = $0)
-> Seq Scan on ab_a3_b3 (never executed)
Filter: (b = $0)
(37 rows)
-- A case containing a UNION ALL with a non-partitioned child.
explain (analyze, costs off, summary off, timing off)
select * from (select * from ab where a = 1 union all (values(10,5)) union all select * from ab) ab where b = (select 1);
QUERY PLAN
-------------------------------------------------------------------------------
Append (actual rows=0 loops=1)
InitPlan 1 (returns $0)
-> Result (actual rows=1 loops=1)
-> Append (actual rows=0 loops=1)
-> Bitmap Heap Scan on ab_a1_b1 ab_a1_b1_1 (actual rows=0 loops=1)
Recheck Cond: (a = 1)
Filter: (b = $0)
-> Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0 loops=1)
Index Cond: (a = 1)
-> Bitmap Heap Scan on ab_a1_b2 ab_a1_b2_1 (never executed)
Recheck Cond: (a = 1)
Filter: (b = $0)
-> Bitmap Index Scan on ab_a1_b2_a_idx (never executed)
Index Cond: (a = 1)
-> Bitmap Heap Scan on ab_a1_b3 ab_a1_b3_1 (never executed)
Recheck Cond: (a = 1)
Filter: (b = $0)
-> Bitmap Index Scan on ab_a1_b3_a_idx (never executed)
Index Cond: (a = 1)
-> Result (actual rows=0 loops=1)
One-Time Filter: (5 = $0)
-> Seq Scan on ab_a1_b1 (actual rows=0 loops=1)
Filter: (b = $0)
-> Seq Scan on ab_a1_b2 (never executed)
Filter: (b = $0)
-> Seq Scan on ab_a1_b3 (never executed)
Filter: (b = $0)
-> Seq Scan on ab_a2_b1 (actual rows=0 loops=1)
Filter: (b = $0)
-> Seq Scan on ab_a2_b2 (never executed)
Filter: (b = $0)
-> Seq Scan on ab_a2_b3 (never executed)
Filter: (b = $0)
-> Seq Scan on ab_a3_b1 (actual rows=0 loops=1)
Filter: (b = $0)
-> Seq Scan on ab_a3_b2 (never executed)
Filter: (b = $0)
-> Seq Scan on ab_a3_b3 (never executed)
Filter: (b = $0)
(39 rows)
deallocate ab_q1;
deallocate ab_q2;
deallocate ab_q3;
......@@ -3318,3 +3408,86 @@ explain (costs off) select * from pp_temp_parent where a = 2;
(3 rows)
drop table pp_temp_parent;
-- Stress run-time partition pruning a bit more, per bug reports
create temp table p (a int, b int, c int) partition by list (a);
create temp table p1 partition of p for values in (1);
create temp table p2 partition of p for values in (2);
create temp table q (a int, b int, c int) partition by list (a);
create temp table q1 partition of q for values in (1) partition by list (b);
create temp table q11 partition of q1 for values in (1) partition by list (c);
create temp table q111 partition of q11 for values in (1);
create temp table q2 partition of q for values in (2) partition by list (b);
create temp table q21 partition of q2 for values in (1);
create temp table q22 partition of q2 for values in (2);
insert into q22 values (2, 2, 3);
explain (costs off)
select *
from (
select * from p
union all
select * from q1
union all
select 1, 1, 1
) s(a, b, c)
where s.a = 1 and s.b = 1 and s.c = (select 1);
QUERY PLAN
----------------------------------------------------
Append
InitPlan 1 (returns $0)
-> Result
-> Seq Scan on p1
Filter: ((a = 1) AND (b = 1) AND (c = $0))
-> Seq Scan on q111
Filter: ((a = 1) AND (b = 1) AND (c = $0))
-> Result
One-Time Filter: (1 = $0)
(9 rows)
select *
from (
select * from p
union all
select * from q1
union all
select 1, 1, 1
) s(a, b, c)
where s.a = 1 and s.b = 1 and s.c = (select 1);
a | b | c
---+---+---
1 | 1 | 1
(1 row)
prepare q (int, int) as
select *
from (
select * from p
union all
select * from q1
union all
select 1, 1, 1
) s(a, b, c)
where s.a = $1 and s.b = $2 and s.c = (select 1);
set plan_cache_mode to force_generic_plan;
explain (costs off) execute q (1, 1);
QUERY PLAN
---------------------------------------------------------------
Append
InitPlan 1 (returns $0)
-> Result
Subplans Removed: 1
-> Seq Scan on p1
Filter: ((a = $1) AND (b = $2) AND (c = $0))
-> Seq Scan on q111
Filter: ((a = $1) AND (b = $2) AND (c = $0))
-> Result
One-Time Filter: ((1 = $1) AND (1 = $2) AND (1 = $0))
(10 rows)
execute q (1, 1);
a | b | c
---+---+---
1 | 1 | 1
(1 row)
reset plan_cache_mode;
drop table p, q;
......@@ -540,6 +540,14 @@ reset max_parallel_workers_per_gather;
explain (analyze, costs off, summary off, timing off)
select * from ab where a = (select max(a) from lprt_a) and b = (select max(a)-1 from lprt_a);
-- Test run-time partition pruning with UNION ALL parents
explain (analyze, costs off, summary off, timing off)
select * from (select * from ab where a = 1 union all select * from ab) ab where b = (select 1);
-- A case containing a UNION ALL with a non-partitioned child.
explain (analyze, costs off, summary off, timing off)
select * from (select * from ab where a = 1 union all (values(10,5)) union all select * from ab) ab where b = (select 1);
deallocate ab_q1;
deallocate ab_q2;
deallocate ab_q3;
......@@ -878,3 +886,57 @@ create temp table pp_temp_part_def partition of pp_temp_parent default;
explain (costs off) select * from pp_temp_parent where true;
explain (costs off) select * from pp_temp_parent where a = 2;
drop table pp_temp_parent;
-- Stress run-time partition pruning a bit more, per bug reports
create temp table p (a int, b int, c int) partition by list (a);
create temp table p1 partition of p for values in (1);
create temp table p2 partition of p for values in (2);
create temp table q (a int, b int, c int) partition by list (a);
create temp table q1 partition of q for values in (1) partition by list (b);
create temp table q11 partition of q1 for values in (1) partition by list (c);
create temp table q111 partition of q11 for values in (1);
create temp table q2 partition of q for values in (2) partition by list (b);
create temp table q21 partition of q2 for values in (1);
create temp table q22 partition of q2 for values in (2);
insert into q22 values (2, 2, 3);
explain (costs off)
select *
from (
select * from p
union all
select * from q1
union all
select 1, 1, 1
) s(a, b, c)
where s.a = 1 and s.b = 1 and s.c = (select 1);
select *
from (
select * from p
union all
select * from q1
union all
select 1, 1, 1
) s(a, b, c)
where s.a = 1 and s.b = 1 and s.c = (select 1);
prepare q (int, int) as
select *
from (
select * from p
union all
select * from q1
union all
select 1, 1, 1
) s(a, b, c)
where s.a = $1 and s.b = $2 and s.c = (select 1);
set plan_cache_mode to force_generic_plan;
explain (costs off) execute q (1, 1);
execute q (1, 1);
reset plan_cache_mode;
drop table p, q;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment