Commit 5220bb75 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Expand run-time partition pruning to work with MergeAppend

This expands the support for the run-time partition pruning which was added
for Append in 499be013 to also allow unneeded subnodes of a MergeAppend
to be removed.

Author: David Rowley
Discussion: https://www.postgresql.org/message-id/CAKJS1f_F_V8D7Wu-HVdnH7zCUxhoGK8XhLLtd%3DCu85qDZzXrgg%40mail.gmail.com
parent b33ef397
......@@ -899,12 +899,12 @@ EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE unique1 < 100 AND unique2 > 9000
Generally, the <command>EXPLAIN</command> output will display details for
every plan node which was generated by the query planner. However, there
are cases where the executor is able to determine that certain nodes are
not required; currently, the only node type to support this is the
<literal>Append</literal> node. This node type has the ability to discard
subnodes which it is able to determine won't contain any records required
by the query. It is possible to determine that nodes have been removed in
this way by the presence of a "Subplans Removed" property in the
<command>EXPLAIN</command> output.
not required; currently, the only node types to support this are the
<literal>Append</literal> and <literal>MergeAppend</literal> nodes. These
node types have the ability to discard subnodes which they are able to
determine won't contain any records required by the query. It is possible
to determine that nodes have been removed in this way by the presence of a
"Subplans Removed" property in the <command>EXPLAIN</command> output.
</para>
</sect2>
......
......@@ -151,7 +151,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
/*
* The case where no subplans survive pruning must be handled
* specially. The problem here is that code in explain.c requires
* an Append to have at least one subplan in order for it to
* a MergeAppend to have at least one subplan in order for it to
* properly determine the Vars in that subplan's targetlist. We
* sidestep this issue by just initializing the first subplan and
* setting as_whichplan to NO_MATCHING_SUBPLANS to indicate that
......
......@@ -39,6 +39,7 @@
#include "postgres.h"
#include "executor/execdebug.h"
#include "executor/execPartition.h"
#include "executor/nodeMergeAppend.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
......@@ -65,8 +66,10 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
{
MergeAppendState *mergestate = makeNode(MergeAppendState);
PlanState **mergeplanstates;
Bitmapset *validsubplans;
int nplans;
int i;
int i,
j;
ListCell *lc;
/* check for unsupported flags */
......@@ -78,19 +81,80 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
*/
ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
/*
* Set up empty vector of subplan states
*/
nplans = list_length(node->mergeplans);
mergeplanstates = (PlanState **) palloc0(nplans * sizeof(PlanState *));
/*
* create new MergeAppendState for our node
*/
mergestate->ps.plan = (Plan *) node;
mergestate->ps.state = estate;
mergestate->ps.ExecProcNode = ExecMergeAppend;
mergestate->ms_noopscan = false;
/* If run-time partition pruning is enabled, then set that up now */
if (node->part_prune_infos != NIL)
{
PartitionPruneState *prunestate;
/* We may need an expression context to evaluate partition exprs */
ExecAssignExprContext(estate, &mergestate->ps);
prunestate = ExecCreatePartitionPruneState(&mergestate->ps,
node->part_prune_infos);
mergestate->ms_prune_state = prunestate;
/* Perform an initial partition prune, if required. */
if (prunestate->do_initial_prune)
{
/* Determine which subplans survive initial pruning */
validsubplans = ExecFindInitialMatchingSubPlans(prunestate,
list_length(node->mergeplans));
/*
* The case where no subplans survive pruning must be handled
* specially. The problem here is that code in explain.c requires
* an Append to have at least one subplan in order for it to
* properly determine the Vars in that subplan's targetlist. We
* sidestep this issue by just initializing the first subplan and
* setting ms_noopscan to true to indicate that we don't really
* need to scan any subnodes.
*/
if (bms_is_empty(validsubplans))
{
mergestate->ms_noopscan = true;
/* Mark the first as valid so that it's initialized below */
validsubplans = bms_make_singleton(0);
}
nplans = bms_num_members(validsubplans);
}
else
{
/* We'll need to initialize all subplans */
nplans = list_length(node->mergeplans);
validsubplans = bms_add_range(NULL, 0, nplans - 1);
}
/*
* If no runtime pruning is required, we can fill ms_valid_subplans
* immediately, preventing later calls to ExecFindMatchingSubPlans.
*/
if (!prunestate->do_exec_prune)
mergestate->ms_valid_subplans = bms_add_range(NULL, 0, nplans - 1);
}
else
{
nplans = list_length(node->mergeplans);
/*
* When run-time partition pruning is not enabled we can just mark all
* subplans as valid; they must also all be initialized.
*/
mergestate->ms_valid_subplans = validsubplans =
bms_add_range(NULL, 0, nplans - 1);
mergestate->ms_prune_state = NULL;
}
mergeplanstates = (PlanState **) palloc(nplans * sizeof(PlanState *));
mergestate->mergeplans = mergeplanstates;
mergestate->ms_nplans = nplans;
......@@ -101,26 +165,24 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
/*
* Miscellaneous initialization
*
* MergeAppend plans don't have expression contexts because they never
* call ExecQual or ExecProject.
*/
/*
* MergeAppend nodes do have Result slots, which hold pointers to tuples,
* so we have to initialize them.
*/
ExecInitResultTupleSlotTL(estate, &mergestate->ps);
/*
* call ExecInitNode on each of the plans to be executed and save the
* results into the array "mergeplans".
* call ExecInitNode on each of the valid plans to be executed and save
* the results into the mergeplanstates array.
*/
i = 0;
j = i = 0;
foreach(lc, node->mergeplans)
{
Plan *initNode = (Plan *) lfirst(lc);
if (bms_is_member(i, validsubplans))
{
Plan *initNode = (Plan *) lfirst(lc);
mergeplanstates[i] = ExecInitNode(initNode, estate, eflags);
mergeplanstates[j++] = ExecInitNode(initNode, estate, eflags);
}
i++;
}
......@@ -178,11 +240,25 @@ ExecMergeAppend(PlanState *pstate)
if (!node->ms_initialized)
{
/* Nothing to do if all subplans were pruned */
if (node->ms_noopscan)
return ExecClearTuple(node->ps.ps_ResultTupleSlot);
/*
* First time through: pull the first tuple from each subplan, and set
* up the heap.
* If we've yet to determine the valid subplans then do so now. If
* run-time pruning is disabled then the valid subplans will always be
* set to all subplans.
*/
for (i = 0; i < node->ms_nplans; i++)
if (node->ms_valid_subplans == NULL)
node->ms_valid_subplans =
ExecFindMatchingSubPlans(node->ms_prune_state);
/*
* First time through: pull the first tuple from each valid subplan,
* and set up the heap.
*/
i = -1;
while ((i = bms_next_member(node->ms_valid_subplans, i)) >= 0)
{
node->ms_slots[i] = ExecProcNode(node->mergeplans[i]);
if (!TupIsNull(node->ms_slots[i]))
......@@ -288,6 +364,12 @@ ExecEndMergeAppend(MergeAppendState *node)
*/
for (i = 0; i < nplans; i++)
ExecEndNode(mergeplans[i]);
/*
* release any resources associated with run-time pruning
*/
if (node->ms_prune_state)
ExecDestroyPartitionPruneState(node->ms_prune_state);
}
void
......@@ -295,6 +377,19 @@ ExecReScanMergeAppend(MergeAppendState *node)
{
int i;
/*
* If any PARAM_EXEC Params used in pruning expressions have changed, then
* we'd better unset the valid subplans so that they are reselected for
* the new parameter values.
*/
if (node->ms_prune_state &&
bms_overlap(node->ps.chgParam,
node->ms_prune_state->execparamids))
{
bms_free(node->ms_valid_subplans);
node->ms_valid_subplans = NULL;
}
for (i = 0; i < node->ms_nplans; i++)
{
PlanState *subnode = node->mergeplans[i];
......
......@@ -273,6 +273,7 @@ _copyMergeAppend(const MergeAppend *from)
COPY_POINTER_FIELD(sortOperators, from->numCols * sizeof(Oid));
COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid));
COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool));
COPY_NODE_FIELD(part_prune_infos);
return newnode;
}
......
......@@ -434,6 +434,8 @@ _outMergeAppend(StringInfo str, const MergeAppend *node)
appendStringInfoString(str, " :nullsFirst");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %s", booltostr(node->nullsFirst[i]));
WRITE_NODE_FIELD(part_prune_infos);
}
static void
......
......@@ -1634,6 +1634,7 @@ _readMergeAppend(void)
READ_OID_ARRAY(sortOperators, local_node->numCols);
READ_OID_ARRAY(collations, local_node->numCols);
READ_BOOL_ARRAY(nullsFirst, local_node->numCols);
READ_NODE_FIELD(part_prune_infos);
READ_DONE();
}
......
......@@ -1068,6 +1068,11 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
subplans = lappend(subplans, subplan);
}
/*
* If any quals exist, they may be useful to perform further partition
* pruning during execution. Gather information needed by the executor
* to do partition pruning.
*/
if (enable_partition_pruning &&
rel->reloptkind == RELOPT_BASEREL &&
best_path->partitioned_rels != NIL)
......@@ -1078,7 +1083,6 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
if (best_path->path.param_info)
{
List *prmquals = best_path->path.param_info->ppi_clauses;
prmquals = extract_actual_clauses(prmquals, false);
......@@ -1088,12 +1092,6 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
prunequal = list_concat(prunequal, prmquals);
}
/*
* If any quals exist, they may be useful to perform further partition
* pruning during execution. Generate a PartitionPruneInfo for each
* partitioned rel to store these quals and allow translation of
* partition indexes into subpath indexes.
*/
if (prunequal != NIL)
partpruneinfos =
make_partition_pruneinfo(root,
......@@ -1133,6 +1131,8 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
List *pathkeys = best_path->path.pathkeys;
List *subplans = NIL;
ListCell *subpaths;
RelOptInfo *rel = best_path->path.parent;
List *partpruneinfos = NIL;
/*
* We don't have the actual creation of the MergeAppend node split out
......@@ -1218,8 +1218,40 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
subplans = lappend(subplans, subplan);
}
/*
* If any quals exist, they may be useful to perform further partition
* pruning during execution. Gather information needed by the executor
* to do partition pruning.
*/
if (enable_partition_pruning &&
rel->reloptkind == RELOPT_BASEREL &&
best_path->partitioned_rels != NIL)
{
List *prunequal;
prunequal = extract_actual_clauses(rel->baserestrictinfo, false);
if (best_path->path.param_info)
{
List *prmquals = best_path->path.param_info->ppi_clauses;
prmquals = extract_actual_clauses(prmquals, false);
prmquals = (List *) replace_nestloop_params(root,
(Node *) prmquals);
prunequal = list_concat(prunequal, prmquals);
}
if (prunequal != NIL)
partpruneinfos = make_partition_pruneinfo(root,
best_path->partitioned_rels,
best_path->subpaths, prunequal);
}
node->partitioned_rels = best_path->partitioned_rels;
node->mergeplans = subplans;
node->part_prune_infos = partpruneinfos;
return (Plan *) node;
}
......
......@@ -1106,6 +1106,12 @@ struct AppendState
* slots current output tuple of each subplan
* heap heap of active tuples
* initialized true if we have fetched first tuple from each subplan
* noopscan true if partition pruning proved that none of the
* mergeplans can contain a record to satisfy this query.
* prune_state details required to allow partitions to be
* eliminated from the scan, or NULL if not possible.
* valid_subplans for runtime pruning, valid mergeplans indexes to
* scan.
* ----------------
*/
typedef struct MergeAppendState
......@@ -1118,6 +1124,9 @@ typedef struct MergeAppendState
TupleTableSlot **ms_slots; /* array of length ms_nplans */
struct binaryheap *ms_heap; /* binary heap of slot indices */
bool ms_initialized; /* are subplans started? */
bool ms_noopscan;
struct PartitionPruneState *ms_prune_state;
Bitmapset *ms_valid_subplans;
} MergeAppendState;
/* ----------------
......
......@@ -281,6 +281,9 @@ typedef struct MergeAppend
Oid *sortOperators; /* OIDs of operators to sort them by */
Oid *collations; /* OIDs of collations */
bool *nullsFirst; /* NULLS FIRST/LAST directions */
/* Info for run-time subplan pruning, one entry per partitioned_rels */
List *part_prune_infos; /* List of PartitionPruneInfo */
} MergeAppend;
/* ----------------
......
......@@ -2824,6 +2824,143 @@ select * from boolp where a = (select value from boolvalues where not value);
(9 rows)
drop table boolp;
--
-- Test run-time pruning of MergeAppend subnodes
--
set enable_seqscan = off;
set enable_sort = off;
create table ma_test (a int) partition by range (a);
create table ma_test_p1 partition of ma_test for values from (0) to (10);
create table ma_test_p2 partition of ma_test for values from (10) to (20);
create table ma_test_p3 partition of ma_test for values from (20) to (30);
insert into ma_test select x from generate_series(0,29) t(x);
create index on ma_test (a);
analyze ma_test;
prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a;
-- Execute query 5 times to allow choose_custom_plan
-- to start considering a generic plan.
execute mt_q1(0);
a
----
5
15
25
(3 rows)
execute mt_q1(0);
a
----
5
15
25
(3 rows)
execute mt_q1(0);
a
----
5
15
25
(3 rows)
execute mt_q1(0);
a
----
5
15
25
(3 rows)
execute mt_q1(0);
a
----
5
15
25
(3 rows)
explain (analyze, costs off, summary off, timing off) execute mt_q1(15);
QUERY PLAN
-------------------------------------------------------------------------------
Merge Append (actual rows=2 loops=1)
Sort Key: ma_test_p2.a
Subplans Removed: 1
-> Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=1 loops=1)
Index Cond: (a >= $1)
Filter: ((a % 10) = 5)
Rows Removed by Filter: 4
-> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1)
Index Cond: (a >= $1)
Filter: ((a % 10) = 5)
Rows Removed by Filter: 9
(11 rows)
execute mt_q1(15);
a
----
15
25
(2 rows)
explain (analyze, costs off, summary off, timing off) execute mt_q1(25);
QUERY PLAN
-------------------------------------------------------------------------------
Merge Append (actual rows=1 loops=1)
Sort Key: ma_test_p3.a
Subplans Removed: 2
-> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1)
Index Cond: (a >= $1)
Filter: ((a % 10) = 5)
Rows Removed by Filter: 4
(7 rows)
execute mt_q1(25);
a
----
25
(1 row)
-- Ensure MergeAppend behaves correctly when no subplans match
explain (analyze, costs off, summary off, timing off) execute mt_q1(35);
QUERY PLAN
------------------------------------------------------------------------
Merge Append (actual rows=0 loops=1)
Sort Key: ma_test_p1.a
Subplans Removed: 2
-> Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed)
Index Cond: (a >= $1)
Filter: ((a % 10) = 5)
(6 rows)
execute mt_q1(35);
a
---
(0 rows)
deallocate mt_q1;
-- ensure initplan params properly prune partitions
explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a;
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Merge Append (actual rows=20 loops=1)
Sort Key: ma_test_p1.a
InitPlan 2 (returns $1)
-> Result (actual rows=1 loops=1)
InitPlan 1 (returns $0)
-> Limit (actual rows=1 loops=1)
-> Index Scan using ma_test_p2_a_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1)
Index Cond: (a IS NOT NULL)
-> Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed)
Index Cond: (a >= $1)
-> Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=10 loops=1)
Index Cond: (a >= $1)
-> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=10 loops=1)
Index Cond: (a >= $1)
(14 rows)
reset enable_seqscan;
reset enable_sort;
drop table ma_test;
reset enable_indexonlyscan;
--
-- check that pruning works properly when the partition key is of a
......
......@@ -714,6 +714,47 @@ select * from boolp where a = (select value from boolvalues where not value);
drop table boolp;
--
-- Test run-time pruning of MergeAppend subnodes
--
set enable_seqscan = off;
set enable_sort = off;
create table ma_test (a int) partition by range (a);
create table ma_test_p1 partition of ma_test for values from (0) to (10);
create table ma_test_p2 partition of ma_test for values from (10) to (20);
create table ma_test_p3 partition of ma_test for values from (20) to (30);
insert into ma_test select x from generate_series(0,29) t(x);
create index on ma_test (a);
analyze ma_test;
prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a;
-- Execute query 5 times to allow choose_custom_plan
-- to start considering a generic plan.
execute mt_q1(0);
execute mt_q1(0);
execute mt_q1(0);
execute mt_q1(0);
execute mt_q1(0);
explain (analyze, costs off, summary off, timing off) execute mt_q1(15);
execute mt_q1(15);
explain (analyze, costs off, summary off, timing off) execute mt_q1(25);
execute mt_q1(25);
-- Ensure MergeAppend behaves correctly when no subplans match
explain (analyze, costs off, summary off, timing off) execute mt_q1(35);
execute mt_q1(35);
deallocate mt_q1;
-- ensure initplan params properly prune partitions
explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a;
reset enable_seqscan;
reset enable_sort;
drop table ma_test;
reset enable_indexonlyscan;
--
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment