Commit c6f28af5 authored by Robert Haas's avatar Robert Haas

Avoid generating bogus paths with partitionwise aggregate.

Previously, if some or all partitions had no partially aggregated path,
we would still try to generate a partially aggregated path for the
parent, leading to assertion failures or wrong answers.

Report by Rajkumar Raghuwanshi.  Patch by Jeevan Chalke, reviewed
by Ashutosh Bapat.  A few changes by me.

Discussion: http://postgr.es/m/CAKcux6=q4+Mw8gOOX16ef6ZMFp9Cve7KWFstUsrDa4GiFaXGUQ@mail.gmail.com
parent 2448adf2
......@@ -7012,6 +7012,7 @@ create_partitionwise_grouping_paths(PlannerInfo *root,
List *grouped_live_children = NIL;
List *partially_grouped_live_children = NIL;
PathTarget *target = grouped_rel->reltarget;
bool partial_grouping_valid = true;
Assert(patype != PARTITIONWISE_AGGREGATE_NONE);
Assert(patype != PARTITIONWISE_AGGREGATE_PARTIAL ||
......@@ -7091,6 +7092,8 @@ create_partitionwise_grouping_paths(PlannerInfo *root,
lappend(partially_grouped_live_children,
child_partially_grouped_rel);
}
else
partial_grouping_valid = false;
if (patype == PARTITIONWISE_AGGREGATE_FULL)
{
......@@ -7102,21 +7105,19 @@ create_partitionwise_grouping_paths(PlannerInfo *root,
pfree(appinfos);
}
/*
* All children can't be dummy at this point. If they are, then the parent
* too marked as dummy.
*/
Assert(grouped_live_children != NIL ||
partially_grouped_live_children != NIL);
/*
* Try to create append paths for partially grouped children. For full
* partitionwise aggregation, we might have paths in the partial_pathlist
* if parallel aggregation is possible. For partial partitionwise
* aggregation, we may have paths in both pathlist and partial_pathlist.
*
* NB: We must have a partially grouped path for every child in order to
* generate a partially grouped path for this relation.
*/
if (partially_grouped_rel)
if (partially_grouped_rel && partial_grouping_valid)
{
Assert(partially_grouped_live_children != NIL);
add_paths_to_append_rel(root, partially_grouped_rel,
partially_grouped_live_children);
......@@ -7130,7 +7131,11 @@ create_partitionwise_grouping_paths(PlannerInfo *root,
/* If possible, create append paths for fully grouped children. */
if (patype == PARTITIONWISE_AGGREGATE_FULL)
{
Assert(grouped_live_children != NIL);
add_paths_to_append_rel(root, grouped_rel, grouped_live_children);
}
}
/*
......
......@@ -1394,3 +1394,108 @@ SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) <
11 | 16500 | 11.0000000000000000 | 1500
(4 rows)
-- Test when parent can produce parallel paths but not any (or some) of its children
ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0);
ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
QUERY PLAN
--------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y))
-> Finalize GroupAggregate
Group Key: pagg_tab_para_p1.x
Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric)
-> Gather Merge
Workers Planned: 2
-> Sort
Sort Key: pagg_tab_para_p1.x
-> Partial HashAggregate
Group Key: pagg_tab_para_p1.x
-> Parallel Append
-> Seq Scan on pagg_tab_para_p1
-> Seq Scan on pagg_tab_para_p3
-> Parallel Seq Scan on pagg_tab_para_p2
(15 rows)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
x | sum | avg | count
----+------+--------------------+-------
0 | 5000 | 5.0000000000000000 | 1000
1 | 6000 | 6.0000000000000000 | 1000
10 | 5000 | 5.0000000000000000 | 1000
11 | 6000 | 6.0000000000000000 | 1000
20 | 5000 | 5.0000000000000000 | 1000
21 | 6000 | 6.0000000000000000 | 1000
(6 rows)
ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
QUERY PLAN
--------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y))
-> Finalize GroupAggregate
Group Key: pagg_tab_para_p1.x
Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric)
-> Gather Merge
Workers Planned: 2
-> Sort
Sort Key: pagg_tab_para_p1.x
-> Partial HashAggregate
Group Key: pagg_tab_para_p1.x
-> Parallel Append
-> Seq Scan on pagg_tab_para_p1
-> Seq Scan on pagg_tab_para_p2
-> Seq Scan on pagg_tab_para_p3
(15 rows)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
x | sum | avg | count
----+------+--------------------+-------
0 | 5000 | 5.0000000000000000 | 1000
1 | 6000 | 6.0000000000000000 | 1000
10 | 5000 | 5.0000000000000000 | 1000
11 | 6000 | 6.0000000000000000 | 1000
20 | 5000 | 5.0000000000000000 | 1000
21 | 6000 | 6.0000000000000000 | 1000
(6 rows)
-- Reset parallelism parameters to get partitionwise aggregation plan.
RESET min_parallel_table_scan_size;
RESET parallel_setup_cost;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
QUERY PLAN
--------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y))
-> Append
-> HashAggregate
Group Key: pagg_tab_para_p1.x
Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric)
-> Seq Scan on pagg_tab_para_p1
-> HashAggregate
Group Key: pagg_tab_para_p2.x
Filter: (avg(pagg_tab_para_p2.y) < '7'::numeric)
-> Seq Scan on pagg_tab_para_p2
-> HashAggregate
Group Key: pagg_tab_para_p3.x
Filter: (avg(pagg_tab_para_p3.y) < '7'::numeric)
-> Seq Scan on pagg_tab_para_p3
(15 rows)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
x | sum | avg | count
----+------+--------------------+-------
0 | 5000 | 5.0000000000000000 | 1000
1 | 6000 | 6.0000000000000000 | 1000
10 | 5000 | 5.0000000000000000 | 1000
11 | 6000 | 6.0000000000000000 | 1000
20 | 5000 | 5.0000000000000000 | 1000
21 | 6000 | 6.0000000000000000 | 1000
(6 rows)
......@@ -294,3 +294,27 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
EXPLAIN (COSTS OFF)
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-- Test when parent can produce parallel paths but not any (or some) of its children
ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0);
ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0);
ANALYZE pagg_tab_para;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
-- Reset parallelism parameters to get partitionwise aggregation plan.
RESET min_parallel_table_scan_size;
RESET parallel_setup_cost;
EXPLAIN (COSTS OFF)
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment