Commit 7351bfed authored by Jeff Davis's avatar Jeff Davis

Fix costing for disk-based hash aggregation.

Report and suggestions from Richard Guo and Tomas Vondra.

Discussion: https://postgr.es/m/CAMbWs4_W8fYbAn8KxgidAaZHON_Oo08OYn9ze=7remJymLqo5g@mail.gmail.com
parent 4083f445
...@@ -1728,6 +1728,8 @@ hash_agg_set_limits(double hashentrysize, uint64 input_groups, int used_bits, ...@@ -1728,6 +1728,8 @@ hash_agg_set_limits(double hashentrysize, uint64 input_groups, int used_bits,
/* if not expected to spill, use all of work_mem */ /* if not expected to spill, use all of work_mem */
if (input_groups * hashentrysize < work_mem * 1024L) if (input_groups * hashentrysize < work_mem * 1024L)
{ {
if (num_partitions != NULL)
*num_partitions = 0;
*mem_limit = work_mem * 1024L; *mem_limit = work_mem * 1024L;
*ngroups_limit = *mem_limit / hashentrysize; *ngroups_limit = *mem_limit / hashentrysize;
return; return;
......
...@@ -2257,6 +2257,7 @@ cost_agg(Path *path, PlannerInfo *root, ...@@ -2257,6 +2257,7 @@ cost_agg(Path *path, PlannerInfo *root,
*/ */
if (aggstrategy == AGG_HASHED || aggstrategy == AGG_MIXED) if (aggstrategy == AGG_HASHED || aggstrategy == AGG_MIXED)
{ {
double pages;
double pages_written = 0.0; double pages_written = 0.0;
double pages_read = 0.0; double pages_read = 0.0;
double hashentrysize; double hashentrysize;
...@@ -2264,7 +2265,7 @@ cost_agg(Path *path, PlannerInfo *root, ...@@ -2264,7 +2265,7 @@ cost_agg(Path *path, PlannerInfo *root,
Size mem_limit; Size mem_limit;
uint64 ngroups_limit; uint64 ngroups_limit;
int num_partitions; int num_partitions;
int depth;
/* /*
* Estimate number of batches based on the computed limits. If less * Estimate number of batches based on the computed limits. If less
...@@ -2279,25 +2280,22 @@ cost_agg(Path *path, PlannerInfo *root, ...@@ -2279,25 +2280,22 @@ cost_agg(Path *path, PlannerInfo *root,
nbatches = Max( (numGroups * hashentrysize) / mem_limit, nbatches = Max( (numGroups * hashentrysize) / mem_limit,
numGroups / ngroups_limit ); numGroups / ngroups_limit );
nbatches = Max(ceil(nbatches), 1.0);
num_partitions = Max(num_partitions, 2);
/*
* The number of partitions can change at different levels of
* recursion; but for the purposes of this calculation assume it stays
* constant.
*/
depth = ceil( log(nbatches) / log(num_partitions) );
/* /*
* Estimate number of pages read and written. For each level of * Estimate number of pages read and written. For each level of
* recursion, a tuple must be written and then later read. * recursion, a tuple must be written and then later read.
*/ */
if (nbatches > 1.0) pages = relation_byte_size(input_tuples, input_width) / BLCKSZ;
{ pages_written = pages_read = pages * depth;
double depth;
double pages;
pages = relation_byte_size(input_tuples, input_width) / BLCKSZ;
/*
* The number of partitions can change at different levels of
* recursion; but for the purposes of this calculation assume it
* stays constant.
*/
depth = ceil( log(nbatches - 1) / log(num_partitions) );
pages_written = pages_read = pages * depth;
}
startup_cost += pages_written * random_page_cost; startup_cost += pages_written * random_page_cost;
total_cost += pages_written * random_page_cost; total_cost += pages_written * random_page_cost;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment