Commit f8f6e446 authored by Etsuro Fujita's avatar Etsuro Fujita

postgres_fdw: Improve cost and size estimation for aggregate pushdown.

In commit 7012b132, which added aggregate
pushdown to postgres_fdw, we didn't account for the evaluation cost and the
selectivity of HAVING quals attached to ForeignPaths performing aggregate
pushdown, as core had never accounted for that for AggPaths and GroupPaths.
And we didn't set these values of the locally-checked quals (ie, fpinfo's
local_conds_cost and local_conds_sel), which were initialized to zeros, but
since estimate_path_cost_size factors in these to estimate the result size
and the evaluation cost of such a ForeignPath when the use_remote_estimate
option is enabled, this caused it to produce underestimated results in that
case.

By commit 7b6c0754 core was changed so that
it accounts for the evaluation cost and the selectivity of HAVING quals in
aggregation paths, so change the postgres_fdw's aggregate pushdown code as
well as such.  This not only fixes the underestimation issue mentioned
above, but improves the estimation using local statistics in that function
when that option is disabled.

This would be a bug fix rather than an improvement, but apply it to HEAD
only to avoid destabilizing existing plan choices.

Author: Etsuro Fujita
Discussion: https://postgr.es/m/5BFD3EAD.2060301%40lab.ntt.co.jp
parent afc4a78a
...@@ -3209,6 +3209,8 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 ...@@ -3209,6 +3209,8 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6)) Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
(6 rows) (6 rows)
-- Update local stats on ft2
ANALYZE ft2;
-- Add into extension -- Add into extension
alter extension postgres_fdw add operator class my_op_class using btree; alter extension postgres_fdw add operator class my_op_class using btree;
alter extension postgres_fdw add function my_op_cmp(a int, b int); alter extension postgres_fdw add function my_op_cmp(a int, b int);
......
...@@ -2844,10 +2844,6 @@ estimate_path_cost_size(PlannerInfo *root, ...@@ -2844,10 +2844,6 @@ estimate_path_cost_size(PlannerInfo *root,
* strategy will be considered at remote side, thus for * strategy will be considered at remote side, thus for
* simplicity, we put all startup related costs in startup_cost * simplicity, we put all startup related costs in startup_cost
* and all finalization and run cost are added in total_cost. * and all finalization and run cost are added in total_cost.
*
* Also, core does not care about costing HAVING expressions and
* adding that to the costs. So similarly, here too we are not
* considering remote and local conditions for costing.
*/ */
ofpinfo = (PgFdwRelationInfo *) fpinfo->outerrel->fdw_private; ofpinfo = (PgFdwRelationInfo *) fpinfo->outerrel->fdw_private;
...@@ -2880,10 +2876,26 @@ estimate_path_cost_size(PlannerInfo *root, ...@@ -2880,10 +2876,26 @@ estimate_path_cost_size(PlannerInfo *root,
input_rows, NULL); input_rows, NULL);
/* /*
* Number of rows expected from foreign server will be same as * Get the retrieved_rows and rows estimates. If there are HAVING
* that of number of groups. * quals, account for their selectivity.
*/ */
if (root->parse->havingQual)
{
/* Factor in the selectivity of the remotely-checked quals */
retrieved_rows =
clamp_row_est(numGroups *
clauselist_selectivity(root,
fpinfo->remote_conds,
0,
JOIN_INNER,
NULL));
/* Factor in the selectivity of the locally-checked quals */
rows = clamp_row_est(retrieved_rows * fpinfo->local_conds_sel);
}
else
{
rows = retrieved_rows = numGroups; rows = retrieved_rows = numGroups;
}
/*----- /*-----
* Startup cost includes: * Startup cost includes:
...@@ -2909,6 +2921,20 @@ estimate_path_cost_size(PlannerInfo *root, ...@@ -2909,6 +2921,20 @@ estimate_path_cost_size(PlannerInfo *root,
run_cost += aggcosts.finalCost * numGroups; run_cost += aggcosts.finalCost * numGroups;
run_cost += cpu_tuple_cost * numGroups; run_cost += cpu_tuple_cost * numGroups;
run_cost += ptarget->cost.per_tuple * numGroups; run_cost += ptarget->cost.per_tuple * numGroups;
/* Accout for the eval cost of HAVING quals, if any */
if (root->parse->havingQual)
{
QualCost remote_cost;
/* Add in the eval cost of the remotely-checked quals */
cost_qual_eval(&remote_cost, fpinfo->remote_conds, root);
startup_cost += remote_cost.startup;
run_cost += remote_cost.per_tuple * numGroups;
/* Add in the eval cost of the locally-checked quals */
startup_cost += fpinfo->local_conds_cost.startup;
run_cost += fpinfo->local_conds_cost.per_tuple * retrieved_rows;
}
} }
else else
{ {
...@@ -5496,6 +5522,22 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, ...@@ -5496,6 +5522,22 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
if (!foreign_grouping_ok(root, grouped_rel, extra->havingQual)) if (!foreign_grouping_ok(root, grouped_rel, extra->havingQual))
return; return;
/*
* Compute the selectivity and cost of the local_conds, so we don't have
* to do it over again for each path. (Currently we create just a single
* path here, but in future it would be possible that we build more paths
* such as pre-sorted paths as in postgresGetForeignPaths and
* postgresGetForeignJoinPaths.) The best we can do for these conditions
* is to estimate selectivity on the basis of local statistics.
*/
fpinfo->local_conds_sel = clauselist_selectivity(root,
fpinfo->local_conds,
0,
JOIN_INNER,
NULL);
cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);
/* Estimate the cost of push down */ /* Estimate the cost of push down */
estimate_path_cost_size(root, grouped_rel, NIL, NIL, &rows, estimate_path_cost_size(root, grouped_rel, NIL, NIL, &rows,
&width, &startup_cost, &total_cost); &width, &startup_cost, &total_cost);
......
...@@ -807,6 +807,9 @@ create operator class my_op_class for type int using btree family my_op_family a ...@@ -807,6 +807,9 @@ create operator class my_op_class for type int using btree family my_op_family a
explain (verbose, costs off) explain (verbose, costs off)
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2; select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
-- Update local stats on ft2
ANALYZE ft2;
-- Add into extension -- Add into extension
alter extension postgres_fdw add operator class my_op_class using btree; alter extension postgres_fdw add operator class my_op_class using btree;
alter extension postgres_fdw add function my_op_cmp(a int, b int); alter extension postgres_fdw add function my_op_cmp(a int, b int);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment