Commit 08d2d58a authored by Etsuro Fujita's avatar Etsuro Fujita

postgres_fdw: Fix costing of pre-sorted foreign paths with local stats.

Commit aa09cd24 modified estimate_path_cost_size() so that it reuses
cached costs of a basic foreign path for a given foreign-base/join
relation when costing pre-sorted foreign paths for that relation, but it
incorrectly re-computed retrieved_rows, an estimated number of rows
fetched from the remote side, which is needed for costing both the basic
and pre-sorted foreign paths.  To fix, handle retrieved_rows the same way
as the cached costs: store in that relation's fpinfo the retrieved_rows
estimate computed for costing the basic foreign path, and reuse it when
costing the pre-sorted foreign paths.  Also, reuse the rows/width
estimates stored in that relation's fpinfo when costing the pre-sorted
foreign paths, to make the code consistent.

In commit ffab494a, to extend the costing mentioned above to the
foreign-grouping case, I made a change to add_foreign_grouping_paths() to
store in a given foreign-grouped relation's RelOptInfo the rows estimate
for that relation for reuse, but this patch makes that change unnecessary
since we already store the row estimate in that relation's fpinfo, which
this patch reuses when costing a foreign path for that relation with the
sortClause ordering; remove that change.

In passing, fix thinko in commit 7012b132: in estimate_path_cost_size(),
the width estimate for a given foreign-grouped relation to be stored in
that relation's fpinfo was reset incorrectly when costing a basic foreign
path for that relation with local stats.

Apply the patch to HEAD only to avoid destabilizing existing plan choices.

Author: Etsuro Fujita
Discussion: https://postgr.es/m/CAPmGK17jaJLPDEkgnP2VmkOg=5wT8YQ1CqssU8JRpZ_NSE+dqQ@mail.gmail.com
parent be2e024b
...@@ -661,10 +661,11 @@ postgresGetForeignRelSize(PlannerInfo *root, ...@@ -661,10 +661,11 @@ postgresGetForeignRelSize(PlannerInfo *root,
cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root); cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);
/* /*
* Set cached relation costs to some negative value, so that we can detect * Set # of retrieved rows and cached relation costs to some negative
* when they are set to some sensible costs during one (usually the first) * value, so that we can detect when they are set to some sensible values,
* of the calls to estimate_path_cost_size(). * during one (usually the first) of the calls to estimate_path_cost_size.
*/ */
fpinfo->retrieved_rows = -1;
fpinfo->rel_startup_cost = -1; fpinfo->rel_startup_cost = -1;
fpinfo->rel_total_cost = -1; fpinfo->rel_total_cost = -1;
...@@ -2623,7 +2624,6 @@ estimate_path_cost_size(PlannerInfo *root, ...@@ -2623,7 +2624,6 @@ estimate_path_cost_size(PlannerInfo *root,
int width; int width;
Cost startup_cost; Cost startup_cost;
Cost total_cost; Cost total_cost;
Cost cpu_per_tuple;
/* Make sure the core code has set up the relation's reltarget */ /* Make sure the core code has set up the relation's reltarget */
Assert(foreignrel->reltarget); Assert(foreignrel->reltarget);
...@@ -2736,26 +2736,20 @@ estimate_path_cost_size(PlannerInfo *root, ...@@ -2736,26 +2736,20 @@ estimate_path_cost_size(PlannerInfo *root,
*/ */
Assert(param_join_conds == NIL); Assert(param_join_conds == NIL);
/*
* Use rows/width estimates made by set_baserel_size_estimates() for
* base foreign relations and set_joinrel_size_estimates() for join
* between foreign relations.
*/
rows = foreignrel->rows;
width = foreignrel->reltarget->width;
/* Back into an estimate of the number of retrieved rows. */
retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
/* /*
* We will come here again and again with different set of pathkeys or * We will come here again and again with different set of pathkeys or
* additional post-scan/join-processing steps that caller wants to * additional post-scan/join-processing steps that caller wants to
* cost. We don't need to calculate the costs of the underlying scan, * cost. We don't need to calculate the cost/size estimates for the
* join, or grouping each time. Instead, use the costs if we have * underlying scan, join, or grouping each time. Instead, use those
* cached them already. * estimates if we have cached them already.
*/ */
if (fpinfo->rel_startup_cost >= 0 && fpinfo->rel_total_cost >= 0) if (fpinfo->rel_startup_cost >= 0 && fpinfo->rel_total_cost >= 0)
{ {
Assert(fpinfo->retrieved_rows >= 1);
rows = fpinfo->rows;
retrieved_rows = fpinfo->retrieved_rows;
width = fpinfo->width;
startup_cost = fpinfo->rel_startup_cost; startup_cost = fpinfo->rel_startup_cost;
run_cost = fpinfo->rel_total_cost - fpinfo->rel_startup_cost; run_cost = fpinfo->rel_total_cost - fpinfo->rel_startup_cost;
...@@ -2785,6 +2779,10 @@ estimate_path_cost_size(PlannerInfo *root, ...@@ -2785,6 +2779,10 @@ estimate_path_cost_size(PlannerInfo *root,
QualCost remote_conds_cost; QualCost remote_conds_cost;
double nrows; double nrows;
/* Use rows/width estimates made by the core code. */
rows = foreignrel->rows;
width = foreignrel->reltarget->width;
/* For join we expect inner and outer relations set */ /* For join we expect inner and outer relations set */
Assert(fpinfo->innerrel && fpinfo->outerrel); Assert(fpinfo->innerrel && fpinfo->outerrel);
...@@ -2793,7 +2791,12 @@ estimate_path_cost_size(PlannerInfo *root, ...@@ -2793,7 +2791,12 @@ estimate_path_cost_size(PlannerInfo *root,
/* Estimate of number of rows in cross product */ /* Estimate of number of rows in cross product */
nrows = fpinfo_i->rows * fpinfo_o->rows; nrows = fpinfo_i->rows * fpinfo_o->rows;
/* Clamp retrieved rows estimate to at most size of cross product */
/*
* Back into an estimate of the number of retrieved rows. Just in
* case this is nuts, clamp to at most nrow.
*/
retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
retrieved_rows = Min(retrieved_rows, nrows); retrieved_rows = Min(retrieved_rows, nrows);
/* /*
...@@ -2871,9 +2874,8 @@ estimate_path_cost_size(PlannerInfo *root, ...@@ -2871,9 +2874,8 @@ estimate_path_cost_size(PlannerInfo *root,
ofpinfo = (PgFdwRelationInfo *) outerrel->fdw_private; ofpinfo = (PgFdwRelationInfo *) outerrel->fdw_private;
/* Get rows and width from input rel */ /* Get rows from input rel */
input_rows = ofpinfo->rows; input_rows = ofpinfo->rows;
width = ofpinfo->width;
/* Collect statistics about aggregates for estimating costs. */ /* Collect statistics about aggregates for estimating costs. */
MemSet(&aggcosts, 0, sizeof(AggClauseCosts)); MemSet(&aggcosts, 0, sizeof(AggClauseCosts));
...@@ -2920,6 +2922,9 @@ estimate_path_cost_size(PlannerInfo *root, ...@@ -2920,6 +2922,9 @@ estimate_path_cost_size(PlannerInfo *root,
rows = retrieved_rows = numGroups; rows = retrieved_rows = numGroups;
} }
/* Use width estimate made by the core code. */
width = foreignrel->reltarget->width;
/*----- /*-----
* Startup cost includes: * Startup cost includes:
* 1. Startup cost for underneath input relation, adjusted for * 1. Startup cost for underneath input relation, adjusted for
...@@ -2966,7 +2971,17 @@ estimate_path_cost_size(PlannerInfo *root, ...@@ -2966,7 +2971,17 @@ estimate_path_cost_size(PlannerInfo *root,
} }
else else
{ {
/* Clamp retrieved rows estimates to at most foreignrel->tuples. */ Cost cpu_per_tuple;
/* Use rows/width estimates made by set_baserel_size_estimates. */
rows = foreignrel->rows;
width = foreignrel->reltarget->width;
/*
* Back into an estimate of the number of retrieved rows. Just in
* case this is nuts, clamp to at most foreignrel->tuples.
*/
retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
retrieved_rows = Min(retrieved_rows, foreignrel->tuples); retrieved_rows = Min(retrieved_rows, foreignrel->tuples);
/* /*
...@@ -3043,18 +3058,20 @@ estimate_path_cost_size(PlannerInfo *root, ...@@ -3043,18 +3058,20 @@ estimate_path_cost_size(PlannerInfo *root,
} }
/* /*
* Cache the costs for scans, joins, or groupings without any * Cache the retrieved rows and cost estimates for scans, joins, or
* parameterization, pathkeys, or additional post-scan/join-processing * groupings without any parameterization, pathkeys, or additional
* steps, before adding the costs for transferring data from the foreign * post-scan/join-processing steps, before adding the costs for
* server. These costs are useful for costing remote joins involving this * transferring data from the foreign server. These estimates are useful
* relation or costing other remote operations for this relation such as * for costing remote joins involving this relation or costing other
* remote sorts and remote LIMIT restrictions, when the costs can not be * remote operations on this relation such as remote sorts and remote
* obtained from the foreign server. This function will be called at * LIMIT restrictions, when the costs can not be obtained from the foreign
* least once for every foreign relation without any parameterization, * server. This function will be called at least once for every foreign
* pathkeys, or additional post-scan/join-processing steps. * relation without any parameterization, pathkeys, or additional
* post-scan/join-processing steps.
*/ */
if (pathkeys == NIL && param_join_conds == NIL && fpextra == NULL) if (pathkeys == NIL && param_join_conds == NIL && fpextra == NULL)
{ {
fpinfo->retrieved_rows = retrieved_rows;
fpinfo->rel_startup_cost = startup_cost; fpinfo->rel_startup_cost = startup_cost;
fpinfo->rel_total_cost = total_cost; fpinfo->rel_total_cost = total_cost;
} }
...@@ -5157,10 +5174,11 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, ...@@ -5157,10 +5174,11 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
fpinfo->user = NULL; fpinfo->user = NULL;
/* /*
* Set cached relation costs to some negative value, so that we can detect * Set # of retrieved rows and cached relation costs to some negative
* when they are set to some sensible costs, during one (usually the * value, so that we can detect when they are set to some sensible values,
* first) of the calls to estimate_path_cost_size(). * during one (usually the first) of the calls to estimate_path_cost_size.
*/ */
fpinfo->retrieved_rows = -1;
fpinfo->rel_startup_cost = -1; fpinfo->rel_startup_cost = -1;
fpinfo->rel_total_cost = -1; fpinfo->rel_total_cost = -1;
...@@ -5708,10 +5726,11 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel, ...@@ -5708,10 +5726,11 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel,
fpinfo->pushdown_safe = true; fpinfo->pushdown_safe = true;
/* /*
* Set cached relation costs to some negative value, so that we can detect * Set # of retrieved rows and cached relation costs to some negative
* when they are set to some sensible costs, during one (usually the * value, so that we can detect when they are set to some sensible values,
* first) of the calls to estimate_path_cost_size(). * during one (usually the first) of the calls to estimate_path_cost_size.
*/ */
fpinfo->retrieved_rows = -1;
fpinfo->rel_startup_cost = -1; fpinfo->rel_startup_cost = -1;
fpinfo->rel_total_cost = -1; fpinfo->rel_total_cost = -1;
...@@ -5853,8 +5872,6 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, ...@@ -5853,8 +5872,6 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
fpinfo->startup_cost = startup_cost; fpinfo->startup_cost = startup_cost;
fpinfo->total_cost = total_cost; fpinfo->total_cost = total_cost;
grouped_rel->rows = fpinfo->rows;
/* Create and add foreign path to the grouping relation. */ /* Create and add foreign path to the grouping relation. */
grouppath = create_foreign_upper_path(root, grouppath = create_foreign_upper_path(root,
grouped_rel, grouped_rel,
......
...@@ -59,12 +59,17 @@ typedef struct PgFdwRelationInfo ...@@ -59,12 +59,17 @@ typedef struct PgFdwRelationInfo
/* Selectivity of join conditions */ /* Selectivity of join conditions */
Selectivity joinclause_sel; Selectivity joinclause_sel;
/* Estimated size and cost for a scan or join. */ /* Estimated size and cost for a scan, join, or grouping/aggregation. */
double rows; double rows;
int width; int width;
Cost startup_cost; Cost startup_cost;
Cost total_cost; Cost total_cost;
/* Costs excluding costs for transferring data from the foreign server */ /*
* Estimated number of rows fetched from the foreign server, and costs
* excluding costs for transferring those rows from the foreign server.
* These are only used by estimate_path_cost_size().
*/
double retrieved_rows;
Cost rel_startup_cost; Cost rel_startup_cost;
Cost rel_total_cost; Cost rel_total_cost;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment