Commit 9fd88436 authored by Tom Lane's avatar Tom Lane

Fix mergejoin cost estimation so that we consider the statistical ranges of

the two join variables at both ends: not only trailing rows that need not be
scanned because there cannot be a match on the other side, but initial rows
that will be scanned without possibly having a match.  This allows a more
realistic estimate of startup cost to be made, per recent pgsql-performance
discussion.  In passing, fix a couple of bugs that had crept into
mergejoinscansel: it was not quite up to speed for the task of estimating
descending-order scans, which is a new requirement in 8.3.
parent 88216128
...@@ -54,7 +54,7 @@ ...@@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.189 2007/11/15 22:25:15 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.190 2007/12/08 21:05:11 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1372,12 +1372,16 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) ...@@ -1372,12 +1372,16 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
double outer_path_rows = PATH_ROWS(outer_path); double outer_path_rows = PATH_ROWS(outer_path);
double inner_path_rows = PATH_ROWS(inner_path); double inner_path_rows = PATH_ROWS(inner_path);
double outer_rows, double outer_rows,
inner_rows; inner_rows,
outer_skip_rows,
inner_skip_rows;
double mergejointuples, double mergejointuples,
rescannedtuples; rescannedtuples;
double rescanratio; double rescanratio;
Selectivity outerscansel, Selectivity outerstartsel,
innerscansel; outerendsel,
innerstartsel,
innerendsel;
Selectivity joininfactor; Selectivity joininfactor;
Path sort_path; /* dummy for result of cost_sort */ Path sort_path; /* dummy for result of cost_sort */
...@@ -1444,10 +1448,12 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) ...@@ -1444,10 +1448,12 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
* A merge join will stop as soon as it exhausts either input stream * A merge join will stop as soon as it exhausts either input stream
* (unless it's an outer join, in which case the outer side has to be * (unless it's an outer join, in which case the outer side has to be
* scanned all the way anyway). Estimate fraction of the left and right * scanned all the way anyway). Estimate fraction of the left and right
* inputs that will actually need to be scanned. We use only the first * inputs that will actually need to be scanned. Likewise, we can
* (most significant) merge clause for this purpose. Since * estimate the number of rows that will be skipped before the first
* mergejoinscansel() is a fairly expensive computation, we cache the * join pair is found, which should be factored into startup cost.
* results in the merge clause RestrictInfo. * We use only the first (most significant) merge clause for this purpose.
* Since mergejoinscansel() is a fairly expensive computation, we cache
* the results in the merge clause RestrictInfo.
*/ */
if (mergeclauses && path->jpath.jointype != JOIN_FULL) if (mergeclauses && path->jpath.jointype != JOIN_FULL)
{ {
...@@ -1478,37 +1484,61 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) ...@@ -1478,37 +1484,61 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
outer_path->parent->relids)) outer_path->parent->relids))
{ {
/* left side of clause is outer */ /* left side of clause is outer */
outerscansel = cache->leftscansel; outerstartsel = cache->leftstartsel;
innerscansel = cache->rightscansel; outerendsel = cache->leftendsel;
innerstartsel = cache->rightstartsel;
innerendsel = cache->rightendsel;
} }
else else
{ {
/* left side of clause is inner */ /* left side of clause is inner */
outerscansel = cache->rightscansel; outerstartsel = cache->rightstartsel;
innerscansel = cache->leftscansel; outerendsel = cache->rightendsel;
innerstartsel = cache->leftstartsel;
innerendsel = cache->leftendsel;
} }
if (path->jpath.jointype == JOIN_LEFT) if (path->jpath.jointype == JOIN_LEFT)
outerscansel = 1.0; {
outerstartsel = 0.0;
outerendsel = 1.0;
}
else if (path->jpath.jointype == JOIN_RIGHT) else if (path->jpath.jointype == JOIN_RIGHT)
innerscansel = 1.0; {
innerstartsel = 0.0;
innerendsel = 1.0;
}
} }
else else
{ {
/* cope with clauseless or full mergejoin */ /* cope with clauseless or full mergejoin */
outerscansel = innerscansel = 1.0; outerstartsel = innerstartsel = 0.0;
outerendsel = innerendsel = 1.0;
} }
/* convert selectivity to row count; must scan at least one row */ /*
outer_rows = clamp_row_est(outer_path_rows * outerscansel); * Convert selectivities to row counts. We force outer_rows and
inner_rows = clamp_row_est(inner_path_rows * innerscansel); * inner_rows to be at least 1, but the skip_rows estimates can be zero.
*/
outer_skip_rows = rint(outer_path_rows * outerstartsel);
inner_skip_rows = rint(inner_path_rows * innerstartsel);
outer_rows = clamp_row_est(outer_path_rows * outerendsel);
inner_rows = clamp_row_est(inner_path_rows * innerendsel);
Assert(outer_skip_rows <= outer_rows);
Assert(inner_skip_rows <= inner_rows);
/* /*
* Readjust scan selectivities to account for above rounding. This is * Readjust scan selectivities to account for above rounding. This is
* normally an insignificant effect, but when there are only a few rows in * normally an insignificant effect, but when there are only a few rows in
* the inputs, failing to do this makes for a large percentage error. * the inputs, failing to do this makes for a large percentage error.
*/ */
outerscansel = outer_rows / outer_path_rows; outerstartsel = outer_skip_rows / outer_path_rows;
innerscansel = inner_rows / inner_path_rows; innerstartsel = inner_skip_rows / inner_path_rows;
outerendsel = outer_rows / outer_path_rows;
innerendsel = inner_rows / inner_path_rows;
Assert(outerstartsel <= outerendsel);
Assert(innerstartsel <= innerendsel);
/* cost of source data */ /* cost of source data */
...@@ -1522,14 +1552,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) ...@@ -1522,14 +1552,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
outer_path->parent->width, outer_path->parent->width,
-1.0); -1.0);
startup_cost += sort_path.startup_cost; startup_cost += sort_path.startup_cost;
startup_cost += (sort_path.total_cost - sort_path.startup_cost)
* outerstartsel;
run_cost += (sort_path.total_cost - sort_path.startup_cost) run_cost += (sort_path.total_cost - sort_path.startup_cost)
* outerscansel; * (outerendsel - outerstartsel);
} }
else else
{ {
startup_cost += outer_path->startup_cost; startup_cost += outer_path->startup_cost;
startup_cost += (outer_path->total_cost - outer_path->startup_cost)
* outerstartsel;
run_cost += (outer_path->total_cost - outer_path->startup_cost) run_cost += (outer_path->total_cost - outer_path->startup_cost)
* outerscansel; * (outerendsel - outerstartsel);
} }
if (innersortkeys) /* do we need to sort inner? */ if (innersortkeys) /* do we need to sort inner? */
...@@ -1542,14 +1576,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) ...@@ -1542,14 +1576,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
inner_path->parent->width, inner_path->parent->width,
-1.0); -1.0);
startup_cost += sort_path.startup_cost; startup_cost += sort_path.startup_cost;
startup_cost += (sort_path.total_cost - sort_path.startup_cost)
* innerstartsel * rescanratio;
run_cost += (sort_path.total_cost - sort_path.startup_cost) run_cost += (sort_path.total_cost - sort_path.startup_cost)
* innerscansel * rescanratio; * (innerendsel - innerstartsel) * rescanratio;
} }
else else
{ {
startup_cost += inner_path->startup_cost; startup_cost += inner_path->startup_cost;
startup_cost += (inner_path->total_cost - inner_path->startup_cost)
* innerstartsel * rescanratio;
run_cost += (inner_path->total_cost - inner_path->startup_cost) run_cost += (inner_path->total_cost - inner_path->startup_cost)
* innerscansel * rescanratio; * (innerendsel - innerstartsel) * rescanratio;
} }
/* CPU costs */ /* CPU costs */
...@@ -1571,8 +1609,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) ...@@ -1571,8 +1609,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
* joininfactor. * joininfactor.
*/ */
startup_cost += merge_qual_cost.startup; startup_cost += merge_qual_cost.startup;
startup_cost += merge_qual_cost.per_tuple *
(outer_skip_rows + inner_skip_rows * rescanratio);
run_cost += merge_qual_cost.per_tuple * run_cost += merge_qual_cost.per_tuple *
(outer_rows + inner_rows * rescanratio); ((outer_rows - outer_skip_rows) +
(inner_rows - inner_skip_rows) * rescanratio);
/* /*
* For each tuple that gets through the mergejoin proper, we charge * For each tuple that gets through the mergejoin proper, we charge
...@@ -1597,8 +1638,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey) ...@@ -1597,8 +1638,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
{ {
MergeScanSelCache *cache; MergeScanSelCache *cache;
ListCell *lc; ListCell *lc;
Selectivity leftscansel, Selectivity leftstartsel,
rightscansel; leftendsel,
rightstartsel,
rightendsel;
MemoryContext oldcontext; MemoryContext oldcontext;
/* Do we have this result already? */ /* Do we have this result already? */
...@@ -1617,8 +1660,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey) ...@@ -1617,8 +1660,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
pathkey->pk_opfamily, pathkey->pk_opfamily,
pathkey->pk_strategy, pathkey->pk_strategy,
pathkey->pk_nulls_first, pathkey->pk_nulls_first,
&leftscansel, &leftstartsel,
&rightscansel); &leftendsel,
&rightstartsel,
&rightendsel);
/* Cache the result in suitably long-lived workspace */ /* Cache the result in suitably long-lived workspace */
oldcontext = MemoryContextSwitchTo(root->planner_cxt); oldcontext = MemoryContextSwitchTo(root->planner_cxt);
...@@ -1627,8 +1672,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey) ...@@ -1627,8 +1672,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
cache->opfamily = pathkey->pk_opfamily; cache->opfamily = pathkey->pk_opfamily;
cache->strategy = pathkey->pk_strategy; cache->strategy = pathkey->pk_strategy;
cache->nulls_first = pathkey->pk_nulls_first; cache->nulls_first = pathkey->pk_nulls_first;
cache->leftscansel = leftscansel; cache->leftstartsel = leftstartsel;
cache->rightscansel = rightscansel; cache->leftendsel = leftendsel;
cache->rightstartsel = rightstartsel;
cache->rightendsel = rightendsel;
rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache); rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache);
......
This diff is collapsed.
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.150 2007/11/15 22:25:17 momjian Exp $ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.151 2007/12/08 21:05:11 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -993,8 +993,10 @@ typedef struct MergeScanSelCache ...@@ -993,8 +993,10 @@ typedef struct MergeScanSelCache
int strategy; /* sort direction (ASC or DESC) */ int strategy; /* sort direction (ASC or DESC) */
bool nulls_first; /* do NULLs come before normal values? */ bool nulls_first; /* do NULLs come before normal values? */
/* Results */ /* Results */
Selectivity leftscansel; /* scan fraction for clause left side */ Selectivity leftstartsel; /* first-join fraction for clause left side */
Selectivity rightscansel; /* scan fraction for clause right side */ Selectivity leftendsel; /* last-join fraction for clause left side */
Selectivity rightstartsel; /* first-join fraction for clause right side */
Selectivity rightendsel; /* last-join fraction for clause right side */
} MergeScanSelCache; } MergeScanSelCache;
/* /*
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.41 2007/11/07 22:37:24 tgl Exp $ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.42 2007/12/08 21:05:11 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -161,8 +161,8 @@ extern Selectivity rowcomparesel(PlannerInfo *root, ...@@ -161,8 +161,8 @@ extern Selectivity rowcomparesel(PlannerInfo *root,
extern void mergejoinscansel(PlannerInfo *root, Node *clause, extern void mergejoinscansel(PlannerInfo *root, Node *clause,
Oid opfamily, int strategy, bool nulls_first, Oid opfamily, int strategy, bool nulls_first,
Selectivity *leftscan, Selectivity *leftstart, Selectivity *leftend,
Selectivity *rightscan); Selectivity *rightstart, Selectivity *rightend);
extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
double input_rows); double input_rows);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment