Commit 6217a8c7 authored by Tom Lane's avatar Tom Lane

Fix some bogosities in the code that deals with estimating the fraction

of tuples we are going to retrieve from a sub-SELECT.  Must have been
half asleep when I did this code the first time :-(
parent a1642089
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.52 2000/02/15 20:49:16 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.53 2000/03/14 02:23:14 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -687,8 +687,8 @@ cost_qual_eval_walker(Node *node, Cost *total) ...@@ -687,8 +687,8 @@ cost_qual_eval_walker(Node *node, Cost *total)
* (We assume that sub-selects that can be executed as * (We assume that sub-selects that can be executed as
* InitPlans have already been removed from the expression.) * InitPlans have already been removed from the expression.)
* *
* NOTE: this logic should agree with make_subplan in * NOTE: this logic should agree with the estimates used by
* subselect.c. * make_subplan() in plan/subselect.c.
*/ */
{ {
SubPlan *subplan = (SubPlan *) expr->oper; SubPlan *subplan = (SubPlan *) expr->oper;
...@@ -701,16 +701,18 @@ cost_qual_eval_walker(Node *node, Cost *total) ...@@ -701,16 +701,18 @@ cost_qual_eval_walker(Node *node, Cost *total)
subcost = plan->startup_cost + subcost = plan->startup_cost +
(plan->total_cost - plan->startup_cost) / plan->plan_rows; (plan->total_cost - plan->startup_cost) / plan->plan_rows;
} }
else if (subplan->sublink->subLinkType == EXPR_SUBLINK) else if (subplan->sublink->subLinkType == ALL_SUBLINK ||
{ subplan->sublink->subLinkType == ANY_SUBLINK)
/* assume we need all tuples */
subcost = plan->total_cost;
}
else
{ {
/* assume we need 50% of the tuples */ /* assume we need 50% of the tuples */
subcost = plan->startup_cost + subcost = plan->startup_cost +
0.50 * (plan->total_cost - plan->startup_cost); 0.50 * (plan->total_cost - plan->startup_cost);
/* XXX what if subplan has been materialized? */
}
else
{
/* assume we need all tuples */
subcost = plan->total_cost;
} }
*total += subcost; *total += subcost;
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.76 2000/02/21 01:13:04 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.77 2000/03/14 02:23:15 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -360,11 +360,14 @@ union_planner(Query *parse, ...@@ -360,11 +360,14 @@ union_planner(Query *parse,
* In GROUP BY mode, we have the little problem that we don't * In GROUP BY mode, we have the little problem that we don't
* really know how many input tuples will be needed to make a * really know how many input tuples will be needed to make a
* group, so we can't translate an output LIMIT count into an * group, so we can't translate an output LIMIT count into an
* input count. For lack of a better idea, assume 10% of the * input count. For lack of a better idea, assume 25% of the
* input data will be processed if there is any output limit. * input data will be processed if there is any output limit.
* However, if the caller gave us a fraction rather than an
* absolute count, we can keep using that fraction (which amounts
* to assuming that all the groups are about the same size).
*/ */
if (tuple_fraction > 0.0) if (tuple_fraction >= 1.0)
tuple_fraction = 0.10; tuple_fraction = 0.25;
/* /*
* If both GROUP BY and ORDER BY are specified, we will need * If both GROUP BY and ORDER BY are specified, we will need
* two levels of sort --- and, therefore, certainly need to * two levels of sort --- and, therefore, certainly need to
...@@ -386,11 +389,10 @@ union_planner(Query *parse, ...@@ -386,11 +389,10 @@ union_planner(Query *parse,
{ {
/* /*
* SELECT DISTINCT, like GROUP, will absorb an unpredictable * SELECT DISTINCT, like GROUP, will absorb an unpredictable
* number of input tuples per output tuple. So, fall back to * number of input tuples per output tuple. Handle the same way.
* our same old 10% default...
*/ */
if (tuple_fraction > 0.0) if (tuple_fraction >= 1.0)
tuple_fraction = 0.10; tuple_fraction = 0.25;
} }
/* Generate the (sub) plan */ /* Generate the (sub) plan */
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.30 2000/03/11 23:53:41 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.31 2000/03/14 02:23:15 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -134,25 +134,34 @@ make_subplan(SubLink *slink) ...@@ -134,25 +134,34 @@ make_subplan(SubLink *slink)
PlannerInitPlan = NULL; PlannerInitPlan = NULL;
PlannerQueryLevel++; /* we becomes child */ PlannerQueryLevel++; /* we become child */
/* /*
* For an EXISTS subplan, tell lower-level planner to expect that * For an EXISTS subplan, tell lower-level planner to expect that
* only the first tuple will be retrieved. For ALL, ANY, and MULTIEXPR * only the first tuple will be retrieved. For ALL and ANY subplans,
* subplans, we will be able to stop evaluating if the test condition * we will be able to stop evaluating if the test condition fails,
* fails, so very often not all the tuples will be retrieved; for lack * so very often not all the tuples will be retrieved; for lack of a
* of a better idea, specify 50% retrieval. For EXPR_SUBLINK use default * better idea, specify 50% retrieval. For EXPR and MULTIEXPR subplans,
* behavior. * use default behavior (we're only expecting one row out, anyway).
* *
* NOTE: if you change these numbers, also change cost_qual_eval_walker * NOTE: if you change these numbers, also change cost_qual_eval_walker()
* in costsize.c. * in path/costsize.c.
*
* XXX If an ALL/ANY subplan is uncorrelated, we may decide to materialize
* its result below. In that case it would've been better to specify
* full retrieval. At present, however, we can only detect correlation
* or lack of it after we've made the subplan :-(. Perhaps detection
* of correlation should be done as a separate step. Meanwhile, we don't
* want to be too optimistic about the percentage of tuples retrieved,
* for fear of selecting a plan that's bad for the materialization case.
*/ */
if (slink->subLinkType == EXISTS_SUBLINK) if (slink->subLinkType == EXISTS_SUBLINK)
tuple_fraction = 1.0; /* just like a LIMIT 1 */ tuple_fraction = 1.0; /* just like a LIMIT 1 */
else if (slink->subLinkType == EXPR_SUBLINK) else if (slink->subLinkType == ALL_SUBLINK ||
tuple_fraction = -1.0; /* default behavior */ slink->subLinkType == ANY_SUBLINK)
else
tuple_fraction = 0.5; /* 50% */ tuple_fraction = 0.5; /* 50% */
else
tuple_fraction = -1.0; /* default behavior */
node->plan = plan = union_planner(subquery, tuple_fraction); node->plan = plan = union_planner(subquery, tuple_fraction);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment