Commit 4f06c688 authored by Tom Lane's avatar Tom Lane

Put back planner's ability to cache the results of mergejoinscansel(),

which I had removed in the first cut of the EquivalenceClass rewrite to
simplify that patch a little.  But it's still important --- in a four-way
join problem mergejoinscansel() was eating about 40% of the planning time
according to gprof.  Also, improve the EquivalenceClass code to re-use
join RestrictInfos rather than generating fresh ones for each join
considered.  This saves some memory space but more importantly improves
the effectiveness of caching planning info in RestrictInfos.
parent 45e07369
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.362 2007/01/20 20:45:38 tgl Exp $ * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.363 2007/01/22 20:00:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1326,6 +1326,10 @@ _copyRestrictInfo(RestrictInfo *from) ...@@ -1326,6 +1326,10 @@ _copyRestrictInfo(RestrictInfo *from)
/* EquivalenceClasses are never copied, so shallow-copy the pointers */ /* EquivalenceClasses are never copied, so shallow-copy the pointers */
COPY_SCALAR_FIELD(left_ec); COPY_SCALAR_FIELD(left_ec);
COPY_SCALAR_FIELD(right_ec); COPY_SCALAR_FIELD(right_ec);
COPY_SCALAR_FIELD(left_em);
COPY_SCALAR_FIELD(right_em);
/* MergeScanSelCache isn't a Node, so hard to copy; just reset cache */
newnode->scansel_cache = NIL;
COPY_SCALAR_FIELD(outer_is_left); COPY_SCALAR_FIELD(outer_is_left);
COPY_SCALAR_FIELD(hashjoinoperator); COPY_SCALAR_FIELD(hashjoinoperator);
COPY_SCALAR_FIELD(left_bucketsize); COPY_SCALAR_FIELD(left_bucketsize);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.294 2007/01/20 20:45:38 tgl Exp $ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.295 2007/01/22 20:00:39 tgl Exp $
* *
* NOTES * NOTES
* Every node type that can appear in stored rules' parsetrees *must* * Every node type that can appear in stored rules' parsetrees *must*
...@@ -1304,6 +1304,7 @@ _outEquivalenceClass(StringInfo str, EquivalenceClass *node) ...@@ -1304,6 +1304,7 @@ _outEquivalenceClass(StringInfo str, EquivalenceClass *node)
WRITE_NODE_FIELD(ec_opfamilies); WRITE_NODE_FIELD(ec_opfamilies);
WRITE_NODE_FIELD(ec_members); WRITE_NODE_FIELD(ec_members);
WRITE_NODE_FIELD(ec_sources); WRITE_NODE_FIELD(ec_sources);
WRITE_NODE_FIELD(ec_derives);
WRITE_BITMAPSET_FIELD(ec_relids); WRITE_BITMAPSET_FIELD(ec_relids);
WRITE_BOOL_FIELD(ec_has_const); WRITE_BOOL_FIELD(ec_has_const);
WRITE_BOOL_FIELD(ec_has_volatile); WRITE_BOOL_FIELD(ec_has_volatile);
...@@ -1354,6 +1355,8 @@ _outRestrictInfo(StringInfo str, RestrictInfo *node) ...@@ -1354,6 +1355,8 @@ _outRestrictInfo(StringInfo str, RestrictInfo *node)
WRITE_NODE_FIELD(mergeopfamilies); WRITE_NODE_FIELD(mergeopfamilies);
WRITE_NODE_FIELD(left_ec); WRITE_NODE_FIELD(left_ec);
WRITE_NODE_FIELD(right_ec); WRITE_NODE_FIELD(right_ec);
WRITE_NODE_FIELD(left_em);
WRITE_NODE_FIELD(right_em);
WRITE_BOOL_FIELD(outer_is_left); WRITE_BOOL_FIELD(outer_is_left);
WRITE_OID_FIELD(hashjoinoperator); WRITE_OID_FIELD(hashjoinoperator);
} }
......
...@@ -54,7 +54,7 @@ ...@@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.176 2007/01/22 01:35:20 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.177 2007/01/22 20:00:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -108,6 +108,9 @@ bool enable_mergejoin = true; ...@@ -108,6 +108,9 @@ bool enable_mergejoin = true;
bool enable_hashjoin = true; bool enable_hashjoin = true;
static MergeScanSelCache *cached_scansel(PlannerInfo *root,
RestrictInfo *rinfo,
PathKey *pathkey);
static bool cost_qual_eval_walker(Node *node, QualCost *total); static bool cost_qual_eval_walker(Node *node, QualCost *total);
static Selectivity approx_selectivity(PlannerInfo *root, List *quals, static Selectivity approx_selectivity(PlannerInfo *root, List *quals,
JoinType jointype); JoinType jointype);
...@@ -1349,9 +1352,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) ...@@ -1349,9 +1352,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
* (unless it's an outer join, in which case the outer side has to be * (unless it's an outer join, in which case the outer side has to be
* scanned all the way anyway). Estimate fraction of the left and right * scanned all the way anyway). Estimate fraction of the left and right
* inputs that will actually need to be scanned. We use only the first * inputs that will actually need to be scanned. We use only the first
* (most significant) merge clause for this purpose. * (most significant) merge clause for this purpose. Since
* * mergejoinscansel() is a fairly expensive computation, we cache the
* XXX mergejoinscansel is a bit expensive, can we cache its results? * results in the merge clause RestrictInfo.
*/ */
if (mergeclauses && path->jpath.jointype != JOIN_FULL) if (mergeclauses && path->jpath.jointype != JOIN_FULL)
{ {
...@@ -1360,8 +1363,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) ...@@ -1360,8 +1363,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
List *ipathkeys; List *ipathkeys;
PathKey *opathkey; PathKey *opathkey;
PathKey *ipathkey; PathKey *ipathkey;
Selectivity leftscansel, MergeScanSelCache *cache;
rightscansel;
/* Get the input pathkeys to determine the sort-order details */ /* Get the input pathkeys to determine the sort-order details */
opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys; opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys;
...@@ -1376,22 +1378,21 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) ...@@ -1376,22 +1378,21 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
opathkey->pk_nulls_first != ipathkey->pk_nulls_first) opathkey->pk_nulls_first != ipathkey->pk_nulls_first)
elog(ERROR, "left and right pathkeys do not match in mergejoin"); elog(ERROR, "left and right pathkeys do not match in mergejoin");
mergejoinscansel(root, (Node *) firstclause->clause, /* Get the selectivity with caching */
opathkey->pk_opfamily, opathkey->pk_strategy, cache = cached_scansel(root, firstclause, opathkey);
&leftscansel, &rightscansel);
if (bms_is_subset(firstclause->left_relids, if (bms_is_subset(firstclause->left_relids,
outer_path->parent->relids)) outer_path->parent->relids))
{ {
/* left side of clause is outer */ /* left side of clause is outer */
outerscansel = leftscansel; outerscansel = cache->leftscansel;
innerscansel = rightscansel; innerscansel = cache->rightscansel;
} }
else else
{ {
/* left side of clause is inner */ /* left side of clause is inner */
outerscansel = rightscansel; outerscansel = cache->rightscansel;
innerscansel = leftscansel; innerscansel = cache->leftscansel;
} }
if (path->jpath.jointype == JOIN_LEFT) if (path->jpath.jointype == JOIN_LEFT)
outerscansel = 1.0; outerscansel = 1.0;
...@@ -1493,6 +1494,54 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) ...@@ -1493,6 +1494,54 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
path->jpath.path.total_cost = startup_cost + run_cost; path->jpath.path.total_cost = startup_cost + run_cost;
} }
/*
* run mergejoinscansel() with caching
*/
static MergeScanSelCache *
cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
{
MergeScanSelCache *cache;
ListCell *lc;
Selectivity leftscansel,
rightscansel;
MemoryContext oldcontext;
/* Do we have this result already? */
foreach(lc, rinfo->scansel_cache)
{
cache = (MergeScanSelCache *) lfirst(lc);
if (cache->opfamily == pathkey->pk_opfamily &&
cache->strategy == pathkey->pk_strategy &&
cache->nulls_first == pathkey->pk_nulls_first)
return cache;
}
/* Nope, do the computation */
mergejoinscansel(root,
(Node *) rinfo->clause,
pathkey->pk_opfamily,
pathkey->pk_strategy,
pathkey->pk_nulls_first,
&leftscansel,
&rightscansel);
/* Cache the result in suitably long-lived workspace */
oldcontext = MemoryContextSwitchTo(root->planner_cxt);
cache = (MergeScanSelCache *) palloc(sizeof(MergeScanSelCache));
cache->opfamily = pathkey->pk_opfamily;
cache->strategy = pathkey->pk_strategy;
cache->nulls_first = pathkey->pk_nulls_first;
cache->leftscansel = leftscansel;
cache->rightscansel = rightscansel;
rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache);
MemoryContextSwitchTo(oldcontext);
return cache;
}
/* /*
* cost_hashjoin * cost_hashjoin
* Determines and returns the cost of joining two relations using the * Determines and returns the cost of joining two relations using the
......
This diff is collapsed.
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.136 2007/01/20 20:45:39 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.137 2007/01/22 20:00:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1197,6 +1197,9 @@ adjust_appendrel_attrs_mutator(Node *node, AppendRelInfo *context) ...@@ -1197,6 +1197,9 @@ adjust_appendrel_attrs_mutator(Node *node, AppendRelInfo *context)
newinfo->this_selec = -1; newinfo->this_selec = -1;
newinfo->left_ec = NULL; newinfo->left_ec = NULL;
newinfo->right_ec = NULL; newinfo->right_ec = NULL;
newinfo->left_em = NULL;
newinfo->right_em = NULL;
newinfo->scansel_cache = NIL;
newinfo->left_bucketsize = -1; newinfo->left_bucketsize = -1;
newinfo->right_bucketsize = -1; newinfo->right_bucketsize = -1;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.52 2007/01/20 20:45:40 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.53 2007/01/22 20:00:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -344,6 +344,9 @@ make_restrictinfo_internal(Expr *clause, ...@@ -344,6 +344,9 @@ make_restrictinfo_internal(Expr *clause,
restrictinfo->left_ec = NULL; restrictinfo->left_ec = NULL;
restrictinfo->right_ec = NULL; restrictinfo->right_ec = NULL;
restrictinfo->left_em = NULL;
restrictinfo->right_em = NULL;
restrictinfo->scansel_cache = NIL;
restrictinfo->outer_is_left = false; restrictinfo->outer_is_left = false;
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.220 2007/01/20 20:45:40 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.221 2007/01/22 20:00:40 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -2112,8 +2112,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS) ...@@ -2112,8 +2112,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
* we can estimate how much of the input will actually be read. This * we can estimate how much of the input will actually be read. This
* can have a considerable impact on the cost when using indexscans. * can have a considerable impact on the cost when using indexscans.
* *
* clause should be a clause already known to be mergejoinable. opfamily and * clause should be a clause already known to be mergejoinable. opfamily,
* strategy specify the sort ordering being used. * strategy, and nulls_first specify the sort ordering being used.
* *
* *leftscan is set to the fraction of the left-hand variable expected * *leftscan is set to the fraction of the left-hand variable expected
* to be scanned (0 to 1), and similarly *rightscan for the right-hand * to be scanned (0 to 1), and similarly *rightscan for the right-hand
...@@ -2121,7 +2121,7 @@ icnlikejoinsel(PG_FUNCTION_ARGS) ...@@ -2121,7 +2121,7 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
*/ */
void void
mergejoinscansel(PlannerInfo *root, Node *clause, mergejoinscansel(PlannerInfo *root, Node *clause,
Oid opfamily, int strategy, Oid opfamily, int strategy, bool nulls_first,
Selectivity *leftscan, Selectivity *leftscan,
Selectivity *rightscan) Selectivity *rightscan)
{ {
...@@ -2214,18 +2214,39 @@ mergejoinscansel(PlannerInfo *root, Node *clause, ...@@ -2214,18 +2214,39 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
/* /*
* Now, the fraction of the left variable that will be scanned is the * Now, the fraction of the left variable that will be scanned is the
* fraction that's <= the right-side maximum value. But only believe * fraction that's <= the right-side maximum value. But only believe
* non-default estimates, else stick with our 1.0. * non-default estimates, else stick with our 1.0. Also, if the sort
* order is nulls-first, we're going to have to read over any nulls too.
*/ */
selec = scalarineqsel(root, leop, false, &leftvar, selec = scalarineqsel(root, leop, false, &leftvar,
rightmax, op_righttype); rightmax, op_righttype);
if (selec != DEFAULT_INEQ_SEL) if (selec != DEFAULT_INEQ_SEL)
{
if (nulls_first && HeapTupleIsValid(leftvar.statsTuple))
{
Form_pg_statistic stats;
stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
selec += stats->stanullfrac;
CLAMP_PROBABILITY(selec);
}
*leftscan = selec; *leftscan = selec;
}
/* And similarly for the right variable. */ /* And similarly for the right variable. */
selec = scalarineqsel(root, revleop, false, &rightvar, selec = scalarineqsel(root, revleop, false, &rightvar,
leftmax, op_lefttype); leftmax, op_lefttype);
if (selec != DEFAULT_INEQ_SEL) if (selec != DEFAULT_INEQ_SEL)
{
if (nulls_first && HeapTupleIsValid(rightvar.statsTuple))
{
Form_pg_statistic stats;
stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
selec += stats->stanullfrac;
CLAMP_PROBABILITY(selec);
}
*rightscan = selec; *rightscan = selec;
}
/* /*
* Only one of the two fractions can really be less than 1.0; believe the * Only one of the two fractions can really be less than 1.0; believe the
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.133 2007/01/20 20:45:40 tgl Exp $ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.134 2007/01/22 20:00:40 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -397,6 +397,7 @@ typedef struct EquivalenceClass ...@@ -397,6 +397,7 @@ typedef struct EquivalenceClass
List *ec_opfamilies; /* btree operator family OIDs */ List *ec_opfamilies; /* btree operator family OIDs */
List *ec_members; /* list of EquivalenceMembers */ List *ec_members; /* list of EquivalenceMembers */
List *ec_sources; /* list of generating RestrictInfos */ List *ec_sources; /* list of generating RestrictInfos */
List *ec_derives; /* list of derived RestrictInfos */
Relids ec_relids; /* all relids appearing in ec_members */ Relids ec_relids; /* all relids appearing in ec_members */
bool ec_has_const; /* any pseudoconstants in ec_members? */ bool ec_has_const; /* any pseudoconstants in ec_members? */
bool ec_has_volatile; /* the (sole) member is a volatile expr */ bool ec_has_volatile; /* the (sole) member is a volatile expr */
...@@ -890,6 +891,9 @@ typedef struct RestrictInfo ...@@ -890,6 +891,9 @@ typedef struct RestrictInfo
/* cache space for mergeclause processing; NULL if not yet set */ /* cache space for mergeclause processing; NULL if not yet set */
EquivalenceClass *left_ec; /* EquivalenceClass containing lefthand */ EquivalenceClass *left_ec; /* EquivalenceClass containing lefthand */
EquivalenceClass *right_ec; /* EquivalenceClass containing righthand */ EquivalenceClass *right_ec; /* EquivalenceClass containing righthand */
EquivalenceMember *left_em; /* EquivalenceMember for lefthand */
EquivalenceMember *right_em; /* EquivalenceMember for righthand */
List *scansel_cache; /* list of MergeScanSelCache structs */
/* transient workspace for use while considering a specific join path */ /* transient workspace for use while considering a specific join path */
bool outer_is_left; /* T = outer var on left, F = on right */ bool outer_is_left; /* T = outer var on left, F = on right */
...@@ -902,6 +906,24 @@ typedef struct RestrictInfo ...@@ -902,6 +906,24 @@ typedef struct RestrictInfo
Selectivity right_bucketsize; /* avg bucketsize of right side */ Selectivity right_bucketsize; /* avg bucketsize of right side */
} RestrictInfo; } RestrictInfo;
/*
* Since mergejoinscansel() is a relatively expensive function, and would
* otherwise be invoked many times while planning a large join tree,
* we go out of our way to cache its results. Each mergejoinable
* RestrictInfo carries a list of the specific sort orderings that have
* been considered for use with it, and the resulting selectivities.
*/
typedef struct MergeScanSelCache
{
/* Ordering details (cache lookup key) */
Oid opfamily; /* btree opfamily defining the ordering */
int strategy; /* sort direction (ASC or DESC) */
bool nulls_first; /* do NULLs come before normal values? */
/* Results */
Selectivity leftscansel; /* scan fraction for clause left side */
Selectivity rightscansel; /* scan fraction for clause right side */
} MergeScanSelCache;
/* /*
* Inner indexscan info. * Inner indexscan info.
* *
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.38 2007/01/05 22:19:59 momjian Exp $ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.39 2007/01/22 20:00:40 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -160,7 +160,7 @@ extern Selectivity rowcomparesel(PlannerInfo *root, ...@@ -160,7 +160,7 @@ extern Selectivity rowcomparesel(PlannerInfo *root,
int varRelid, JoinType jointype); int varRelid, JoinType jointype);
extern void mergejoinscansel(PlannerInfo *root, Node *clause, extern void mergejoinscansel(PlannerInfo *root, Node *clause,
Oid opfamily, int strategy, Oid opfamily, int strategy, bool nulls_first,
Selectivity *leftscan, Selectivity *leftscan,
Selectivity *rightscan); Selectivity *rightscan);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment