Commit f8c10952 authored by Tom Lane's avatar Tom Lane

Teach planner about the idea that a mergejoin won't necessarily read

both input streams to the end.  If one variable's range is much less
than the other, an indexscan-based merge can win by not scanning all
of the other table.  Per example from Reinhard Max.
parent fdc60bd9
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.47 2001/10/28 06:25:43 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.48 2002/03/01 04:09:22 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -88,97 +88,62 @@ static bool MergeCompare(List *eqQual, List *compareQual, ExprContext *econtext)
/* ----------------------------------------------------------------
* MJFormSkipQual
* MJFormSkipQuals
*
* This takes the mergeclause which is a qualification of the
* form ((= expr expr) (= expr expr) ...) and forms a new
* qualification like ((> expr expr) (> expr expr) ...) which
* is used by ExecMergeJoin() in order to determine if we should
* skip tuples. The replacement operators are named either ">"
* or "<" according to the replaceopname parameter, and have the
* same operand data types as the "=" operators they replace.
* (We expect there to be such operators because the "=" operators
* form ((= expr expr) (= expr expr) ...) and forms new lists
* of the forms ((< expr expr) (< expr expr) ...) and
* ((> expr expr) (> expr expr) ...). These lists will be used
* by ExecMergeJoin() to determine if we should skip tuples.
* (We expect there to be suitable operators because the "=" operators
* were marked mergejoinable; however, there might be a different
* one needed in each qual clause.)
* ----------------------------------------------------------------
*/
static List *
MJFormSkipQual(List *qualList, char *replaceopname)
static void
MJFormSkipQuals(List *qualList, List **ltQuals, List **gtQuals)
{
List *qualCopy;
List *qualcdr;
Expr *qual;
Oper *op;
HeapTuple optup;
Form_pg_operator opform;
Oid oprleft,
oprright;
List *ltcdr,
*gtcdr;
/*
* qualList is a list: ((op .. ..) ...)
*
* first we make a copy of it. copyObject() makes a deep copy so let's
* use it instead of the old fashoned lispCopy()...
* Make modifiable copies of the qualList.
*/
qualCopy = (List *) copyObject((Node *) qualList);
*ltQuals = (List *) copyObject((Node *) qualList);
*gtQuals = (List *) copyObject((Node *) qualList);
foreach(qualcdr, qualCopy)
/*
* Scan both lists in parallel, so that we can update the operators
* with the minimum number of syscache searches.
*/
ltcdr = *ltQuals;
foreach(gtcdr, *gtQuals)
{
/*
* first get the current (op .. ..) list
*/
qual = lfirst(qualcdr);
Expr *ltqual = (Expr *) lfirst(ltcdr);
Expr *gtqual = (Expr *) lfirst(gtcdr);
Oper *ltop = (Oper *) ltqual->oper;
Oper *gtop = (Oper *) gtqual->oper;
/*
* now get at the op
* The two ops should be identical, so use either one for lookup.
*/
op = (Oper *) qual->oper;
if (!IsA(op, Oper))
elog(ERROR, "MJFormSkipQual: op not an Oper!");
if (!IsA(ltop, Oper))
elog(ERROR, "MJFormSkipQuals: op not an Oper!");
/*
* Get the declared left and right operand types of the operator.
* Note we do *not* use the actual operand types, since those
* might be different in scenarios with binary-compatible data
* types. There should be "<" and ">" operators matching a
* mergejoinable "=" operator's declared operand types, but we
* might not find them if we search with the actual operand types.
* Lookup the operators, and replace the data in the copied
* operator nodes.
*/
optup = SearchSysCache(OPEROID,
ObjectIdGetDatum(op->opno),
0, 0, 0);
if (!HeapTupleIsValid(optup)) /* shouldn't happen */
elog(ERROR, "MJFormSkipQual: operator %u not found", op->opno);
opform = (Form_pg_operator) GETSTRUCT(optup);
oprleft = opform->oprleft;
oprright = opform->oprright;
ReleaseSysCache(optup);
/*
* Now look up the matching "<" or ">" operator. If there isn't
* one, whoever marked the "=" operator mergejoinable was a loser.
*/
optup = SearchSysCache(OPERNAME,
PointerGetDatum(replaceopname),
ObjectIdGetDatum(oprleft),
ObjectIdGetDatum(oprright),
CharGetDatum('b'));
if (!HeapTupleIsValid(optup))
elog(ERROR,
"MJFormSkipQual: mergejoin operator %u has no matching %s op",
op->opno, replaceopname);
opform = (Form_pg_operator) GETSTRUCT(optup);
/*
* And replace the data in the copied operator node.
*/
op->opno = optup->t_data->t_oid;
op->opid = opform->oprcode;
op->op_fcache = NULL;
ReleaseSysCache(optup);
op_mergejoin_crossops(ltop->opno,
&ltop->opno,
&gtop->opno,
&ltop->opid,
&gtop->opid);
ltop->op_fcache = NULL;
gtop->op_fcache = NULL;
ltcdr = lnext(ltcdr);
}
return qualCopy;
}
/* ----------------------------------------------------------------
......@@ -1430,7 +1395,6 @@ bool
ExecInitMergeJoin(MergeJoin *node, EState *estate, Plan *parent)
{
MergeJoinState *mergestate;
List *joinclauses;
MJ1_printf("ExecInitMergeJoin: %s\n",
"initializing node");
......@@ -1522,9 +1486,9 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, Plan *parent)
/*
* form merge skip qualifications
*/
joinclauses = node->mergeclauses;
mergestate->mj_OuterSkipQual = MJFormSkipQual(joinclauses, "<");
mergestate->mj_InnerSkipQual = MJFormSkipQual(joinclauses, ">");
MJFormSkipQuals(node->mergeclauses,
&mergestate->mj_OuterSkipQual,
&mergestate->mj_InnerSkipQual);
MJ_printf("\nExecInitMergeJoin: OuterSkipQual is ");
MJ_nodeDisplay(mergestate->mj_OuterSkipQual);
......
......@@ -42,7 +42,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.79 2001/10/25 05:49:32 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.80 2002/03/01 04:09:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -58,6 +58,7 @@
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "parser/parsetree.h"
#include "utils/selfuncs.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
......@@ -565,12 +566,29 @@ cost_mergejoin(Path *path, Query *root,
Cost startup_cost = 0;
Cost run_cost = 0;
Cost cpu_per_tuple;
double outer_rows,
inner_rows;
double ntuples;
Selectivity leftscan,
rightscan;
Path sort_path; /* dummy for result of cost_sort */
if (!enable_mergejoin)
startup_cost += disable_cost;
/*
* A merge join will stop as soon as it exhausts either input stream.
* Estimate fraction of the left and right inputs that will actually
* need to be scanned. We use only the first (most significant)
* merge clause for this purpose.
*/
mergejoinscansel(root,
(Node *) ((RestrictInfo *) lfirst(mergeclauses))->clause,
&leftscan, &rightscan);
outer_rows = outer_path->parent->rows * leftscan;
inner_rows = inner_path->parent->rows * rightscan;
/* cost of source data */
/*
......@@ -588,12 +606,14 @@ cost_mergejoin(Path *path, Query *root,
outer_path->parent->rows,
outer_path->parent->width);
startup_cost += sort_path.startup_cost;
run_cost += sort_path.total_cost - sort_path.startup_cost;
run_cost += (sort_path.total_cost - sort_path.startup_cost)
* leftscan;
}
else
{
startup_cost += outer_path->startup_cost;
run_cost += outer_path->total_cost - outer_path->startup_cost;
run_cost += (outer_path->total_cost - outer_path->startup_cost)
* leftscan;
}
if (innersortkeys) /* do we need to sort inner? */
......@@ -605,30 +625,33 @@ cost_mergejoin(Path *path, Query *root,
inner_path->parent->rows,
inner_path->parent->width);
startup_cost += sort_path.startup_cost;
run_cost += sort_path.total_cost - sort_path.startup_cost;
run_cost += (sort_path.total_cost - sort_path.startup_cost)
* rightscan;
}
else
{
startup_cost += inner_path->startup_cost;
run_cost += inner_path->total_cost - inner_path->startup_cost;
run_cost += (inner_path->total_cost - inner_path->startup_cost)
* rightscan;
}
/*
* The number of tuple comparisons needed depends drastically on the
* number of equal keys in the two source relations, which we have no
* good way of estimating. Somewhat arbitrarily, we charge one tuple
* good way of estimating. (XXX could the MCV statistics help?)
* Somewhat arbitrarily, we charge one tuple
* comparison (one cpu_operator_cost) for each tuple in the two source
* relations. This is probably a lower bound.
*/
run_cost += cpu_operator_cost *
(outer_path->parent->rows + inner_path->parent->rows);
run_cost += cpu_operator_cost * (outer_rows + inner_rows);
/*
* For each tuple that gets through the mergejoin proper, we charge
* cpu_tuple_cost plus the cost of evaluating additional restriction
* clauses that are to be applied at the join. It's OK to use an
* approximate selectivity here, since in most cases this is a minor
* component of the cost.
* component of the cost. NOTE: it's correct to use the unscaled rows
* counts here, not the scaled-down counts we obtained above.
*/
ntuples = approx_selectivity(root, mergeclauses) *
outer_path->parent->rows * inner_path->parent->rows;
......
This diff is collapsed.
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.59 2001/10/25 05:49:46 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.60 2002/03/01 04:09:26 tgl Exp $
*
* NOTES
* Eventually, the index information should go through here, too.
......@@ -369,6 +369,76 @@ op_mergejoinable(Oid opno, Oid ltype, Oid rtype, Oid *leftOp, Oid *rightOp)
return result;
}
/*
* op_mergejoin_crossops
*
* Returns the cross-type comparison operators (ltype "<" rtype and
* ltype ">" rtype) for an operator previously determined to be
* mergejoinable. Optionally, fetches the regproc ids of these
* operators, as well as their operator OIDs.
*
* Raises error if operators cannot be found. Assuming that the operator
* had indeed been marked mergejoinable, this indicates that whoever marked
* it so was mistaken.
*/
void
op_mergejoin_crossops(Oid opno, Oid *ltop, Oid *gtop,
RegProcedure *ltproc, RegProcedure *gtproc)
{
HeapTuple tp;
Form_pg_operator optup;
Oid oprleft,
oprright;
/*
* Get the declared left and right operand types of the operator.
*/
tp = SearchSysCache(OPEROID,
ObjectIdGetDatum(opno),
0, 0, 0);
if (!HeapTupleIsValid(tp)) /* shouldn't happen */
elog(ERROR, "op_mergejoin_crossops: operator %u not found", opno);
optup = (Form_pg_operator) GETSTRUCT(tp);
oprleft = optup->oprleft;
oprright = optup->oprright;
ReleaseSysCache(tp);
/*
* Look up the "<" operator with the same input types. If there isn't
* one, whoever marked the "=" operator mergejoinable was a loser.
*/
tp = SearchSysCache(OPERNAME,
PointerGetDatum("<"),
ObjectIdGetDatum(oprleft),
ObjectIdGetDatum(oprright),
CharGetDatum('b'));
if (!HeapTupleIsValid(tp))
elog(ERROR, "op_mergejoin_crossops: mergejoin operator %u has no matching < operator",
opno);
optup = (Form_pg_operator) GETSTRUCT(tp);
*ltop = tp->t_data->t_oid;
if (ltproc)
*ltproc = optup->oprcode;
ReleaseSysCache(tp);
/*
* And the same for the ">" operator.
*/
tp = SearchSysCache(OPERNAME,
PointerGetDatum(">"),
ObjectIdGetDatum(oprleft),
ObjectIdGetDatum(oprright),
CharGetDatum('b'));
if (!HeapTupleIsValid(tp))
elog(ERROR, "op_mergejoin_crossops: mergejoin operator %u has no matching > operator",
opno);
optup = (Form_pg_operator) GETSTRUCT(tp);
*gtop = tp->t_data->t_oid;
if (gtproc)
*gtproc = optup->oprcode;
ReleaseSysCache(tp);
}
/*
* op_hashjoinable
*
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: lsyscache.h,v 1.39 2001/11/05 17:46:36 momjian Exp $
* $Id: lsyscache.h,v 1.40 2002/03/01 04:09:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -29,6 +29,8 @@ extern RegProcedure get_opcode(Oid opno);
extern char *get_opname(Oid opno);
extern bool op_mergejoinable(Oid opno, Oid ltype, Oid rtype,
Oid *leftOp, Oid *rightOp);
extern void op_mergejoin_crossops(Oid opno, Oid *ltop, Oid *gtop,
RegProcedure *ltproc, RegProcedure *gtproc);
extern Oid op_hashjoinable(Oid opno, Oid ltype, Oid rtype);
extern bool op_iscachable(Oid opno);
extern Oid get_commutator(Oid opno);
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: selfuncs.h,v 1.4 2001/11/05 17:46:36 momjian Exp $
* $Id: selfuncs.h,v 1.5 2002/03/01 04:09:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -66,8 +66,12 @@ extern Datum icregexnejoinsel(PG_FUNCTION_ARGS);
extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
Selectivity booltestsel(Query *root, BooleanTest *clause, int varRelid);
Selectivity nulltestsel(Query *root, NullTest *clause, int varRelid);
extern Selectivity booltestsel(Query *root, BooleanTest *clause, int varRelid);
extern Selectivity nulltestsel(Query *root, NullTest *clause, int varRelid);
extern void mergejoinscansel(Query *root, Node *clause,
Selectivity *leftscan,
Selectivity *rightscan);
extern Datum btcostestimate(PG_FUNCTION_ARGS);
extern Datum rtcostestimate(PG_FUNCTION_ARGS);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment