Commit f38fbf31 authored by Tom Lane's avatar Tom Lane

If we expect a hash join to be performed in multiple batches, suppress

"physical tlist" optimization on the outer relation (ie, force a projection
step to occur in its scan).  This avoids storing useless column values when
the outer relation's tuples are written to temporary batch files.

Modified version of a patch by Michael Henderson and Ramon Lawrence.
parent ee4c187f
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.355 2009/03/21 00:04:39 tgl Exp $ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.356 2009/03/26 17:15:34 tgl Exp $
* *
* NOTES * NOTES
* Every node type that can appear in stored rules' parsetrees *must* * Every node type that can appear in stored rules' parsetrees *must*
...@@ -1448,6 +1448,7 @@ _outHashPath(StringInfo str, HashPath *node) ...@@ -1448,6 +1448,7 @@ _outHashPath(StringInfo str, HashPath *node)
_outJoinPathInfo(str, (JoinPath *) node); _outJoinPathInfo(str, (JoinPath *) node);
WRITE_NODE_FIELD(path_hashclauses); WRITE_NODE_FIELD(path_hashclauses);
WRITE_INT_FIELD(num_batches);
} }
static void static void
......
...@@ -54,7 +54,7 @@ ...@@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.205 2009/03/21 00:04:39 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.206 2009/03/26 17:15:35 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1880,6 +1880,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) ...@@ -1880,6 +1880,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
&numbatches, &numbatches,
&num_skew_mcvs); &num_skew_mcvs);
virtualbuckets = (double) numbuckets *(double) numbatches; virtualbuckets = (double) numbuckets *(double) numbatches;
/* mark the path with estimated # of batches */
path->num_batches = numbatches;
/* /*
* Determine bucketsize fraction for inner relation. We use the smallest * Determine bucketsize fraction for inner relation. We use the smallest
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.256 2009/03/21 00:04:39 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.257 2009/03/26 17:15:35 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1910,6 +1910,10 @@ create_hashjoin_plan(PlannerInfo *root, ...@@ -1910,6 +1910,10 @@ create_hashjoin_plan(PlannerInfo *root,
/* We don't want any excess columns in the hashed tuples */ /* We don't want any excess columns in the hashed tuples */
disuse_physical_tlist(inner_plan, best_path->jpath.innerjoinpath); disuse_physical_tlist(inner_plan, best_path->jpath.innerjoinpath);
/* If we expect batching, suppress excess columns in outer tuples too */
if (best_path->num_batches > 1)
disuse_physical_tlist(outer_plan, best_path->jpath.outerjoinpath);
/* /*
* If there is a single join clause and we can identify the outer * If there is a single join clause and we can identify the outer
* variable as a simple column reference, supply its identity for * variable as a simple column reference, supply its identity for
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.150 2009/02/27 00:06:27 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.151 2009/03/26 17:15:35 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1480,9 +1480,20 @@ create_hashjoin_path(PlannerInfo *root, ...@@ -1480,9 +1480,20 @@ create_hashjoin_path(PlannerInfo *root,
pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses; pathnode->jpath.joinrestrictinfo = restrict_clauses;
/* A hashjoin never has pathkeys, since its ordering is unpredictable */ /*
* A hashjoin never has pathkeys, since its output ordering is
* unpredictable due to possible batching. XXX If the inner relation is
* small enough, we could instruct the executor that it must not batch,
* and then we could assume that the output inherits the outer relation's
* ordering, which might save a sort step. However there is considerable
* downside if our estimate of the inner relation size is badly off.
* For the moment we don't risk it. (Note also that if we wanted to take
* this seriously, joinpath.c would have to consider many more paths for
* the outer rel than it does now.)
*/
pathnode->jpath.path.pathkeys = NIL; pathnode->jpath.path.pathkeys = NIL;
pathnode->path_hashclauses = hashclauses; pathnode->path_hashclauses = hashclauses;
/* cost_hashjoin will fill in pathnode->num_batches */
cost_hashjoin(pathnode, root, sjinfo); cost_hashjoin(pathnode, root, sjinfo);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.170 2009/03/05 23:06:45 tgl Exp $ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.171 2009/03/26 17:15:35 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -845,6 +845,7 @@ typedef struct HashPath ...@@ -845,6 +845,7 @@ typedef struct HashPath
{ {
JoinPath jpath; JoinPath jpath;
List *path_hashclauses; /* join clauses used for hashing */ List *path_hashclauses; /* join clauses used for hashing */
int num_batches; /* number of batches expected */
} HashPath; } HashPath;
/* /*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment