Change the implementation of hash join to attempt to avoid unnecessary

work if either of the join relations are empty. The logic is: (1) if the inner relation's startup cost is less than the outer relation's startup cost and this is not an outer join, read a single tuple from the inner relation via ExecHash() - if NULL, we're done (2) read a single tuple from the outer relation - if NULL, we're done (3) build the hash table on the inner relation - if hash table is empty and this is not an outer join, we're done (4) otherwise, do hash join as usual The implementation uses the new MultiExecProcNode API, per a suggestion from Tom: invoking ExecHash() now produces the first tuple from the Hash node's child node, whereas MultiExecHash() builds the hash table. I had to put in a bit of a kludge to get the row count returned for EXPLAIN ANALYZE to be correct: since ExecHash() is invoked to return a tuple, and then MultiExecHash() is invoked, we would return one too many tuples to EXPLAIN ANALYZE. I hacked around this by just manually detecting this situation and subtracting 1 from the EXPLAIN ANALYZE row count.

Change the implementation of hash join to attempt to avoid unnecessary
work if either of the join relations are empty. The logic is: (1) if the inner relation's startup cost is less than the outer relation's startup cost and this is not an outer join, read a single tuple from the inner relation via ExecHash() - if NULL, we're done (2) read a single tuple from the outer relation - if NULL, we're done (3) build the hash table on the inner relation - if hash table is empty and this is not an outer join, we're done (4) otherwise, do hash join as usual The implementation uses the new MultiExecProcNode API, per a suggestion from Tom: invoking ExecHash() now produces the first tuple from the Hash node's child node, whereas MultiExecHash() builds the hash table. I had to put in a bit of a kludge to get the row count returned for EXPLAIN ANALYZE to be correct: since ExecHash() is invoked to return a tuple, and then MultiExecHash() is invoked, we would return one too many tuples to EXPLAIN ANALYZE. I hacked around this by just manually detecting this situation and subtracting 1 from the EXPLAIN ANALYZE row count.
c119c5bd · Neil Conway · 4aaff553 · c119c5bd · c119c5bd · c119c5bd
Commit c119c5bd authored Jun 15, 2005 by Neil Conway
3 changed files
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.93 2005/04/16 20:07:35 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.94 2005/06/15 07:27:44 neilc Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -37,14 +37,22 @@ static void ExecHashIncreaseNumBatches(HashJoinTable hashtable);
 /* ----------------------------------------------------------------
 *		ExecHash
 *
- *		stub for pro forma compliance
+ *		produce the first tuple from our child node (and _only_ the
+ *		first tuple). This is of limited general use -- it does not
+ *		hash its output, and produces only a single tuple. It is
+ *		provided so that hash join can probe the inner hash input to
+ *		determine whether it is empty without needing to build the
+ *		entire hash table first, which is what MultiExecHash() would
+ *		do.
 * ----------------------------------------------------------------
 */
 TupleTableSlot *
 ExecHash(HashState *node)
 {
-	elog(ERROR, "Hash node does not support ExecProcNode call convention");
-	return NULL;
+	if (TupIsNull(node->firstTuple))
+		node->firstTuple = ExecProcNode(outerPlanState(node));
+
+	return node->firstTuple;
 }

 /* ----------------------------------------------------------------
@@ -63,6 +71,7 @@ MultiExecHash(HashState *node)
 	TupleTableSlot *slot;
 	ExprContext *econtext;
 	uint32		hashvalue;
+	bool		cleared_first_tuple = false;

 	/* must provide our own instrumentation support */
 	if (node->ps.instrument)
@@ -85,9 +94,19 @@ MultiExecHash(HashState *node)
 	 */
 	for (;;)
 	{
-		slot = ExecProcNode(outerNode);
-		if (TupIsNull(slot))
-			break;
+		/* use and clear the tuple produced by ExecHash(), if any */
+		if (!TupIsNull(node->firstTuple))
+		{
+			slot = node->firstTuple;
+			node->firstTuple = NULL;
+			cleared_first_tuple = true;
+		}
+		else
+		{
+			slot = ExecProcNode(outerNode);
+			if (TupIsNull(slot))
+				break;
+		}
 		hashtable->totalTuples += 1;
 		/* We have to compute the hash value */
 		econtext->ecxt_innertuple = slot;
@@ -97,7 +116,19 @@ MultiExecHash(HashState *node)

 	/* must provide our own instrumentation support */
 	if (node->ps.instrument)
-		InstrStopNodeMulti(node->ps.instrument, hashtable->totalTuples);
+	{
+		/*
+		 * XXX: kludge -- if ExecHash() was invoked, we've already
+		 * included the tuple that it produced in the row output count
+		 * for this node, so subtract 1 from the # of hashed tuples.
+		 */
+		if (cleared_first_tuple)
+			InstrStopNodeMulti(node->ps.instrument,
+							   hashtable->totalTuples - 1);
+		else
+			InstrStopNodeMulti(node->ps.instrument,
+							   hashtable->totalTuples);
+	}

 	/*
 	 * We do not return the hash table directly because it's not a subtype
@@ -130,6 +161,7 @@ ExecInitHash(Hash *node, EState *estate)
 	hashstate->ps.state = estate;
 	hashstate->hashtable = NULL;
 	hashstate->hashkeys = NIL;	/* will be set by parent HashJoin */
+	hashstate->firstTuple = NULL;

 	/*
 	 * Miscellaneous initialization
@@ -189,6 +221,8 @@ ExecEndHash(HashState *node)
 {
 	PlanState  *outerPlan;

+	node->firstTuple = NULL;
+
 	/*
 	 * free exprcontext
 	 */
@@ -830,6 +864,8 @@ ExecHashTableReset(HashJoinTable hashtable)
 void
 ExecReScanHash(HashState *node, ExprContext *exprCtxt)
 {
+	node->firstTuple = NULL;
+
 	/*
 	 * if chgParam of subnode is not null then plan will be re-scanned by
 	 * first ExecProcNode.

--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.71 2005/04/16 20:07:35 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.72 2005/06/15 07:27:44 neilc Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -31,7 +31,7 @@ static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
 						  uint32 *hashvalue,
 						  TupleTableSlot *tupleSlot);
 static int	ExecHashJoinNewBatch(HashJoinState *hjstate);
-
+static TupleTableSlot *ExecHashJoinReadOuterPlan(HashJoinState *hjstate);

 /* ----------------------------------------------------------------
 *		ExecHashJoin
@@ -57,8 +57,6 @@ ExecHashJoin(HashJoinState *node)
 	HashJoinTable hashtable;
 	HeapTuple	curtuple;
 	TupleTableSlot *outerTupleSlot;
-	uint32		hashvalue;
-	int			batchno;

 	/*
 	 * get information from HashJoin node
@@ -107,31 +105,68 @@ ExecHashJoin(HashJoinState *node)
 	 */
 	ResetExprContext(econtext);

-	/*
-	 * if this is the first call, build the hash table for inner relation
-	 */
 	if (hashtable == NULL)
 	{
 		/*
-		 * create the hash table
+		 * This is the first call to the node. When _either_ of the
+		 * hash join inputs are empty, we want to avoid doing
+		 * unnecessary work (e.g. building the hash table for the
+		 * inner join relation). We therefore read a single tuple from
+		 * both inputs before proceeding further. We choose which
+		 * input to probe first based on the startup cost of the plan
+		 * node.
+		 *
+		 * Note that if we're executing an outer join and the inner
+		 * relation is empty, we still have work to do.
+		 */
+
+		/* Consider probing the inner relation first */
+		if (hashNode->ps.plan->startup_cost <= outerNode->plan->startup_cost &&
+			node->js.jointype != JOIN_LEFT)
+		{
+			/*
+			 * ExecHash() lets us get a single tuple from the inner
+			 * relation without building the entire hash table
+			 */
+			TupleTableSlot *tup = ExecProcNode(&hashNode->ps);
+			if (TupIsNull(tup))
+				return NULL;
+		}
+
+		/*
+		 * Before we can check the outer relation, we need to build
+		 * the hash table. This is somewhat a waste of time if the
+		 * outer relation is empty, but it would be awkward to avoid.
 		 */
 		hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
 										node->hj_HashOperators);
 		node->hj_HashTable = hashtable;
+		hashNode->hashtable = hashtable;
+
+		/* Now check the outer relation */
+		outerTupleSlot = ExecHashJoinReadOuterPlan(node);
+
+		if (TupIsNull(outerTupleSlot))
+		{
+			ExecHashTableDestroy(node->hj_HashTable);
+			node->hj_HashTable = NULL;
+			return NULL;
+		}

 		/*
-		 * execute the Hash node, to build the hash table
+		 * Okay, we can't avoid it, so execute the Hash node to build
+		 * the hash table
 		 */
-		hashNode->hashtable = hashtable;
 		(void) MultiExecProcNode((PlanState *) hashNode);

 		/*
-		 * If the inner relation is completely empty, and we're not doing
-		 * an outer join, we can quit without scanning the outer relation.
+		 * If the inner relation is empty but its startup cost was
+		 * less than the outer relation's startup cost, we can arrive
+		 * here -- we're done unless this is an outer join
 		 */
 		if (hashtable->totalTuples == 0 && node->js.jointype != JOIN_LEFT)
 		{
-			ExecHashTableDestroy(hashtable);
+			ExecHashTableDestroy(node->hj_HashTable);
 			node->hj_HashTable = NULL;
 			return NULL;
 		}
@@ -153,46 +188,9 @@ ExecHashJoin(HashJoinState *node)
 		 */
 		if (node->hj_NeedNewOuter)
 		{
-			outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
-													   node,
-													   &hashvalue);
+			outerTupleSlot = ExecHashJoinReadOuterPlan(node);
 			if (TupIsNull(outerTupleSlot))
-			{
-				/* end of join */
-				return NULL;
-			}
-
-			node->js.ps.ps_OuterTupleSlot = outerTupleSlot;
-			econtext->ecxt_outertuple = outerTupleSlot;
-			node->hj_NeedNewOuter = false;
-			node->hj_MatchedOuter = false;
-
-			/*
-			 * now we have an outer tuple, find the corresponding bucket
-			 * for this tuple from the hash table
-			 */
-			node->hj_CurHashValue = hashvalue;
-			ExecHashGetBucketAndBatch(hashtable, hashvalue,
-									  &node->hj_CurBucketNo, &batchno);
-			node->hj_CurTuple = NULL;
-
-			/*
-			 * Now we've got an outer tuple and the corresponding hash
-			 * bucket, but this tuple may not belong to the current batch.
-			 */
-			if (batchno != hashtable->curbatch)
-			{
-				/*
-				 * Need to postpone this outer tuple to a later batch.
-				 * Save it in the corresponding outer-batch file.
-				 */
-				Assert(batchno > hashtable->curbatch);
-				ExecHashJoinSaveTuple(ExecFetchSlotTuple(outerTupleSlot),
-									  hashvalue,
-									  &hashtable->outerBatchFile[batchno]);
-				node->hj_NeedNewOuter = true;
-				continue;	/* loop around for a new outer tuple */
-			}
+				return NULL; /* end of join */
 		}

 		/*
@@ -487,6 +485,79 @@ ExecEndHashJoin(HashJoinState *node)
 	ExecEndNode(innerPlanState(node));
 }

+/*
+ * ExecHashJoinReadOuterPlan
+ *
+ *		do all the work necessary to produce the next tuple from the
+ *		outer hash join relation that is in the current batch. Returns
+ *		NULL if there are no more tuples in the outer relation.
+ */
+static TupleTableSlot *
+ExecHashJoinReadOuterPlan(HashJoinState *hjstate)
+{
+	PlanState *outerNode;
+	ExprContext *econtext;
+	HashJoinTable hashtable;
+
+	outerNode = outerPlanState(hjstate);
+	econtext = hjstate->js.ps.ps_ExprContext;
+	hashtable = hjstate->hj_HashTable;
+
+	for (;;)
+	{
+		TupleTableSlot *result;
+		uint32		hashvalue;
+		int			batchno;
+
+		result = ExecHashJoinOuterGetTuple(outerNode,
+										   hjstate,
+										   &hashvalue);
+		if (TupIsNull(result))
+		{
+			/* end of join */
+			return NULL;
+		}
+
+		hjstate->js.ps.ps_OuterTupleSlot = result;
+		econtext->ecxt_outertuple = result;
+		hjstate->hj_NeedNewOuter = false;
+		hjstate->hj_MatchedOuter = false;
+
+		/*
+		 * now we have an outer tuple, find the corresponding bucket
+		 * for this tuple from the hash table
+		 */
+		hjstate->hj_CurHashValue = hashvalue;
+		ExecHashGetBucketAndBatch(hashtable, hashvalue,
+								  &hjstate->hj_CurBucketNo, &batchno);
+		hjstate->hj_CurTuple = NULL;
+
+		/*
+		 * Now we've got an outer tuple and the corresponding hash
+		 * bucket, but this tuple may not belong to the current batch.
+		 */
+		if (batchno != hashtable->curbatch)
+		{
+			/*
+			 * Need to postpone this outer tuple to a later batch.
+			 * Save it in the corresponding outer-batch file.
+			 */
+			Assert(batchno > hashtable->curbatch);
+			ExecHashJoinSaveTuple(ExecFetchSlotTuple(result),
+								  hashvalue,
+								  &hashtable->outerBatchFile[batchno]);
+			hjstate->hj_NeedNewOuter = true;
+			continue;	/* Get the next outer tuple */
+		}
+
+		/*
+		 * Otherwise, we have a tuple in the current batch, so we're
+		 * done
+		 */
+		return result;
+	}
+}
+
 /*
 * ExecHashJoinOuterGetTuple
 *
@@ -769,7 +840,6 @@ ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
 	return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);
 }

-
 void
 ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
 {

--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.133 2005/05/14 21:29:23 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.134 2005/06/15 07:27:44 neilc Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -1218,6 +1218,7 @@ typedef struct HashState
 	HashJoinTable hashtable;	/* hash table for the hashjoin */
 	List	   *hashkeys;		/* list of ExprState nodes */
 	/* hashkeys is same as parent's hj_InnerHashKeys */
+	TupleTableSlot *firstTuple;	/* tuple produced by ExecHash() */
 } HashState;

 /* ----------------