Commit f4e4b327 authored by Tom Lane's avatar Tom Lane

Support RIGHT and FULL OUTER JOIN in hash joins.

This is advantageous first because it allows us to hash the smaller table
regardless of the outer-join type, and second because hash join can be more
flexible than merge join in dealing with arbitrary join quals in a FULL
join.  For merge join all the join quals have to be mergejoinable, but hash
join will work so long as there's at least one hashjoinable qual --- the
others can be any condition.  (This is true essentially because we don't
keep per-inner-tuple match flags in merge join, while hash join can do so.)

To do this, we need a has-it-been-matched flag for each tuple in the
hashtable, not just one for the current outer tuple.  The key idea that
makes this practical is that we can store the match flag in the tuple's
infomask, since there are lots of bits there that are of no interest for a
MinimalTuple.  So we aren't increasing the size of the hashtable at all for
the feature.

To write this without turning the hash code into even more of a pile of
spaghetti than it already was, I rewrote ExecHashJoin in a state-machine
style, similar to ExecMergeJoin.  Other than that decision, it was pretty
straightforward.
parent 17cb9e8c
...@@ -105,7 +105,8 @@ MultiExecHash(HashState *node) ...@@ -105,7 +105,8 @@ MultiExecHash(HashState *node)
break; break;
/* We have to compute the hash value */ /* We have to compute the hash value */
econtext->ecxt_innertuple = slot; econtext->ecxt_innertuple = slot;
if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, false, if (ExecHashGetHashValue(hashtable, econtext, hashkeys,
false, hashtable->keepNulls,
&hashvalue)) &hashvalue))
{ {
int bucketNumber; int bucketNumber;
...@@ -231,7 +232,7 @@ ExecEndHash(HashState *node) ...@@ -231,7 +232,7 @@ ExecEndHash(HashState *node)
* ---------------------------------------------------------------- * ----------------------------------------------------------------
*/ */
HashJoinTable HashJoinTable
ExecHashTableCreate(Hash *node, List *hashOperators) ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
{ {
HashJoinTable hashtable; HashJoinTable hashtable;
Plan *outerNode; Plan *outerNode;
...@@ -273,6 +274,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators) ...@@ -273,6 +274,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
hashtable->nbuckets = nbuckets; hashtable->nbuckets = nbuckets;
hashtable->log2_nbuckets = log2_nbuckets; hashtable->log2_nbuckets = log2_nbuckets;
hashtable->buckets = NULL; hashtable->buckets = NULL;
hashtable->keepNulls = keepNulls;
hashtable->skewEnabled = false; hashtable->skewEnabled = false;
hashtable->skewBucket = NULL; hashtable->skewBucket = NULL;
hashtable->skewBucketLen = 0; hashtable->skewBucketLen = 0;
...@@ -712,13 +714,26 @@ ExecHashTableInsert(HashJoinTable hashtable, ...@@ -712,13 +714,26 @@ ExecHashTableInsert(HashJoinTable hashtable,
HashJoinTuple hashTuple; HashJoinTuple hashTuple;
int hashTupleSize; int hashTupleSize;
/* Create the HashJoinTuple */
hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len; hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt, hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
hashTupleSize); hashTupleSize);
hashTuple->hashvalue = hashvalue; hashTuple->hashvalue = hashvalue;
memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len); memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
/*
* We always reset the tuple-matched flag on insertion. This is okay
* even when reloading a tuple from a batch file, since the tuple
* could not possibly have been matched to an outer tuple before it
* went into the batch file.
*/
HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
/* Push it onto the front of the bucket's list */
hashTuple->next = hashtable->buckets[bucketno]; hashTuple->next = hashtable->buckets[bucketno];
hashtable->buckets[bucketno] = hashTuple; hashtable->buckets[bucketno] = hashTuple;
/* Account for space used, and back off if we've used too much */
hashtable->spaceUsed += hashTupleSize; hashtable->spaceUsed += hashTupleSize;
if (hashtable->spaceUsed > hashtable->spacePeak) if (hashtable->spaceUsed > hashtable->spacePeak)
hashtable->spacePeak = hashtable->spaceUsed; hashtable->spacePeak = hashtable->spaceUsed;
...@@ -878,8 +893,12 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable, ...@@ -878,8 +893,12 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
* scan a hash bucket for matches to the current outer tuple * scan a hash bucket for matches to the current outer tuple
* *
* The current outer tuple must be stored in econtext->ecxt_outertuple. * The current outer tuple must be stored in econtext->ecxt_outertuple.
*
* On success, the inner tuple is stored into hjstate->hj_CurTuple and
* econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot
* for the latter.
*/ */
HashJoinTuple bool
ExecScanHashBucket(HashJoinState *hjstate, ExecScanHashBucket(HashJoinState *hjstate,
ExprContext *econtext) ExprContext *econtext)
{ {
...@@ -920,7 +939,7 @@ ExecScanHashBucket(HashJoinState *hjstate, ...@@ -920,7 +939,7 @@ ExecScanHashBucket(HashJoinState *hjstate,
if (ExecQual(hjclauses, econtext, false)) if (ExecQual(hjclauses, econtext, false))
{ {
hjstate->hj_CurTuple = hashTuple; hjstate->hj_CurTuple = hashTuple;
return hashTuple; return true;
} }
} }
...@@ -930,7 +949,99 @@ ExecScanHashBucket(HashJoinState *hjstate, ...@@ -930,7 +949,99 @@ ExecScanHashBucket(HashJoinState *hjstate,
/* /*
* no match * no match
*/ */
return NULL; return false;
}
/*
* ExecPrepHashTableForUnmatched
* set up for a series of ExecScanHashTableForUnmatched calls
*/
void
ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
{
/*
*----------
* During this scan we use the HashJoinState fields as follows:
*
* hj_CurBucketNo: next regular bucket to scan
* hj_CurSkewBucketNo: next skew bucket (an index into skewBucketNums)
* hj_CurTuple: last tuple returned, or NULL to start next bucket
*----------
*/
hjstate->hj_CurBucketNo = 0;
hjstate->hj_CurSkewBucketNo = 0;
hjstate->hj_CurTuple = NULL;
}
/*
* ExecScanHashTableForUnmatched
* scan the hash table for unmatched inner tuples
*
* On success, the inner tuple is stored into hjstate->hj_CurTuple and
* econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot
* for the latter.
*/
bool
ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
{
HashJoinTable hashtable = hjstate->hj_HashTable;
HashJoinTuple hashTuple = hjstate->hj_CurTuple;
for (;;)
{
/*
* hj_CurTuple is the address of the tuple last returned from the
* current bucket, or NULL if it's time to start scanning a new
* bucket.
*/
if (hashTuple != NULL)
hashTuple = hashTuple->next;
else if (hjstate->hj_CurBucketNo < hashtable->nbuckets)
{
hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
hjstate->hj_CurBucketNo++;
}
else if (hjstate->hj_CurSkewBucketNo < hashtable->nSkewBuckets)
{
int j = hashtable->skewBucketNums[hjstate->hj_CurSkewBucketNo];
hashTuple = hashtable->skewBucket[j]->tuples;
hjstate->hj_CurSkewBucketNo++;
}
else
break; /* finished all buckets */
while (hashTuple != NULL)
{
if (!HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(hashTuple)))
{
TupleTableSlot *inntuple;
/* insert hashtable's tuple into exec slot */
inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
hjstate->hj_HashTupleSlot,
false); /* do not pfree */
econtext->ecxt_innertuple = inntuple;
/*
* Reset temp memory each time; although this function doesn't
* do any qual eval, the caller will, so let's keep it
* parallel to ExecScanHashBucket.
*/
ResetExprContext(econtext);
hjstate->hj_CurTuple = hashTuple;
return true;
}
hashTuple = hashTuple->next;
}
}
/*
* no more unmatched tuples
*/
return false;
} }
/* /*
...@@ -960,6 +1071,35 @@ ExecHashTableReset(HashJoinTable hashtable) ...@@ -960,6 +1071,35 @@ ExecHashTableReset(HashJoinTable hashtable)
MemoryContextSwitchTo(oldcxt); MemoryContextSwitchTo(oldcxt);
} }
/*
* ExecHashTableResetMatchFlags
* Clear all the HeapTupleHeaderHasMatch flags in the table
*/
void
ExecHashTableResetMatchFlags(HashJoinTable hashtable)
{
HashJoinTuple tuple;
int i;
/* Reset all flags in the main table ... */
for (i = 0; i < hashtable->nbuckets; i++)
{
for (tuple = hashtable->buckets[i]; tuple != NULL; tuple = tuple->next)
HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple));
}
/* ... and the same for the skew buckets, if any */
for (i = 0; i < hashtable->nSkewBuckets; i++)
{
int j = hashtable->skewBucketNums[i];
HashSkewBucket *skewBucket = hashtable->skewBucket[j];
for (tuple = skewBucket->tuples; tuple != NULL; tuple = tuple->next)
HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple));
}
}
void void
ExecReScanHash(HashState *node) ExecReScanHash(HashState *node)
{ {
...@@ -1203,6 +1343,7 @@ ExecHashSkewTableInsert(HashJoinTable hashtable, ...@@ -1203,6 +1343,7 @@ ExecHashSkewTableInsert(HashJoinTable hashtable,
hashTupleSize); hashTupleSize);
hashTuple->hashvalue = hashvalue; hashTuple->hashvalue = hashvalue;
memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len); memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
/* Push it onto the front of the skew bucket's list */ /* Push it onto the front of the skew bucket's list */
hashTuple->next = hashtable->skewBucket[bucketNumber]->tuples; hashTuple->next = hashtable->skewBucket[bucketNumber]->tuples;
......
...@@ -22,8 +22,20 @@ ...@@ -22,8 +22,20 @@
#include "utils/memutils.h" #include "utils/memutils.h"
/* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */ /*
#define HASHJOIN_IS_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL) * States of the ExecHashJoin state machine
*/
#define HJ_BUILD_HASHTABLE 1
#define HJ_NEED_NEW_OUTER 2
#define HJ_SCAN_BUCKET 3
#define HJ_FILL_OUTER_TUPLE 4
#define HJ_FILL_INNER_TUPLES 5
#define HJ_NEED_NEW_BATCH 6
/* Returns true if doing null-fill on outer relation */
#define HJ_FILL_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL)
/* Returns true if doing null-fill on inner relation */
#define HJ_FILL_INNER(hjstate) ((hjstate)->hj_NullOuterTupleSlot != NULL)
static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode, static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
HashJoinState *hjstate, HashJoinState *hjstate,
...@@ -32,7 +44,7 @@ static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate, ...@@ -32,7 +44,7 @@ static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
BufFile *file, BufFile *file,
uint32 *hashvalue, uint32 *hashvalue,
TupleTableSlot *tupleSlot); TupleTableSlot *tupleSlot);
static int ExecHashJoinNewBatch(HashJoinState *hjstate); static bool ExecHashJoinNewBatch(HashJoinState *hjstate);
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
...@@ -52,11 +64,9 @@ ExecHashJoin(HashJoinState *node) ...@@ -52,11 +64,9 @@ ExecHashJoin(HashJoinState *node)
HashState *hashNode; HashState *hashNode;
List *joinqual; List *joinqual;
List *otherqual; List *otherqual;
TupleTableSlot *inntuple;
ExprContext *econtext; ExprContext *econtext;
ExprDoneCond isDone; ExprDoneCond isDone;
HashJoinTable hashtable; HashJoinTable hashtable;
HashJoinTuple curtuple;
TupleTableSlot *outerTupleSlot; TupleTableSlot *outerTupleSlot;
uint32 hashvalue; uint32 hashvalue;
int batchno; int batchno;
...@@ -69,10 +79,6 @@ ExecHashJoin(HashJoinState *node) ...@@ -69,10 +79,6 @@ ExecHashJoin(HashJoinState *node)
otherqual = node->js.ps.qual; otherqual = node->js.ps.qual;
hashNode = (HashState *) innerPlanState(node); hashNode = (HashState *) innerPlanState(node);
outerNode = outerPlanState(node); outerNode = outerPlanState(node);
/*
* get information from HashJoin state
*/
hashtable = node->hj_HashTable; hashtable = node->hj_HashTable;
econtext = node->js.ps.ps_ExprContext; econtext = node->js.ps.ps_ExprContext;
...@@ -100,31 +106,47 @@ ExecHashJoin(HashJoinState *node) ...@@ -100,31 +106,47 @@ ExecHashJoin(HashJoinState *node)
ResetExprContext(econtext); ResetExprContext(econtext);
/* /*
* if this is the first call, build the hash table for inner relation * run the hash join state machine
*/ */
if (hashtable == NULL) for (;;)
{ {
switch (node->hj_JoinState)
{
case HJ_BUILD_HASHTABLE:
/*
* First time through: build hash table for inner relation.
*/
Assert(hashtable == NULL);
/* /*
* If the outer relation is completely empty, we can quit without * If the outer relation is completely empty, and it's not
* building the hash table. However, for an inner join it is only a * right/full join, we can quit without building the hash
* win to check this when the outer relation's startup cost is less * table. However, for an inner join it is only a win to
* than the projected cost of building the hash table. Otherwise it's * check this when the outer relation's startup cost is less
* best to build the hash table first and see if the inner relation is * than the projected cost of building the hash
* empty. (When it's an outer join, we should always make this check, * table. Otherwise it's best to build the hash table first
* since we aren't going to be able to skip the join on the strength * and see if the inner relation is empty. (When it's a left
* of an empty inner relation anyway.) * join, we should always make this check, since we aren't
* going to be able to skip the join on the strength of an
* empty inner relation anyway.)
* *
* If we are rescanning the join, we make use of information gained on * If we are rescanning the join, we make use of information
* the previous scan: don't bother to try the prefetch if the previous * gained on the previous scan: don't bother to try the
* scan found the outer relation nonempty. This is not 100% reliable * prefetch if the previous scan found the outer relation
* since with new parameters the outer relation might yield different * nonempty. This is not 100% reliable since with new
* parameters the outer relation might yield different
* results, but it's a good heuristic. * results, but it's a good heuristic.
* *
* The only way to make the check is to try to fetch a tuple from the * The only way to make the check is to try to fetch a tuple
* outer plan node. If we succeed, we have to stash it away for later * from the outer plan node. If we succeed, we have to stash
* consumption by ExecHashJoinOuterGetTuple. * it away for later consumption by ExecHashJoinOuterGetTuple.
*/ */
if (HASHJOIN_IS_OUTER(node) || if (HJ_FILL_INNER(node))
{
/* no chance to not build the hash table */
node->hj_FirstOuterTupleSlot = NULL;
}
else if (HJ_FILL_OUTER(node) ||
(outerNode->plan->startup_cost < hashNode->ps.plan->total_cost && (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
!node->hj_OuterNotEmpty)) !node->hj_OuterNotEmpty))
{ {
...@@ -144,7 +166,8 @@ ExecHashJoin(HashJoinState *node) ...@@ -144,7 +166,8 @@ ExecHashJoin(HashJoinState *node)
* create the hash table * create the hash table
*/ */
hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan, hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
node->hj_HashOperators); node->hj_HashOperators,
HJ_FILL_INNER(node));
node->hj_HashTable = hashtable; node->hj_HashTable = hashtable;
/* /*
...@@ -154,10 +177,11 @@ ExecHashJoin(HashJoinState *node) ...@@ -154,10 +177,11 @@ ExecHashJoin(HashJoinState *node)
(void) MultiExecProcNode((PlanState *) hashNode); (void) MultiExecProcNode((PlanState *) hashNode);
/* /*
* If the inner relation is completely empty, and we're not doing an * If the inner relation is completely empty, and we're not
* outer join, we can quit without scanning the outer relation. * doing a left outer join, we can quit without scanning the
* outer relation.
*/ */
if (hashtable->totalTuples == 0 && !HASHJOIN_IS_OUTER(node)) if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
return NULL; return NULL;
/* /*
...@@ -167,39 +191,43 @@ ExecHashJoin(HashJoinState *node) ...@@ -167,39 +191,43 @@ ExecHashJoin(HashJoinState *node)
hashtable->nbatch_outstart = hashtable->nbatch; hashtable->nbatch_outstart = hashtable->nbatch;
/* /*
* Reset OuterNotEmpty for scan. (It's OK if we fetched a tuple * Reset OuterNotEmpty for scan. (It's OK if we fetched a
* above, because ExecHashJoinOuterGetTuple will immediately set it * tuple above, because ExecHashJoinOuterGetTuple will
* again.) * immediately set it again.)
*/ */
node->hj_OuterNotEmpty = false; node->hj_OuterNotEmpty = false;
}
node->hj_JoinState = HJ_NEED_NEW_OUTER;
/* FALL THRU */
case HJ_NEED_NEW_OUTER:
/* /*
* run the hash join process * We don't have an outer tuple, try to get the next one
*/
for (;;)
{
/*
* If we don't have an outer tuple, get the next one
*/ */
if (node->hj_NeedNewOuter)
{
outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode, outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
node, node,
&hashvalue); &hashvalue);
if (TupIsNull(outerTupleSlot)) if (TupIsNull(outerTupleSlot))
{ {
/* end of join */ /* end of batch, or maybe whole join */
return NULL; if (HJ_FILL_INNER(node))
{
/* set up to scan for unmatched inner tuples */
ExecPrepHashTableForUnmatched(node);
node->hj_JoinState = HJ_FILL_INNER_TUPLES;
}
else
node->hj_JoinState = HJ_NEED_NEW_BATCH;
continue;
} }
econtext->ecxt_outertuple = outerTupleSlot; econtext->ecxt_outertuple = outerTupleSlot;
node->hj_NeedNewOuter = false;
node->hj_MatchedOuter = false; node->hj_MatchedOuter = false;
/* /*
* Now we have an outer tuple; find the corresponding bucket for * Find the corresponding bucket for this tuple in the main
* this tuple in the main hash table or skew hash table. * hash table or skew hash table.
*/ */
node->hj_CurHashValue = hashvalue; node->hj_CurHashValue = hashvalue;
ExecHashGetBucketAndBatch(hashtable, hashvalue, ExecHashGetBucketAndBatch(hashtable, hashvalue,
...@@ -209,63 +237,62 @@ ExecHashJoin(HashJoinState *node) ...@@ -209,63 +237,62 @@ ExecHashJoin(HashJoinState *node)
node->hj_CurTuple = NULL; node->hj_CurTuple = NULL;
/* /*
* Now we've got an outer tuple and the corresponding hash bucket, * The tuple might not belong to the current batch (where
* but it might not belong to the current batch, or it might match * "current batch" includes the skew buckets if any).
* a skew bucket.
*/ */
if (batchno != hashtable->curbatch && if (batchno != hashtable->curbatch &&
node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO) node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
{ {
/* /*
* Need to postpone this outer tuple to a later batch. Save it * Need to postpone this outer tuple to a later batch.
* in the corresponding outer-batch file. * Save it in the corresponding outer-batch file.
*/ */
Assert(batchno > hashtable->curbatch); Assert(batchno > hashtable->curbatch);
ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot), ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
hashvalue, hashvalue,
&hashtable->outerBatchFile[batchno]); &hashtable->outerBatchFile[batchno]);
node->hj_NeedNewOuter = true; /* Loop around, staying in HJ_NEED_NEW_OUTER state */
continue; /* loop around for a new outer tuple */ continue;
}
} }
/* /* OK, let's scan the bucket for matches */
* OK, scan the selected hash bucket for matches node->hj_JoinState = HJ_SCAN_BUCKET;
*/
for (;;) /* FALL THRU */
{
curtuple = ExecScanHashBucket(node, econtext);
if (curtuple == NULL)
break; /* out of matches */
case HJ_SCAN_BUCKET:
/* /*
* we've got a match, but still need to test non-hashed quals * Scan the selected hash bucket for matches to current outer
*/ */
inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(curtuple), if (!ExecScanHashBucket(node, econtext))
node->hj_HashTupleSlot, {
false); /* don't pfree */ /* out of matches; check for possible outer-join fill */
econtext->ecxt_innertuple = inntuple; node->hj_JoinState = HJ_FILL_OUTER_TUPLE;
continue;
/* reset temp memory each time to avoid leaks from qual expr */ }
ResetExprContext(econtext);
/* /*
* if we pass the qual, then save state for next call and have * We've got a match, but still need to test non-hashed quals.
* ExecProject form the projection, store it in the tuple table, * ExecScanHashBucket already set up all the state needed to
* and return the slot. * call ExecQual.
*
* If we pass the qual, then save state for next call and have
* ExecProject form the projection, store it in the tuple
* table, and return the slot.
* *
* Only the joinquals determine MatchedOuter status, but all quals * Only the joinquals determine tuple match status, but all
* must pass to actually return the tuple. * quals must pass to actually return the tuple.
*/ */
if (joinqual == NIL || ExecQual(joinqual, econtext, false)) if (joinqual == NIL || ExecQual(joinqual, econtext, false))
{ {
node->hj_MatchedOuter = true; node->hj_MatchedOuter = true;
HeapTupleHeaderSetMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple));
/* In an antijoin, we never return a matched tuple */ /* In an antijoin, we never return a matched tuple */
if (node->js.jointype == JOIN_ANTI) if (node->js.jointype == JOIN_ANTI)
{ {
node->hj_NeedNewOuter = true; node->hj_JoinState = HJ_NEED_NEW_OUTER;
break; /* out of loop over hash bucket */ continue;
} }
/* /*
...@@ -273,9 +300,10 @@ ExecHashJoin(HashJoinState *node) ...@@ -273,9 +300,10 @@ ExecHashJoin(HashJoinState *node)
* but after that we're done with this outer tuple. * but after that we're done with this outer tuple.
*/ */
if (node->js.jointype == JOIN_SEMI) if (node->js.jointype == JOIN_SEMI)
node->hj_NeedNewOuter = true; node->hj_JoinState = HJ_NEED_NEW_OUTER;
if (otherqual == NIL || ExecQual(otherqual, econtext, false)) if (otherqual == NIL ||
ExecQual(otherqual, econtext, false))
{ {
TupleTableSlot *result; TupleTableSlot *result;
...@@ -288,39 +316,65 @@ ExecHashJoin(HashJoinState *node) ...@@ -288,39 +316,65 @@ ExecHashJoin(HashJoinState *node)
return result; return result;
} }
} }
/*
* If semijoin and we didn't return the tuple, we're still
* done with this outer tuple.
*/
if (node->js.jointype == JOIN_SEMI)
break; /* out of loop over hash bucket */
}
} }
break;
case HJ_FILL_OUTER_TUPLE:
/* /*
* Now the current outer tuple has run out of matches, so check * The current outer tuple has run out of matches, so check
* whether to emit a dummy outer-join tuple. If not, loop around to * whether to emit a dummy outer-join tuple. Whether we
* get a new outer tuple. * emit one or not, the next state is NEED_NEW_OUTER.
*/ */
node->hj_NeedNewOuter = true; node->hj_JoinState = HJ_NEED_NEW_OUTER;
if (!node->hj_MatchedOuter && if (!node->hj_MatchedOuter &&
HASHJOIN_IS_OUTER(node)) HJ_FILL_OUTER(node))
{ {
/* /*
* We are doing an outer join and there were no join matches for * Generate a fake join tuple with nulls for the inner
* this outer tuple. Generate a fake join tuple with nulls for * tuple, and return it if it passes the non-join quals.
* the inner tuple, and return it if it passes the non-join quals.
*/ */
econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot; econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
if (otherqual == NIL || ExecQual(otherqual, econtext, false)) if (otherqual == NIL ||
ExecQual(otherqual, econtext, false))
{
TupleTableSlot *result;
result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
if (isDone != ExprEndResult)
{
node->js.ps.ps_TupFromTlist =
(isDone == ExprMultipleResult);
return result;
}
}
}
break;
case HJ_FILL_INNER_TUPLES:
/*
* We have finished a batch, but we are doing right/full join,
* so any unmatched inner tuples in the hashtable have to be
* emitted before we continue to the next batch.
*/
if (!ExecScanHashTableForUnmatched(node, econtext))
{ {
/* no more unmatched tuples */
node->hj_JoinState = HJ_NEED_NEW_BATCH;
continue;
}
/* /*
* qualification was satisfied so we project and return the * Generate a fake join tuple with nulls for the outer tuple,
* slot containing the result tuple using ExecProject(). * and return it if it passes the non-join quals.
*/ */
econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
if (otherqual == NIL ||
ExecQual(otherqual, econtext, false))
{
TupleTableSlot *result; TupleTableSlot *result;
result = ExecProject(node->js.ps.ps_ProjInfo, &isDone); result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
...@@ -332,6 +386,20 @@ ExecHashJoin(HashJoinState *node) ...@@ -332,6 +386,20 @@ ExecHashJoin(HashJoinState *node)
return result; return result;
} }
} }
break;
case HJ_NEED_NEW_BATCH:
/*
* Try to advance to next batch. Done if there are no more.
*/
if (!ExecHashJoinNewBatch(node))
return NULL; /* end of join */
node->hj_JoinState = HJ_NEED_NEW_OUTER;
break;
default:
elog(ERROR, "unrecognized hashjoin state: %d",
(int) node->hj_JoinState);
} }
} }
} }
...@@ -406,7 +474,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) ...@@ -406,7 +474,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
ExecInitResultTupleSlot(estate, &hjstate->js.ps); ExecInitResultTupleSlot(estate, &hjstate->js.ps);
hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate); hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
/* note: HASHJOIN_IS_OUTER macro depends on this initialization */ /* set up null tuples for outer joins, if needed */
switch (node->join.jointype) switch (node->join.jointype)
{ {
case JOIN_INNER: case JOIN_INNER:
...@@ -418,6 +486,19 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) ...@@ -418,6 +486,19 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
ExecInitNullTupleSlot(estate, ExecInitNullTupleSlot(estate,
ExecGetResultType(innerPlanState(hjstate))); ExecGetResultType(innerPlanState(hjstate)));
break; break;
case JOIN_RIGHT:
hjstate->hj_NullOuterTupleSlot =
ExecInitNullTupleSlot(estate,
ExecGetResultType(outerPlanState(hjstate)));
break;
case JOIN_FULL:
hjstate->hj_NullOuterTupleSlot =
ExecInitNullTupleSlot(estate,
ExecGetResultType(outerPlanState(hjstate)));
hjstate->hj_NullInnerTupleSlot =
ExecInitNullTupleSlot(estate,
ExecGetResultType(innerPlanState(hjstate)));
break;
default: default:
elog(ERROR, "unrecognized join type: %d", elog(ERROR, "unrecognized join type: %d",
(int) node->join.jointype); (int) node->join.jointype);
...@@ -425,7 +506,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) ...@@ -425,7 +506,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
/* /*
* now for some voodoo. our temporary tuple slot is actually the result * now for some voodoo. our temporary tuple slot is actually the result
* tuple slot of the Hash node (which is our inner plan). we do this * tuple slot of the Hash node (which is our inner plan). we can do this
* because Hash nodes don't return tuples via ExecProcNode() -- instead * because Hash nodes don't return tuples via ExecProcNode() -- instead
* the hash join node uses ExecScanHashBucket() to get at the contents of * the hash join node uses ExecScanHashBucket() to get at the contents of
* the hash table. -cim 6/9/91 * the hash table. -cim 6/9/91
...@@ -485,7 +566,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) ...@@ -485,7 +566,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
((HashState *) innerPlanState(hjstate))->hashkeys = rclauses; ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
hjstate->js.ps.ps_TupFromTlist = false; hjstate->js.ps.ps_TupFromTlist = false;
hjstate->hj_NeedNewOuter = true; hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
hjstate->hj_MatchedOuter = false; hjstate->hj_MatchedOuter = false;
hjstate->hj_OuterNotEmpty = false; hjstate->hj_OuterNotEmpty = false;
...@@ -533,12 +614,13 @@ ExecEndHashJoin(HashJoinState *node) ...@@ -533,12 +614,13 @@ ExecEndHashJoin(HashJoinState *node)
* ExecHashJoinOuterGetTuple * ExecHashJoinOuterGetTuple
* *
* get the next outer tuple for hashjoin: either by * get the next outer tuple for hashjoin: either by
* executing a plan node in the first pass, or from * executing the outer plan node in the first pass, or from
* the temp files for the hashjoin batches. * the temp files for the hashjoin batches.
* *
* Returns a null slot if no more outer tuples. On success, the tuple's * Returns a null slot if no more outer tuples (within the current batch).
* hash value is stored at *hashvalue --- this is either originally computed, *
* or re-read from the temp file. * On success, the tuple's hash value is stored at *hashvalue --- this is
* either originally computed, or re-read from the temp file.
*/ */
static TupleTableSlot * static TupleTableSlot *
ExecHashJoinOuterGetTuple(PlanState *outerNode, ExecHashJoinOuterGetTuple(PlanState *outerNode,
...@@ -572,7 +654,7 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode, ...@@ -572,7 +654,7 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
if (ExecHashGetHashValue(hashtable, econtext, if (ExecHashGetHashValue(hashtable, econtext,
hjstate->hj_OuterHashKeys, hjstate->hj_OuterHashKeys,
true, /* outer tuple */ true, /* outer tuple */
HASHJOIN_IS_OUTER(hjstate), HJ_FILL_OUTER(hjstate),
hashvalue)) hashvalue))
{ {
/* remember outer relation is not empty for possible rescan */ /* remember outer relation is not empty for possible rescan */
...@@ -587,31 +669,27 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode, ...@@ -587,31 +669,27 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
*/ */
slot = ExecProcNode(outerNode); slot = ExecProcNode(outerNode);
} }
/*
* We have just reached the end of the first pass. Try to switch to a
* saved batch.
*/
curbatch = ExecHashJoinNewBatch(hjstate);
} }
else if (curbatch < hashtable->nbatch)
{
BufFile *file = hashtable->outerBatchFile[curbatch];
/* /*
* Try to read from a temp file. Loop allows us to advance to new batches * In outer-join cases, we could get here even though the batch file
* as needed. NOTE: nbatch could increase inside ExecHashJoinNewBatch, so * is empty.
* don't try to optimize this loop.
*/ */
while (curbatch < hashtable->nbatch) if (file == NULL)
{ return NULL;
slot = ExecHashJoinGetSavedTuple(hjstate, slot = ExecHashJoinGetSavedTuple(hjstate,
hashtable->outerBatchFile[curbatch], file,
hashvalue, hashvalue,
hjstate->hj_OuterTupleSlot); hjstate->hj_OuterTupleSlot);
if (!TupIsNull(slot)) if (!TupIsNull(slot))
return slot; return slot;
curbatch = ExecHashJoinNewBatch(hjstate);
} }
/* Out of batches... */ /* End of this batch */
return NULL; return NULL;
} }
...@@ -619,10 +697,9 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode, ...@@ -619,10 +697,9 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
* ExecHashJoinNewBatch * ExecHashJoinNewBatch
* switch to a new hashjoin batch * switch to a new hashjoin batch
* *
* Returns the number of the new batch (1..nbatch-1), or nbatch if no more. * Returns true if successful, false if there are no more batches.
* We will never return a batch number that has an empty outer batch file.
*/ */
static int static bool
ExecHashJoinNewBatch(HashJoinState *hjstate) ExecHashJoinNewBatch(HashJoinState *hjstate)
{ {
HashJoinTable hashtable = hjstate->hj_HashTable; HashJoinTable hashtable = hjstate->hj_HashTable;
...@@ -632,7 +709,6 @@ ExecHashJoinNewBatch(HashJoinState *hjstate) ...@@ -632,7 +709,6 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
TupleTableSlot *slot; TupleTableSlot *slot;
uint32 hashvalue; uint32 hashvalue;
start_over:
nbatch = hashtable->nbatch; nbatch = hashtable->nbatch;
curbatch = hashtable->curbatch; curbatch = hashtable->curbatch;
...@@ -657,6 +733,7 @@ start_over: ...@@ -657,6 +733,7 @@ start_over:
hashtable->skewEnabled = false; hashtable->skewEnabled = false;
hashtable->skewBucket = NULL; hashtable->skewBucket = NULL;
hashtable->skewBucketNums = NULL; hashtable->skewBucketNums = NULL;
hashtable->nSkewBuckets = 0;
hashtable->spaceUsedSkew = 0; hashtable->spaceUsedSkew = 0;
} }
...@@ -665,8 +742,9 @@ start_over: ...@@ -665,8 +742,9 @@ start_over:
* sides. We can sometimes skip over batches that are empty on only one * sides. We can sometimes skip over batches that are empty on only one
* side, but there are exceptions: * side, but there are exceptions:
* *
* 1. In an outer join, we have to process outer batches even if the inner * 1. In a left/full outer join, we have to process outer batches even if
* batch is empty. * the inner batch is empty. Similarly, in a right/full outer join, we
* have to process inner batches even if the outer batch is empty.
* *
* 2. If we have increased nbatch since the initial estimate, we have to * 2. If we have increased nbatch since the initial estimate, we have to
* scan inner batches since they might contain tuples that need to be * scan inner batches since they might contain tuples that need to be
...@@ -682,7 +760,10 @@ start_over: ...@@ -682,7 +760,10 @@ start_over:
hashtable->innerBatchFile[curbatch] == NULL)) hashtable->innerBatchFile[curbatch] == NULL))
{ {
if (hashtable->outerBatchFile[curbatch] && if (hashtable->outerBatchFile[curbatch] &&
HASHJOIN_IS_OUTER(hjstate)) HJ_FILL_OUTER(hjstate))
break; /* must process due to rule 1 */
if (hashtable->innerBatchFile[curbatch] &&
HJ_FILL_INNER(hjstate))
break; /* must process due to rule 1 */ break; /* must process due to rule 1 */
if (hashtable->innerBatchFile[curbatch] && if (hashtable->innerBatchFile[curbatch] &&
nbatch != hashtable->nbatch_original) nbatch != hashtable->nbatch_original)
...@@ -702,7 +783,7 @@ start_over: ...@@ -702,7 +783,7 @@ start_over:
} }
if (curbatch >= nbatch) if (curbatch >= nbatch)
return curbatch; /* no more batches */ return false; /* no more batches */
hashtable->curbatch = curbatch; hashtable->curbatch = curbatch;
...@@ -741,20 +822,17 @@ start_over: ...@@ -741,20 +822,17 @@ start_over:
} }
/* /*
* If there's no outer batch file, advance to next batch. * Rewind outer batch file (if present), so that we can start reading it.
*/
if (hashtable->outerBatchFile[curbatch] == NULL)
goto start_over;
/*
* Rewind outer batch file, so that we can start reading it.
*/ */
if (hashtable->outerBatchFile[curbatch] != NULL)
{
if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET)) if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not rewind hash-join temporary file: %m"))); errmsg("could not rewind hash-join temporary file: %m")));
}
return curbatch; return true;
} }
/* /*
...@@ -857,9 +935,16 @@ ExecReScanHashJoin(HashJoinState *node) ...@@ -857,9 +935,16 @@ ExecReScanHashJoin(HashJoinState *node)
node->js.ps.righttree->chgParam == NULL) node->js.ps.righttree->chgParam == NULL)
{ {
/* /*
* okay to reuse the hash table; needn't rescan inner, either. * Okay to reuse the hash table; needn't rescan inner, either.
* *
* What we do need to do is reset our state about the emptiness of * However, if it's a right/full join, we'd better reset the
* inner-tuple match flags contained in the table.
*/
if (HJ_FILL_INNER(node))
ExecHashTableResetMatchFlags(node->hj_HashTable);
/*
* Also, we need to reset our state about the emptiness of
* the outer relation, so that the new scan of the outer will * the outer relation, so that the new scan of the outer will
* update it correctly if it turns out to be empty this time. * update it correctly if it turns out to be empty this time.
* (There's no harm in clearing it now because ExecHashJoin won't * (There's no harm in clearing it now because ExecHashJoin won't
...@@ -869,12 +954,16 @@ ExecReScanHashJoin(HashJoinState *node) ...@@ -869,12 +954,16 @@ ExecReScanHashJoin(HashJoinState *node)
* through.) * through.)
*/ */
node->hj_OuterNotEmpty = false; node->hj_OuterNotEmpty = false;
/* ExecHashJoin can skip the BUILD_HASHTABLE step */
node->hj_JoinState = HJ_NEED_NEW_OUTER;
} }
else else
{ {
/* must destroy and rebuild hash table */ /* must destroy and rebuild hash table */
ExecHashTableDestroy(node->hj_HashTable); ExecHashTableDestroy(node->hj_HashTable);
node->hj_HashTable = NULL; node->hj_HashTable = NULL;
node->hj_JoinState = HJ_BUILD_HASHTABLE;
/* /*
* if chgParam of subnode is not null then plan will be re-scanned * if chgParam of subnode is not null then plan will be re-scanned
...@@ -892,7 +981,6 @@ ExecReScanHashJoin(HashJoinState *node) ...@@ -892,7 +981,6 @@ ExecReScanHashJoin(HashJoinState *node)
node->hj_CurTuple = NULL; node->hj_CurTuple = NULL;
node->js.ps.ps_TupFromTlist = false; node->js.ps.ps_TupFromTlist = false;
node->hj_NeedNewOuter = true;
node->hj_MatchedOuter = false; node->hj_MatchedOuter = false;
node->hj_FirstOuterTupleSlot = NULL; node->hj_FirstOuterTupleSlot = NULL;
......
...@@ -889,8 +889,8 @@ generate_join_implied_equalities_normal(PlannerInfo *root, ...@@ -889,8 +889,8 @@ generate_join_implied_equalities_normal(PlannerInfo *root,
* combination for which an opfamily member operator exists. If we have * combination for which an opfamily member operator exists. If we have
* choices, we prefer simple Var members (possibly with RelabelType) since * choices, we prefer simple Var members (possibly with RelabelType) since
* these are (a) cheapest to compute at runtime and (b) most likely to * these are (a) cheapest to compute at runtime and (b) most likely to
* have useful statistics. Also, if enable_hashjoin is on, we prefer * have useful statistics. Also, prefer operators that are also
* operators that are also hashjoinable. * hashjoinable.
*/ */
if (outer_members && inner_members) if (outer_members && inner_members)
{ {
...@@ -925,8 +925,7 @@ generate_join_implied_equalities_normal(PlannerInfo *root, ...@@ -925,8 +925,7 @@ generate_join_implied_equalities_normal(PlannerInfo *root,
(IsA(inner_em->em_expr, RelabelType) && (IsA(inner_em->em_expr, RelabelType) &&
IsA(((RelabelType *) inner_em->em_expr)->arg, Var))) IsA(((RelabelType *) inner_em->em_expr)->arg, Var)))
score++; score++;
if (!enable_hashjoin || if (op_hashjoinable(eq_op,
op_hashjoinable(eq_op,
exprType((Node *) outer_em->em_expr))) exprType((Node *) outer_em->em_expr)))
score++; score++;
if (score > best_score) if (score > best_score)
......
...@@ -41,7 +41,8 @@ static List *select_mergejoin_clauses(PlannerInfo *root, ...@@ -41,7 +41,8 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
RelOptInfo *outerrel, RelOptInfo *outerrel,
RelOptInfo *innerrel, RelOptInfo *innerrel,
List *restrictlist, List *restrictlist,
JoinType jointype); JoinType jointype,
bool *have_nonmergeable_clause);
/* /*
...@@ -77,12 +78,13 @@ add_paths_to_joinrel(PlannerInfo *root, ...@@ -77,12 +78,13 @@ add_paths_to_joinrel(PlannerInfo *root,
List *restrictlist) List *restrictlist)
{ {
List *mergeclause_list = NIL; List *mergeclause_list = NIL;
bool have_nonmergeable_clause = false;
/* /*
* Find potential mergejoin clauses. We can skip this if we are not * Find potential mergejoin clauses. We can skip this if we are not
* interested in doing a mergejoin. However, mergejoin is currently our * interested in doing a mergejoin. However, mergejoin may be our only
* only way of implementing full outer joins, so override mergejoin * way of implementing a full outer join, so override enable_mergejoin if
* disable if it's a full join. * it's a full join.
*/ */
if (enable_mergejoin || jointype == JOIN_FULL) if (enable_mergejoin || jointype == JOIN_FULL)
mergeclause_list = select_mergejoin_clauses(root, mergeclause_list = select_mergejoin_clauses(root,
...@@ -90,20 +92,25 @@ add_paths_to_joinrel(PlannerInfo *root, ...@@ -90,20 +92,25 @@ add_paths_to_joinrel(PlannerInfo *root,
outerrel, outerrel,
innerrel, innerrel,
restrictlist, restrictlist,
jointype); jointype,
&have_nonmergeable_clause);
/* /*
* 1. Consider mergejoin paths where both relations must be explicitly * 1. Consider mergejoin paths where both relations must be explicitly
* sorted. * sorted. Skip this if we can't mergejoin.
*/ */
if (!have_nonmergeable_clause)
sort_inner_and_outer(root, joinrel, outerrel, innerrel, sort_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype, sjinfo); restrictlist, mergeclause_list, jointype, sjinfo);
/* /*
* 2. Consider paths where the outer relation need not be explicitly * 2. Consider paths where the outer relation need not be explicitly
* sorted. This includes both nestloops and mergejoins where the outer * sorted. This includes both nestloops and mergejoins where the outer
* path is already ordered. * path is already ordered. Again, skip this if we can't mergejoin.
* (That's okay because we know that nestloop can't handle right/full
* joins at all, so it wouldn't work in those cases either.)
*/ */
if (!have_nonmergeable_clause)
match_unsorted_outer(root, joinrel, outerrel, innerrel, match_unsorted_outer(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype, sjinfo); restrictlist, mergeclause_list, jointype, sjinfo);
...@@ -120,15 +127,17 @@ add_paths_to_joinrel(PlannerInfo *root, ...@@ -120,15 +127,17 @@ add_paths_to_joinrel(PlannerInfo *root,
* those made by match_unsorted_outer when add_paths_to_joinrel() is * those made by match_unsorted_outer when add_paths_to_joinrel() is
* invoked with the two rels given in the other order. * invoked with the two rels given in the other order.
*/ */
if (!have_nonmergeable_clause)
match_unsorted_inner(root, joinrel, outerrel, innerrel, match_unsorted_inner(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype, sjinfo); restrictlist, mergeclause_list, jointype, sjinfo);
#endif #endif
/* /*
* 4. Consider paths where both outer and inner relations must be hashed * 4. Consider paths where both outer and inner relations must be hashed
* before being joined. * before being joined. As above, disregard enable_hashjoin for full
* joins, because there may be no other alternative.
*/ */
if (enable_hashjoin) if (enable_hashjoin || jointype == JOIN_FULL)
hash_inner_and_outer(root, joinrel, outerrel, innerrel, hash_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, jointype, sjinfo); restrictlist, jointype, sjinfo);
} }
...@@ -189,37 +198,11 @@ sort_inner_and_outer(PlannerInfo *root, ...@@ -189,37 +198,11 @@ sort_inner_and_outer(PlannerInfo *root,
JoinType jointype, JoinType jointype,
SpecialJoinInfo *sjinfo) SpecialJoinInfo *sjinfo)
{ {
bool useallclauses;
Path *outer_path; Path *outer_path;
Path *inner_path; Path *inner_path;
List *all_pathkeys; List *all_pathkeys;
ListCell *l; ListCell *l;
/*
* If we are doing a right or full join, we must use *all* the
* mergeclauses as join clauses, else we will not have a valid plan.
*/
switch (jointype)
{
case JOIN_INNER:
case JOIN_LEFT:
case JOIN_SEMI:
case JOIN_ANTI:
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
useallclauses = false;
break;
case JOIN_RIGHT:
case JOIN_FULL:
useallclauses = true;
break;
default:
elog(ERROR, "unrecognized join type: %d",
(int) jointype);
useallclauses = false; /* keep compiler quiet */
break;
}
/* /*
* We only consider the cheapest-total-cost input paths, since we are * We only consider the cheapest-total-cost input paths, since we are
* assuming here that a sort is required. We will consider * assuming here that a sort is required. We will consider
...@@ -390,9 +373,9 @@ match_unsorted_outer(PlannerInfo *root, ...@@ -390,9 +373,9 @@ match_unsorted_outer(PlannerInfo *root,
/* /*
* Nestloop only supports inner, left, semi, and anti joins. Also, if we * Nestloop only supports inner, left, semi, and anti joins. Also, if we
* are doing a right or full join, we must use *all* the mergeclauses as * are doing a right or full mergejoin, we must use *all* the mergeclauses
* join clauses, else we will not have a valid plan. (Although these two * as join clauses, else we will not have a valid plan. (Although these
* flags are currently inverses, keep them separate for clarity and * two flags are currently inverses, keep them separate for clarity and
* possible future changes.) * possible future changes.)
*/ */
switch (jointype) switch (jointype)
...@@ -574,8 +557,8 @@ match_unsorted_outer(PlannerInfo *root, ...@@ -574,8 +557,8 @@ match_unsorted_outer(PlannerInfo *root,
* Special corner case: for "x FULL JOIN y ON true", there will be no * Special corner case: for "x FULL JOIN y ON true", there will be no
* join clauses at all. Ordinarily we'd generate a clauseless * join clauses at all. Ordinarily we'd generate a clauseless
* nestloop path, but since mergejoin is our only join type that * nestloop path, but since mergejoin is our only join type that
* supports FULL JOIN, it's necessary to generate a clauseless * supports FULL JOIN without any join clauses, it's necessary to
* mergejoin path instead. * generate a clauseless mergejoin path instead.
*/ */
if (mergeclauses == NIL) if (mergeclauses == NIL)
{ {
...@@ -781,29 +764,10 @@ hash_inner_and_outer(PlannerInfo *root, ...@@ -781,29 +764,10 @@ hash_inner_and_outer(PlannerInfo *root,
JoinType jointype, JoinType jointype,
SpecialJoinInfo *sjinfo) SpecialJoinInfo *sjinfo)
{ {
bool isouterjoin; bool isouterjoin = IS_OUTER_JOIN(jointype);
List *hashclauses; List *hashclauses;
ListCell *l; ListCell *l;
/*
* Hashjoin only supports inner, left, semi, and anti joins.
*/
switch (jointype)
{
case JOIN_INNER:
case JOIN_SEMI:
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
isouterjoin = false;
break;
case JOIN_LEFT:
case JOIN_ANTI:
isouterjoin = true;
break;
default:
return;
}
/* /*
* We need to build only one hashpath for any given pair of outer and * We need to build only one hashpath for any given pair of outer and
* inner relations; all of the hashable clauses will be used as keys. * inner relations; all of the hashable clauses will be used as keys.
...@@ -963,6 +927,11 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, ...@@ -963,6 +927,11 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
* Select mergejoin clauses that are usable for a particular join. * Select mergejoin clauses that are usable for a particular join.
* Returns a list of RestrictInfo nodes for those clauses. * Returns a list of RestrictInfo nodes for those clauses.
* *
* *have_nonmergeable_clause is set TRUE if this is a right/full join and
* there are nonmergejoinable join clauses. The executor's mergejoin
* machinery cannot handle such cases, so we have to avoid generating a
* mergejoin plan.
*
* We also mark each selected RestrictInfo to show which side is currently * We also mark each selected RestrictInfo to show which side is currently
* being considered as outer. These are transient markings that are only * being considered as outer. These are transient markings that are only
* good for the duration of the current add_paths_to_joinrel() call! * good for the duration of the current add_paths_to_joinrel() call!
...@@ -977,13 +946,15 @@ select_mergejoin_clauses(PlannerInfo *root, ...@@ -977,13 +946,15 @@ select_mergejoin_clauses(PlannerInfo *root,
RelOptInfo *outerrel, RelOptInfo *outerrel,
RelOptInfo *innerrel, RelOptInfo *innerrel,
List *restrictlist, List *restrictlist,
JoinType jointype) JoinType jointype,
bool *have_nonmergeable_clause)
{ {
List *result_list = NIL; List *result_list = NIL;
bool isouterjoin = IS_OUTER_JOIN(jointype); bool isouterjoin = IS_OUTER_JOIN(jointype);
bool have_nonmergeable_joinclause = false;
ListCell *l; ListCell *l;
*have_nonmergeable_clause = false;
foreach(l, restrictlist) foreach(l, restrictlist)
{ {
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
...@@ -991,7 +962,7 @@ select_mergejoin_clauses(PlannerInfo *root, ...@@ -991,7 +962,7 @@ select_mergejoin_clauses(PlannerInfo *root,
/* /*
* If processing an outer join, only use its own join clauses in the * If processing an outer join, only use its own join clauses in the
* merge. For inner joins we can use pushed-down clauses too. (Note: * merge. For inner joins we can use pushed-down clauses too. (Note:
* we don't set have_nonmergeable_joinclause here because pushed-down * we don't set have_nonmergeable_clause here because pushed-down
* clauses will become otherquals not joinquals.) * clauses will become otherquals not joinquals.)
*/ */
if (isouterjoin && restrictinfo->is_pushed_down) if (isouterjoin && restrictinfo->is_pushed_down)
...@@ -1008,7 +979,7 @@ select_mergejoin_clauses(PlannerInfo *root, ...@@ -1008,7 +979,7 @@ select_mergejoin_clauses(PlannerInfo *root,
* FALSE.) * FALSE.)
*/ */
if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const)) if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
have_nonmergeable_joinclause = true; *have_nonmergeable_clause = true;
continue; /* not mergejoinable */ continue; /* not mergejoinable */
} }
...@@ -1017,7 +988,7 @@ select_mergejoin_clauses(PlannerInfo *root, ...@@ -1017,7 +988,7 @@ select_mergejoin_clauses(PlannerInfo *root,
*/ */
if (!clause_sides_match_join(restrictinfo, outerrel, innerrel)) if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
{ {
have_nonmergeable_joinclause = true; *have_nonmergeable_clause = true;
continue; /* no good for these input relations */ continue; /* no good for these input relations */
} }
...@@ -1046,7 +1017,7 @@ select_mergejoin_clauses(PlannerInfo *root, ...@@ -1046,7 +1017,7 @@ select_mergejoin_clauses(PlannerInfo *root,
if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) || if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
EC_MUST_BE_REDUNDANT(restrictinfo->right_ec)) EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
{ {
have_nonmergeable_joinclause = true; *have_nonmergeable_clause = true;
continue; /* can't handle redundant eclasses */ continue; /* can't handle redundant eclasses */
} }
...@@ -1054,28 +1025,20 @@ select_mergejoin_clauses(PlannerInfo *root, ...@@ -1054,28 +1025,20 @@ select_mergejoin_clauses(PlannerInfo *root,
} }
/* /*
* If it is a right/full join then *all* the explicit join clauses must be * If it is not a right/full join then we don't need to insist on all the
* mergejoinable, else the executor will fail. If we are asked for a right * joinclauses being mergejoinable, so reset the flag. This simplifies
* join then just return NIL to indicate no mergejoin is possible (we can * the logic in add_paths_to_joinrel.
* handle it as a left join instead). If we are asked for a full join then
* emit an error, because there is no fallback.
*/ */
if (have_nonmergeable_joinclause)
{
switch (jointype) switch (jointype)
{ {
case JOIN_RIGHT: case JOIN_RIGHT:
return NIL; /* not mergejoinable */
case JOIN_FULL: case JOIN_FULL:
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
break; break;
default: default:
/* otherwise, it's OK to have nonmergeable join quals */ /* otherwise, it's OK to have nonmergeable join quals */
*have_nonmergeable_clause = false;
break; break;
} }
}
return result_list; return result_list;
} }
...@@ -658,6 +658,18 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) ...@@ -658,6 +658,18 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
add_paths_to_joinrel(root, joinrel, rel2, rel1, add_paths_to_joinrel(root, joinrel, rel2, rel1,
JOIN_FULL, sjinfo, JOIN_FULL, sjinfo,
restrictlist); restrictlist);
/*
* If there are join quals that aren't mergeable or hashable, we
* may not be able to build any valid plan. Complain here so that
* we can give a somewhat-useful error message. (Since we have no
* flexibility of planning for a full join, there's no chance of
* succeeding later with another pair of input rels.)
*/
if (joinrel->pathlist == NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions")));
break; break;
case JOIN_SEMI: case JOIN_SEMI:
......
...@@ -1328,9 +1328,8 @@ distribute_restrictinfo_to_rels(PlannerInfo *root, ...@@ -1328,9 +1328,8 @@ distribute_restrictinfo_to_rels(PlannerInfo *root,
/* /*
* Check for hashjoinable operators. (We don't bother setting the * Check for hashjoinable operators. (We don't bother setting the
* hashjoin info if we're not going to need it.) * hashjoin info except in true join clauses.)
*/ */
if (enable_hashjoin)
check_hashjoinable(restrictinfo); check_hashjoinable(restrictinfo);
/* /*
...@@ -1458,9 +1457,8 @@ build_implied_join_equality(Oid opno, ...@@ -1458,9 +1457,8 @@ build_implied_join_equality(Oid opno,
qualscope, /* required_relids */ qualscope, /* required_relids */
NULL); /* nullable_relids */ NULL); /* nullable_relids */
/* Set mergejoinability info always, and hashjoinability if enabled */ /* Set mergejoinability/hashjoinability flags */
check_mergejoinable(restrictinfo); check_mergejoinable(restrictinfo);
if (enable_hashjoin)
check_hashjoinable(restrictinfo); check_hashjoinable(restrictinfo);
return restrictinfo; return restrictinfo;
......
...@@ -195,6 +195,14 @@ typedef HeapTupleHeaderData *HeapTupleHeader; ...@@ -195,6 +195,14 @@ typedef HeapTupleHeaderData *HeapTupleHeader;
#define HEAP2_XACT_MASK 0xC000 /* visibility-related bits */ #define HEAP2_XACT_MASK 0xC000 /* visibility-related bits */
/*
* HEAP_TUPLE_HAS_MATCH is a temporary flag used during hash joins. It is
* only used in tuples that are in the hash table, and those don't need
* any visibility information, so we can overlay it on a visibility flag
* instead of using up a dedicated bit.
*/
#define HEAP_TUPLE_HAS_MATCH HEAP_ONLY_TUPLE /* tuple has a join match */
/* /*
* HeapTupleHeader accessor macros * HeapTupleHeader accessor macros
* *
...@@ -343,6 +351,21 @@ do { \ ...@@ -343,6 +351,21 @@ do { \
(tup)->t_infomask2 &= ~HEAP_ONLY_TUPLE \ (tup)->t_infomask2 &= ~HEAP_ONLY_TUPLE \
) )
#define HeapTupleHeaderHasMatch(tup) \
( \
(tup)->t_infomask2 & HEAP_TUPLE_HAS_MATCH \
)
#define HeapTupleHeaderSetMatch(tup) \
( \
(tup)->t_infomask2 |= HEAP_TUPLE_HAS_MATCH \
)
#define HeapTupleHeaderClearMatch(tup) \
( \
(tup)->t_infomask2 &= ~HEAP_TUPLE_HAS_MATCH \
)
#define HeapTupleHeaderGetNatts(tup) \ #define HeapTupleHeaderGetNatts(tup) \
((tup)->t_infomask2 & HEAP_NATTS_MASK) ((tup)->t_infomask2 & HEAP_NATTS_MASK)
......
...@@ -113,6 +113,8 @@ typedef struct HashJoinTableData ...@@ -113,6 +113,8 @@ typedef struct HashJoinTableData
struct HashJoinTupleData **buckets; struct HashJoinTupleData **buckets;
/* buckets array is per-batch storage, as are all the tuples */ /* buckets array is per-batch storage, as are all the tuples */
bool keepNulls; /* true to store unmatchable NULL tuples */
bool skewEnabled; /* are we using skew optimization? */ bool skewEnabled; /* are we using skew optimization? */
HashSkewBucket **skewBucket; /* hashtable of skew buckets */ HashSkewBucket **skewBucket; /* hashtable of skew buckets */
int skewBucketLen; /* size of skewBucket array (a power of 2!) */ int skewBucketLen; /* size of skewBucket array (a power of 2!) */
......
...@@ -22,7 +22,8 @@ extern Node *MultiExecHash(HashState *node); ...@@ -22,7 +22,8 @@ extern Node *MultiExecHash(HashState *node);
extern void ExecEndHash(HashState *node); extern void ExecEndHash(HashState *node);
extern void ExecReScanHash(HashState *node); extern void ExecReScanHash(HashState *node);
extern HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators); extern HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators,
bool keepNulls);
extern void ExecHashTableDestroy(HashJoinTable hashtable); extern void ExecHashTableDestroy(HashJoinTable hashtable);
extern void ExecHashTableInsert(HashJoinTable hashtable, extern void ExecHashTableInsert(HashJoinTable hashtable,
TupleTableSlot *slot, TupleTableSlot *slot,
...@@ -37,9 +38,12 @@ extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable, ...@@ -37,9 +38,12 @@ extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
uint32 hashvalue, uint32 hashvalue,
int *bucketno, int *bucketno,
int *batchno); int *batchno);
extern HashJoinTuple ExecScanHashBucket(HashJoinState *hjstate, extern bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext);
extern void ExecPrepHashTableForUnmatched(HashJoinState *hjstate);
extern bool ExecScanHashTableForUnmatched(HashJoinState *hjstate,
ExprContext *econtext); ExprContext *econtext);
extern void ExecHashTableReset(HashJoinTable hashtable); extern void ExecHashTableReset(HashJoinTable hashtable);
extern void ExecHashTableResetMatchFlags(HashJoinTable hashtable);
extern void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, extern void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
int *numbuckets, int *numbuckets,
int *numbatches, int *numbatches,
......
...@@ -1468,6 +1468,10 @@ typedef struct MergeJoinState ...@@ -1468,6 +1468,10 @@ typedef struct MergeJoinState
/* ---------------- /* ----------------
* HashJoinState information * HashJoinState information
* *
* hashclauses original form of the hashjoin condition
* hj_OuterHashKeys the outer hash keys in the hashjoin condition
* hj_InnerHashKeys the inner hash keys in the hashjoin condition
* hj_HashOperators the join operators in the hashjoin condition
* hj_HashTable hash table for the hashjoin * hj_HashTable hash table for the hashjoin
* (NULL if table not built yet) * (NULL if table not built yet)
* hj_CurHashValue hash value for current outer tuple * hj_CurHashValue hash value for current outer tuple
...@@ -1477,14 +1481,12 @@ typedef struct MergeJoinState ...@@ -1477,14 +1481,12 @@ typedef struct MergeJoinState
* tuple, or NULL if starting search * tuple, or NULL if starting search
* (hj_CurXXX variables are undefined if * (hj_CurXXX variables are undefined if
* OuterTupleSlot is empty!) * OuterTupleSlot is empty!)
* hj_OuterHashKeys the outer hash keys in the hashjoin condition
* hj_InnerHashKeys the inner hash keys in the hashjoin condition
* hj_HashOperators the join operators in the hashjoin condition
* hj_OuterTupleSlot tuple slot for outer tuples * hj_OuterTupleSlot tuple slot for outer tuples
* hj_HashTupleSlot tuple slot for hashed tuples * hj_HashTupleSlot tuple slot for inner (hashed) tuples
* hj_NullInnerTupleSlot prepared null tuple for left outer joins * hj_NullOuterTupleSlot prepared null tuple for right/full outer joins
* hj_NullInnerTupleSlot prepared null tuple for left/full outer joins
* hj_FirstOuterTupleSlot first tuple retrieved from outer plan * hj_FirstOuterTupleSlot first tuple retrieved from outer plan
* hj_NeedNewOuter true if need new outer tuple on next call * hj_JoinState current state of ExecHashJoin state machine
* hj_MatchedOuter true if found a join match for current outer * hj_MatchedOuter true if found a join match for current outer
* hj_OuterNotEmpty true if outer relation known not empty * hj_OuterNotEmpty true if outer relation known not empty
* ---------------- * ----------------
...@@ -1498,19 +1500,20 @@ typedef struct HashJoinState ...@@ -1498,19 +1500,20 @@ typedef struct HashJoinState
{ {
JoinState js; /* its first field is NodeTag */ JoinState js; /* its first field is NodeTag */
List *hashclauses; /* list of ExprState nodes */ List *hashclauses; /* list of ExprState nodes */
List *hj_OuterHashKeys; /* list of ExprState nodes */
List *hj_InnerHashKeys; /* list of ExprState nodes */
List *hj_HashOperators; /* list of operator OIDs */
HashJoinTable hj_HashTable; HashJoinTable hj_HashTable;
uint32 hj_CurHashValue; uint32 hj_CurHashValue;
int hj_CurBucketNo; int hj_CurBucketNo;
int hj_CurSkewBucketNo; int hj_CurSkewBucketNo;
HashJoinTuple hj_CurTuple; HashJoinTuple hj_CurTuple;
List *hj_OuterHashKeys; /* list of ExprState nodes */
List *hj_InnerHashKeys; /* list of ExprState nodes */
List *hj_HashOperators; /* list of operator OIDs */
TupleTableSlot *hj_OuterTupleSlot; TupleTableSlot *hj_OuterTupleSlot;
TupleTableSlot *hj_HashTupleSlot; TupleTableSlot *hj_HashTupleSlot;
TupleTableSlot *hj_NullOuterTupleSlot;
TupleTableSlot *hj_NullInnerTupleSlot; TupleTableSlot *hj_NullInnerTupleSlot;
TupleTableSlot *hj_FirstOuterTupleSlot; TupleTableSlot *hj_FirstOuterTupleSlot;
bool hj_NeedNewOuter; int hj_JoinState;
bool hj_MatchedOuter; bool hj_MatchedOuter;
bool hj_OuterNotEmpty; bool hj_OuterNotEmpty;
} HashJoinState; } HashJoinState;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment