Commit 19b88633 authored by Tom Lane's avatar Tom Lane

First cut at implementing IN (and NOT IN) via hashtables. There is

more to be done yet, but this is a good start.
parent 3e54e26b
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.1 2003/01/10 23:54:24 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.2 2003/01/12 04:03:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -23,17 +23,14 @@
/*****************************************************************************
* Utility routines for grouping tuples together
*
* These routines actually implement SQL's notion of "distinct/not distinct".
* Two tuples match if they are not distinct in all the compared columns,
* i.e., the column values are either both null, or both non-null and equal.
*****************************************************************************/
/*
* execTuplesMatch
* Return true if two tuples match in all the indicated fields.
* This is used to detect group boundaries in nodeGroup and nodeAgg,
* and to decide whether two tuples are distinct or not in nodeUnique.
*
* This actually implements SQL's notion of "not distinct". Two nulls
* match, a null and a not-null don't match.
*
* tuple1, tuple2: the tuples to compare
* tupdesc: tuple descriptor applying to both tuples
......@@ -112,11 +109,88 @@ execTuplesMatch(HeapTuple tuple1,
return result;
}
/*
* execTuplesUnequal
* Return true if two tuples are definitely unequal in the indicated
* fields.
*
* Nulls are neither equal nor unequal to anything else. A true result
* is obtained only if there are non-null fields that compare not-equal.
*
* Parameters are identical to execTuplesMatch.
*/
bool
execTuplesUnequal(HeapTuple tuple1,
HeapTuple tuple2,
TupleDesc tupdesc,
int numCols,
AttrNumber *matchColIdx,
FmgrInfo *eqfunctions,
MemoryContext evalContext)
{
MemoryContext oldContext;
bool result;
int i;
/* Reset and switch into the temp context. */
MemoryContextReset(evalContext);
oldContext = MemoryContextSwitchTo(evalContext);
/*
* We cannot report a match without checking all the fields, but we
* can report a non-match as soon as we find unequal fields. So,
* start comparing at the last field (least significant sort key).
* That's the most likely to be different if we are dealing with
* sorted input.
*/
result = false;
for (i = numCols; --i >= 0;)
{
AttrNumber att = matchColIdx[i];
Datum attr1,
attr2;
bool isNull1,
isNull2;
attr1 = heap_getattr(tuple1,
att,
tupdesc,
&isNull1);
if (isNull1)
continue; /* can't prove anything here */
attr2 = heap_getattr(tuple2,
att,
tupdesc,
&isNull2);
if (isNull2)
continue; /* can't prove anything here */
/* Apply the type-specific equality function */
if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
attr1, attr2)))
{
result = true; /* they are unequal */
break;
}
}
MemoryContextSwitchTo(oldContext);
return result;
}
/*
* execTuplesMatchPrepare
* Look up the equality functions needed for execTuplesMatch.
* The result is a palloc'd array.
* Look up the equality functions needed for execTuplesMatch or
* execTuplesUnequal.
*
* The result is a palloc'd array.
*/
FmgrInfo *
execTuplesMatchPrepare(TupleDesc tupdesc,
......@@ -266,8 +340,13 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
* Find or create a hashtable entry for the tuple group containing the
* given tuple.
*
* On return, *isnew is true if the entry is newly created, false if it
* existed already. Any extra space in a new entry has been zeroed.
* If isnew is NULL, we do not create new entries; we return NULL if no
* match is found.
*
* If isnew isn't NULL, then a new entry is created if no existing entry
* matches. On return, *isnew is true if the entry is newly created,
* false if it existed already. Any extra space in a new entry has been
* zeroed.
*/
TupleHashEntry
LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
......@@ -318,26 +397,30 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
hashtable->eqfunctions,
hashtable->tempcxt))
{
if (isnew)
*isnew = false;
MemoryContextSwitchTo(oldContext);
*isnew = false;
return entry;
}
}
/* Not there, so build a new one */
MemoryContextSwitchTo(hashtable->tablecxt);
/* Not there, so build a new one if requested */
if (isnew)
{
MemoryContextSwitchTo(hashtable->tablecxt);
entry = (TupleHashEntry) palloc0(hashtable->entrysize);
entry = (TupleHashEntry) palloc0(hashtable->entrysize);
entry->hashkey = hashkey;
entry->firstTuple = heap_copytuple(tuple);
entry->hashkey = hashkey;
entry->firstTuple = heap_copytuple(tuple);
entry->next = hashtable->buckets[bucketno];
hashtable->buckets[bucketno] = entry;
entry->next = hashtable->buckets[bucketno];
hashtable->buckets[bucketno] = entry;
MemoryContextSwitchTo(oldContext);
*isnew = true;
}
*isnew = true;
MemoryContextSwitchTo(oldContext);
return entry;
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.122 2003/01/10 21:08:07 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.123 2003/01/12 04:03:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -2324,8 +2324,13 @@ ExecCleanTargetListLength(List *targetlist)
/* ----------------------------------------------------------------
* ExecTargetList
*
* Evaluates a targetlist with respect to the current
* expression context and return a tuple.
* Evaluates a targetlist with respect to the given
* expression context and returns a tuple.
*
* The caller must pass workspace for the values and nulls arrays
* as well as the itemIsDone array. This convention saves palloc'ing
* workspace on each call, and some callers may find it useful to examine
* the values array directly.
*
* As with ExecEvalExpr, the caller should pass isDone = NULL if not
* prepared to deal with sets of result tuples. Otherwise, a return
......@@ -2335,21 +2340,15 @@ ExecCleanTargetListLength(List *targetlist)
*/
static HeapTuple
ExecTargetList(List *targetlist,
int nodomains,
TupleDesc targettype,
Datum *values,
ExprContext *econtext,
Datum *values,
char *nulls,
ExprDoneCond *itemIsDone,
ExprDoneCond *isDone)
{
MemoryContext oldContext;
#define NPREALLOCDOMAINS 64
char nullsArray[NPREALLOCDOMAINS];
ExprDoneCond itemIsDoneArray[NPREALLOCDOMAINS];
char *nulls;
ExprDoneCond *itemIsDone;
List *tl;
HeapTuple newTuple;
bool isNull;
bool haveDoneSets;
static struct tupleDesc NullTupleDesc; /* we assume this inits to
......@@ -2378,31 +2377,9 @@ ExecTargetList(List *targetlist,
if (targettype == NULL)
targettype = &NullTupleDesc;
/*
* allocate an array of char's to hold the "null" information only if
* we have a really large targetlist. otherwise we use the stack.
*
* We also allocate another array that holds the isDone status for each
* targetlist item. The isDone status is needed so that we can iterate,
* generating multiple tuples, when one or more tlist items return
* sets. (We expect the caller to call us again if we return
* isDone = ExprMultipleResult.)
*/
if (nodomains > NPREALLOCDOMAINS)
{
nulls = (char *) palloc(nodomains * sizeof(char));
itemIsDone = (ExprDoneCond *) palloc(nodomains * sizeof(ExprDoneCond));
}
else
{
nulls = nullsArray;
itemIsDone = itemIsDoneArray;
}
/*
* evaluate all the expressions in the target list
*/
if (isDone)
*isDone = ExprSingleResult; /* until proven otherwise */
......@@ -2451,8 +2428,7 @@ ExecTargetList(List *targetlist,
*/
*isDone = ExprEndResult;
MemoryContextSwitchTo(oldContext);
newTuple = NULL;
goto exit;
return NULL;
}
else
{
......@@ -2511,8 +2487,7 @@ ExecTargetList(List *targetlist,
}
MemoryContextSwitchTo(oldContext);
newTuple = NULL;
goto exit;
return NULL;
}
}
}
......@@ -2522,20 +2497,7 @@ ExecTargetList(List *targetlist,
*/
MemoryContextSwitchTo(oldContext);
newTuple = (HeapTuple) heap_formtuple(targettype, values, nulls);
exit:
/*
* free the status arrays if we palloc'd them
*/
if (nodomains > NPREALLOCDOMAINS)
{
pfree(nulls);
pfree(itemIsDone);
}
return newTuple;
return heap_formtuple(targettype, values, nulls);
}
/* ----------------------------------------------------------------
......@@ -2555,11 +2517,7 @@ TupleTableSlot *
ExecProject(ProjectionInfo *projInfo, ExprDoneCond *isDone)
{
TupleTableSlot *slot;
List *targetlist;
int len;
TupleDesc tupType;
Datum *tupValue;
ExprContext *econtext;
HeapTuple newTuple;
/*
......@@ -2572,21 +2530,17 @@ ExecProject(ProjectionInfo *projInfo, ExprDoneCond *isDone)
* get the projection info we want
*/
slot = projInfo->pi_slot;
targetlist = projInfo->pi_targetlist;
len = projInfo->pi_len;
tupType = slot->ttc_tupleDescriptor;
tupValue = projInfo->pi_tupValue;
econtext = projInfo->pi_exprContext;
/*
* form a new result tuple (if possible --- result can be NULL)
*/
newTuple = ExecTargetList(targetlist,
len,
newTuple = ExecTargetList(projInfo->pi_targetlist,
tupType,
tupValue,
econtext,
projInfo->pi_exprContext,
projInfo->pi_tupValues,
projInfo->pi_tupNulls,
projInfo->pi_itemIsDone,
isDone);
/*
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.94 2002/12/18 00:14:47 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.95 2003/01/12 04:03:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -476,28 +476,50 @@ ExecGetResultType(PlanState *planstate)
}
/* ----------------
* ExecAssignProjectionInfo
forms the projection information from the node's targetlist
* ExecBuildProjectionInfo
*
* Build a ProjectionInfo node for evaluating the given tlist in the given
* econtext, and storing the result into the tuple slot. (Caller must have
* ensured that tuple slot has a descriptor matching the tlist!) Note that
* the given tlist should be a list of ExprState nodes, not Expr nodes.
* ----------------
*/
void
ExecAssignProjectionInfo(PlanState *planstate)
ProjectionInfo *
ExecBuildProjectionInfo(List *targetList,
ExprContext *econtext,
TupleTableSlot *slot)
{
ProjectionInfo *projInfo;
List *targetList;
ProjectionInfo *projInfo = makeNode(ProjectionInfo);
int len;
targetList = planstate->targetlist;
len = ExecTargetListLength(targetList);
projInfo = makeNode(ProjectionInfo);
projInfo->pi_targetlist = targetList;
projInfo->pi_len = len;
projInfo->pi_tupValue = (len <= 0) ? NULL : (Datum *) palloc(sizeof(Datum) * len);
projInfo->pi_exprContext = planstate->ps_ExprContext;
projInfo->pi_slot = planstate->ps_ResultTupleSlot;
projInfo->pi_exprContext = econtext;
projInfo->pi_slot = slot;
if (len > 0)
{
projInfo->pi_tupValues = (Datum *) palloc(len * sizeof(Datum));
projInfo->pi_tupNulls = (char *) palloc(len * sizeof(char));
projInfo->pi_itemIsDone = (ExprDoneCond *) palloc(len * sizeof(ExprDoneCond));
}
return projInfo;
}
planstate->ps_ProjInfo = projInfo;
/* ----------------
* ExecAssignProjectionInfo
*
* forms the projection information from the node's targetlist
* ----------------
*/
void
ExecAssignProjectionInfo(PlanState *planstate)
{
planstate->ps_ProjInfo =
ExecBuildProjectionInfo(planstate->targetlist,
planstate->ps_ExprContext,
planstate->ps_ResultTupleSlot);
}
......
This diff is collapsed.
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.63 2003/01/10 21:08:11 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.64 2003/01/12 04:03:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -197,9 +197,9 @@ make_subplan(SubLink *slink, List *lefthand)
* NOTE: if you change these numbers, also change cost_qual_eval_walker()
* in path/costsize.c.
*
* XXX If an ALL/ANY subplan is uncorrelated, we may decide to
* materialize its result below. In that case it would've been better
* to specify full retrieval. At present, however, we can only detect
* XXX If an ALL/ANY subplan is uncorrelated, we may decide to hash or
* materialize its result below. In that case it would've been better to
* specify full retrieval. At present, however, we can only detect
* correlation or lack of it after we've made the subplan :-(. Perhaps
* detection of correlation should be done as a separate step.
* Meanwhile, we don't want to be too optimistic about the percentage
......@@ -525,10 +525,17 @@ subplan_is_hashable(SubLink *slink, SubPlan *node)
if (subquery_size > SortMem * 1024L)
return false;
/*
* The combining operators must be hashable and strict. (Without
* strictness, behavior in the presence of nulls is too unpredictable.
* We actually must assume even more than plain strictness, see
* nodeSubplan.c for details.)
* The combining operators must be hashable, strict, and self-commutative.
* The need for hashability is obvious, since we want to use hashing.
* Without strictness, behavior in the presence of nulls is too
* unpredictable. (We actually must assume even more than plain
* strictness, see nodeSubplan.c for details.) And commutativity ensures
* that the left and right datatypes are the same; this allows us to
* assume that the combining operators are equality for the righthand
* datatype, so that they can be used to compare righthand tuples as
* well as comparing lefthand to righthand tuples. (This last restriction
* could be relaxed by using two different sets of operators with the
* hash table, but there is no obvious usefulness to that at present.)
*/
foreach(opids, slink->operOids)
{
......@@ -542,7 +549,8 @@ subplan_is_hashable(SubLink *slink, SubPlan *node)
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for operator %u", opid);
optup = (Form_pg_operator) GETSTRUCT(tup);
if (!optup->oprcanhash || !func_strict(optup->oprcode))
if (!optup->oprcanhash || optup->oprcom != opid ||
!func_strict(optup->oprcode))
{
ReleaseSysCache(tup);
return false;
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: executor.h,v 1.86 2003/01/10 23:54:24 tgl Exp $
* $Id: executor.h,v 1.87 2003/01/12 04:03:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -46,6 +46,13 @@ extern bool execTuplesMatch(HeapTuple tuple1,
AttrNumber *matchColIdx,
FmgrInfo *eqfunctions,
MemoryContext evalContext);
extern bool execTuplesUnequal(HeapTuple tuple1,
HeapTuple tuple2,
TupleDesc tupdesc,
int numCols,
AttrNumber *matchColIdx,
FmgrInfo *eqfunctions,
MemoryContext evalContext);
extern FmgrInfo *execTuplesMatchPrepare(TupleDesc tupdesc,
int numCols,
AttrNumber *matchColIdx);
......@@ -214,6 +221,9 @@ extern void ExecAssignResultType(PlanState *planstate,
extern void ExecAssignResultTypeFromOuterPlan(PlanState *planstate);
extern void ExecAssignResultTypeFromTL(PlanState *planstate);
extern TupleDesc ExecGetResultType(PlanState *planstate);
extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList,
ExprContext *econtext,
TupleTableSlot *slot);
extern void ExecAssignProjectionInfo(PlanState *planstate);
extern void ExecFreeExprContext(PlanState *planstate);
extern TupleDesc ExecGetScanType(ScanState *scanstate);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: execnodes.h,v 1.90 2003/01/10 23:54:24 tgl Exp $
* $Id: execnodes.h,v 1.91 2003/01/12 04:03:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -170,27 +170,34 @@ typedef struct ReturnSetInfo
/* ----------------
* ProjectionInfo node information
*
* This is all the information needed to perform projections
* on a tuple. Nodes which need to do projections create one
* of these. In theory, when a node wants to perform a projection
* This is all the information needed to perform projections ---
* that is, form new tuples by evaluation of targetlist expressions.
* Nodes which need to do projections create one of these.
* In theory, when a node wants to perform a projection
* it should just update this information as necessary and then
* call ExecProject(). -cim 6/3/91
*
* ExecProject() evaluates the tlist, forms a tuple, and stores it
* in the given slot. As a side-effect, the actual datum values and
* null indicators are placed in the work arrays tupValues/tupNulls.
*
* targetlist target list for projection
* len length of target list
* tupValue array of pointers to projection results
* exprContext expression context for ExecTargetList
* exprContext expression context in which to evaluate targetlist
* slot slot to place projection result in
* tupValues array of computed values
* tupNull array of null indicators
* itemIsDone workspace for ExecProject
* ----------------
*/
typedef struct ProjectionInfo
{
NodeTag type;
List *pi_targetlist;
int pi_len;
Datum *pi_tupValue;
ExprContext *pi_exprContext;
TupleTableSlot *pi_slot;
Datum *pi_tupValues;
char *pi_tupNulls;
ExprDoneCond *pi_itemIsDone;
} ProjectionInfo;
/* ----------------
......@@ -495,8 +502,16 @@ typedef struct SubPlanState
bool needShutdown; /* TRUE = need to shutdown subplan */
HeapTuple curTuple; /* copy of most recent tuple from subplan */
/* these are used when hashing the subselect's output: */
ProjectionInfo *projLeft; /* for projecting lefthand exprs */
ProjectionInfo *projRight; /* for projecting subselect output */
TupleHashTable hashtable; /* hash table for no-nulls subselect rows */
TupleHashTable hashnulls; /* hash table for rows with null(s) */
bool havehashrows; /* TRUE if hashtable is not empty */
bool havenullrows; /* TRUE if hashnulls is not empty */
MemoryContext tablecxt; /* memory context containing tables */
ExprContext *innerecontext; /* working context for comparisons */
AttrNumber *keyColIdx; /* control data for hash tables */
FmgrInfo *eqfunctions; /* comparison functions for hash tables */
} SubPlanState;
/* ----------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment