Commit 38e93482 authored by Tom Lane's avatar Tom Lane

Make a couple of small changes to the tuplestore API, for the benefit of the

upcoming window-functions patch.  First, tuplestore_trim is now an
exported function that must be explicitly invoked by callers at
appropriate times, rather than something that tuplestore tries to do
behind the scenes.  Second, a read pointer that is marked as allowing
backward scan no longer prevents truncation.  This means that a read pointer
marked as having BACKWARD but not REWIND capability can only safely read
backwards as far as the oldest other read pointer.  (The expected use pattern
for this involves having another read pointer that serves as the truncation
fencepost.)
parent c8b69ed6
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeMaterial.c,v 1.63 2008/10/01 19:51:49 tgl Exp $ * $PostgreSQL: pgsql/src/backend/executor/nodeMaterial.c,v 1.64 2008/12/27 17:39:00 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -66,11 +66,11 @@ ExecMaterial(MaterialState *node) ...@@ -66,11 +66,11 @@ ExecMaterial(MaterialState *node)
* Allocate a second read pointer to serve as the mark. * Allocate a second read pointer to serve as the mark.
* We know it must have index 1, so needn't store that. * We know it must have index 1, so needn't store that.
*/ */
int ptrn; int ptrno;
ptrn = tuplestore_alloc_read_pointer(tuplestorestate, ptrno = tuplestore_alloc_read_pointer(tuplestorestate,
node->eflags); node->eflags);
Assert(ptrn == 1); Assert(ptrno == 1);
} }
node->tuplestorestate = tuplestorestate; node->tuplestorestate = tuplestorestate;
} }
...@@ -182,6 +182,16 @@ ExecInitMaterial(Material *node, EState *estate, int eflags) ...@@ -182,6 +182,16 @@ ExecInitMaterial(Material *node, EState *estate, int eflags)
EXEC_FLAG_BACKWARD | EXEC_FLAG_BACKWARD |
EXEC_FLAG_MARK)); EXEC_FLAG_MARK));
/*
* Tuplestore's interpretation of the flag bits is subtly different from
* the general executor meaning: it doesn't think BACKWARD necessarily
* means "backwards all the way to start". If told to support BACKWARD we
* must include REWIND in the tuplestore eflags, else tuplestore_trim
* might throw away too much.
*/
if (eflags & EXEC_FLAG_BACKWARD)
matstate->eflags |= EXEC_FLAG_REWIND;
matstate->eof_underlying = false; matstate->eof_underlying = false;
matstate->tuplestorestate = NULL; matstate->tuplestorestate = NULL;
...@@ -278,6 +288,11 @@ ExecMaterialMarkPos(MaterialState *node) ...@@ -278,6 +288,11 @@ ExecMaterialMarkPos(MaterialState *node)
* copy the active read pointer to the mark. * copy the active read pointer to the mark.
*/ */
tuplestore_copy_read_pointer(node->tuplestorestate, 0, 1); tuplestore_copy_read_pointer(node->tuplestorestate, 0, 1);
/*
* since we may have advanced the mark, try to truncate the tuplestore.
*/
tuplestore_trim(node->tuplestorestate);
} }
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
......
...@@ -30,9 +30,10 @@ ...@@ -30,9 +30,10 @@
* in a format that allows either forward or backward scan. Otherwise, only * in a format that allows either forward or backward scan. Otherwise, only
* forward scan is allowed. A request for backward scan must be made before * forward scan is allowed. A request for backward scan must be made before
* putting any tuples into the tuplestore. Rewind is normally allowed but * putting any tuples into the tuplestore. Rewind is normally allowed but
* can be turned off via tuplestore_set_eflags; turning off both backward * can be turned off via tuplestore_set_eflags; turning off rewind for all
* scan and rewind for all read pointers enables truncation of the tuplestore * read pointers enables truncation of the tuplestore at the oldest read point
* at the oldest read point for minimal memory usage. * for minimal memory usage. (The caller must explicitly call tuplestore_trim
* at appropriate times for truncation to actually happen.)
* *
* Note: in TSS_WRITEFILE state, the temp file's seek position is the * Note: in TSS_WRITEFILE state, the temp file's seek position is the
* current write position, and the write-position variables in the tuplestore * current write position, and the write-position variables in the tuplestore
...@@ -46,7 +47,7 @@ ...@@ -46,7 +47,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.43 2008/10/28 15:51:03 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.44 2008/12/27 17:39:00 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -101,6 +102,7 @@ struct Tuplestorestate ...@@ -101,6 +102,7 @@ struct Tuplestorestate
int eflags; /* capability flags (OR of pointers' flags) */ int eflags; /* capability flags (OR of pointers' flags) */
bool backward; /* store extra length words in file? */ bool backward; /* store extra length words in file? */
bool interXact; /* keep open through transactions? */ bool interXact; /* keep open through transactions? */
bool truncated; /* tuplestore_trim has removed tuples? */
long availMem; /* remaining memory available, in bytes */ long availMem; /* remaining memory available, in bytes */
BufFile *myfile; /* underlying file, or NULL if none */ BufFile *myfile; /* underlying file, or NULL if none */
...@@ -220,7 +222,6 @@ static Tuplestorestate *tuplestore_begin_common(int eflags, ...@@ -220,7 +222,6 @@ static Tuplestorestate *tuplestore_begin_common(int eflags,
int maxKBytes); int maxKBytes);
static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple); static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple);
static void dumptuples(Tuplestorestate *state); static void dumptuples(Tuplestorestate *state);
static void tuplestore_trim(Tuplestorestate *state);
static unsigned int getlen(Tuplestorestate *state, bool eofOK); static unsigned int getlen(Tuplestorestate *state, bool eofOK);
static void *copytup_heap(Tuplestorestate *state, void *tup); static void *copytup_heap(Tuplestorestate *state, void *tup);
static void writetup_heap(Tuplestorestate *state, void *tup); static void writetup_heap(Tuplestorestate *state, void *tup);
...@@ -242,6 +243,7 @@ tuplestore_begin_common(int eflags, bool interXact, int maxKBytes) ...@@ -242,6 +243,7 @@ tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
state->status = TSS_INMEM; state->status = TSS_INMEM;
state->eflags = eflags; state->eflags = eflags;
state->interXact = interXact; state->interXact = interXact;
state->truncated = false;
state->availMem = maxKBytes * 1024L; state->availMem = maxKBytes * 1024L;
state->myfile = NULL; state->myfile = NULL;
...@@ -319,6 +321,10 @@ tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes) ...@@ -319,6 +321,10 @@ tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
* EXEC_FLAG_BACKWARD need backward fetch * EXEC_FLAG_BACKWARD need backward fetch
* If tuplestore_set_eflags is not called, REWIND is allowed, and BACKWARD * If tuplestore_set_eflags is not called, REWIND is allowed, and BACKWARD
* is set per "randomAccess" in the tuplestore_begin_xxx call. * is set per "randomAccess" in the tuplestore_begin_xxx call.
*
* NOTE: setting BACKWARD without REWIND means the pointer can read backwards,
* but not further than the truncation point (the furthest-back read pointer
* position at the time of the last tuplestore_trim call).
*/ */
void void
tuplestore_set_eflags(Tuplestorestate *state, int eflags) tuplestore_set_eflags(Tuplestorestate *state, int eflags)
...@@ -397,6 +403,7 @@ tuplestore_clear(Tuplestorestate *state) ...@@ -397,6 +403,7 @@ tuplestore_clear(Tuplestorestate *state)
} }
} }
state->status = TSS_INMEM; state->status = TSS_INMEM;
state->truncated = false;
state->memtupcount = 0; state->memtupcount = 0;
readptr = state->readptrs; readptr = state->readptrs;
for (i = 0; i < state->readptrcount; readptr++, i++) for (i = 0; i < state->readptrcount; readptr++, i++)
...@@ -723,12 +730,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward, ...@@ -723,12 +730,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
return NULL; return NULL;
if (readptr->current < state->memtupcount) if (readptr->current < state->memtupcount)
{ {
/* /* We have another tuple, so return it */
* We have another tuple, so return it. Note: in
* principle we could try tuplestore_trim() here after
* advancing current, but this would cost cycles with
* little chance of success, so we don't bother.
*/
return state->memtuples[readptr->current++]; return state->memtuples[readptr->current++];
} }
readptr->eof_reached = true; readptr->eof_reached = true;
...@@ -738,7 +740,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward, ...@@ -738,7 +740,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
{ {
/* /*
* if all tuples are fetched already then we return last * if all tuples are fetched already then we return last
* tuple, else - tuple before last returned. * tuple, else tuple before last returned.
*/ */
if (readptr->eof_reached) if (readptr->eof_reached)
{ {
...@@ -748,11 +750,17 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward, ...@@ -748,11 +750,17 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
else else
{ {
if (readptr->current <= 0) if (readptr->current <= 0)
{
Assert(!state->truncated);
return NULL; return NULL;
}
readptr->current--; /* last returned tuple */ readptr->current--; /* last returned tuple */
} }
if (readptr->current <= 0) if (readptr->current <= 0)
{
Assert(!state->truncated);
return NULL; return NULL;
}
return state->memtuples[readptr->current - 1]; return state->memtuples[readptr->current - 1];
} }
break; break;
...@@ -795,7 +803,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward, ...@@ -795,7 +803,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
* Backward. * Backward.
* *
* if all tuples are fetched already then we return last tuple, * if all tuples are fetched already then we return last tuple,
* else - tuple before last returned. * else tuple before last returned.
* *
* Back up to fetch previously-returned tuple's ending length * Back up to fetch previously-returned tuple's ending length
* word. If seek fails, assume we are at start of file. * word. If seek fails, assume we are at start of file.
...@@ -805,6 +813,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward, ...@@ -805,6 +813,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
{ {
/* even a failed backwards fetch gets you out of eof state */ /* even a failed backwards fetch gets you out of eof state */
readptr->eof_reached = false; readptr->eof_reached = false;
Assert(!state->truncated);
return NULL; return NULL;
} }
tuplen = getlen(state, false); tuplen = getlen(state, false);
...@@ -833,6 +842,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward, ...@@ -833,6 +842,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
-(long) (tuplen + sizeof(unsigned int)), -(long) (tuplen + sizeof(unsigned int)),
SEEK_CUR) != 0) SEEK_CUR) != 0)
elog(ERROR, "bogus tuple length in backward scan"); elog(ERROR, "bogus tuple length in backward scan");
Assert(!state->truncated);
return NULL; return NULL;
} }
tuplen = getlen(state, false); tuplen = getlen(state, false);
...@@ -887,7 +897,8 @@ tuplestore_gettupleslot(Tuplestorestate *state, bool forward, ...@@ -887,7 +897,8 @@ tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
* tuplestore_advance - exported function to adjust position without fetching * tuplestore_advance - exported function to adjust position without fetching
* *
* We could optimize this case to avoid palloc/pfree overhead, but for the * We could optimize this case to avoid palloc/pfree overhead, but for the
* moment it doesn't seem worthwhile. * moment it doesn't seem worthwhile. (XXX this probably needs to be
* reconsidered given the needs of window functions.)
*/ */
bool bool
tuplestore_advance(Tuplestorestate *state, bool forward) tuplestore_advance(Tuplestorestate *state, bool forward)
...@@ -948,6 +959,7 @@ tuplestore_rescan(Tuplestorestate *state) ...@@ -948,6 +959,7 @@ tuplestore_rescan(Tuplestorestate *state)
TSReadPointer *readptr = &state->readptrs[state->activeptr]; TSReadPointer *readptr = &state->readptrs[state->activeptr];
Assert(readptr->eflags & EXEC_FLAG_REWIND); Assert(readptr->eflags & EXEC_FLAG_REWIND);
Assert(!state->truncated);
switch (state->status) switch (state->status)
{ {
...@@ -1006,10 +1018,8 @@ tuplestore_copy_read_pointer(Tuplestorestate *state, ...@@ -1006,10 +1018,8 @@ tuplestore_copy_read_pointer(Tuplestorestate *state,
switch (state->status) switch (state->status)
{ {
case TSS_INMEM: case TSS_INMEM:
/* We might be able to truncate the tuplestore */
tuplestore_trim(state);
break;
case TSS_WRITEFILE: case TSS_WRITEFILE:
/* no work */
break; break;
case TSS_READFILE: case TSS_READFILE:
/* /*
...@@ -1053,8 +1063,17 @@ tuplestore_copy_read_pointer(Tuplestorestate *state, ...@@ -1053,8 +1063,17 @@ tuplestore_copy_read_pointer(Tuplestorestate *state,
/* /*
* tuplestore_trim - remove all no-longer-needed tuples * tuplestore_trim - remove all no-longer-needed tuples
*
* Calling this function authorizes the tuplestore to delete all tuples
* before the oldest read pointer, if no read pointer is marked as requiring
* REWIND capability.
*
* Note: this is obviously safe if no pointer has BACKWARD capability either.
* If a pointer is marked as BACKWARD but not REWIND capable, it means that
* the pointer can be moved backward but not before the oldest other read
* pointer.
*/ */
static void void
tuplestore_trim(Tuplestorestate *state) tuplestore_trim(Tuplestorestate *state)
{ {
int oldest; int oldest;
...@@ -1062,10 +1081,9 @@ tuplestore_trim(Tuplestorestate *state) ...@@ -1062,10 +1081,9 @@ tuplestore_trim(Tuplestorestate *state)
int i; int i;
/* /*
* We can truncate the tuplestore if neither backward scan nor * Truncation is disallowed if any read pointer requires rewind capability.
* rewind capability are required by any read pointer.
*/ */
if (state->eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_REWIND)) if (state->eflags & EXEC_FLAG_REWIND)
return; return;
/* /*
...@@ -1125,6 +1143,9 @@ tuplestore_trim(Tuplestorestate *state) ...@@ -1125,6 +1143,9 @@ tuplestore_trim(Tuplestorestate *state)
if (!state->readptrs[i].eof_reached) if (!state->readptrs[i].eof_reached)
state->readptrs[i].current -= nremove; state->readptrs[i].current -= nremove;
} }
/* mark tuplestore as truncated (used for Assert crosschecks only) */
state->truncated = true;
} }
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/utils/tuplestore.h,v 1.25 2008/10/04 21:56:55 tgl Exp $ * $PostgreSQL: pgsql/src/include/utils/tuplestore.h,v 1.26 2008/12/27 17:39:00 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -66,6 +66,8 @@ extern void tuplestore_select_read_pointer(Tuplestorestate *state, int ptr); ...@@ -66,6 +66,8 @@ extern void tuplestore_select_read_pointer(Tuplestorestate *state, int ptr);
extern void tuplestore_copy_read_pointer(Tuplestorestate *state, extern void tuplestore_copy_read_pointer(Tuplestorestate *state,
int srcptr, int destptr); int srcptr, int destptr);
extern void tuplestore_trim(Tuplestorestate *state);
extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
TupleTableSlot *slot); TupleTableSlot *slot);
extern bool tuplestore_advance(Tuplestorestate *state, bool forward); extern bool tuplestore_advance(Tuplestorestate *state, bool forward);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment