Commit 01ec2563 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Simplify tape block format.

No more indirect blocks. The blocks form a linked list instead.

This saves some memory, because we don't need to have a buffer in memory to
hold the indirect block (or blocks). To reflect that, TAPE_BUFFER_OVERHEAD
is reduced from 3 to 1 buffer, which allows using more memory for building
the initial runs.

Reviewed by Peter Geoghegan and Robert Haas.

Discussion: https://www.postgresql.org/message-id/34678beb-938e-646e-db9f-a7def5c44ada%40iki.fi
parent b86515da
This diff is collapsed.
...@@ -240,16 +240,16 @@ typedef enum ...@@ -240,16 +240,16 @@ typedef enum
* Parameters for calculation of number of tapes to use --- see inittapes() * Parameters for calculation of number of tapes to use --- see inittapes()
* and tuplesort_merge_order(). * and tuplesort_merge_order().
* *
* In this calculation we assume that each tape will cost us about 3 blocks * In this calculation we assume that each tape will cost us about 1 blocks
* worth of buffer space (which is an underestimate for very large data * worth of buffer space. This ignores the overhead of all the other data
* volumes, but it's probably close enough --- see logtape.c). * structures needed for each tape, but it's probably close enough.
* *
* MERGE_BUFFER_SIZE is how much data we'd like to read from each input * MERGE_BUFFER_SIZE is how much data we'd like to read from each input
* tape during a preread cycle (see discussion at top of file). * tape during a preread cycle (see discussion at top of file).
*/ */
#define MINORDER 6 /* minimum merge order */ #define MINORDER 6 /* minimum merge order */
#define MAXORDER 500 /* maximum merge order */ #define MAXORDER 500 /* maximum merge order */
#define TAPE_BUFFER_OVERHEAD (BLCKSZ * 3) #define TAPE_BUFFER_OVERHEAD BLCKSZ
#define MERGE_BUFFER_SIZE (BLCKSZ * 32) #define MERGE_BUFFER_SIZE (BLCKSZ * 32)
/* /*
...@@ -1849,6 +1849,7 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward, ...@@ -1849,6 +1849,7 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
SortTuple *stup) SortTuple *stup)
{ {
unsigned int tuplen; unsigned int tuplen;
size_t nmoved;
switch (state->status) switch (state->status)
{ {
...@@ -1948,10 +1949,13 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward, ...@@ -1948,10 +1949,13 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
* end of file; back up to fetch last tuple's ending length * end of file; back up to fetch last tuple's ending length
* word. If seek fails we must have a completely empty file. * word. If seek fails we must have a completely empty file.
*/ */
if (!LogicalTapeBackspace(state->tapeset, nmoved = LogicalTapeBackspace(state->tapeset,
state->result_tape, state->result_tape,
2 * sizeof(unsigned int))) 2 * sizeof(unsigned int));
if (nmoved == 0)
return false; return false;
else if (nmoved != 2 * sizeof(unsigned int))
elog(ERROR, "unexpected tape position");
state->eof_reached = false; state->eof_reached = false;
} }
else else
...@@ -1960,31 +1964,34 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward, ...@@ -1960,31 +1964,34 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
* Back up and fetch previously-returned tuple's ending length * Back up and fetch previously-returned tuple's ending length
* word. If seek fails, assume we are at start of file. * word. If seek fails, assume we are at start of file.
*/ */
if (!LogicalTapeBackspace(state->tapeset, nmoved = LogicalTapeBackspace(state->tapeset,
state->result_tape, state->result_tape,
sizeof(unsigned int))) sizeof(unsigned int));
if (nmoved == 0)
return false; return false;
else if (nmoved != sizeof(unsigned int))
elog(ERROR, "unexpected tape position");
tuplen = getlen(state, state->result_tape, false); tuplen = getlen(state, state->result_tape, false);
/* /*
* Back up to get ending length word of tuple before it. * Back up to get ending length word of tuple before it.
*/ */
if (!LogicalTapeBackspace(state->tapeset, nmoved = LogicalTapeBackspace(state->tapeset,
state->result_tape, state->result_tape,
tuplen + 2 * sizeof(unsigned int))) tuplen + 2 * sizeof(unsigned int));
if (nmoved == tuplen + sizeof(unsigned int))
{ {
/* /*
* If that fails, presumably the prev tuple is the first * We backed up over the previous tuple, but there was no
* in the file. Back up so that it becomes next to read * ending length word before it. That means that the prev
* in forward direction (not obviously right, but that is * tuple is the first tuple in the file. It is now the
* what in-memory case does). * next to read in forward direction (not obviously right,
* but that is what in-memory case does).
*/ */
if (!LogicalTapeBackspace(state->tapeset,
state->result_tape,
tuplen + sizeof(unsigned int)))
elog(ERROR, "bogus tuple length in backward scan");
return false; return false;
} }
else if (nmoved != tuplen + 2 * sizeof(unsigned int))
elog(ERROR, "bogus tuple length in backward scan");
} }
tuplen = getlen(state, state->result_tape, false); tuplen = getlen(state, state->result_tape, false);
...@@ -1994,9 +2001,10 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward, ...@@ -1994,9 +2001,10 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
* Note: READTUP expects we are positioned after the initial * Note: READTUP expects we are positioned after the initial
* length word of the tuple, so back up to that point. * length word of the tuple, so back up to that point.
*/ */
if (!LogicalTapeBackspace(state->tapeset, nmoved = LogicalTapeBackspace(state->tapeset,
state->result_tape, state->result_tape,
tuplen)) tuplen);
if (nmoved != tuplen)
elog(ERROR, "bogus tuple length in backward scan"); elog(ERROR, "bogus tuple length in backward scan");
READTUP(state, stup, state->result_tape, tuplen); READTUP(state, stup, state->result_tape, tuplen);
...@@ -3183,11 +3191,10 @@ tuplesort_restorepos(Tuplesortstate *state) ...@@ -3183,11 +3191,10 @@ tuplesort_restorepos(Tuplesortstate *state)
state->eof_reached = state->markpos_eof; state->eof_reached = state->markpos_eof;
break; break;
case TSS_SORTEDONTAPE: case TSS_SORTEDONTAPE:
if (!LogicalTapeSeek(state->tapeset, LogicalTapeSeek(state->tapeset,
state->result_tape, state->result_tape,
state->markpos_block, state->markpos_block,
state->markpos_offset)) state->markpos_offset);
elog(ERROR, "tuplesort_restorepos failed");
state->eof_reached = state->markpos_eof; state->eof_reached = state->markpos_eof;
break; break;
default: default:
......
...@@ -35,9 +35,9 @@ extern void LogicalTapeRewindForRead(LogicalTapeSet *lts, int tapenum, ...@@ -35,9 +35,9 @@ extern void LogicalTapeRewindForRead(LogicalTapeSet *lts, int tapenum,
size_t buffer_size); size_t buffer_size);
extern void LogicalTapeRewindForWrite(LogicalTapeSet *lts, int tapenum); extern void LogicalTapeRewindForWrite(LogicalTapeSet *lts, int tapenum);
extern void LogicalTapeFreeze(LogicalTapeSet *lts, int tapenum); extern void LogicalTapeFreeze(LogicalTapeSet *lts, int tapenum);
extern bool LogicalTapeBackspace(LogicalTapeSet *lts, int tapenum, extern size_t LogicalTapeBackspace(LogicalTapeSet *lts, int tapenum,
size_t size); size_t size);
extern bool LogicalTapeSeek(LogicalTapeSet *lts, int tapenum, extern void LogicalTapeSeek(LogicalTapeSet *lts, int tapenum,
long blocknum, int offset); long blocknum, int offset);
extern void LogicalTapeTell(LogicalTapeSet *lts, int tapenum, extern void LogicalTapeTell(LogicalTapeSet *lts, int tapenum,
long *blocknum, int *offset); long *blocknum, int *offset);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment