Commit 01ec2563 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Simplify tape block format.

No more indirect blocks. The blocks form a linked list instead.

This saves some memory, because we don't need to have a buffer in memory to
hold the indirect block (or blocks). To reflect that, TAPE_BUFFER_OVERHEAD
is reduced from 3 to 1 buffer, which allows using more memory for building
the initial runs.

Reviewed by Peter Geoghegan and Robert Haas.

Discussion: https://www.postgresql.org/message-id/34678beb-938e-646e-db9f-a7def5c44ada%40iki.fi
parent b86515da
This diff is collapsed.
......@@ -240,16 +240,16 @@ typedef enum
* Parameters for calculation of number of tapes to use --- see inittapes()
* and tuplesort_merge_order().
*
* In this calculation we assume that each tape will cost us about 3 blocks
* worth of buffer space (which is an underestimate for very large data
* volumes, but it's probably close enough --- see logtape.c).
* In this calculation we assume that each tape will cost us about 1 blocks
* worth of buffer space. This ignores the overhead of all the other data
* structures needed for each tape, but it's probably close enough.
*
* MERGE_BUFFER_SIZE is how much data we'd like to read from each input
* tape during a preread cycle (see discussion at top of file).
*/
#define MINORDER 6 /* minimum merge order */
#define MAXORDER 500 /* maximum merge order */
#define TAPE_BUFFER_OVERHEAD (BLCKSZ * 3)
#define TAPE_BUFFER_OVERHEAD BLCKSZ
#define MERGE_BUFFER_SIZE (BLCKSZ * 32)
/*
......@@ -1849,6 +1849,7 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
SortTuple *stup)
{
unsigned int tuplen;
size_t nmoved;
switch (state->status)
{
......@@ -1948,10 +1949,13 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
* end of file; back up to fetch last tuple's ending length
* word. If seek fails we must have a completely empty file.
*/
if (!LogicalTapeBackspace(state->tapeset,
state->result_tape,
2 * sizeof(unsigned int)))
nmoved = LogicalTapeBackspace(state->tapeset,
state->result_tape,
2 * sizeof(unsigned int));
if (nmoved == 0)
return false;
else if (nmoved != 2 * sizeof(unsigned int))
elog(ERROR, "unexpected tape position");
state->eof_reached = false;
}
else
......@@ -1960,31 +1964,34 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
* Back up and fetch previously-returned tuple's ending length
* word. If seek fails, assume we are at start of file.
*/
if (!LogicalTapeBackspace(state->tapeset,
state->result_tape,
sizeof(unsigned int)))
nmoved = LogicalTapeBackspace(state->tapeset,
state->result_tape,
sizeof(unsigned int));
if (nmoved == 0)
return false;
else if (nmoved != sizeof(unsigned int))
elog(ERROR, "unexpected tape position");
tuplen = getlen(state, state->result_tape, false);
/*
* Back up to get ending length word of tuple before it.
*/
if (!LogicalTapeBackspace(state->tapeset,
state->result_tape,
tuplen + 2 * sizeof(unsigned int)))
nmoved = LogicalTapeBackspace(state->tapeset,
state->result_tape,
tuplen + 2 * sizeof(unsigned int));
if (nmoved == tuplen + sizeof(unsigned int))
{
/*
* If that fails, presumably the prev tuple is the first
* in the file. Back up so that it becomes next to read
* in forward direction (not obviously right, but that is
* what in-memory case does).
* We backed up over the previous tuple, but there was no
* ending length word before it. That means that the prev
* tuple is the first tuple in the file. It is now the
* next to read in forward direction (not obviously right,
* but that is what in-memory case does).
*/
if (!LogicalTapeBackspace(state->tapeset,
state->result_tape,
tuplen + sizeof(unsigned int)))
elog(ERROR, "bogus tuple length in backward scan");
return false;
}
else if (nmoved != tuplen + 2 * sizeof(unsigned int))
elog(ERROR, "bogus tuple length in backward scan");
}
tuplen = getlen(state, state->result_tape, false);
......@@ -1994,9 +2001,10 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
* Note: READTUP expects we are positioned after the initial
* length word of the tuple, so back up to that point.
*/
if (!LogicalTapeBackspace(state->tapeset,
state->result_tape,
tuplen))
nmoved = LogicalTapeBackspace(state->tapeset,
state->result_tape,
tuplen);
if (nmoved != tuplen)
elog(ERROR, "bogus tuple length in backward scan");
READTUP(state, stup, state->result_tape, tuplen);
......@@ -3183,11 +3191,10 @@ tuplesort_restorepos(Tuplesortstate *state)
state->eof_reached = state->markpos_eof;
break;
case TSS_SORTEDONTAPE:
if (!LogicalTapeSeek(state->tapeset,
state->result_tape,
state->markpos_block,
state->markpos_offset))
elog(ERROR, "tuplesort_restorepos failed");
LogicalTapeSeek(state->tapeset,
state->result_tape,
state->markpos_block,
state->markpos_offset);
state->eof_reached = state->markpos_eof;
break;
default:
......
......@@ -35,9 +35,9 @@ extern void LogicalTapeRewindForRead(LogicalTapeSet *lts, int tapenum,
size_t buffer_size);
extern void LogicalTapeRewindForWrite(LogicalTapeSet *lts, int tapenum);
extern void LogicalTapeFreeze(LogicalTapeSet *lts, int tapenum);
extern bool LogicalTapeBackspace(LogicalTapeSet *lts, int tapenum,
extern size_t LogicalTapeBackspace(LogicalTapeSet *lts, int tapenum,
size_t size);
extern bool LogicalTapeSeek(LogicalTapeSet *lts, int tapenum,
extern void LogicalTapeSeek(LogicalTapeSet *lts, int tapenum,
long blocknum, int offset);
extern void LogicalTapeTell(LogicalTapeSet *lts, int tapenum,
long *blocknum, int *offset);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment