Commit e9fd0415 authored by Robert Haas's avatar Robert Haas

Move heap-specific detoasting logic into a separate function.

The new function, heap_fetch_toast_slice, is shared between
toast_fetch_datum_slice and toast_fetch_datum, and does all the
work of scanning the TOAST table, fetching chunks, and storing
them into the space allocated for the result varlena.

As an incidental side effect, this allows toast_fetch_datum_slice
to perform the scan with only a single scankey if all chunks are
being fetched, which might have some tiny performance benefit.

Discussion: http://postgr.es/m/CA+TgmobBzxwFojJ0zV0Own3dr09y43hp+OzU2VW+nos4PMXWEg@mail.gmail.com
parent bf7427bd
...@@ -27,6 +27,9 @@ static struct varlena *toast_fetch_datum(struct varlena *attr); ...@@ -27,6 +27,9 @@ static struct varlena *toast_fetch_datum(struct varlena *attr);
static struct varlena *toast_fetch_datum_slice(struct varlena *attr, static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
int32 sliceoffset, int32 sliceoffset,
int32 slicelength); int32 slicelength);
static void heap_fetch_toast_slice(Relation toastrel, Oid valueid,
int32 attrsize, int32 sliceoffset,
int32 slicelength, struct varlena *result);
static struct varlena *toast_decompress_datum(struct varlena *attr); static struct varlena *toast_decompress_datum(struct varlena *attr);
static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength); static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
...@@ -325,19 +328,9 @@ static struct varlena * ...@@ -325,19 +328,9 @@ static struct varlena *
toast_fetch_datum(struct varlena *attr) toast_fetch_datum(struct varlena *attr)
{ {
Relation toastrel; Relation toastrel;
Relation *toastidxs;
ScanKeyData toastkey;
SysScanDesc toastscan;
HeapTuple ttup;
TupleDesc toasttupDesc;
struct varlena *result; struct varlena *result;
struct varatt_external toast_pointer; struct varatt_external toast_pointer;
int32 attrsize; int32 attrsize;
int32 expectedchunk;
int32 totalchunks;
int num_indexes;
int validIndex;
SnapshotData SnapshotToast;
if (!VARATT_IS_EXTERNAL_ONDISK(attr)) if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums"); elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
...@@ -346,7 +339,6 @@ toast_fetch_datum(struct varlena *attr) ...@@ -346,7 +339,6 @@ toast_fetch_datum(struct varlena *attr)
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
attrsize = toast_pointer.va_extsize; attrsize = toast_pointer.va_extsize;
totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
result = (struct varlena *) palloc(attrsize + VARHDRSZ); result = (struct varlena *) palloc(attrsize + VARHDRSZ);
...@@ -355,130 +347,19 @@ toast_fetch_datum(struct varlena *attr) ...@@ -355,130 +347,19 @@ toast_fetch_datum(struct varlena *attr)
else else
SET_VARSIZE(result, attrsize + VARHDRSZ); SET_VARSIZE(result, attrsize + VARHDRSZ);
if (attrsize == 0)
return result; /* Probably shouldn't happen, but just in case. */
/* /*
* Open the toast relation and its indexes * Open the toast relation and its indexes
*/ */
toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock); toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
toasttupDesc = toastrel->rd_att;
/* Look for the valid index of the toast relation */
validIndex = toast_open_indexes(toastrel,
AccessShareLock,
&toastidxs,
&num_indexes);
/*
* Setup a scan key to fetch from the index by va_valueid
*/
ScanKeyInit(&toastkey,
(AttrNumber) 1,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(toast_pointer.va_valueid));
/*
* Read the chunks by index
*
* Note that because the index is actually on (valueid, chunkidx) we will
* see the chunks in chunkidx order, even though we didn't explicitly ask
* for it.
*/
expectedchunk = 0;
init_toast_snapshot(&SnapshotToast);
toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
&SnapshotToast, 1, &toastkey);
while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{
int32 curchunk;
Pointer chunk;
bool isnull;
char *chunkdata;
int32 chunksize;
int32 expected_size;
/*
* Have a chunk, extract the sequence number and the data
*/
curchunk = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
Assert(!isnull);
chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
Assert(!isnull);
if (!VARATT_IS_EXTENDED(chunk))
{
chunksize = VARSIZE(chunk) - VARHDRSZ;
chunkdata = VARDATA(chunk);
}
else if (VARATT_IS_SHORT(chunk))
{
/* could happen due to heap_form_tuple doing its thing */
chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
chunkdata = VARDATA_SHORT(chunk);
}
else
{
/* should never happen */
elog(ERROR, "found toasted toast chunk for toast value %u in %s",
toast_pointer.va_valueid,
RelationGetRelationName(toastrel));
chunksize = 0; /* keep compiler quiet */
chunkdata = NULL;
}
/*
* Some checks on the data we've found
*/
if (curchunk != expectedchunk)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk number %d (expected %d) for toast value %u in %s",
curchunk, expectedchunk,
toast_pointer.va_valueid,
RelationGetRelationName(toastrel))));
if (curchunk > totalchunks - 1)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
curchunk,
0, totalchunks - 1,
toast_pointer.va_valueid,
RelationGetRelationName(toastrel))));
expected_size = curchunk < totalchunks - 1 ? TOAST_MAX_CHUNK_SIZE
: attrsize - ((totalchunks - 1) * TOAST_MAX_CHUNK_SIZE);
if (chunksize != expected_size)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
chunksize, expected_size,
curchunk, totalchunks,
toast_pointer.va_valueid,
RelationGetRelationName(toastrel))));
/*
* Copy the data into proper place in our result
*/
memcpy(VARDATA(result) + curchunk * TOAST_MAX_CHUNK_SIZE,
chunkdata,
chunksize);
expectedchunk++; /* Fetch all chunks */
} heap_fetch_toast_slice(toastrel, toast_pointer.va_valueid, attrsize, 0,
attrsize, result);
/*
* Final checks that we successfully fetched the datum
*/
if (expectedchunk != totalchunks)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("missing chunk number %d for toast value %u in %s",
expectedchunk,
toast_pointer.va_valueid,
RelationGetRelationName(toastrel))));
/* /* Close toast table */
* End scan and close relations
*/
systable_endscan_ordered(toastscan);
toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
table_close(toastrel, AccessShareLock); table_close(toastrel, AccessShareLock);
return result; return result;
...@@ -500,22 +381,9 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, ...@@ -500,22 +381,9 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
int32 slicelength) int32 slicelength)
{ {
Relation toastrel; Relation toastrel;
Relation *toastidxs;
ScanKeyData toastkey[3];
int nscankeys;
SysScanDesc toastscan;
HeapTuple ttup;
TupleDesc toasttupDesc;
struct varlena *result; struct varlena *result;
struct varatt_external toast_pointer; struct varatt_external toast_pointer;
int32 attrsize; int32 attrsize;
int32 expectedchunk;
int startchunk;
int endchunk;
int totalchunks;
int num_indexes;
int validIndex;
SnapshotData SnapshotToast;
if (!VARATT_IS_EXTERNAL_ONDISK(attr)) if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums"); elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
...@@ -531,7 +399,6 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, ...@@ -531,7 +399,6 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset); Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
attrsize = toast_pointer.va_extsize; attrsize = toast_pointer.va_extsize;
totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
if (sliceoffset >= attrsize) if (sliceoffset >= attrsize)
{ {
...@@ -560,15 +427,47 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, ...@@ -560,15 +427,47 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
if (slicelength == 0) if (slicelength == 0)
return result; /* Can save a lot of work at this point! */ return result; /* Can save a lot of work at this point! */
startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE; /* Open the toast relation */
endchunk = (sliceoffset + slicelength - 1) / TOAST_MAX_CHUNK_SIZE;
Assert(endchunk < totalchunks);
/*
* Open the toast relation and its indexes
*/
toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock); toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
toasttupDesc = toastrel->rd_att;
/* Fetch all chunks */
heap_fetch_toast_slice(toastrel, toast_pointer.va_valueid, attrsize,
sliceoffset, slicelength, result);
/* Close toast table */
table_close(toastrel, AccessShareLock);
return result;
}
/*
* Fetch a TOAST slice from a heap table.
*
* toastrel is the relation from which chunks are to be fetched.
* valueid identifies the TOAST value from which chunks are being fetched.
* attrsize is the total size of the TOAST value.
* sliceoffset is the byte offset within the TOAST value from which to fetch.
* slicelength is the number of bytes to be fetched from the TOAST value.
* result is the varlena into which the results should be written.
*/
static void
heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize,
int32 sliceoffset, int32 slicelength,
struct varlena *result)
{
Relation *toastidxs;
ScanKeyData toastkey[3];
TupleDesc toasttupDesc = toastrel->rd_att;
int nscankeys;
SysScanDesc toastscan;
HeapTuple ttup;
int32 expectedchunk;
int32 totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
int startchunk;
int endchunk;
int num_indexes;
int validIndex;
SnapshotData SnapshotToast;
/* Look for the valid index of toast relation */ /* Look for the valid index of toast relation */
validIndex = toast_open_indexes(toastrel, validIndex = toast_open_indexes(toastrel,
...@@ -576,6 +475,10 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, ...@@ -576,6 +475,10 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
&toastidxs, &toastidxs,
&num_indexes); &num_indexes);
startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
endchunk = (sliceoffset + slicelength - 1) / TOAST_MAX_CHUNK_SIZE;
Assert(endchunk <= totalchunks);
/* /*
* Setup a scan key to fetch from the index. This is either two keys or * Setup a scan key to fetch from the index. This is either two keys or
* three depending on the number of chunks. * three depending on the number of chunks.
...@@ -583,12 +486,15 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, ...@@ -583,12 +486,15 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
ScanKeyInit(&toastkey[0], ScanKeyInit(&toastkey[0],
(AttrNumber) 1, (AttrNumber) 1,
BTEqualStrategyNumber, F_OIDEQ, BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(toast_pointer.va_valueid)); ObjectIdGetDatum(valueid));
/* /*
* Use equality condition for one chunk, a range condition otherwise: * No additional condition if fetching all chunks. Otherwise, use an
* equality condition for one chunk, and a range condition otherwise.
*/ */
if (startchunk == endchunk) if (startchunk == 0 && endchunk == totalchunks - 1)
nscankeys = 1;
else if (startchunk == endchunk)
{ {
ScanKeyInit(&toastkey[1], ScanKeyInit(&toastkey[1],
(AttrNumber) 2, (AttrNumber) 2,
...@@ -609,15 +515,17 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, ...@@ -609,15 +515,17 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
nscankeys = 3; nscankeys = 3;
} }
/* Prepare for scan */
init_toast_snapshot(&SnapshotToast);
toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
&SnapshotToast, nscankeys, toastkey);
/* /*
* Read the chunks by index * Read the chunks by index
* *
* The index is on (valueid, chunkidx) so they will come in order * The index is on (valueid, chunkidx) so they will come in order
*/ */
init_toast_snapshot(&SnapshotToast);
expectedchunk = startchunk; expectedchunk = startchunk;
toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
&SnapshotToast, nscankeys, toastkey);
while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{ {
int32 curchunk; int32 curchunk;
...@@ -651,8 +559,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, ...@@ -651,8 +559,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
{ {
/* should never happen */ /* should never happen */
elog(ERROR, "found toasted toast chunk for toast value %u in %s", elog(ERROR, "found toasted toast chunk for toast value %u in %s",
toast_pointer.va_valueid, valueid, RelationGetRelationName(toastrel));
RelationGetRelationName(toastrel));
chunksize = 0; /* keep compiler quiet */ chunksize = 0; /* keep compiler quiet */
chunkdata = NULL; chunkdata = NULL;
} }
...@@ -664,16 +571,14 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, ...@@ -664,16 +571,14 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED), (errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk number %d (expected %d) for toast value %u in %s", errmsg_internal("unexpected chunk number %d (expected %d) for toast value %u in %s",
curchunk, expectedchunk, curchunk, expectedchunk, valueid,
toast_pointer.va_valueid,
RelationGetRelationName(toastrel)))); RelationGetRelationName(toastrel))));
if (curchunk > endchunk) if (curchunk > endchunk)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED), (errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk number %d (out of range %d..%d) for toast value %u in %s", errmsg_internal("unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
curchunk, curchunk,
startchunk, endchunk, startchunk, endchunk, valueid,
toast_pointer.va_valueid,
RelationGetRelationName(toastrel)))); RelationGetRelationName(toastrel))));
expected_size = curchunk < totalchunks - 1 ? TOAST_MAX_CHUNK_SIZE expected_size = curchunk < totalchunks - 1 ? TOAST_MAX_CHUNK_SIZE
: attrsize - ((totalchunks - 1) * TOAST_MAX_CHUNK_SIZE); : attrsize - ((totalchunks - 1) * TOAST_MAX_CHUNK_SIZE);
...@@ -682,8 +587,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, ...@@ -682,8 +587,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
(errcode(ERRCODE_DATA_CORRUPTED), (errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s", errmsg_internal("unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
chunksize, expected_size, chunksize, expected_size,
curchunk, totalchunks, curchunk, totalchunks, valueid,
toast_pointer.va_valueid,
RelationGetRelationName(toastrel)))); RelationGetRelationName(toastrel))));
/* /*
...@@ -711,18 +615,12 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, ...@@ -711,18 +615,12 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED), (errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("missing chunk number %d for toast value %u in %s", errmsg_internal("missing chunk number %d for toast value %u in %s",
expectedchunk, expectedchunk, valueid,
toast_pointer.va_valueid,
RelationGetRelationName(toastrel)))); RelationGetRelationName(toastrel))));
/* /* End scan and close indexes. */
* End scan and close relations
*/
systable_endscan_ordered(toastscan); systable_endscan_ordered(toastscan);
toast_close_indexes(toastidxs, num_indexes, AccessShareLock); toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
table_close(toastrel, AccessShareLock);
return result;
} }
/* ---------- /* ----------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment