Commit 21b446dd authored by Tom Lane's avatar Tom Lane

Fix CLUSTER/VACUUM FULL for toast values owned by recently-updated rows.

In commit 7b0d0e93, I made CLUSTER and
VACUUM FULL try to preserve toast value OIDs from the original toast table
to the new one.  However, if we have to copy both live and recently-dead
versions of a row that has a toasted column, those versions may well
reference the same toast value with the same OID.  The patch then led to
duplicate-key failures as we tried to insert the toast value twice with the
same OID.  (The previous behavior was not very desirable either, since it
would have silently inserted the same value twice with different OIDs.
That wastes space, but what's worse is that the toast values inserted for
already-dead heap rows would not be reclaimed by subsequent ordinary
VACUUMs, since they go into the new toast table marked live not deleted.)

To fix, check if the copied OID already exists in the new toast table, and
if so, assume that it stores the desired value.  This is reasonably safe
since the only case where we will copy an OID from a previous toast pointer
is when toast_insert_or_update was given that toast pointer and so we just
pulled the data from the old table; if we got two different values that way
then we have big problems anyway.  We do have to assume that no other
backend is inserting items into the new toast table concurrently, but
that's surely safe for CLUSTER and VACUUM FULL.

Per bug #6393 from Maxim Boguk.  Back-patch to 9.0, same as the previous
patch.
parent de5a08c5
...@@ -76,7 +76,8 @@ do { \ ...@@ -76,7 +76,8 @@ do { \
static void toast_delete_datum(Relation rel, Datum value); static void toast_delete_datum(Relation rel, Datum value);
static Datum toast_save_datum(Relation rel, Datum value, static Datum toast_save_datum(Relation rel, Datum value,
struct varlena *oldexternal, int options); struct varlena *oldexternal, int options);
static bool toast_valueid_exists(Oid toastrelid, Oid valueid); static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
static struct varlena *toast_fetch_datum(struct varlena * attr); static struct varlena *toast_fetch_datum(struct varlena * attr);
static struct varlena *toast_fetch_datum_slice(struct varlena * attr, static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
int32 sliceoffset, int32 length); int32 sliceoffset, int32 length);
...@@ -1342,7 +1343,34 @@ toast_save_datum(Relation rel, Datum value, ...@@ -1342,7 +1343,34 @@ toast_save_datum(Relation rel, Datum value,
/* Must copy to access aligned fields */ /* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal); VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
if (old_toast_pointer.va_toastrelid == rel->rd_toastoid) if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
{
/* This value came from the old toast table; reuse its OID */
toast_pointer.va_valueid = old_toast_pointer.va_valueid; toast_pointer.va_valueid = old_toast_pointer.va_valueid;
/*
* There is a corner case here: the table rewrite might have
* to copy both live and recently-dead versions of a row, and
* those versions could easily reference the same toast value.
* When we copy the second or later version of such a row,
* reusing the OID will mean we select an OID that's already
* in the new toast table. Check for that, and if so, just
* fall through without writing the data again.
*
* While annoying and ugly-looking, this is a good thing
* because it ensures that we wind up with only one copy of
* the toast value when there is only one copy in the old
* toast table. Before we detected this case, we'd have made
* multiple copies, wasting space; and what's worse, the
* copies belonging to already-deleted heap tuples would not
* be reclaimed by VACUUM.
*/
if (toastrel_valueid_exists(toastrel,
toast_pointer.va_valueid))
{
/* Match, so short-circuit the data storage loop below */
data_todo = 0;
}
}
} }
if (toast_pointer.va_valueid == InvalidOid) if (toast_pointer.va_valueid == InvalidOid)
{ {
...@@ -1356,8 +1384,8 @@ toast_save_datum(Relation rel, Datum value, ...@@ -1356,8 +1384,8 @@ toast_save_datum(Relation rel, Datum value,
GetNewOidWithIndex(toastrel, GetNewOidWithIndex(toastrel,
RelationGetRelid(toastidx), RelationGetRelid(toastidx),
(AttrNumber) 1); (AttrNumber) 1);
} while (toast_valueid_exists(rel->rd_toastoid, } while (toastid_valueid_exists(rel->rd_toastoid,
toast_pointer.va_valueid)); toast_pointer.va_valueid));
} }
} }
...@@ -1495,24 +1523,18 @@ toast_delete_datum(Relation rel, Datum value) ...@@ -1495,24 +1523,18 @@ toast_delete_datum(Relation rel, Datum value)
/* ---------- /* ----------
* toast_valueid_exists - * toastrel_valueid_exists -
* *
* Test whether a toast value with the given ID exists in the toast relation * Test whether a toast value with the given ID exists in the toast relation
* ---------- * ----------
*/ */
static bool static bool
toast_valueid_exists(Oid toastrelid, Oid valueid) toastrel_valueid_exists(Relation toastrel, Oid valueid)
{ {
bool result = false; bool result = false;
Relation toastrel;
ScanKeyData toastkey; ScanKeyData toastkey;
SysScanDesc toastscan; SysScanDesc toastscan;
/*
* Open the toast relation
*/
toastrel = heap_open(toastrelid, AccessShareLock);
/* /*
* Setup a scan key to find chunks with matching va_valueid * Setup a scan key to find chunks with matching va_valueid
*/ */
...@@ -1530,10 +1552,27 @@ toast_valueid_exists(Oid toastrelid, Oid valueid) ...@@ -1530,10 +1552,27 @@ toast_valueid_exists(Oid toastrelid, Oid valueid)
if (systable_getnext(toastscan) != NULL) if (systable_getnext(toastscan) != NULL)
result = true; result = true;
/*
* End scan and close relations
*/
systable_endscan(toastscan); systable_endscan(toastscan);
return result;
}
/* ----------
* toastid_valueid_exists -
*
* As above, but work from toast rel's OID not an open relation
* ----------
*/
static bool
toastid_valueid_exists(Oid toastrelid, Oid valueid)
{
bool result;
Relation toastrel;
toastrel = heap_open(toastrelid, AccessShareLock);
result = toastrel_valueid_exists(toastrel, valueid);
heap_close(toastrel, AccessShareLock); heap_close(toastrel, AccessShareLock);
return result; return result;
......
...@@ -787,16 +787,19 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, ...@@ -787,16 +787,19 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
* When doing swap by content, any toast pointers written into NewHeap * When doing swap by content, any toast pointers written into NewHeap
* must use the old toast table's OID, because that's where the toast * must use the old toast table's OID, because that's where the toast
* data will eventually be found. Set this up by setting rd_toastoid. * data will eventually be found. Set this up by setting rd_toastoid.
* This also tells tuptoaster.c to preserve the toast value OIDs, * This also tells toast_save_datum() to preserve the toast value
* which we want so as not to invalidate toast pointers in system * OIDs, which we want so as not to invalidate toast pointers in
* catalog caches. * system catalog caches, and to avoid making multiple copies of a
* single toast value.
* *
* Note that we must hold NewHeap open until we are done writing data, * Note that we must hold NewHeap open until we are done writing data,
* since the relcache will not guarantee to remember this setting once * since the relcache will not guarantee to remember this setting once
* the relation is closed. Also, this technique depends on the fact * the relation is closed. Also, this technique depends on the fact
* that no one will try to read from the NewHeap until after we've * that no one will try to read from the NewHeap until after we've
* finished writing it and swapping the rels --- otherwise they could * finished writing it and swapping the rels --- otherwise they could
* follow the toast pointers to the wrong place. * follow the toast pointers to the wrong place. (It would actually
* work for values copied over from the old toast table, but not for
* any values that we toast which were previously not toasted.)
*/ */
NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid; NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
} }
......
...@@ -159,7 +159,8 @@ typedef struct RelationData ...@@ -159,7 +159,8 @@ typedef struct RelationData
* have the existing toast table's OID, not the OID of the transient toast * have the existing toast table's OID, not the OID of the transient toast
* table. If rd_toastoid isn't InvalidOid, it is the OID to place in * table. If rd_toastoid isn't InvalidOid, it is the OID to place in
* toast pointers inserted into this rel. (Note it's set on the new * toast pointers inserted into this rel. (Note it's set on the new
* version of the main heap, not the toast table itself.) * version of the main heap, not the toast table itself.) This also
* causes toast_save_datum() to try to preserve toast value OIDs.
*/ */
Oid rd_toastoid; /* Real TOAST table's OID, or InvalidOid */ Oid rd_toastoid; /* Real TOAST table's OID, or InvalidOid */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment