Commit eba77534 authored by Peter Geoghegan's avatar Peter Geoghegan

Avoid amcheck inline compression false positives.

The previous tacit assumption that index_form_tuple() hides differences
in the TOAST state of its input datums was wrong.  Normalize input
varlena datums by decompressing compressed values, and forming a new
index tuple for fingerprinting using uncompressed inputs.  The final
normalized representation may actually be compressed once again within
index_form_tuple(), though that shouldn't matter.  When the original
tuple is found to have no datums that are compressed inline, fingerprint
the original tuple directly.

Normalization avoids false positive reports of corruption in certain
cases.  For example, the executor can apply toasting with some inline
compression to an entire heap tuple because its input has a single
external TOAST pointer.  Varlena datums for other attributes that are
not particularly good candidates for inline compression can be
compressed in the heap tuple in passing, without the representation of
the same values in index tuples ever receiving concomitant inline
compression.

Add a test case to recreate the issue in a simpler though less realistic
way: by exploiting differences in pg_attribute.attstorage between heap
and index relations.

This bug was discovered by me during testing of an upcoming set of nbtree
enhancements.  It was also independently reported by Andreas Kunert, as
bug #15597.  His test case was rather more realistic than the one I
ended up using.

Bug: #15597
Discussion: https://postgr.es/m/CAH2-WznrVd9ie+TTJ45nDT+v2nUt6YJwQrT9SebCdQKtAvfPZw@mail.gmail.com
Discussion: https://postgr.es/m/15597-294e5d3e7f01c407@postgresql.org
Backpatch: 11-, where heapallindexed verification was introduced.
parent 727921f4
......@@ -140,10 +140,30 @@ SELECT bt_index_parent_check('delete_test_table_pkey', true);
(1 row)
--
-- BUG #15597: must not assume consistent input toasting state when forming
-- tuple. Bloom filter must fingerprint normalized index tuple representation.
--
CREATE TABLE toast_bug(buggy text);
ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE plain;
-- pg_attribute entry for toasty.buggy will have plain storage:
CREATE INDEX toasty ON toast_bug(buggy);
-- Whereas pg_attribute entry for toast_bug.buggy now has extended storage:
ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
INSERT INTO toast_bug SELECT repeat('a', 2200);
-- Should not get false positive report of corruption:
SELECT bt_index_check('toasty', true);
bt_index_check
----------------
(1 row)
-- cleanup
DROP TABLE bttest_a;
DROP TABLE bttest_b;
DROP TABLE bttest_multi;
DROP TABLE delete_test_table;
DROP TABLE toast_bug;
DROP OWNED BY bttest_role; -- permissions
DROP ROLE bttest_role;
......@@ -88,10 +88,26 @@ DELETE FROM delete_test_table WHERE a > 10;
VACUUM delete_test_table;
SELECT bt_index_parent_check('delete_test_table_pkey', true);
--
-- BUG #15597: must not assume consistent input toasting state when forming
-- tuple. Bloom filter must fingerprint normalized index tuple representation.
--
CREATE TABLE toast_bug(buggy text);
ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE plain;
-- pg_attribute entry for toasty.buggy will have plain storage:
CREATE INDEX toasty ON toast_bug(buggy);
-- Whereas pg_attribute entry for toast_bug.buggy now has extended storage:
ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
INSERT INTO toast_bug SELECT repeat('a', 2200);
-- Should not get false positive report of corruption:
SELECT bt_index_check('toasty', true);
-- cleanup
DROP TABLE bttest_a;
DROP TABLE bttest_b;
DROP TABLE bttest_multi;
DROP TABLE delete_test_table;
DROP TABLE toast_bug;
DROP OWNED BY bttest_role; -- permissions
DROP ROLE bttest_role;
......@@ -133,6 +133,8 @@ static void bt_downlink_missing_check(BtreeCheckState *state);
static void bt_tuple_present_callback(Relation index, HeapTuple htup,
Datum *values, bool *isnull,
bool tupleIsAlive, void *checkstate);
static IndexTuple bt_normalize_tuple(BtreeCheckState *state,
IndexTuple itup);
static inline bool offset_is_negative_infinity(BTPageOpaque opaque,
OffsetNumber offset);
static inline bool invariant_leq_offset(BtreeCheckState *state,
......@@ -908,7 +910,16 @@ bt_target_page_check(BtreeCheckState *state)
/* Fingerprint leaf page tuples (those that point to the heap) */
if (state->heapallindexed && P_ISLEAF(topaque) && !ItemIdIsDead(itemid))
bloom_add_element(state->filter, (unsigned char *) itup, tupsize);
{
IndexTuple norm;
norm = bt_normalize_tuple(state, itup);
bloom_add_element(state->filter, (unsigned char *) norm,
IndexTupleSize(norm));
/* Be tidy */
if (norm != itup)
pfree(norm);
}
/*
* * High key check *
......@@ -1672,35 +1683,18 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
bool *isnull, bool tupleIsAlive, void *checkstate)
{
BtreeCheckState *state = (BtreeCheckState *) checkstate;
IndexTuple itup;
IndexTuple itup, norm;
Assert(state->heapallindexed);
/*
* Generate an index tuple for fingerprinting.
*
* Index tuple formation is assumed to be deterministic, and IndexTuples
* are assumed immutable. While the LP_DEAD bit is mutable in leaf pages,
* that's ItemId metadata, which was not fingerprinted. (There will often
* be some dead-to-everyone IndexTuples fingerprinted by the Bloom filter,
* but we only try to detect the absence of needed tuples, so that's
* okay.)
*
* Note that we rely on deterministic index_form_tuple() TOAST
* compression. If index_form_tuple() was ever enhanced to compress datums
* out-of-line, or otherwise varied when or how compression was applied,
* our assumption would break, leading to false positive reports of
* corruption. It's also possible that non-pivot tuples could in the
* future have alternative equivalent representations (e.g. by using the
* INDEX_ALT_TID_MASK bit). For now, we don't decompress/normalize toasted
* values as part of fingerprinting.
*/
/* Generate a normalized index tuple for fingerprinting */
itup = index_form_tuple(RelationGetDescr(index), values, isnull);
itup->t_tid = htup->t_self;
norm = bt_normalize_tuple(state, itup);
/* Probe Bloom filter -- tuple should be present */
if (bloom_lacks_element(state->filter, (unsigned char *) itup,
IndexTupleSize(itup)))
if (bloom_lacks_element(state->filter, (unsigned char *) norm,
IndexTupleSize(norm)))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
......@@ -1714,6 +1708,115 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
state->heaptuplespresent++;
pfree(itup);
/* Cannot leak memory here */
if (norm != itup)
pfree(norm);
}
/*
* Normalize an index tuple for fingerprinting.
*
* In general, index tuple formation is assumed to be deterministic by
* heapallindexed verification, and IndexTuples are assumed immutable. While
* the LP_DEAD bit is mutable in leaf pages, that's ItemId metadata, which is
* not fingerprinted. Normalization is required to compensate for corner
* cases where the determinism assumption doesn't quite work.
*
* There is currently one such case: index_form_tuple() does not try to hide
* the source TOAST state of input datums. The executor applies TOAST
* compression for heap tuples based on different criteria to the compression
* applied within btinsert()'s call to index_form_tuple(): it sometimes
* compresses more aggressively, resulting in compressed heap tuple datums but
* uncompressed corresponding index tuple datums. A subsequent heapallindexed
* verification will get a logically equivalent though bitwise unequal tuple
* from index_form_tuple(). False positive heapallindexed corruption reports
* could occur without normalizing away the inconsistency.
*
* Returned tuple is often caller's own original tuple. Otherwise, it is a
* new representation of caller's original index tuple, palloc()'d in caller's
* memory context.
*
* Note: This routine is not concerned with distinctions about the
* representation of tuples beyond those that might break heapallindexed
* verification. In particular, it won't try to normalize opclass-equal
* datums with potentially distinct representations (e.g., btree/numeric_ops
* index datums will not get their display scale normalized-away here).
* Normalization may need to be expanded to handle more cases in the future,
* though. For example, it's possible that non-pivot tuples could in the
* future have alternative logically equivalent representations due to using
* the INDEX_ALT_TID_MASK bit to implement intelligent deduplication.
*/
static IndexTuple
bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
{
TupleDesc tupleDescriptor = RelationGetDescr(state->rel);
Datum normalized[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
bool toast_free[INDEX_MAX_KEYS];
bool formnewtup = false;
IndexTuple reformed;
int i;
/* Easy case: It's immediately clear that tuple has no varlena datums */
if (!IndexTupleHasVarwidths(itup))
return itup;
for (i = 0; i < tupleDescriptor->natts; i++)
{
Form_pg_attribute att;
att = TupleDescAttr(tupleDescriptor, i);
/* Assume untoasted/already normalized datum initially */
toast_free[i] = false;
normalized[i] = index_getattr(itup, att->attnum,
tupleDescriptor,
&isnull[i]);
if (att->attbyval || att->attlen != -1 || isnull[i])
continue;
/*
* Callers always pass a tuple that could safely be inserted into the
* index without further processing, so an external varlena header
* should never be encountered here
*/
if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
ItemPointerGetBlockNumber(&(itup->t_tid)),
ItemPointerGetOffsetNumber(&(itup->t_tid)),
RelationGetRelationName(state->rel))));
else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
{
formnewtup = true;
normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
toast_free[i] = true;
}
}
/* Easier case: Tuple has varlena datums, none of which are compressed */
if (!formnewtup)
return itup;
/*
* Hard case: Tuple had compressed varlena datums that necessitate
* creating normalized version of the tuple from uncompressed input datums
* (normalized input datums). This is rather naive, but shouldn't be
* necessary too often.
*
* Note that we rely on deterministic index_form_tuple() TOAST compression
* of normalized input.
*/
reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
reformed->t_tid = itup->t_tid;
/* Cannot leak memory here */
for (i = 0; i < tupleDescriptor->natts; i++)
if (toast_free[i])
pfree(DatumGetPointer(normalized[i]));
return reformed;
}
/*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment