Commit 2cc41acd authored by Kevin Grittner's avatar Kevin Grittner

Fix hash index vs "snapshot too old" problemms

Hash indexes are not WAL-logged, and so do not maintain the LSN of
index pages.  Since the "snapshot too old" feature counts on
detecting error conditions using the LSN of a table and all indexes
on it, this makes it impossible to safely do early vacuuming on any
table with a hash index, so add this to the tests for whether the
xid used to vacuum a table can be adjusted based on
old_snapshot_threshold.

While at it, add a paragraph to the docs for old_snapshot_threshold
which specifically mentions this and other aspects of the feature
which may otherwise surprise users.

Problem reported and patch reviewed by Amit Kapila
parent 9b66aa00
...@@ -2077,6 +2077,19 @@ include_dir 'conf.d' ...@@ -2077,6 +2077,19 @@ include_dir 'conf.d'
allowed, please note that in many workloads extreme bloat or allowed, please note that in many workloads extreme bloat or
transaction ID wraparound may occur in much shorter time frames. transaction ID wraparound may occur in much shorter time frames.
</para> </para>
<para>
This setting does not attempt to guarantee that an error will be
generated under any particular circumstances. In fact, if the
correct results can be generated from (for example) a cursor which
has materialized a result set, no error will be generated even if the
underlying rows in the referenced table have been vacuumed away.
Some tables cannot safely be vacuumed early, and so will not be
affected by this setting. Examples include system catalogs and any
table which has a hash index. For such tables this setting will
neither reduce bloat nor create a possibility of a <literal>snapshot
too old</> error on scanning.
</para>
</listitem> </listitem>
</varlistentry> </varlistentry>
</variablelist> </variablelist>
......
...@@ -279,7 +279,6 @@ hashgettuple(IndexScanDesc scan, ScanDirection dir) ...@@ -279,7 +279,6 @@ hashgettuple(IndexScanDesc scan, ScanDirection dir)
buf = so->hashso_curbuf; buf = so->hashso_curbuf;
Assert(BufferIsValid(buf)); Assert(BufferIsValid(buf));
page = BufferGetPage(buf); page = BufferGetPage(buf);
TestForOldSnapshot(scan->xs_snapshot, rel, page);
maxoffnum = PageGetMaxOffsetNumber(page); maxoffnum = PageGetMaxOffsetNumber(page);
for (offnum = ItemPointerGetOffsetNumber(current); for (offnum = ItemPointerGetOffsetNumber(current);
offnum <= maxoffnum; offnum <= maxoffnum;
......
...@@ -189,7 +189,6 @@ _hash_first(IndexScanDesc scan, ScanDirection dir) ...@@ -189,7 +189,6 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
/* Read the metapage */ /* Read the metapage */
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
page = BufferGetPage(metabuf); page = BufferGetPage(metabuf);
TestForOldSnapshot(scan->xs_snapshot, rel, page);
metap = HashPageGetMeta(page); metap = HashPageGetMeta(page);
/* /*
...@@ -243,7 +242,6 @@ _hash_first(IndexScanDesc scan, ScanDirection dir) ...@@ -243,7 +242,6 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
/* Fetch the primary bucket page for the bucket */ /* Fetch the primary bucket page for the bucket */
buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE); buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
page = BufferGetPage(buf); page = BufferGetPage(buf);
TestForOldSnapshot(scan->xs_snapshot, rel, page);
opaque = (HashPageOpaque) PageGetSpecialPointer(page); opaque = (HashPageOpaque) PageGetSpecialPointer(page);
Assert(opaque->hasho_bucket == bucket); Assert(opaque->hasho_bucket == bucket);
...@@ -350,7 +348,6 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) ...@@ -350,7 +348,6 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
_hash_readnext(rel, &buf, &page, &opaque); _hash_readnext(rel, &buf, &page, &opaque);
if (BufferIsValid(buf)) if (BufferIsValid(buf))
{ {
TestForOldSnapshot(scan->xs_snapshot, rel, page);
maxoff = PageGetMaxOffsetNumber(page); maxoff = PageGetMaxOffsetNumber(page);
offnum = _hash_binsearch(page, so->hashso_sk_hash); offnum = _hash_binsearch(page, so->hashso_sk_hash);
} }
...@@ -392,7 +389,6 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) ...@@ -392,7 +389,6 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
_hash_readprev(rel, &buf, &page, &opaque); _hash_readprev(rel, &buf, &page, &opaque);
if (BufferIsValid(buf)) if (BufferIsValid(buf))
{ {
TestForOldSnapshot(scan->xs_snapshot, rel, page);
maxoff = PageGetMaxOffsetNumber(page); maxoff = PageGetMaxOffsetNumber(page);
offnum = _hash_binsearch_last(page, so->hashso_sk_hash); offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
} }
......
...@@ -5312,6 +5312,52 @@ RelationIdIsInInitFile(Oid relationId) ...@@ -5312,6 +5312,52 @@ RelationIdIsInInitFile(Oid relationId)
return RelationSupportsSysCache(relationId); return RelationSupportsSysCache(relationId);
} }
/*
* Tells whether any index for the relation is unlogged.
*
* Any index using the hash AM is implicitly unlogged.
*
* Note: There doesn't seem to be any way to have an unlogged index attached
* to a permanent table except to create a hash index, but it seems best to
* keep this general so that it returns sensible results even when they seem
* obvious (like for an unlogged table) and to handle possible future unlogged
* indexes on permanent tables.
*/
bool
RelationHasUnloggedIndex(Relation rel)
{
List *indexoidlist;
ListCell *indexoidscan;
bool result = false;
indexoidlist = RelationGetIndexList(rel);
foreach(indexoidscan, indexoidlist)
{
Oid indexoid = lfirst_oid(indexoidscan);
HeapTuple tp;
Form_pg_class reltup;
tp = SearchSysCache1(RELOID, ObjectIdGetDatum(indexoid));
if (!HeapTupleIsValid(tp))
elog(ERROR, "cache lookup failed for relation %u", indexoid);
reltup = (Form_pg_class) GETSTRUCT(tp);
if (reltup->relpersistence == RELPERSISTENCE_UNLOGGED
|| reltup->relam == HASH_AM_OID)
result = true;
ReleaseSysCache(tp);
if (result == true)
break;
}
list_free(indexoidlist);
return result;
}
/* /*
* Invalidate (remove) the init file during commit of a transaction that * Invalidate (remove) the init file during commit of a transaction that
* changed one or more of the relation cache entries that are kept in the * changed one or more of the relation cache entries that are kept in the
......
...@@ -1590,7 +1590,8 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, ...@@ -1590,7 +1590,8 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
&& old_snapshot_threshold >= 0 && old_snapshot_threshold >= 0
&& RelationNeedsWAL(relation) && RelationNeedsWAL(relation)
&& !IsCatalogRelation(relation) && !IsCatalogRelation(relation)
&& !RelationIsAccessibleInLogicalDecoding(relation)) && !RelationIsAccessibleInLogicalDecoding(relation)
&& !RelationHasUnloggedIndex(relation))
{ {
int64 ts = GetSnapshotCurrentTimestamp(); int64 ts = GetSnapshotCurrentTimestamp();
TransactionId xlimit = recentXmin; TransactionId xlimit = recentXmin;
......
...@@ -505,5 +505,6 @@ typedef struct ViewOptions ...@@ -505,5 +505,6 @@ typedef struct ViewOptions
/* routines in utils/cache/relcache.c */ /* routines in utils/cache/relcache.c */
extern void RelationIncrementReferenceCount(Relation rel); extern void RelationIncrementReferenceCount(Relation rel);
extern void RelationDecrementReferenceCount(Relation rel); extern void RelationDecrementReferenceCount(Relation rel);
extern bool RelationHasUnloggedIndex(Relation rel);
#endif /* REL_H */ #endif /* REL_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment