Commit f3b5565d authored by Tom Lane's avatar Tom Lane

Use a safer method for determining whether relcache init file is stale.

When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry.  The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events.  In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session.  This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.

Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke.  Remarkably, this has been broken
since 2002 (in commit 78634044), but we had
never seen a reproducible test case until recently.  If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.

To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file.  This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search.  It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.

Back-patch to all supported branches.
parent 1497369e
...@@ -507,10 +507,13 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId) ...@@ -507,10 +507,13 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId)
(void) GetCurrentCommandId(true); (void) GetCurrentCommandId(true);
/* /*
* If the relation being invalidated is one of those cached in the * If the relation being invalidated is one of those cached in the local
* relcache init file, mark that we need to zap that file at commit. * relcache init file, mark that we need to zap that file at commit.
* (Note: perhaps it would be better if this code were a bit more
* decoupled from the knowledge that the init file contains exactly those
* non-shared rels used in catalog caches.)
*/ */
if (RelationIdIsInInitFile(relId)) if (OidIsValid(dbId) && RelationSupportsSysCache(relId))
transInvalInfo->RelcacheInitFileInval = true; transInvalInfo->RelcacheInitFileInval = true;
} }
......
...@@ -133,14 +133,6 @@ bool criticalSharedRelcachesBuilt = false; ...@@ -133,14 +133,6 @@ bool criticalSharedRelcachesBuilt = false;
*/ */
static long relcacheInvalsReceived = 0L; static long relcacheInvalsReceived = 0L;
/*
* This list remembers the OIDs of the non-shared relations cached in the
* database's local relcache init file. Note that there is no corresponding
* list for the shared relcache init file, for reasons explained in the
* comments for RelationCacheInitFileRemove.
*/
static List *initFileRelationIds = NIL;
/* /*
* eoxact_list[] stores the OIDs of relations that (might) need AtEOXact * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
* cleanup work. This list intentionally has limited size; if it overflows, * cleanup work. This list intentionally has limited size; if it overflows,
...@@ -3489,9 +3481,6 @@ RelationCacheInitializePhase3(void) ...@@ -3489,9 +3481,6 @@ RelationCacheInitializePhase3(void)
*/ */
InitCatalogCachePhase2(); InitCatalogCachePhase2();
/* reset initFileRelationIds list; we'll fill it during write */
initFileRelationIds = NIL;
/* now write the files */ /* now write the files */
write_relcache_init_file(true); write_relcache_init_file(true);
write_relcache_init_file(false); write_relcache_init_file(false);
...@@ -4915,10 +4904,6 @@ load_relcache_init_file(bool shared) ...@@ -4915,10 +4904,6 @@ load_relcache_init_file(bool shared)
for (relno = 0; relno < num_rels; relno++) for (relno = 0; relno < num_rels; relno++)
{ {
RelationCacheInsert(rels[relno], false); RelationCacheInsert(rels[relno], false);
/* also make a list of their OIDs, for RelationIdIsInInitFile */
if (!shared)
initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
initFileRelationIds);
} }
pfree(rels); pfree(rels);
...@@ -4955,9 +4940,15 @@ write_relcache_init_file(bool shared) ...@@ -4955,9 +4940,15 @@ write_relcache_init_file(bool shared)
int magic; int magic;
HASH_SEQ_STATUS status; HASH_SEQ_STATUS status;
RelIdCacheEnt *idhentry; RelIdCacheEnt *idhentry;
MemoryContext oldcxt;
int i; int i;
/*
* If we have already received any relcache inval events, there's no
* chance of succeeding so we may as well skip the whole thing.
*/
if (relcacheInvalsReceived != 0L)
return;
/* /*
* We must write a temporary file and rename it into place. Otherwise, * We must write a temporary file and rename it into place. Otherwise,
* another backend starting at about the same time might crash trying to * another backend starting at about the same time might crash trying to
...@@ -5017,6 +5008,16 @@ write_relcache_init_file(bool shared) ...@@ -5017,6 +5008,16 @@ write_relcache_init_file(bool shared)
if (relform->relisshared != shared) if (relform->relisshared != shared)
continue; continue;
/*
* Ignore if not supposed to be in init file. We can allow any shared
* relation that's been loaded so far to be in the shared init file,
* but unshared relations must be used for catalog caches. (Note: if
* you want to change the criterion for rels to be kept in the init
* file, see also inval.c.)
*/
if (!shared && !RelationSupportsSysCache(RelationGetRelid(rel)))
continue;
/* first write the relcache entry proper */ /* first write the relcache entry proper */
write_item(rel, sizeof(RelationData), fp); write_item(rel, sizeof(RelationData), fp);
...@@ -5073,15 +5074,6 @@ write_relcache_init_file(bool shared) ...@@ -5073,15 +5074,6 @@ write_relcache_init_file(bool shared)
relform->relnatts * sizeof(int16), relform->relnatts * sizeof(int16),
fp); fp);
} }
/* also make a list of their OIDs, for RelationIdIsInInitFile */
if (!shared)
{
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
initFileRelationIds = lcons_oid(RelationGetRelid(rel),
initFileRelationIds);
MemoryContextSwitchTo(oldcxt);
}
} }
if (FreeFile(fp)) if (FreeFile(fp))
...@@ -5140,21 +5132,6 @@ write_item(const void *data, Size len, FILE *fp) ...@@ -5140,21 +5132,6 @@ write_item(const void *data, Size len, FILE *fp)
elog(FATAL, "could not write init file"); elog(FATAL, "could not write init file");
} }
/*
* Detect whether a given relation (identified by OID) is one of the ones
* we store in the local relcache init file.
*
* Note that we effectively assume that all backends running in a database
* would choose to store the same set of relations in the init file;
* otherwise there are cases where we'd fail to detect the need for an init
* file invalidation. This does not seem likely to be a problem in practice.
*/
bool
RelationIdIsInInitFile(Oid relationId)
{
return list_member_oid(initFileRelationIds, relationId);
}
/* /*
* Invalidate (remove) the init file during commit of a transaction that * Invalidate (remove) the init file during commit of a transaction that
* changed one or more of the relation cache entries that are kept in the * changed one or more of the relation cache entries that are kept in the
......
...@@ -867,17 +867,23 @@ static const struct cachedesc cacheinfo[] = { ...@@ -867,17 +867,23 @@ static const struct cachedesc cacheinfo[] = {
} }
}; };
static CatCache *SysCache[ #define SysCacheSize ((int) lengthof(cacheinfo))
lengthof(cacheinfo)];
static int SysCacheSize = lengthof(cacheinfo); static CatCache *SysCache[SysCacheSize];
static bool CacheInitialized = false; static bool CacheInitialized = false;
static Oid SysCacheRelationOid[ /* Sorted array of OIDs of tables that have caches on them */
lengthof(cacheinfo)]; static Oid SysCacheRelationOid[SysCacheSize];
static int SysCacheRelationOidSize; static int SysCacheRelationOidSize;
/* Sorted array of OIDs of tables and indexes used by caches */
static Oid SysCacheSupportingRelOid[SysCacheSize * 2];
static int SysCacheSupportingRelOidSize;
static int oid_compare(const void *a, const void *b); static int oid_compare(const void *a, const void *b);
/* /*
* InitCatalogCache - initialize the caches * InitCatalogCache - initialize the caches
* *
...@@ -891,11 +897,11 @@ InitCatalogCache(void) ...@@ -891,11 +897,11 @@ InitCatalogCache(void)
{ {
int cacheId; int cacheId;
int i, int i,
j = 0; j;
Assert(!CacheInitialized); Assert(!CacheInitialized);
MemSet(SysCache, 0, sizeof(SysCache)); SysCacheRelationOidSize = SysCacheSupportingRelOidSize = 0;
for (cacheId = 0; cacheId < SysCacheSize; cacheId++) for (cacheId = 0; cacheId < SysCacheSize; cacheId++)
{ {
...@@ -908,20 +914,39 @@ InitCatalogCache(void) ...@@ -908,20 +914,39 @@ InitCatalogCache(void)
if (!PointerIsValid(SysCache[cacheId])) if (!PointerIsValid(SysCache[cacheId]))
elog(ERROR, "could not initialize cache %u (%d)", elog(ERROR, "could not initialize cache %u (%d)",
cacheinfo[cacheId].reloid, cacheId); cacheinfo[cacheId].reloid, cacheId);
/* Accumulate data for OID lists, too */
SysCacheRelationOid[SysCacheRelationOidSize++] = SysCacheRelationOid[SysCacheRelationOidSize++] =
cacheinfo[cacheId].reloid; cacheinfo[cacheId].reloid;
SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] =
cacheinfo[cacheId].reloid;
SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] =
cacheinfo[cacheId].indoid;
/* see comments for RelationInvalidatesSnapshotsOnly */ /* see comments for RelationInvalidatesSnapshotsOnly */
Assert(!RelationInvalidatesSnapshotsOnly(cacheinfo[cacheId].reloid)); Assert(!RelationInvalidatesSnapshotsOnly(cacheinfo[cacheId].reloid));
} }
/* Sort and dedup OIDs. */ Assert(SysCacheRelationOidSize <= lengthof(SysCacheRelationOid));
Assert(SysCacheSupportingRelOidSize <= lengthof(SysCacheSupportingRelOid));
/* Sort and de-dup OID arrays, so we can use binary search. */
pg_qsort(SysCacheRelationOid, SysCacheRelationOidSize, pg_qsort(SysCacheRelationOid, SysCacheRelationOidSize,
sizeof(Oid), oid_compare); sizeof(Oid), oid_compare);
for (i = 1; i < SysCacheRelationOidSize; ++i) for (i = 1, j = 0; i < SysCacheRelationOidSize; i++)
{
if (SysCacheRelationOid[i] != SysCacheRelationOid[j]) if (SysCacheRelationOid[i] != SysCacheRelationOid[j])
SysCacheRelationOid[++j] = SysCacheRelationOid[i]; SysCacheRelationOid[++j] = SysCacheRelationOid[i];
}
SysCacheRelationOidSize = j + 1; SysCacheRelationOidSize = j + 1;
pg_qsort(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize,
sizeof(Oid), oid_compare);
for (i = 1, j = 0; i < SysCacheSupportingRelOidSize; i++)
{
if (SysCacheSupportingRelOid[i] != SysCacheSupportingRelOid[j])
SysCacheSupportingRelOid[++j] = SysCacheSupportingRelOid[i];
}
SysCacheSupportingRelOidSize = j + 1;
CacheInitialized = true; CacheInitialized = true;
} }
...@@ -1264,6 +1289,31 @@ RelationHasSysCache(Oid relid) ...@@ -1264,6 +1289,31 @@ RelationHasSysCache(Oid relid)
return false; return false;
} }
/*
* Test whether a relation supports a system cache, ie it is either a
* cached table or the index used for a cache.
*/
bool
RelationSupportsSysCache(Oid relid)
{
int low = 0,
high = SysCacheSupportingRelOidSize - 1;
while (low <= high)
{
int middle = low + (high - low) / 2;
if (SysCacheSupportingRelOid[middle] == relid)
return true;
if (SysCacheSupportingRelOid[middle] < relid)
low = middle + 1;
else
high = middle - 1;
}
return false;
}
/* /*
* OID comparator for pg_qsort * OID comparator for pg_qsort
...@@ -1271,8 +1321,8 @@ RelationHasSysCache(Oid relid) ...@@ -1271,8 +1321,8 @@ RelationHasSysCache(Oid relid)
static int static int
oid_compare(const void *a, const void *b) oid_compare(const void *a, const void *b)
{ {
Oid oa = *((Oid *) a); Oid oa = *((const Oid *) a);
Oid ob = *((Oid *) b); Oid ob = *((const Oid *) b);
if (oa == ob) if (oa == ob)
return 0; return 0;
......
...@@ -116,7 +116,6 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid, ...@@ -116,7 +116,6 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
/* /*
* Routines to help manage rebuilding of relcache init files * Routines to help manage rebuilding of relcache init files
*/ */
extern bool RelationIdIsInInitFile(Oid relationId);
extern void RelationCacheInitFilePreInvalidate(void); extern void RelationCacheInitFilePreInvalidate(void);
extern void RelationCacheInitFilePostInvalidate(void); extern void RelationCacheInitFilePostInvalidate(void);
extern void RelationCacheInitFileRemove(void); extern void RelationCacheInitFileRemove(void);
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include "access/attnum.h" #include "access/attnum.h"
#include "access/htup.h" #include "access/htup.h"
/* we purposedly do not include utils/catcache.h here */ /* we intentionally do not include utils/catcache.h here */
/* /*
* SysCache identifiers. * SysCache identifiers.
...@@ -131,8 +131,9 @@ struct catclist; ...@@ -131,8 +131,9 @@ struct catclist;
extern struct catclist *SearchSysCacheList(int cacheId, int nkeys, extern struct catclist *SearchSysCacheList(int cacheId, int nkeys,
Datum key1, Datum key2, Datum key3, Datum key4); Datum key1, Datum key2, Datum key3, Datum key4);
extern bool RelationInvalidatesSnapshotsOnly(Oid); extern bool RelationInvalidatesSnapshotsOnly(Oid relid);
extern bool RelationHasSysCache(Oid); extern bool RelationHasSysCache(Oid relid);
extern bool RelationSupportsSysCache(Oid relid);
/* /*
* The use of the macros below rather than direct calls to the corresponding * The use of the macros below rather than direct calls to the corresponding
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment