Commit 2ada6779 authored by Tom Lane's avatar Tom Lane

Fix race condition in relcache init file invalidation.

The previous code tried to synchronize by unlinking the init file twice,
but that doesn't actually work: it leaves a window wherein a third process
could read the already-stale init file but miss the SI messages that would
tell it the data is stale.  The result would be bizarre failures in catalog
accesses, typically "could not read block 0 in file ..." later during
startup.

Instead, hold RelCacheInitLock across both the unlink and the sending of
the SI messages.  This is more straightforward, and might even be a bit
faster since only one unlink call is needed.

This has been wrong since it was put in (in 2002!), so back-patch to all
supported releases.
parent 1bb69245
...@@ -1356,10 +1356,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit) ...@@ -1356,10 +1356,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
* after we send the SI messages. See AtEOXact_Inval() * after we send the SI messages. See AtEOXact_Inval()
*/ */
if (hdr->initfileinval) if (hdr->initfileinval)
RelationCacheInitFileInvalidate(true); RelationCacheInitFilePreInvalidate();
SendSharedInvalidMessages(invalmsgs, hdr->ninvalmsgs); SendSharedInvalidMessages(invalmsgs, hdr->ninvalmsgs);
if (hdr->initfileinval) if (hdr->initfileinval)
RelationCacheInitFileInvalidate(false); RelationCacheInitFilePostInvalidate();
/* And now do the callbacks */ /* And now do the callbacks */
if (isCommit) if (isCommit)
......
...@@ -854,24 +854,12 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, ...@@ -854,24 +854,12 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
return numSharedInvalidMessagesArray; return numSharedInvalidMessagesArray;
} }
#define RecoveryRelationCacheInitFileInvalidate(dbo, tbo, tf) \
{ \
DatabasePath = GetDatabasePath(dbo, tbo); \
elog(trace_recovery(DEBUG4), "removing relcache init file in %s", DatabasePath); \
RelationCacheInitFileInvalidate(tf); \
pfree(DatabasePath); \
}
/* /*
* ProcessCommittedInvalidationMessages is executed by xact_redo_commit() * ProcessCommittedInvalidationMessages is executed by xact_redo_commit()
* to process invalidation messages added to commit records. * to process invalidation messages added to commit records.
* *
* Relcache init file invalidation requires processing both * Relcache init file invalidation requires processing both
* before and after we send the SI messages. See AtEOXact_Inval() * before and after we send the SI messages. See AtEOXact_Inval()
*
* We deliberately avoid SetDatabasePath() since it is intended to be used
* only once by normal backends, so we set DatabasePath directly then
* pfree after use. See RecoveryRelationCacheInitFileInvalidate() macro.
*/ */
void void
ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
...@@ -885,12 +873,25 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, ...@@ -885,12 +873,25 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
(RelcacheInitFileInval ? " and relcache file invalidation" : "")); (RelcacheInitFileInval ? " and relcache file invalidation" : ""));
if (RelcacheInitFileInval) if (RelcacheInitFileInval)
RecoveryRelationCacheInitFileInvalidate(dbid, tsid, true); {
/*
* RelationCacheInitFilePreInvalidate requires DatabasePath to be set,
* but we should not use SetDatabasePath during recovery, since it is
* intended to be used only once by normal backends. Hence, a quick
* hack: set DatabasePath directly then unset after use.
*/
DatabasePath = GetDatabasePath(dbid, tsid);
elog(trace_recovery(DEBUG4), "removing relcache init file in \"%s\"",
DatabasePath);
RelationCacheInitFilePreInvalidate();
pfree(DatabasePath);
DatabasePath = NULL;
}
SendSharedInvalidMessages(msgs, nmsgs); SendSharedInvalidMessages(msgs, nmsgs);
if (RelcacheInitFileInval) if (RelcacheInitFileInval)
RecoveryRelationCacheInitFileInvalidate(dbid, tsid, false); RelationCacheInitFilePostInvalidate();
} }
/* /*
...@@ -931,7 +932,7 @@ AtEOXact_Inval(bool isCommit) ...@@ -931,7 +932,7 @@ AtEOXact_Inval(bool isCommit)
* unless we committed. * unless we committed.
*/ */
if (transInvalInfo->RelcacheInitFileInval) if (transInvalInfo->RelcacheInitFileInval)
RelationCacheInitFileInvalidate(true); RelationCacheInitFilePreInvalidate();
AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
&transInvalInfo->CurrentCmdInvalidMsgs); &transInvalInfo->CurrentCmdInvalidMsgs);
...@@ -940,7 +941,7 @@ AtEOXact_Inval(bool isCommit) ...@@ -940,7 +941,7 @@ AtEOXact_Inval(bool isCommit)
SendSharedInvalidMessages); SendSharedInvalidMessages);
if (transInvalInfo->RelcacheInitFileInval) if (transInvalInfo->RelcacheInitFileInval)
RelationCacheInitFileInvalidate(false); RelationCacheInitFilePostInvalidate();
} }
else if (transInvalInfo != NULL) else if (transInvalInfo != NULL)
{ {
......
...@@ -4377,8 +4377,8 @@ write_relcache_init_file(bool shared) ...@@ -4377,8 +4377,8 @@ write_relcache_init_file(bool shared)
* updated by SI message processing, but we can't be sure whether what we * updated by SI message processing, but we can't be sure whether what we
* wrote out was up-to-date.) * wrote out was up-to-date.)
* *
* This mustn't run concurrently with RelationCacheInitFileInvalidate, so * This mustn't run concurrently with the code that unlinks an init file
* grab a serialization lock for the duration. * and sends SI messages, so grab a serialization lock for the duration.
*/ */
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE); LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
...@@ -4442,19 +4442,22 @@ RelationIdIsInInitFile(Oid relationId) ...@@ -4442,19 +4442,22 @@ RelationIdIsInInitFile(Oid relationId)
* changed one or more of the relation cache entries that are kept in the * changed one or more of the relation cache entries that are kept in the
* local init file. * local init file.
* *
* We actually need to remove the init file twice: once just before sending * To be safe against concurrent inspection or rewriting of the init file,
* the SI messages that include relcache inval for such relations, and once * we must take RelCacheInitLock, then remove the old init file, then send
* just after sending them. The unlink before ensures that a backend that's * the SI messages that include relcache inval for such relations, and then
* currently starting cannot read the now-obsolete init file and then miss * release RelCacheInitLock. This serializes the whole affair against
* the SI messages that will force it to update its relcache entries. (This * write_relcache_init_file, so that we can be sure that any other process
* works because the backend startup sequence gets into the PGPROC array before * that's concurrently trying to create a new init file won't move an
* trying to load the init file.) The unlink after is to synchronize with a * already-stale version into place after we unlink. Also, because we unlink
* backend that may currently be trying to write an init file based on data * before sending the SI messages, a backend that's currently starting cannot
* that we've just rendered invalid. Such a backend will see the SI messages, * read the now-obsolete init file and then miss the SI messages that will
* but we can't leave the init file sitting around to fool later backends. * force it to update its relcache entries. (This works because the backend
* * startup sequence gets into the sinval array before trying to load the init
* Ignore any failure to unlink the file, since it might not be there if * file.)
* no backend has been started since the last removal. *
* We take the lock and do the unlink in RelationCacheInitFilePreInvalidate,
* then release the lock in RelationCacheInitFilePostInvalidate. Caller must
* send any pending SI messages between those calls.
* *
* Notice this deals only with the local init file, not the shared init file. * Notice this deals only with the local init file, not the shared init file.
* The reason is that there can never be a "significant" change to the * The reason is that there can never be a "significant" change to the
...@@ -4464,34 +4467,37 @@ RelationIdIsInInitFile(Oid relationId) ...@@ -4464,34 +4467,37 @@ RelationIdIsInInitFile(Oid relationId)
* be invalid enough to make it necessary to remove it. * be invalid enough to make it necessary to remove it.
*/ */
void void
RelationCacheInitFileInvalidate(bool beforeSend) RelationCacheInitFilePreInvalidate(void)
{ {
char initfilename[MAXPGPATH]; char initfilename[MAXPGPATH];
snprintf(initfilename, sizeof(initfilename), "%s/%s", snprintf(initfilename, sizeof(initfilename), "%s/%s",
DatabasePath, RELCACHE_INIT_FILENAME); DatabasePath, RELCACHE_INIT_FILENAME);
if (beforeSend) LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
{
/* no interlock needed here */ if (unlink(initfilename) < 0)
unlink(initfilename);
}
else
{ {
/* /*
* We need to interlock this against write_relcache_init_file, to * The file might not be there if no backend has been started since
* guard against possibility that someone renames a new-but- * the last removal. But complain about failures other than ENOENT.
* already-obsolete init file into place just after we unlink. With * Fortunately, it's not too late to abort the transaction if we
* the interlock, it's certain that write_relcache_init_file will * can't get rid of the would-be-obsolete init file.
* notice our SI inval message before renaming into place, or else
* that we will execute second and successfully unlink the file.
*/ */
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE); if (errno != ENOENT)
unlink(initfilename); ereport(ERROR,
LWLockRelease(RelCacheInitLock); (errcode_for_file_access(),
errmsg("could not remove cache file \"%s\": %m",
initfilename)));
} }
} }
void
RelationCacheInitFilePostInvalidate(void)
{
LWLockRelease(RelCacheInitLock);
}
/* /*
* Remove the init files during postmaster startup. * Remove the init files during postmaster startup.
* *
......
...@@ -98,7 +98,8 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid, ...@@ -98,7 +98,8 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
* Routines to help manage rebuilding of relcache init files * Routines to help manage rebuilding of relcache init files
*/ */
extern bool RelationIdIsInInitFile(Oid relationId); extern bool RelationIdIsInInitFile(Oid relationId);
extern void RelationCacheInitFileInvalidate(bool beforeSend); extern void RelationCacheInitFilePreInvalidate(void);
extern void RelationCacheInitFilePostInvalidate(void);
extern void RelationCacheInitFileRemove(void); extern void RelationCacheInitFileRemove(void);
/* should be used only by relcache.c and catcache.c */ /* should be used only by relcache.c and catcache.c */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment