Commit 1d4a0ab1 authored by Andres Freund's avatar Andres Freund

Avoid unlikely data-loss scenarios due to rename() without fsync.

Renaming a file using rename(2) is not guaranteed to be durable in face
of crashes. Use the previously added durable_rename()/durable_link_or_rename()
in various places where we previously just renamed files.

Most of the changed call sites are arguably not critical, but it seems
better to err on the side of too much durability.  The most prominent
known case where the previously missing fsyncs could cause data loss is
crashes at the end of a checkpoint. After the actual checkpoint has been
performed, old WAL files are recycled. When they're filled, their
contents are fdatasynced, but we did not fsync the containing
directory. An OS/hardware crash in an unfortunate moment could then end
up leaving that file with its old name, but new content; WAL replay
would thus not replay it.

Reported-By: Tomas Vondra
Author: Michael Paquier, Tomas Vondra, Andres Freund
Discussion: 56583BDD.9060302@2ndquadrant.com
Backpatch: All supported branches
parent 606e0f98
...@@ -741,11 +741,7 @@ pgss_shmem_shutdown(int code, Datum arg) ...@@ -741,11 +741,7 @@ pgss_shmem_shutdown(int code, Datum arg)
/* /*
* Rename file into place, so we atomically replace any old one. * Rename file into place, so we atomically replace any old one.
*/ */
if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0) (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not rename pg_stat_statement file \"%s\": %m",
PGSS_DUMP_FILE ".tmp")));
/* Unlink query-texts file; it's not needed while shutdown */ /* Unlink query-texts file; it's not needed while shutdown */
unlink(PGSS_TEXT_FILE); unlink(PGSS_TEXT_FILE);
......
...@@ -418,24 +418,10 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, ...@@ -418,24 +418,10 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
TLHistoryFilePath(path, newTLI); TLHistoryFilePath(path, newTLI);
/* /*
* Prefer link() to rename() here just to be really sure that we don't * Perform the rename using link if available, paranoidly trying to avoid
* overwrite an existing file. However, there shouldn't be one, so * overwriting an existing file (there shouldn't be one).
* rename() is an acceptable substitute except for the truly paranoid.
*/ */
#if HAVE_WORKING_LINK durable_link_or_rename(tmppath, path, ERROR);
if (link(tmppath, path) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not link file \"%s\" to \"%s\": %m",
tmppath, path)));
unlink(tmppath);
#else
if (rename(tmppath, path) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
tmppath, path)));
#endif
/* The history file can be archived immediately. */ /* The history file can be archived immediately. */
if (XLogArchivingActive()) if (XLogArchivingActive())
...@@ -508,24 +494,10 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size) ...@@ -508,24 +494,10 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
TLHistoryFilePath(path, tli); TLHistoryFilePath(path, tli);
/* /*
* Prefer link() to rename() here just to be really sure that we don't * Perform the rename using link if available, paranoidly trying to avoid
* overwrite an existing logfile. However, there shouldn't be one, so * overwriting an existing file (there shouldn't be one).
* rename() is an acceptable substitute except for the truly paranoid.
*/ */
#if HAVE_WORKING_LINK durable_link_or_rename(tmppath, path, ERROR);
if (link(tmppath, path) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not link file \"%s\" to \"%s\": %m",
tmppath, path)));
unlink(tmppath);
#else
if (rename(tmppath, path) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
tmppath, path)));
#endif
} }
/* /*
......
...@@ -3299,34 +3299,16 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, ...@@ -3299,34 +3299,16 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
} }
/* /*
* Prefer link() to rename() here just to be really sure that we don't * Perform the rename using link if available, paranoidly trying to avoid
* overwrite an existing logfile. However, there shouldn't be one, so * overwriting an existing file (there shouldn't be one).
* rename() is an acceptable substitute except for the truly paranoid.
*/ */
#if HAVE_WORKING_LINK if (durable_link_or_rename(tmppath, path, LOG) != 0)
if (link(tmppath, path) < 0)
{ {
if (use_lock) if (use_lock)
LWLockRelease(ControlFileLock); LWLockRelease(ControlFileLock);
ereport(LOG, /* durable_link_or_rename already emitted log message */
(errcode_for_file_access(),
errmsg("could not link file \"%s\" to \"%s\" (initialization of log file): %m",
tmppath, path)));
return false;
}
unlink(tmppath);
#else
if (rename(tmppath, path) < 0)
{
if (use_lock)
LWLockRelease(ControlFileLock);
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file): %m",
tmppath, path)));
return false; return false;
} }
#endif
if (use_lock) if (use_lock)
LWLockRelease(ControlFileLock); LWLockRelease(ControlFileLock);
...@@ -5339,11 +5321,7 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog) ...@@ -5339,11 +5321,7 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
* re-enter archive recovery mode in a subsequent crash. * re-enter archive recovery mode in a subsequent crash.
*/ */
unlink(RECOVERY_COMMAND_DONE); unlink(RECOVERY_COMMAND_DONE);
if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0) durable_rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE, FATAL);
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE)));
ereport(LOG, ereport(LOG,
(errmsg("archive recovery complete"))); (errmsg("archive recovery complete")));
...@@ -6190,7 +6168,7 @@ StartupXLOG(void) ...@@ -6190,7 +6168,7 @@ StartupXLOG(void)
if (stat(TABLESPACE_MAP, &st) == 0) if (stat(TABLESPACE_MAP, &st) == 0)
{ {
unlink(TABLESPACE_MAP_OLD); unlink(TABLESPACE_MAP_OLD);
if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) == 0) if (durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, DEBUG1) == 0)
ereport(LOG, ereport(LOG,
(errmsg("ignoring file \"%s\" because no file \"%s\" exists", (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
TABLESPACE_MAP, BACKUP_LABEL_FILE), TABLESPACE_MAP, BACKUP_LABEL_FILE),
...@@ -6553,11 +6531,7 @@ StartupXLOG(void) ...@@ -6553,11 +6531,7 @@ StartupXLOG(void)
if (haveBackupLabel) if (haveBackupLabel)
{ {
unlink(BACKUP_LABEL_OLD); unlink(BACKUP_LABEL_OLD);
if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) != 0) durable_rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, FATAL);
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
} }
/* /*
...@@ -6570,11 +6544,7 @@ StartupXLOG(void) ...@@ -6570,11 +6544,7 @@ StartupXLOG(void)
if (haveTblspcMap) if (haveTblspcMap)
{ {
unlink(TABLESPACE_MAP_OLD); unlink(TABLESPACE_MAP_OLD);
if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) != 0) durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, FATAL);
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
} }
/* Check that the GUCs used to generate the WAL allow recovery */ /* Check that the GUCs used to generate the WAL allow recovery */
...@@ -7351,11 +7321,7 @@ StartupXLOG(void) ...@@ -7351,11 +7321,7 @@ StartupXLOG(void)
*/ */
XLogArchiveCleanup(partialfname); XLogArchiveCleanup(partialfname);
if (rename(origpath, partialpath) != 0) durable_rename(origpath, partialpath, ERROR);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
origpath, partialpath)));
XLogArchiveNotify(partialfname); XLogArchiveNotify(partialfname);
} }
} }
...@@ -10911,7 +10877,7 @@ CancelBackup(void) ...@@ -10911,7 +10877,7 @@ CancelBackup(void)
/* remove leftover file from previously canceled backup if it exists */ /* remove leftover file from previously canceled backup if it exists */
unlink(BACKUP_LABEL_OLD); unlink(BACKUP_LABEL_OLD);
if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) != 0) if (durable_rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, DEBUG1) != 0)
{ {
ereport(WARNING, ereport(WARNING,
(errcode_for_file_access(), (errcode_for_file_access(),
...@@ -10934,7 +10900,7 @@ CancelBackup(void) ...@@ -10934,7 +10900,7 @@ CancelBackup(void)
/* remove leftover file from previously canceled backup if it exists */ /* remove leftover file from previously canceled backup if it exists */
unlink(TABLESPACE_MAP_OLD); unlink(TABLESPACE_MAP_OLD);
if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) == 0) if (durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, DEBUG1) == 0)
{ {
ereport(LOG, ereport(LOG,
(errmsg("online backup mode canceled"), (errmsg("online backup mode canceled"),
......
...@@ -470,11 +470,7 @@ KeepFileRestoredFromArchive(char *path, char *xlogfname) ...@@ -470,11 +470,7 @@ KeepFileRestoredFromArchive(char *path, char *xlogfname)
reload = true; reload = true;
} }
if (rename(path, xlogfpath) < 0) durable_rename(path, xlogfpath, ERROR);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
path, xlogfpath)));
/* /*
* Create .done file forcibly to prevent the restored segment from being * Create .done file forcibly to prevent the restored segment from being
...@@ -580,12 +576,7 @@ XLogArchiveForceDone(const char *xlog) ...@@ -580,12 +576,7 @@ XLogArchiveForceDone(const char *xlog)
StatusFilePath(archiveReady, xlog, ".ready"); StatusFilePath(archiveReady, xlog, ".ready");
if (stat(archiveReady, &stat_buf) == 0) if (stat(archiveReady, &stat_buf) == 0)
{ {
if (rename(archiveReady, archiveDone) < 0) (void) durable_rename(archiveReady, archiveDone, WARNING);
ereport(WARNING,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
archiveReady, archiveDone)));
return; return;
} }
......
...@@ -728,9 +728,5 @@ pgarch_archiveDone(char *xlog) ...@@ -728,9 +728,5 @@ pgarch_archiveDone(char *xlog)
StatusFilePath(rlogready, xlog, ".ready"); StatusFilePath(rlogready, xlog, ".ready");
StatusFilePath(rlogdone, xlog, ".done"); StatusFilePath(rlogdone, xlog, ".done");
if (rename(rlogready, rlogdone) < 0) (void) durable_rename(rlogready, rlogdone, WARNING);
ereport(WARNING,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
rlogready, rlogdone)));
} }
...@@ -604,29 +604,10 @@ CheckPointReplicationOrigin(void) ...@@ -604,29 +604,10 @@ CheckPointReplicationOrigin(void)
tmppath))); tmppath)));
} }
/* fsync the temporary file */
if (pg_fsync(tmpfd) != 0)
{
CloseTransientFile(tmpfd);
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m",
tmppath)));
}
CloseTransientFile(tmpfd); CloseTransientFile(tmpfd);
/* rename to permanent file, fsync file and directory */ /* fsync, rename to permanent file, fsync file and directory */
if (rename(tmppath, path) != 0) durable_rename(tmppath, path, PANIC);
{
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
tmppath, path)));
}
fsync_fname(path, false);
fsync_fname("pg_logical", true);
} }
/* /*
......
...@@ -7037,11 +7037,7 @@ AlterSystemSetConfigFile(AlterSystemStmt *altersysstmt) ...@@ -7037,11 +7037,7 @@ AlterSystemSetConfigFile(AlterSystemStmt *altersysstmt)
* at worst it can lose the parameters set by last ALTER SYSTEM * at worst it can lose the parameters set by last ALTER SYSTEM
* command. * command.
*/ */
if (rename(AutoConfTmpFileName, AutoConfFileName) < 0) durable_rename(AutoConfTmpFileName, AutoConfFileName, ERROR);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
AutoConfTmpFileName, AutoConfFileName)));
} }
PG_CATCH(); PG_CATCH();
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment