Commit 1b02be21 authored by Teodor Sigaev's avatar Teodor Sigaev

Fsync directory after creating or unlinking file.

If file was created/deleted just before powerloss it's possible that
file system will miss that. To prevent it, call fsync() where creating/
unlinkg file is critical.

Author: Michael Paquier
Reviewed-by: Ashutosh Bapat, Takayuki Tsunakawa, me
parent 1f171a18
...@@ -577,6 +577,13 @@ ShutdownCLOG(void) ...@@ -577,6 +577,13 @@ ShutdownCLOG(void)
/* Flush dirty CLOG pages to disk */ /* Flush dirty CLOG pages to disk */
TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(false); TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(false);
SimpleLruFlush(ClogCtl, false); SimpleLruFlush(ClogCtl, false);
/*
* fsync pg_xact to ensure that any files flushed previously are durably
* on disk.
*/
fsync_fname("pg_xact", true);
TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(false); TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(false);
} }
...@@ -589,6 +596,13 @@ CheckPointCLOG(void) ...@@ -589,6 +596,13 @@ CheckPointCLOG(void)
/* Flush dirty CLOG pages to disk */ /* Flush dirty CLOG pages to disk */
TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true); TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true);
SimpleLruFlush(ClogCtl, true); SimpleLruFlush(ClogCtl, true);
/*
* fsync pg_xact to ensure that any files flushed previously are durably
* on disk.
*/
fsync_fname("pg_xact", true);
TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true); TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true);
} }
......
...@@ -746,6 +746,12 @@ ShutdownCommitTs(void) ...@@ -746,6 +746,12 @@ ShutdownCommitTs(void)
{ {
/* Flush dirty CommitTs pages to disk */ /* Flush dirty CommitTs pages to disk */
SimpleLruFlush(CommitTsCtl, false); SimpleLruFlush(CommitTsCtl, false);
/*
* fsync pg_commit_ts to ensure that any files flushed previously are durably
* on disk.
*/
fsync_fname("pg_commit_ts", true);
} }
/* /*
...@@ -756,6 +762,12 @@ CheckPointCommitTs(void) ...@@ -756,6 +762,12 @@ CheckPointCommitTs(void)
{ {
/* Flush dirty CommitTs pages to disk */ /* Flush dirty CommitTs pages to disk */
SimpleLruFlush(CommitTsCtl, true); SimpleLruFlush(CommitTsCtl, true);
/*
* fsync pg_commit_ts to ensure that any files flushed previously are durably
* on disk.
*/
fsync_fname("pg_commit_ts", true);
} }
/* /*
......
...@@ -1650,6 +1650,14 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) ...@@ -1650,6 +1650,14 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon)
} }
LWLockRelease(TwoPhaseStateLock); LWLockRelease(TwoPhaseStateLock);
/*
* Flush unconditionally the parent directory to make any information
* durable on disk. Two-phase files could have been removed and those
* removals need to be made persistent as well as any files newly created
* previously since the last checkpoint.
*/
fsync_fname(TWOPHASE_DIR, true);
TRACE_POSTGRESQL_TWOPHASE_CHECKPOINT_DONE(); TRACE_POSTGRESQL_TWOPHASE_CHECKPOINT_DONE();
if (log_checkpoints && serialized_xacts > 0) if (log_checkpoints && serialized_xacts > 0)
......
...@@ -3475,7 +3475,7 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, ...@@ -3475,7 +3475,7 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
if (!find_free) if (!find_free)
{ {
/* Force installation: get rid of any pre-existing segment file */ /* Force installation: get rid of any pre-existing segment file */
unlink(path); durable_unlink(path, DEBUG1);
} }
else else
{ {
...@@ -4026,16 +4026,13 @@ RemoveXlogFile(const char *segname, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr) ...@@ -4026,16 +4026,13 @@ RemoveXlogFile(const char *segname, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
path))); path)));
return; return;
} }
rc = unlink(newpath); rc = durable_unlink(newpath, LOG);
#else #else
rc = unlink(path); rc = durable_unlink(path, LOG);
#endif #endif
if (rc != 0) if (rc != 0)
{ {
ereport(LOG, /* Message already logged by durable_unlink() */
(errcode_for_file_access(),
errmsg("could not remove old transaction log file \"%s\": %m",
path)));
return; return;
} }
CheckpointStats.ckpt_segs_removed++; CheckpointStats.ckpt_segs_removed++;
...@@ -10771,17 +10768,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) ...@@ -10771,17 +10768,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not read file \"%s\": %m", errmsg("could not read file \"%s\": %m",
BACKUP_LABEL_FILE))); BACKUP_LABEL_FILE)));
if (unlink(BACKUP_LABEL_FILE) != 0) durable_unlink(BACKUP_LABEL_FILE, ERROR);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
BACKUP_LABEL_FILE)));
/* /*
* Remove tablespace_map file if present, it is created only if there * Remove tablespace_map file if present, it is created only if there
* are tablespaces. * are tablespaces.
*/ */
unlink(TABLESPACE_MAP); durable_unlink(TABLESPACE_MAP, DEBUG1);
} }
PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive)); PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive));
} }
......
...@@ -657,6 +657,43 @@ durable_rename(const char *oldfile, const char *newfile, int elevel) ...@@ -657,6 +657,43 @@ durable_rename(const char *oldfile, const char *newfile, int elevel)
return 0; return 0;
} }
/*
* durable_unlink -- remove a file in a durable manner
*
* This routine ensures that, after returning, the effect of removing file
* persists in case of a crash. A crash while this routine is running will
* leave the system in no mixed state.
*
* It does so by using fsync on the parent directory of the file after the
* actual removal is done.
*
* Log errors with the severity specified by caller.
*
* Returns 0 if the operation succeeded, -1 otherwise. Note that errno is not
* valid upon return.
*/
int
durable_unlink(const char *fname, int elevel)
{
if (unlink(fname) < 0)
{
ereport(elevel,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
fname)));
return -1;
}
/*
* To guarantee that the removal of the file is persistent, fsync
* its parent directory.
*/
if (fsync_parent_path(fname, elevel) != 0)
return -1;
return 0;
}
/* /*
* durable_link_or_rename -- rename a file in a durable manner. * durable_link_or_rename -- rename a file in a durable manner.
* *
......
...@@ -119,6 +119,7 @@ extern int pg_fdatasync(int fd); ...@@ -119,6 +119,7 @@ extern int pg_fdatasync(int fd);
extern void pg_flush_data(int fd, off_t offset, off_t amount); extern void pg_flush_data(int fd, off_t offset, off_t amount);
extern void fsync_fname(const char *fname, bool isdir); extern void fsync_fname(const char *fname, bool isdir);
extern int durable_rename(const char *oldfile, const char *newfile, int loglevel); extern int durable_rename(const char *oldfile, const char *newfile, int loglevel);
extern int durable_unlink(const char *fname, int loglevel);
extern int durable_link_or_rename(const char *oldfile, const char *newfile, int loglevel); extern int durable_link_or_rename(const char *oldfile, const char *newfile, int loglevel);
extern void SyncDataDirectory(void); extern void SyncDataDirectory(void);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment