Commit bc34223b authored by Peter Eisentraut's avatar Peter Eisentraut

pg_basebackup pg_receivexlog: Issue fsync more carefully

Several places weren't careful about fsyncing in the way.  See 1d4a0ab1
and 606e0f98 for details about required fsyncs.

This adds a couple of functions in src/common/ that have an equivalent
in the backend: durable_rename(), fsync_parent_path()

From: Michael Paquier <michael.paquier@gmail.com>
parent bf5bb2e8
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <zlib.h> #include <zlib.h>
#endif #endif
#include "common/file_utils.h"
#include "common/string.h" #include "common/string.h"
#include "fe_utils/string_utils.h" #include "fe_utils/string_utils.h"
#include "getopt_long.h" #include "getopt_long.h"
...@@ -1196,6 +1197,10 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) ...@@ -1196,6 +1197,10 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
if (copybuf != NULL) if (copybuf != NULL)
PQfreemem(copybuf); PQfreemem(copybuf);
/* sync the resulting tar file, errors are not considered fatal */
if (strcmp(basedir, "-") != 0)
(void) fsync_fname(filename, false, progname);
} }
...@@ -1472,6 +1477,11 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum) ...@@ -1472,6 +1477,11 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum)
if (basetablespace && writerecoveryconf) if (basetablespace && writerecoveryconf)
WriteRecoveryConf(); WriteRecoveryConf();
/*
* No data is synced here, everything is done for all tablespaces at the
* end.
*/
} }
/* /*
...@@ -1950,6 +1960,23 @@ BaseBackup(void) ...@@ -1950,6 +1960,23 @@ BaseBackup(void)
PQclear(res); PQclear(res);
PQfinish(conn); PQfinish(conn);
/*
* Make data persistent on disk once backup is completed. For tar
* format once syncing the parent directory is fine, each tar file
* created per tablespace has been already synced. In plain format,
* all the data of the base directory is synced, taking into account
* all the tablespaces. Errors are not considered fatal.
*/
if (format == 't')
{
if (strcmp(basedir, "-") != 0)
(void) fsync_fname(basedir, true, progname);
}
else
{
(void) fsync_pgdata(basedir, progname);
}
if (verbose) if (verbose)
fprintf(stderr, "%s: base backup completed\n", progname); fprintf(stderr, "%s: base backup completed\n", progname);
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "libpq-fe.h" #include "libpq-fe.h"
#include "access/xlog_internal.h" #include "access/xlog_internal.h"
#include "common/file_utils.h"
/* fd and filename for currently open WAL file */ /* fd and filename for currently open WAL file */
...@@ -71,17 +72,13 @@ mark_file_as_archived(const char *basedir, const char *fname) ...@@ -71,17 +72,13 @@ mark_file_as_archived(const char *basedir, const char *fname)
return false; return false;
} }
if (fsync(fd) != 0) close(fd);
{
fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
progname, tmppath, strerror(errno));
close(fd);
if (fsync_fname(tmppath, false, progname) != 0)
return false; return false;
}
close(fd); if (fsync_parent_path(tmppath, progname) != 0)
return false;
return true; return true;
} }
...@@ -132,6 +129,16 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) ...@@ -132,6 +129,16 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
{ {
/* File is open and ready to use */ /* File is open and ready to use */
walfile = f; walfile = f;
/*
* fsync, in case of a previous crash between padding and fsyncing the
* file.
*/
if (fsync_fname(fn, false, progname) != 0)
return false;
if (fsync_parent_path(fn, progname) != 0)
return false;
return true; return true;
} }
if (statbuf.st_size != 0) if (statbuf.st_size != 0)
...@@ -160,6 +167,17 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) ...@@ -160,6 +167,17 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
} }
free(zerobuf); free(zerobuf);
/*
* fsync WAL file and containing directory, to ensure the file is
* persistently created and zeroed. That's particularly important when
* using synchronous mode, where the file is modified and fsynced
* in-place, without a directory fsync.
*/
if (fsync_fname(fn, false, progname) != 0)
return false;
if (fsync_parent_path(fn, progname) != 0)
return false;
if (lseek(f, SEEK_SET, 0) != 0) if (lseek(f, SEEK_SET, 0) != 0)
{ {
fprintf(stderr, fprintf(stderr,
...@@ -220,10 +238,9 @@ close_walfile(StreamCtl *stream, XLogRecPtr pos) ...@@ -220,10 +238,9 @@ close_walfile(StreamCtl *stream, XLogRecPtr pos)
snprintf(oldfn, sizeof(oldfn), "%s/%s%s", stream->basedir, current_walfile_name, stream->partial_suffix); snprintf(oldfn, sizeof(oldfn), "%s/%s%s", stream->basedir, current_walfile_name, stream->partial_suffix);
snprintf(newfn, sizeof(newfn), "%s/%s", stream->basedir, current_walfile_name); snprintf(newfn, sizeof(newfn), "%s/%s", stream->basedir, current_walfile_name);
if (rename(oldfn, newfn) != 0) if (durable_rename(oldfn, newfn, progname) != 0)
{ {
fprintf(stderr, _("%s: could not rename file \"%s\": %s\n"), /* durable_rename produced a log entry */
progname, current_walfile_name, strerror(errno));
return false; return false;
} }
} }
...@@ -341,14 +358,6 @@ writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content) ...@@ -341,14 +358,6 @@ writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
return false; return false;
} }
if (fsync(fd) != 0)
{
close(fd);
fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
progname, tmppath, strerror(errno));
return false;
}
if (close(fd) != 0) if (close(fd) != 0)
{ {
fprintf(stderr, _("%s: could not close file \"%s\": %s\n"), fprintf(stderr, _("%s: could not close file \"%s\": %s\n"),
...@@ -359,10 +368,9 @@ writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content) ...@@ -359,10 +368,9 @@ writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
/* /*
* Now move the completed history file into place with its final name. * Now move the completed history file into place with its final name.
*/ */
if (rename(tmppath, path) < 0) if (durable_rename(tmppath, path, progname) < 0)
{ {
fprintf(stderr, _("%s: could not rename file \"%s\" to \"%s\": %s\n"), /* durable_rename produced a log entry */
progname, tmppath, path, strerror(errno));
return false; return false;
} }
......
...@@ -34,7 +34,7 @@ static void pre_sync_fname(const char *fname, bool isdir, ...@@ -34,7 +34,7 @@ static void pre_sync_fname(const char *fname, bool isdir,
const char *progname); const char *progname);
#endif #endif
static void walkdir(const char *path, static void walkdir(const char *path,
void (*action) (const char *fname, bool isdir, const char *progname), int (*action) (const char *fname, bool isdir, const char *progname),
bool process_symlinks, const char *progname); bool process_symlinks, const char *progname);
/* /*
...@@ -120,7 +120,7 @@ fsync_pgdata(const char *pg_data, const char *progname) ...@@ -120,7 +120,7 @@ fsync_pgdata(const char *pg_data, const char *progname)
*/ */
static void static void
walkdir(const char *path, walkdir(const char *path,
void (*action) (const char *fname, bool isdir, const char *progname), int (*action) (const char *fname, bool isdir, const char *progname),
bool process_symlinks, const char *progname) bool process_symlinks, const char *progname)
{ {
DIR *dir; DIR *dir;
...@@ -228,7 +228,7 @@ pre_sync_fname(const char *fname, bool isdir, const char *progname) ...@@ -228,7 +228,7 @@ pre_sync_fname(const char *fname, bool isdir, const char *progname)
* directories on systems where that isn't allowed/required. Reports * directories on systems where that isn't allowed/required. Reports
* other errors non-fatally. * other errors non-fatally.
*/ */
void int
fsync_fname(const char *fname, bool isdir, const char *progname) fsync_fname(const char *fname, bool isdir, const char *progname)
{ {
int fd; int fd;
...@@ -256,10 +256,10 @@ fsync_fname(const char *fname, bool isdir, const char *progname) ...@@ -256,10 +256,10 @@ fsync_fname(const char *fname, bool isdir, const char *progname)
if (fd < 0) if (fd < 0)
{ {
if (errno == EACCES || (isdir && errno == EISDIR)) if (errno == EACCES || (isdir && errno == EISDIR))
return; return 0;
fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
progname, fname, strerror(errno)); progname, fname, strerror(errno));
return; return -1;
} }
returncode = fsync(fd); returncode = fsync(fd);
...@@ -269,8 +269,103 @@ fsync_fname(const char *fname, bool isdir, const char *progname) ...@@ -269,8 +269,103 @@ fsync_fname(const char *fname, bool isdir, const char *progname)
* those errors. Anything else needs to be reported. * those errors. Anything else needs to be reported.
*/ */
if (returncode != 0 && !(isdir && errno == EBADF)) if (returncode != 0 && !(isdir && errno == EBADF))
{
fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
progname, fname, strerror(errno)); progname, fname, strerror(errno));
return -1;
}
(void) close(fd); (void) close(fd);
return 0;
}
/*
* fsync_parent_path -- fsync the parent path of a file or directory
*
* This is aimed at making file operations persistent on disk in case of
* an OS crash or power failure.
*/
int
fsync_parent_path(const char *fname, const char *progname)
{
char parentpath[MAXPGPATH];
strlcpy(parentpath, fname, MAXPGPATH);
get_parent_directory(parentpath);
/*
* get_parent_directory() returns an empty string if the input argument is
* just a file name (see comments in path.c), so handle that as being the
* current directory.
*/
if (strlen(parentpath) == 0)
strlcpy(parentpath, ".", MAXPGPATH);
if (fsync_fname(parentpath, true, progname) != 0)
return -1;
return 0;
}
/*
* durable_rename -- rename(2) wrapper, issuing fsyncs required for durability
*
* Wrapper around rename, similar to the backend version.
*/
int
durable_rename(const char *oldfile, const char *newfile, const char *progname)
{
int fd;
/*
* First fsync the old and target path (if it exists), to ensure that they
* are properly persistent on disk. Syncing the target file is not
* strictly necessary, but it makes it easier to reason about crashes;
* because it's then guaranteed that either source or target file exists
* after a crash.
*/
if (fsync_fname(oldfile, false, progname) != 0)
return -1;
fd = open(newfile, PG_BINARY | O_RDWR, 0);
if (fd < 0)
{
if (errno != ENOENT)
{
fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
progname, newfile, strerror(errno));
return -1;
}
}
else
{
if (fsync(fd) != 0)
{
fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
progname, newfile, strerror(errno));
close(fd);
return -1;
}
close(fd);
}
/* Time to do the real deal... */
if (rename(oldfile, newfile) != 0)
{
fprintf(stderr, _("%s: could not rename file \"%s\" to \"%s\": %s\n"),
progname, oldfile, newfile, strerror(errno));
return -1;
}
/*
* To guarantee renaming the file is persistent, fsync the file with its
* new name, and its containing directory.
*/
if (fsync_fname(newfile, false, progname) != 0)
return -1;
if (fsync_parent_path(newfile, progname) != 0)
return -1;
return 0;
} }
...@@ -15,8 +15,11 @@ ...@@ -15,8 +15,11 @@
#ifndef FILE_UTILS_H #ifndef FILE_UTILS_H
#define FILE_UTILS_H #define FILE_UTILS_H
extern void fsync_fname(const char *fname, bool isdir, extern int fsync_fname(const char *fname, bool isdir,
const char *progname); const char *progname);
extern void fsync_pgdata(const char *pg_data, const char *progname); extern void fsync_pgdata(const char *pg_data, const char *progname);
extern int durable_rename(const char *oldfile, const char *newfile,
const char *progname);
extern int fsync_parent_path(const char *fname, const char *progname);
#endif /* FILE_UTILS_H */ #endif /* FILE_UTILS_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment