Commit b1892aae authored by Jeff Davis's avatar Jeff Davis

Revert WAL posix_fallocate() patches.

This reverts commit 269e7808
and commit 5b571bb8.

Unfortunately, the initial patch had insufficient performance testing,
and resulted in a regression.

Per report by Thom Brown.
parent be6fcb67
...@@ -19763,8 +19763,7 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` ...@@ -19763,8 +19763,7 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
do do
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
{ $as_echo "$as_me:$LINENO: checking for $ac_func" >&5 { $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
......
...@@ -1230,7 +1230,7 @@ PGAC_FUNC_GETTIMEOFDAY_1ARG ...@@ -1230,7 +1230,7 @@ PGAC_FUNC_GETTIMEOFDAY_1ARG
LIBS_including_readline="$LIBS" LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l]) AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
AC_REPLACE_FUNCS(fseeko) AC_REPLACE_FUNCS(fseeko)
case $host_os in case $host_os in
......
...@@ -3353,10 +3353,11 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) ...@@ -3353,10 +3353,11 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
{ {
char path[MAXPGPATH]; char path[MAXPGPATH];
char tmppath[MAXPGPATH]; char tmppath[MAXPGPATH];
char *zbuffer;
XLogSegNo installed_segno; XLogSegNo installed_segno;
int max_advance; int max_advance;
int fd; int fd;
bool zero_fill = true; int nbytes;
XLogFilePath(path, ThisTimeLineID, logsegno); XLogFilePath(path, ThisTimeLineID, logsegno);
...@@ -3390,6 +3391,16 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) ...@@ -3390,6 +3391,16 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
unlink(tmppath); unlink(tmppath);
/*
* Allocate a buffer full of zeros. This is done before opening the file
* so that we don't leak the file descriptor if palloc fails.
*
* Note: palloc zbuffer, instead of just using a local char array, to
* ensure it is reasonably well-aligned; this may save a few cycles
* transferring data to the kernel.
*/
zbuffer = (char *) palloc0(XLOG_BLCKSZ);
/* do not use get_sync_bit() here --- want to fsync only at end of fill */ /* do not use get_sync_bit() here --- want to fsync only at end of fill */
fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
S_IRUSR | S_IWUSR); S_IRUSR | S_IWUSR);
...@@ -3398,66 +3409,38 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) ...@@ -3398,66 +3409,38 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not create file \"%s\": %m", tmppath))); errmsg("could not create file \"%s\": %m", tmppath)));
#ifdef HAVE_POSIX_FALLOCATE
/* /*
* If posix_fallocate() is available and succeeds, then the file is * Zero-fill the file. We have to do this the hard way to ensure that all
* properly allocated and we don't need to zero-fill it (which is less * the file space has really been allocated --- on platforms that allow
* efficient). In case of an error, fall back to writing zeros, because on * "holes" in files, just seeking to the end doesn't allocate intermediate
* some platforms posix_fallocate() is available but will not always * space. This way, we know that we have all the space and (after the
* succeed in cases where zero-filling will. * fsync below) that all the indirect blocks are down on disk. Therefore,
* fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
* log file.
*/ */
if (posix_fallocate(fd, 0, XLogSegSize) == 0) for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
zero_fill = false;
#endif /* HAVE_POSIX_FALLOCATE */
if (zero_fill)
{ {
/* errno = 0;
* Allocate a buffer full of zeros. This is done before opening the if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
* file so that we don't leak the file descriptor if palloc fails.
*
* Note: palloc zbuffer, instead of just using a local char array, to
* ensure it is reasonably well-aligned; this may save a few cycles
* transferring data to the kernel.
*/
char *zbuffer = (char *) palloc0(XLOG_BLCKSZ);
int nbytes;
/*
* Zero-fill the file. We have to do this the hard way to ensure that
* all the file space has really been allocated --- on platforms that
* allow "holes" in files, just seeking to the end doesn't allocate
* intermediate space. This way, we know that we have all the space
* and (after the fsync below) that all the indirect blocks are down on
* disk. Therefore, fdatasync(2) or O_DSYNC will be sufficient to sync
* future writes to the log file.
*/
for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
{ {
errno = 0; int save_errno = errno;
if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
{
int save_errno = errno;
/* /*
* If we fail to make the file, delete it to release disk space * If we fail to make the file, delete it to release disk space
*/ */
unlink(tmppath); unlink(tmppath);
close(fd); close(fd);
/* if write didn't set errno, assume no disk space */ /* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC; errno = save_errno ? save_errno : ENOSPC;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", errmsg("could not write to file \"%s\": %m", tmppath)));
tmppath)));
}
} }
pfree(zbuffer);
} }
pfree(zbuffer);
if (pg_fsync(fd) != 0) if (pg_fsync(fd) != 0)
{ {
......
...@@ -369,9 +369,6 @@ ...@@ -369,9 +369,6 @@
/* Define to 1 if you have the `posix_fadvise' function. */ /* Define to 1 if you have the `posix_fadvise' function. */
#undef HAVE_POSIX_FADVISE #undef HAVE_POSIX_FADVISE
/* Define to 1 if you have the `posix_fallocate' function. */
#undef HAVE_POSIX_FALLOCATE
/* Define to 1 if you have the POSIX signal interface. */ /* Define to 1 if you have the POSIX signal interface. */
#undef HAVE_POSIX_SIGNALS #undef HAVE_POSIX_SIGNALS
......
...@@ -276,9 +276,6 @@ ...@@ -276,9 +276,6 @@
/* Define to 1 if you have the <poll.h> header file. */ /* Define to 1 if you have the <poll.h> header file. */
/* #undef HAVE_POLL_H */ /* #undef HAVE_POLL_H */
/* Define to 1 if you have the `posix_fallocate' function. */
/* #undef HAVE_POSIX_FALLOCATE */
/* Define to 1 if you have the POSIX signal interface. */ /* Define to 1 if you have the POSIX signal interface. */
/* #undef HAVE_POSIX_SIGNALS */ /* #undef HAVE_POSIX_SIGNALS */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment