Commit b1892aae authored by Jeff Davis's avatar Jeff Davis

Revert WAL posix_fallocate() patches.

This reverts commit 269e7808
and commit 5b571bb8.

Unfortunately, the initial patch had insufficient performance testing,
and resulted in a regression.

Per report by Thom Brown.
parent be6fcb67
......@@ -19763,8 +19763,7 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
do
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
{ $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
......
......@@ -1230,7 +1230,7 @@ PGAC_FUNC_GETTIMEOFDAY_1ARG
LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
AC_REPLACE_FUNCS(fseeko)
case $host_os in
......
......@@ -3353,10 +3353,11 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
{
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
char *zbuffer;
XLogSegNo installed_segno;
int max_advance;
int fd;
bool zero_fill = true;
int nbytes;
XLogFilePath(path, ThisTimeLineID, logsegno);
......@@ -3390,6 +3391,16 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
unlink(tmppath);
/*
* Allocate a buffer full of zeros. This is done before opening the file
* so that we don't leak the file descriptor if palloc fails.
*
* Note: palloc zbuffer, instead of just using a local char array, to
* ensure it is reasonably well-aligned; this may save a few cycles
* transferring data to the kernel.
*/
zbuffer = (char *) palloc0(XLOG_BLCKSZ);
/* do not use get_sync_bit() here --- want to fsync only at end of fill */
fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
S_IRUSR | S_IWUSR);
......@@ -3398,66 +3409,38 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m", tmppath)));
#ifdef HAVE_POSIX_FALLOCATE
/*
* If posix_fallocate() is available and succeeds, then the file is
* properly allocated and we don't need to zero-fill it (which is less
* efficient). In case of an error, fall back to writing zeros, because on
* some platforms posix_fallocate() is available but will not always
* succeed in cases where zero-filling will.
* Zero-fill the file. We have to do this the hard way to ensure that all
* the file space has really been allocated --- on platforms that allow
* "holes" in files, just seeking to the end doesn't allocate intermediate
* space. This way, we know that we have all the space and (after the
* fsync below) that all the indirect blocks are down on disk. Therefore,
* fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
* log file.
*/
if (posix_fallocate(fd, 0, XLogSegSize) == 0)
zero_fill = false;
#endif /* HAVE_POSIX_FALLOCATE */
if (zero_fill)
for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
{
/*
* Allocate a buffer full of zeros. This is done before opening the
* file so that we don't leak the file descriptor if palloc fails.
*
* Note: palloc zbuffer, instead of just using a local char array, to
* ensure it is reasonably well-aligned; this may save a few cycles
* transferring data to the kernel.
*/
char *zbuffer = (char *) palloc0(XLOG_BLCKSZ);
int nbytes;
/*
* Zero-fill the file. We have to do this the hard way to ensure that
* all the file space has really been allocated --- on platforms that
* allow "holes" in files, just seeking to the end doesn't allocate
* intermediate space. This way, we know that we have all the space
* and (after the fsync below) that all the indirect blocks are down on
* disk. Therefore, fdatasync(2) or O_DSYNC will be sufficient to sync
* future writes to the log file.
*/
for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
errno = 0;
if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
{
errno = 0;
if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
{
int save_errno = errno;
int save_errno = errno;
/*
* If we fail to make the file, delete it to release disk space
*/
unlink(tmppath);
/*
* If we fail to make the file, delete it to release disk space
*/
unlink(tmppath);
close(fd);
close(fd);
/* if write didn't set errno, assume no disk space */
errno = save_errno ? save_errno : ENOSPC;
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m",
tmppath)));
}
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
pfree(zbuffer);
}
pfree(zbuffer);
if (pg_fsync(fd) != 0)
{
......
......@@ -369,9 +369,6 @@
/* Define to 1 if you have the `posix_fadvise' function. */
#undef HAVE_POSIX_FADVISE
/* Define to 1 if you have the `posix_fallocate' function. */
#undef HAVE_POSIX_FALLOCATE
/* Define to 1 if you have the POSIX signal interface. */
#undef HAVE_POSIX_SIGNALS
......
......@@ -276,9 +276,6 @@
/* Define to 1 if you have the <poll.h> header file. */
/* #undef HAVE_POLL_H */
/* Define to 1 if you have the `posix_fallocate' function. */
/* #undef HAVE_POSIX_FALLOCATE */
/* Define to 1 if you have the POSIX signal interface. */
/* #undef HAVE_POSIX_SIGNALS */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment