Commit 899bd785 authored by Tom Lane's avatar Tom Lane

Avoid SIGBUS on Linux when a DSM memory request overruns tmpfs.

On Linux, shared memory segments created with shm_open() are backed by
swap files created in tmpfs.  If the swap file needs to be extended,
but there's no tmpfs space left, you get a very unfriendly SIGBUS trap.
To avoid this, force allocation of the full request size when we create
the segment.  This adds a few cycles, but none that we wouldn't expend
later anyway, assuming the request isn't hugely bigger than the actual
need.

Make this code #ifdef __linux__, because (a) there's not currently a
reason to think the same problem exists on other platforms, and (b)
applying posix_fallocate() to an FD created by shm_open() isn't very
portable anyway.

Back-patch to 9.4 where the DSM code came in.

Thomas Munro, per a bug report from Amul Sul

Discussion: https://postgr.es/m/1002664500.12301802.1471008223422.JavaMail.yahoo@mail.yahoo.com
parent 716ea626
...@@ -12970,7 +12970,7 @@ fi ...@@ -12970,7 +12970,7 @@ fi
LIBS_including_readline="$LIBS" LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
for ac_func in cbrt clock_gettime dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range utime utimes wcstombs_l for ac_func in cbrt clock_gettime dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range utime utimes wcstombs_l
do : do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
......
...@@ -1399,7 +1399,7 @@ PGAC_FUNC_WCSTOMBS_L ...@@ -1399,7 +1399,7 @@ PGAC_FUNC_WCSTOMBS_L
LIBS_including_readline="$LIBS" LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
AC_CHECK_FUNCS([cbrt clock_gettime dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range utime utimes wcstombs_l]) AC_CHECK_FUNCS([cbrt clock_gettime dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range utime utimes wcstombs_l])
AC_REPLACE_FUNCS(fseeko) AC_REPLACE_FUNCS(fseeko)
case $host_os in case $host_os in
......
...@@ -73,6 +73,7 @@ ...@@ -73,6 +73,7 @@
static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size, static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
void **impl_private, void **mapped_address, void **impl_private, void **mapped_address,
Size *mapped_size, int elevel); Size *mapped_size, int elevel);
static int dsm_impl_posix_resize(int fd, off_t size);
#endif #endif
#ifdef USE_DSM_SYSV #ifdef USE_DSM_SYSV
static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size, static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
...@@ -319,7 +320,8 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size, ...@@ -319,7 +320,8 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
} }
request_size = st.st_size; request_size = st.st_size;
} }
else if (*mapped_size != request_size && ftruncate(fd, request_size)) else if (*mapped_size != request_size &&
dsm_impl_posix_resize(fd, request_size) != 0)
{ {
int save_errno; int save_errno;
...@@ -392,7 +394,55 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size, ...@@ -392,7 +394,55 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
return true; return true;
} }
#endif
/*
* Set the size of a virtual memory region associated with a file descriptor.
* If necessary, also ensure that virtual memory is actually allocated by the
* operating system, to avoid nasty surprises later.
*
* Returns non-zero if either truncation or allocation fails, and sets errno.
*/
static int
dsm_impl_posix_resize(int fd, off_t size)
{
int rc;
/* Truncate (or extend) the file to the requested size. */
rc = ftruncate(fd, size);
/*
* On Linux, a shm_open fd is backed by a tmpfs file. After resizing with
* ftruncate, the file may contain a hole. Accessing memory backed by a
* hole causes tmpfs to allocate pages, which fails with SIGBUS if there
* is no more tmpfs space available. So we ask tmpfs to allocate pages
* here, so we can fail gracefully with ENOSPC now rather than risking
* SIGBUS later.
*/
#if defined(HAVE_POSIX_FALLOCATE) && defined(__linux__)
if (rc == 0)
{
/* We may get interrupted, if so just retry. */
do
{
rc = posix_fallocate(fd, 0, size);
} while (rc == -1 && errno == EINTR);
if (rc != 0 && errno == ENOSYS)
{
/*
* Kernel too old (< 2.6.23). Rather than fail, just trust that
* we won't hit the problem (it typically doesn't show up without
* many-GB-sized requests, anyway).
*/
rc = 0;
}
}
#endif /* HAVE_POSIX_FALLOCATE && __linux__ */
return rc;
}
#endif /* USE_DSM_POSIX */
#ifdef USE_DSM_SYSV #ifdef USE_DSM_SYSV
/* /*
......
...@@ -393,6 +393,9 @@ ...@@ -393,6 +393,9 @@
/* Define to 1 if you have the `posix_fadvise' function. */ /* Define to 1 if you have the `posix_fadvise' function. */
#undef HAVE_POSIX_FADVISE #undef HAVE_POSIX_FADVISE
/* Define to 1 if you have the `posix_fallocate' function. */
#undef HAVE_POSIX_FALLOCATE
/* Define to 1 if the assembler supports PPC's LWARX mutex hint bit. */ /* Define to 1 if the assembler supports PPC's LWARX mutex hint bit. */
#undef HAVE_PPC_LWARX_MUTEX_HINT #undef HAVE_PPC_LWARX_MUTEX_HINT
......
...@@ -261,6 +261,9 @@ ...@@ -261,6 +261,9 @@
/* Define to 1 if you have the <poll.h> header file. */ /* Define to 1 if you have the <poll.h> header file. */
/* #undef HAVE_POLL_H */ /* #undef HAVE_POLL_H */
/* Define to 1 if you have the `posix_fallocate' function. */
/* #undef HAVE_POSIX_FALLOCATE */
/* Define to 1 if you have the `pstat' function. */ /* Define to 1 if you have the `pstat' function. */
/* #undef HAVE_PSTAT */ /* #undef HAVE_PSTAT */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment