Commit 0ac5e5a7 authored by Robert Haas's avatar Robert Haas

Allow dynamic allocation of shared memory segments.

Patch by myself and Amit Kapila.  Design help from Noah Misch.  Review
by Andres Freund.
parent f5665151
......@@ -8384,6 +8384,180 @@ if test "$ac_res" != no; then
fi
{ $as_echo "$as_me:$LINENO: checking for library containing shm_open" >&5
$as_echo_n "checking for library containing shm_open... " >&6; }
if test "${ac_cv_search_shm_open+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_func_search_save_LIBS=$LIBS
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char shm_open ();
int
main ()
{
return shm_open ();
;
return 0;
}
_ACEOF
for ac_lib in '' rt; do
if test -z "$ac_lib"; then
ac_res="none required"
else
ac_res=-l$ac_lib
LIBS="-l$ac_lib $ac_func_search_save_LIBS"
fi
rm -f conftest.$ac_objext conftest$ac_exeext
if { (ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
$as_echo "$ac_try_echo") >&5
(eval "$ac_link") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
$as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest$ac_exeext && {
test "$cross_compiling" = yes ||
$as_test_x conftest$ac_exeext
}; then
ac_cv_search_shm_open=$ac_res
else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
fi
rm -rf conftest.dSYM
rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
conftest$ac_exeext
if test "${ac_cv_search_shm_open+set}" = set; then
break
fi
done
if test "${ac_cv_search_shm_open+set}" = set; then
:
else
ac_cv_search_shm_open=no
fi
rm conftest.$ac_ext
LIBS=$ac_func_search_save_LIBS
fi
{ $as_echo "$as_me:$LINENO: result: $ac_cv_search_shm_open" >&5
$as_echo "$ac_cv_search_shm_open" >&6; }
ac_res=$ac_cv_search_shm_open
if test "$ac_res" != no; then
test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
fi
{ $as_echo "$as_me:$LINENO: checking for library containing shm_unlink" >&5
$as_echo_n "checking for library containing shm_unlink... " >&6; }
if test "${ac_cv_search_shm_unlink+set}" = set; then
$as_echo_n "(cached) " >&6
else
ac_func_search_save_LIBS=$LIBS
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char shm_unlink ();
int
main ()
{
return shm_unlink ();
;
return 0;
}
_ACEOF
for ac_lib in '' rt; do
if test -z "$ac_lib"; then
ac_res="none required"
else
ac_res=-l$ac_lib
LIBS="-l$ac_lib $ac_func_search_save_LIBS"
fi
rm -f conftest.$ac_objext conftest$ac_exeext
if { (ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
$as_echo "$ac_try_echo") >&5
(eval "$ac_link") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
$as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest$ac_exeext && {
test "$cross_compiling" = yes ||
$as_test_x conftest$ac_exeext
}; then
ac_cv_search_shm_unlink=$ac_res
else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
fi
rm -rf conftest.dSYM
rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
conftest$ac_exeext
if test "${ac_cv_search_shm_unlink+set}" = set; then
break
fi
done
if test "${ac_cv_search_shm_unlink+set}" = set; then
:
else
ac_cv_search_shm_unlink=no
fi
rm conftest.$ac_ext
LIBS=$ac_func_search_save_LIBS
fi
{ $as_echo "$as_me:$LINENO: result: $ac_cv_search_shm_unlink" >&5
$as_echo "$ac_cv_search_shm_unlink" >&6; }
ac_res=$ac_cv_search_shm_unlink
if test "$ac_res" != no; then
test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
fi
# Solaris:
{ $as_echo "$as_me:$LINENO: checking for library containing fdatasync" >&5
$as_echo_n "checking for library containing fdatasync... " >&6; }
......@@ -19763,7 +19937,8 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
do
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
{ $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
......
......@@ -883,6 +883,8 @@ case $host_os in
esac
AC_SEARCH_LIBS(getopt_long, [getopt gnugetopt])
AC_SEARCH_LIBS(crypt, crypt)
AC_SEARCH_LIBS(shm_open, rt)
AC_SEARCH_LIBS(shm_unlink, rt)
# Solaris:
AC_SEARCH_LIBS(fdatasync, [rt posix4])
# Required for thread_test.c on Solaris 2.5:
......@@ -1230,7 +1232,7 @@ PGAC_FUNC_GETTIMEOFDAY_1ARG
LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
AC_REPLACE_FUNCS(fseeko)
case $host_os in
......
......@@ -1194,6 +1194,32 @@ include 'filename'
</listitem>
</varlistentry>
<varlistentry id="guc-dynamic-shared-memory-type" xreflabel="dynamic_shared_memory_type">
<term><varname>dynamic_shared_memory_type</varname> (<type>enum</type>)</term>
<indexterm>
<primary><varname>dynamic_shared_memory_type</> configuration parameter</primary>
</indexterm>
<listitem>
<para>
Specifies the dynamic shared memory implementation that the server
should use. Possible values are <literal>posix</> (for POSIX shared
memory allocated using <literal>shm_open</>), <literal>sysv</literal>
(for System V shared memory allocated via <literal>shmget</>),
<literal>windows</> (for Windows shared memory), <literal>mmap</>
(to simulate shared memory using memory-mapped files stored in the
data directory), and <literal>none</> (to disable this feature).
Not all values are supported on all platforms; the first supported
option is the default for that platform. The use of the
<literal>mmap</> option, which is not the default on any platform,
is generally discouraged because the operating system may write
modified pages back to disk repeatedly, increasing system I/O load;
however, it may be useful for debugging, when the
<literal>pg_dynshmem</> directory is stored on a RAM disk, or when
other shared memory facilities are not available.
</para>
</listitem>
</varlistentry>
</variablelist>
</sect2>
......
......@@ -29,6 +29,7 @@
#endif
#include "miscadmin.h"
#include "portability/mem.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
......@@ -36,31 +37,6 @@
typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */
typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
#define IPCProtection (0600) /* access/modify by user only */
#ifdef SHM_SHARE_MMU /* use intimate shared memory on Solaris */
#define PG_SHMAT_FLAGS SHM_SHARE_MMU
#else
#define PG_SHMAT_FLAGS 0
#endif
/* Linux prefers MAP_ANONYMOUS, but the flag is called MAP_ANON on other systems. */
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
/* BSD-derived systems have MAP_HASSEMAPHORE, but it's not present (or needed) on Linux. */
#ifndef MAP_HASSEMAPHORE
#define MAP_HASSEMAPHORE 0
#endif
#define PG_MMAP_FLAGS (MAP_SHARED|MAP_ANONYMOUS|MAP_HASSEMAPHORE)
/* Some really old systems don't define MAP_FAILED. */
#ifndef MAP_FAILED
#define MAP_FAILED ((void *) -1)
#endif
unsigned long UsedShmemSegID = 0;
void *UsedShmemSegAddr = NULL;
......
......@@ -15,7 +15,7 @@ override CFLAGS+= -fno-inline
endif
endif
OBJS = ipc.o ipci.o pmsignal.o procarray.o procsignal.o shmem.o shmqueue.o \
sinval.o sinvaladt.o standby.o
OBJS = dsm_impl.o dsm.o ipc.o ipci.o pmsignal.o procarray.o procsignal.o \
shmem.o shmqueue.o sinval.o sinvaladt.o standby.o
include $(top_srcdir)/src/backend/common.mk
/*-------------------------------------------------------------------------
*
* dsm.c
* manage dynamic shared memory segments
*
* This file provides a set of services to make programming with dynamic
* shared memory segments more convenient. Unlike the low-level
* facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
* created using this module will be cleaned up automatically. Mappings
* will be removed when the resource owner under which they were created
* is cleaned up, unless dsm_keep_mapping() is used, in which case they
* have session lifespan. Segments will be removed when there are no
* remaining mappings, or at postmaster shutdown in any case. After a
* hard postmaster crash, remaining segments will be removed, if they
* still exist, at the next postmaster startup.
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/storage/ipc/dsm.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#ifndef WIN32
#include <sys/mman.h>
#endif
#include <sys/stat.h>
#include "lib/ilist.h"
#include "miscadmin.h"
#include "storage/dsm.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/resowner_private.h"
#define PG_DYNSHMEM_STATE_FILE PG_DYNSHMEM_DIR "/state"
#define PG_DYNSHMEM_NEW_STATE_FILE PG_DYNSHMEM_DIR "/state.new"
#define PG_DYNSHMEM_STATE_BUFSIZ 512
#define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
/*
* There's no point in getting too cheap here, because the minimum allocation
* is one OS page, which is probably at least 4KB and could easily be as high
* as 64KB. Each currently sizeof(dsm_control_item), currently 8 bytes.
*/
#define PG_DYNSHMEM_FIXED_SLOTS 64
#define PG_DYNSHMEM_SLOTS_PER_BACKEND 2
#define INVALID_CONTROL_SLOT ((uint32) -1)
/* Backend-local state for a dynamic shared memory segment. */
struct dsm_segment
{
dlist_node node; /* List link in dsm_segment_list. */
ResourceOwner resowner; /* Resource owner. */
dsm_handle handle; /* Segment name. */
uint32 control_slot; /* Slot in control segment. */
void *impl_private; /* Implementation-specific private data. */
void *mapped_address; /* Mapping address, or NULL if unmapped. */
uint64 mapped_size; /* Size of our mapping. */
};
/* Shared-memory state for a dynamic shared memory segment. */
typedef struct dsm_control_item
{
dsm_handle handle;
uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
} dsm_control_item;
/* Layout of the dynamic shared memory control segment. */
typedef struct dsm_control_header
{
uint32 magic;
uint32 nitems;
uint32 maxitems;
dsm_control_item item[FLEXIBLE_ARRAY_MEMBER];
} dsm_control_header;
static void dsm_cleanup_using_control_segment(void);
static void dsm_cleanup_for_mmap(void);
static bool dsm_read_state_file(dsm_handle *h);
static void dsm_write_state_file(dsm_handle h);
static void dsm_postmaster_shutdown(int code, Datum arg);
static void dsm_backend_shutdown(int code, Datum arg);
static dsm_segment *dsm_create_descriptor(void);
static bool dsm_control_segment_sane(dsm_control_header *control,
uint64 mapped_size);
static uint64 dsm_control_bytes_needed(uint32 nitems);
/* Has this backend initialized the dynamic shared memory system yet? */
static bool dsm_init_done = false;
/*
* List of dynamic shared memory segments used by this backend.
*
* At process exit time, we must decrement the reference count of each
* segment we have attached; this list makes it possible to find all such
* segments.
*
* This list should always be empty in the postmaster. We could probably
* allow the postmaster to map dynamic shared memory segments before it
* begins to start child processes, provided that each process adjusted
* the reference counts for those segments in the control segment at
* startup time, but there's no obvious need for such a facility, which
* would also be complex to handle in the EXEC_BACKEND case. Once the
* postmaster has begun spawning children, there's an additional problem:
* each new mapping would require an update to the control segment,
* which requires locking, in which the postmaster must not be involved.
*/
static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list);
/*
* Control segment information.
*
* Unlike ordinary shared memory segments, the control segment is not
* reference counted; instead, it lasts for the postmaster's entire
* life cycle. For simplicity, it doesn't have a dsm_segment object either.
*/
static dsm_handle dsm_control_handle;
static dsm_control_header *dsm_control;
static uint64 dsm_control_mapped_size = 0;
static void *dsm_control_impl_private = NULL;
/*
* Start up the dynamic shared memory system.
*
* This is called just once during each cluster lifetime, at postmaster
* startup time.
*/
void
dsm_postmaster_startup(void)
{
void *dsm_control_address = NULL;
uint32 maxitems;
uint64 segsize;
Assert(!IsUnderPostmaster);
/* If dynamic shared memory is disabled, there's nothing to do. */
if (dynamic_shared_memory_type == DSM_IMPL_NONE)
return;
/*
* Check for, and remove, shared memory segments left behind by a dead
* postmaster. This isn't necessary on Windows, which always removes them
* when the last reference is gone.
*/
switch (dynamic_shared_memory_type)
{
case DSM_IMPL_POSIX:
case DSM_IMPL_SYSV:
dsm_cleanup_using_control_segment();
break;
case DSM_IMPL_MMAP:
dsm_cleanup_for_mmap();
break;
case DSM_IMPL_WINDOWS:
/* Nothing to do. */
break;
default:
elog(ERROR, "unknown dynamic shared memory type: %d",
dynamic_shared_memory_type);
}
/* Determine size for new control segment. */
maxitems = PG_DYNSHMEM_FIXED_SLOTS
+ PG_DYNSHMEM_SLOTS_PER_BACKEND * MaxBackends;
elog(DEBUG2, "dynamic shared memory system will support %u segments",
maxitems);
segsize = dsm_control_bytes_needed(maxitems);
/* Loop until we find an unused identifier for the new control segment. */
for (;;)
{
Assert(dsm_control_address == NULL);
Assert(dsm_control_mapped_size == 0);
dsm_control_handle = random();
if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize,
&dsm_control_impl_private, &dsm_control_address,
&dsm_control_mapped_size, ERROR))
break;
}
dsm_control = dsm_control_address;
on_shmem_exit(dsm_postmaster_shutdown, 0);
elog(DEBUG2, "created dynamic shared memory control segment %u ("
UINT64_FORMAT " bytes)", dsm_control_handle, segsize);
dsm_write_state_file(dsm_control_handle);
/* Initialize control segment. */
dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
dsm_control->nitems = 0;
dsm_control->maxitems = maxitems;
}
/*
* Determine whether the control segment from the previous postmaster
* invocation still exists. If so, remove the dynamic shared memory
* segments to which it refers, and then the control segment itself.
*/
static void
dsm_cleanup_using_control_segment(void)
{
void *mapped_address = NULL;
void *junk_mapped_address = NULL;
void *impl_private = NULL;
void *junk_impl_private = NULL;
uint64 mapped_size = 0;
uint64 junk_mapped_size = 0;
uint32 nitems;
uint32 i;
dsm_handle old_control_handle;
dsm_control_header *old_control;
/*
* Read the state file. If it doesn't exist or is empty, there's nothing
* more to do.
*/
if (!dsm_read_state_file(&old_control_handle))
return;
/*
* Try to attach the segment. If this fails, it probably just means that
* the operating system has been rebooted and the segment no longer exists,
* or an unrelated proces has used the same shm ID. So just fall out
* quietly.
*/
if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
&mapped_address, &mapped_size, DEBUG1))
return;
/*
* We've managed to reattach it, but the contents might not be sane.
* If they aren't, we disregard the segment after all.
*/
old_control = (dsm_control_header *) mapped_address;
if (!dsm_control_segment_sane(old_control, mapped_size))
{
dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
&mapped_address, &mapped_size, LOG);
return;
}
/*
* OK, the control segment looks basically valid, so we can get use
* it to get a list of segments that need to be removed.
*/
nitems = old_control->nitems;
for (i = 0; i < nitems; ++i)
{
dsm_handle handle;
uint32 refcnt;
/* If the reference count is 0, the slot is actually unused. */
refcnt = old_control->item[i].refcnt;
if (refcnt == 0)
continue;
/* Log debugging information. */
handle = old_control->item[i].handle;
elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
handle, refcnt);
/* Destroy the referenced segment. */
dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
&junk_mapped_address, &junk_mapped_size, LOG);
}
/* Destroy the old control segment, too. */
elog(DEBUG2,
"cleaning up dynamic shared memory control segment with ID %u",
old_control_handle);
dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
&mapped_address, &mapped_size, LOG);
}
/*
* When we're using the mmap shared memory implementation, "shared memory"
* segments might even manage to survive an operating system reboot.
* But there's no guarantee as to exactly what will survive: some segments
* may survive, and others may not, and the contents of some may be out
* of date. In particular, the control segment may be out of date, so we
* can't rely on it to figure out what to remove. However, since we know
* what directory contains the files we used as shared memory, we can simply
* scan the directory and blow everything away that shouldn't be there.
*/
static void
dsm_cleanup_for_mmap(void)
{
DIR *dir;
struct dirent *dent;
/* Open the directory; can't use AllocateDir in postmaster. */
if ((dir = opendir(PG_DYNSHMEM_DIR)) == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
PG_DYNSHMEM_DIR)));
/* Scan for something with a name of the correct format. */
while ((dent = readdir(dir)) != NULL)
{
if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
{
char buf[MAXPGPATH];
snprintf(buf, MAXPGPATH, PG_DYNSHMEM_DIR "/%s", dent->d_name);
elog(DEBUG2, "removing file \"%s\"", buf);
/* We found a matching file; so remove it. */
if (unlink(buf) != 0)
{
int save_errno;
save_errno = errno;
closedir(dir);
errno = save_errno;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m", buf)));
}
}
}
/* Cleanup complete. */
closedir(dir);
}
/*
* Read and parse the state file.
*
* If the state file is empty or the contents are garbled, it probably means
* that the operating system rebooted before the data written by the previous
* postmaster made it to disk. In that case, we can just ignore it; any shared
* memory from before the reboot should be gone anyway.
*/
static bool
dsm_read_state_file(dsm_handle *h)
{
int statefd;
char statebuf[PG_DYNSHMEM_STATE_BUFSIZ];
int nbytes = 0;
char *endptr,
*s;
dsm_handle handle;
/* Read the state file to get the ID of the old control segment. */
statefd = open(PG_DYNSHMEM_STATE_FILE, O_RDONLY | PG_BINARY, 0);
if (statefd < 0)
{
if (errno == ENOENT)
return false;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
PG_DYNSHMEM_STATE_FILE)));
}
nbytes = read(statefd, statebuf, PG_DYNSHMEM_STATE_BUFSIZ - 1);
if (nbytes < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m",
PG_DYNSHMEM_STATE_FILE)));
/* make sure buffer is NUL terminated */
statebuf[nbytes] = '\0';
close(statefd);
/*
* We expect to find the handle of the old control segment here,
* on a line by itself.
*/
handle = strtoul(statebuf, &endptr, 10);
for (s = endptr; *s == ' ' || *s == '\t'; ++s)
;
if (*s != '\n' && *s != '\0')
return false;
/* Looks good. */
*h = handle;
return true;
}
/*
* Write our control segment handle to the state file, so that if the
* postmaster is killed without running it's on_shmem_exit hooks, the
* next postmaster can clean things up after restart.
*/
static void
dsm_write_state_file(dsm_handle h)
{
int statefd;
char statebuf[PG_DYNSHMEM_STATE_BUFSIZ];
int nbytes;
/* Create or truncate the file. */
statefd = open(PG_DYNSHMEM_NEW_STATE_FILE,
O_RDWR | O_CREAT | O_TRUNC | PG_BINARY, 0600);
if (statefd < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m",
PG_DYNSHMEM_NEW_STATE_FILE)));
/* Write contents. */
snprintf(statebuf, PG_DYNSHMEM_STATE_BUFSIZ, "%u\n", dsm_control_handle);
nbytes = strlen(statebuf);
if (write(statefd, statebuf, nbytes) != nbytes)
{
if (errno == 0)
errno = ENOSPC; /* if no error signalled, assume no space */
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write file \"%s\": %m",
PG_DYNSHMEM_NEW_STATE_FILE)));
}
/* Close file. */
close(statefd);
/*
* Atomically rename file into place, so that no one ever sees a partially
* written state file.
*/
if (rename(PG_DYNSHMEM_NEW_STATE_FILE, PG_DYNSHMEM_STATE_FILE) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\": %m",
PG_DYNSHMEM_NEW_STATE_FILE)));
}
/*
* At shutdown time, we iterate over the control segment and remove all
* remaining dynamic shared memory segments. We avoid throwing errors here;
* the postmaster is shutting down either way, and this is just non-critical
* resource cleanup.
*/
static void
dsm_postmaster_shutdown(int code, Datum arg)
{
uint32 nitems;
uint32 i;
void *dsm_control_address;
void *junk_mapped_address = NULL;
void *junk_impl_private = NULL;
uint64 junk_mapped_size = 0;
/*
* If some other backend exited uncleanly, it might have corrupted the
* control segment while it was dying. In that case, we warn and ignore
* the contents of the control segment. This may end up leaving behind
* stray shared memory segments, but there's not much we can do about
* that if the metadata is gone.
*/
nitems = dsm_control->nitems;
if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
{
ereport(LOG,
(errmsg("dynamic shared memory control segment is corrupt")));
return;
}
/* Remove any remaining segments. */
for (i = 0; i < nitems; ++i)
{
dsm_handle handle;
/* If the reference count is 0, the slot is actually unused. */
if (dsm_control->item[i].refcnt == 0)
continue;
/* Log debugging information. */
handle = dsm_control->item[i].handle;
elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
handle);
/* Destroy the segment. */
dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
&junk_mapped_address, &junk_mapped_size, LOG);
}
/* Remove the control segment itself. */
elog(DEBUG2,
"cleaning up dynamic shared memory control segment with ID %u",
dsm_control_handle);
dsm_control_address = dsm_control;
dsm_impl_op(DSM_OP_DESTROY, dsm_control_handle, 0,
&dsm_control_impl_private, &dsm_control_address,
&dsm_control_mapped_size, LOG);
dsm_control = dsm_control_address;
/* And, finally, remove the state file. */
if (unlink(PG_DYNSHMEM_STATE_FILE) < 0)
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not unlink file \"%s\": %m",
PG_DYNSHMEM_STATE_FILE)));
}
/*
* Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
* we must reread the state file and map the control segment; in other cases,
* we'll have inherited the postmaster's mapping and global variables.
*/
static void
dsm_backend_startup(void)
{
/* If dynamic shared memory is disabled, reject this. */
if (dynamic_shared_memory_type == DSM_IMPL_NONE)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("dynamic shared memory is disabled"),
errhint("Set dynamic_shared_memory_type to a value other than \"none\".")));
#ifdef EXEC_BACKEND
{
dsm_handle control_handle;
void *control_address = NULL;
/* Read the control segment information from the state file. */
if (!dsm_read_state_file(&control_handle))
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("could not parse dynamic shared memory state file")));
/* Attach control segment. */
dsm_impl_op(DSM_OP_ATTACH, control_handle, 0,
&dsm_control_impl_private, &control_address,
&dsm_control_mapped_size, ERROR);
dsm_control_handle = control_handle;
dsm_control = control_address;
/* If control segment doesn't look sane, something is badly wrong. */
if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
{
dsm_impl_op(DSM_OP_DETACH, control_handle, 0,
&dsm_control_impl_private, &control_address,
&dsm_control_mapped_size, WARNING);
ereport(FATAL,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("dynamic shared memory control segment is not valid")));
}
}
#endif
/* Arrange to detach segments on exit. */
on_shmem_exit(dsm_backend_shutdown, 0);
dsm_init_done = true;
}
/*
* Create a new dynamic shared memory segment.
*/
dsm_segment *
dsm_create(uint64 size)
{
dsm_segment *seg = dsm_create_descriptor();
uint32 i;
uint32 nitems;
/* Unsafe in postmaster (and pointless in a stand-alone backend). */
Assert(IsUnderPostmaster);
if (!dsm_init_done)
dsm_backend_startup();
/* Loop until we find an unused segment identifier. */
for (;;)
{
Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
seg->handle = random();
if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
&seg->mapped_address, &seg->mapped_size, ERROR))
break;
}
/* Lock the control segment so we can register the new segment. */
LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
/* Search the control segment for an unused slot. */
nitems = dsm_control->nitems;
for (i = 0; i < nitems; ++i)
{
if (dsm_control->item[i].refcnt == 0)
{
dsm_control->item[i].handle = seg->handle;
/* refcnt of 1 triggers destruction, so start at 2 */
dsm_control->item[i].refcnt = 2;
seg->control_slot = i;
LWLockRelease(DynamicSharedMemoryControlLock);
return seg;
}
}
/* Verify that we can support an additional mapping. */
if (nitems >= dsm_control->maxitems)
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
errmsg("too many dynamic shared memory segments")));
/* Enter the handle into a new array slot. */
dsm_control->item[nitems].handle = seg->handle;
/* refcnt of 1 triggers destruction, so start at 2 */
dsm_control->item[nitems].refcnt = 2;
seg->control_slot = nitems;
dsm_control->nitems++;
LWLockRelease(DynamicSharedMemoryControlLock);
return seg;
}
/*
* Attach a dynamic shared memory segment.
*
* See comments for dsm_segment_handle() for an explanation of how this
* is intended to be used.
*
* This function will return NULL if the segment isn't known to the system.
* This can happen if we're asked to attach the segment, but then everyone
* else detaches it (causing it to be destroyed) before we get around to
* attaching it.
*/
dsm_segment *
dsm_attach(dsm_handle h)
{
dsm_segment *seg;
dlist_iter iter;
uint32 i;
uint32 nitems;
/* Unsafe in postmaster (and pointless in a stand-alone backend). */
Assert(IsUnderPostmaster);
if (!dsm_init_done)
dsm_backend_startup();
/*
* Since this is just a debugging cross-check, we could leave it out
* altogether, or include it only in assert-enabled builds. But since
* the list of attached segments should normally be very short, let's
* include it always for right now.
*
* If you're hitting this error, you probably want to attempt to
* find an existing mapping via dsm_find_mapping() before calling
* dsm_attach() to create a new one.
*/
dlist_foreach(iter, &dsm_segment_list)
{
seg = dlist_container(dsm_segment, node, iter.cur);
if (seg->handle == h)
elog(ERROR, "can't attach the same segment more than once");
}
/* Create a new segment descriptor. */
seg = dsm_create_descriptor();
seg->handle = h;
/* Bump reference count for this segment in shared memory. */
LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
nitems = dsm_control->nitems;
for (i = 0; i < nitems; ++i)
{
/* If the reference count is 0, the slot is actually unused. */
if (dsm_control->item[i].refcnt == 0)
continue;
/*
* If the reference count is 1, the slot is still in use, but the
* segment is in the process of going away. Treat that as if we
* didn't find a match.
*/
if (dsm_control->item[i].refcnt == 1)
break;
/* Otherwise, if the descriptor matches, we've found a match. */
if (dsm_control->item[i].handle == seg->handle)
{
dsm_control->item[i].refcnt++;
seg->control_slot = i;
break;
}
}
LWLockRelease(DynamicSharedMemoryControlLock);
/*
* If we didn't find the handle we're looking for in the control
* segment, it probably means that everyone else who had it mapped,
* including the original creator, died before we got to this point.
* It's up to the caller to decide what to do about that.
*/
if (seg->control_slot == INVALID_CONTROL_SLOT)
{
dsm_detach(seg);
return NULL;
}
/* Here's where we actually try to map the segment. */
dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
&seg->mapped_address, &seg->mapped_size, ERROR);
return seg;
}
/*
* At backend shutdown time, detach any segments that are still attached.
*/
static void
dsm_backend_shutdown(int code, Datum arg)
{
while (!dlist_is_empty(&dsm_segment_list))
{
dsm_segment *seg;
seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
dsm_detach(seg);
}
}
/*
* Resize an existing shared memory segment.
*
* This may cause the shared memory segment to be remapped at a different
* address. For the caller's convenience, we return the mapped address.
*/
void *
dsm_resize(dsm_segment *seg, uint64 size)
{
Assert(seg->control_slot != INVALID_CONTROL_SLOT);
dsm_impl_op(DSM_OP_RESIZE, seg->handle, size, &seg->impl_private,
&seg->mapped_address, &seg->mapped_size, ERROR);
return seg->mapped_address;
}
/*
* Remap an existing shared memory segment.
*
* This is intended to be used when some other process has extended the
* mapping using dsm_resize(), but we've still only got the initial
* portion mapped. Since this might change the address at which the
* segment is mapped, we return the new mapped address.
*/
void *
dsm_remap(dsm_segment *seg)
{
dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
&seg->mapped_address, &seg->mapped_size, ERROR);
return seg->mapped_address;
}
/*
* Detach from a shared memory segment, destroying the segment if we
* remove the last reference.
*
* This function should never fail. It will often be invoked when aborting
* a transaction, and a further error won't serve any purpose. It's not a
* complete disaster if we fail to unmap or destroy the segment; it means a
* resource leak, but that doesn't necessarily preclude further operations.
*/
void
dsm_detach(dsm_segment *seg)
{
/*
* Try to remove the mapping, if one exists. Normally, there will be,
* but maybe not, if we failed partway through a create or attach
* operation. We remove the mapping before decrementing the reference
* count so that the process that sees a zero reference count can be
* certain that no remaining mappings exist. Even if this fails, we
* pretend that it works, because retrying is likely to fail in the
* same way.
*/
if (seg->mapped_address != NULL)
{
dsm_impl_op(DSM_OP_DETACH, seg->handle, 0, &seg->impl_private,
&seg->mapped_address, &seg->mapped_size, WARNING);
seg->impl_private = NULL;
seg->mapped_address = NULL;
seg->mapped_size = 0;
}
/* Reduce reference count, if we previously increased it. */
if (seg->control_slot != INVALID_CONTROL_SLOT)
{
uint32 refcnt;
uint32 control_slot = seg->control_slot;
LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
Assert(dsm_control->item[control_slot].handle == seg->handle);
Assert(dsm_control->item[control_slot].refcnt > 1);
refcnt = --dsm_control->item[control_slot].refcnt;
seg->control_slot = INVALID_CONTROL_SLOT;
LWLockRelease(DynamicSharedMemoryControlLock);
/* If new reference count is 1, try to destroy the segment. */
if (refcnt == 1)
{
/*
* If we fail to destroy the segment here, or are killed before
* we finish doing so, the reference count will remain at 1, which
* will mean that nobody else can attach to the segment. At
* postmaster shutdown time, or when a new postmaster is started
* after a hard kill, another attempt will be made to remove the
* segment.
*
* The main case we're worried about here is being killed by
* a signal before we can finish removing the segment. In that
* case, it's important to be sure that the segment still gets
* removed. If we actually fail to remove the segment for some
* other reason, the postmaster may not have any better luck than
* we did. There's not much we can do about that, though.
*/
if (dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
&seg->mapped_address, &seg->mapped_size, WARNING))
{
LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
Assert(dsm_control->item[control_slot].handle == seg->handle);
Assert(dsm_control->item[control_slot].refcnt == 1);
dsm_control->item[control_slot].refcnt = 0;
LWLockRelease(DynamicSharedMemoryControlLock);
}
}
}
/* Clean up our remaining backend-private data structures. */
if (seg->resowner != NULL)
ResourceOwnerForgetDSM(seg->resowner, seg);
dlist_delete(&seg->node);
pfree(seg);
}
/*
* Keep a dynamic shared memory mapping until end of session.
*
* By default, mappings are owned by the current resource owner, which
* typically means they stick around for the duration of the current query
* only.
*/
void
dsm_keep_mapping(dsm_segment *seg)
{
if (seg->resowner != NULL)
{
ResourceOwnerForgetDSM(seg->resowner, seg);
seg->resowner = NULL;
}
}
/*
* Find an existing mapping for a shared memory segment, if there is one.
*/
dsm_segment *
dsm_find_mapping(dsm_handle h)
{
dlist_iter iter;
dsm_segment *seg;
dlist_foreach(iter, &dsm_segment_list)
{
seg = dlist_container(dsm_segment, node, iter.cur);
if (seg->handle == h)
return seg;
}
return NULL;
}
/*
* Get the address at which a dynamic shared memory segment is mapped.
*/
void *
dsm_segment_address(dsm_segment *seg)
{
Assert(seg->mapped_address != NULL);
return seg->mapped_address;
}
/*
* Get the size of a mapping.
*/
uint64
dsm_segment_map_length(dsm_segment *seg)
{
Assert(seg->mapped_address != NULL);
return seg->mapped_size;
}
/*
* Get a handle for a mapping.
*
* To establish communication via dynamic shared memory between two backends,
* one of them should first call dsm_create() to establish a new shared
* memory mapping. That process should then call dsm_segment_handle() to
* obtain a handle for the mapping, and pass that handle to the
* coordinating backend via some means (e.g. bgw_main_arg, or via the
* main shared memory segment). The recipient, once in position of the
* handle, should call dsm_attach().
*/
dsm_handle
dsm_segment_handle(dsm_segment *seg)
{
return seg->handle;
}
/*
* Create a segment descriptor.
*/
static dsm_segment *
dsm_create_descriptor(void)
{
dsm_segment *seg;
ResourceOwnerEnlargeDSMs(CurrentResourceOwner);
seg = MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment));
dlist_push_head(&dsm_segment_list, &seg->node);
/* seg->handle must be initialized by the caller */
seg->control_slot = INVALID_CONTROL_SLOT;
seg->impl_private = NULL;
seg->mapped_address = NULL;
seg->mapped_size = 0;
seg->resowner = CurrentResourceOwner;
ResourceOwnerRememberDSM(CurrentResourceOwner, seg);
return seg;
}
/*
* Sanity check a control segment.
*
* The goal here isn't to detect everything that could possibly be wrong with
* the control segment; there's not enough information for that. Rather, the
* goal is to make sure that someone can iterate over the items in the segment
* without overrunning the end of the mapping and crashing. We also check
* the magic number since, if that's messed up, this may not even be one of
* our segments at all.
*/
static bool
dsm_control_segment_sane(dsm_control_header *control, uint64 mapped_size)
{
if (mapped_size < offsetof(dsm_control_header, item))
return false; /* Mapped size too short to read header. */
if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
return false; /* Magic number doesn't match. */
if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
return false; /* Max item count won't fit in map. */
if (control->nitems > control->maxitems)
return false; /* Overfull. */
return true;
}
/*
* Compute the number of control-segment bytes needed to store a given
* number of items.
*/
static uint64
dsm_control_bytes_needed(uint32 nitems)
{
return offsetof(dsm_control_header, item)
+ sizeof(dsm_control_item) * (uint64) nitems;
}
/*-------------------------------------------------------------------------
*
* dsm_impl.c
* manage dynamic shared memory segments
*
* This file provides low-level APIs for creating and destroying shared
* memory segments using several different possible techniques. We refer
* to these segments as dynamic because they can be created, altered, and
* destroyed at any point during the server life cycle. This is unlike
* the main shared memory segment, of which there is always exactly one
* and which is always mapped at a fixed address in every PostgreSQL
* background process.
*
* Because not all systems provide the same primitives in this area, nor
* do all primitives behave the same way on all systems, we provide
* several implementations of this facility. Many systems implement
* POSIX shared memory (shm_open etc.), which is well-suited to our needs
* in this area, with the exception that shared memory identifiers live
* in a flat system-wide namespace, raising the uncomfortable prospect of
* name collisions with other processes (including other copies of
* PostgreSQL) running on the same system. Some systems only support
* the older System V shared memory interface (shmget etc.) which is
* also usable; however, the default allocation limits are often quite
* small, and the namespace is even more restricted.
*
* We also provide an mmap-based shared memory implementation. This may
* be useful on systems that provide shared memory via a special-purpose
* filesystem; by opting for this implementation, the user can even
* control precisely where their shared memory segments are placed. It
* can also be used as a fallback for systems where shm_open and shmget
* are not available or can't be used for some reason. Of course,
* mapping a file residing on an actual spinning disk is a fairly poor
* approximation for shared memory because writeback may hurt performance
* substantially, but there should be few systems where we must make do
* with such poor tools.
*
* As ever, Windows requires its own implemetation.
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/storage/ipc/dsm.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#ifndef WIN32
#include <sys/mman.h>
#endif
#include <sys/stat.h>
#ifdef HAVE_SYS_IPC_H
#include <sys/ipc.h>
#endif
#ifdef HAVE_SYS_SHM_H
#include <sys/shm.h>
#endif
#include "portability/mem.h"
#include "storage/dsm_impl.h"
#include "storage/fd.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#ifdef USE_DSM_POSIX
static bool dsm_impl_posix(dsm_op op, dsm_handle handle, uint64 request_size,
void **impl_private, void **mapped_address,
uint64 *mapped_size, int elevel);
#endif
#ifdef USE_DSM_SYSV
static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, uint64 request_size,
void **impl_private, void **mapped_address,
uint64 *mapped_size, int elevel);
#endif
#ifdef USE_DSM_WINDOWS
static bool dsm_impl_windows(dsm_op op, dsm_handle handle, uint64 request_size,
void **impl_private, void **mapped_address,
uint64 *mapped_size, int elevel);
#endif
#ifdef USE_DSM_MMAP
static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, uint64 request_size,
void **impl_private, void **mapped_address,
uint64 *mapped_size, int elevel);
#endif
static int errcode_for_dynamic_shared_memory(void);
const struct config_enum_entry dynamic_shared_memory_options[] = {
#ifdef USE_DSM_POSIX
{ "posix", DSM_IMPL_POSIX, false},
#endif
#ifdef USE_DSM_SYSV
{ "sysv", DSM_IMPL_SYSV, false},
#endif
#ifdef USE_DSM_WINDOWS
{ "windows", DSM_IMPL_WINDOWS, false},
#endif
#ifdef USE_DSM_MMAP
{ "mmap", DSM_IMPL_MMAP, false},
#endif
{ "none", DSM_IMPL_NONE, false},
{NULL, 0, false}
};
/* Implementation selector. */
int dynamic_shared_memory_type;
/* Size of buffer to be used for zero-filling. */
#define ZBUFFER_SIZE 8192
/*------
* Perform a low-level shared memory operation in a platform-specific way,
* as dictated by the selected implementation. Each implementation is
* required to implement the following primitives.
*
* DSM_OP_CREATE. Create a segment whose size is the request_size and
* map it.
*
* DSM_OP_ATTACH. Map the segment, whose size must be the request_size.
* The segment may already be mapped; any existing mapping should be removed
* before creating a new one.
*
* DSM_OP_DETACH. Unmap the segment.
*
* DSM_OP_RESIZE. Resize the segment to the given request_size and
* remap the segment at that new size.
*
* DSM_OP_DESTROY. Unmap the segment, if it is mapped. Destroy the
* segment.
*
* Arguments:
* op: The operation to be performed.
* handle: The handle of an existing object, or for DSM_OP_CREATE, the
* a new handle the caller wants created.
* request_size: For DSM_OP_CREATE, the requested size. For DSM_OP_RESIZE,
* the new size. Otherwise, 0.
* impl_private: Private, implementation-specific data. Will be a pointer
* to NULL for the first operation on a shared memory segment within this
* backend; thereafter, it will point to the value to which it was set
* on the previous call.
* mapped_address: Pointer to start of current mapping; pointer to NULL
* if none. Updated with new mapping address.
* mapped_size: Pointer to size of current mapping; pointer to 0 if none.
* Updated with new mapped size.
* elevel: Level at which to log errors.
*
* Return value: true on success, false on failure. When false is returned,
* a message should first be logged at the specified elevel, except in the
* case where DSM_OP_CREATE experiences a name collision, which should
* silently return false.
*-----
*/
bool
dsm_impl_op(dsm_op op, dsm_handle handle, uint64 request_size,
void **impl_private, void **mapped_address, uint64 *mapped_size,
int elevel)
{
Assert(op == DSM_OP_CREATE || op == DSM_OP_RESIZE || request_size == 0);
Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
(*mapped_address == NULL && *mapped_size == 0));
if (request_size > (size_t) -1)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("requested shared memory size overflows size_t")));
switch (dynamic_shared_memory_type)
{
#ifdef USE_DSM_POSIX
case DSM_IMPL_POSIX:
return dsm_impl_posix(op, handle, request_size, impl_private,
mapped_address, mapped_size, elevel);
#endif
#ifdef USE_DSM_SYSV
case DSM_IMPL_SYSV:
return dsm_impl_sysv(op, handle, request_size, impl_private,
mapped_address, mapped_size, elevel);
#endif
#ifdef USE_DSM_WINDOWS
case DSM_IMPL_WINDOWS:
return dsm_impl_windows(op, handle, request_size, impl_private,
mapped_address, mapped_size, elevel);
#endif
#ifdef USE_DSM_MMAP
case DSM_IMPL_MMAP:
return dsm_impl_mmap(op, handle, request_size, impl_private,
mapped_address, mapped_size, elevel);
#endif
}
elog(ERROR, "unexpected dynamic shared memory type: %d",
dynamic_shared_memory_type);
}
/*
* Does the current dynamic shared memory implementation support resizing
* segments? (The answer here could be platform-dependent in the future,
* since AIX allows shmctl(shmid, SHM_RESIZE, &buffer), though you apparently
* can't resize segments to anything larger than 256MB that way. For now,
* we keep it simple.)
*/
bool
dsm_impl_can_resize(void)
{
switch (dynamic_shared_memory_type)
{
case DSM_IMPL_NONE:
return false;
case DSM_IMPL_POSIX:
return true;
case DSM_IMPL_SYSV:
return false;
case DSM_IMPL_WINDOWS:
return false;
case DSM_IMPL_MMAP:
return false;
default:
return false; /* should not happen */
}
}
#ifdef USE_DSM_POSIX
/*
* Operating system primitives to support POSIX shared memory.
*
* POSIX shared memory segments are created and attached using shm_open()
* and shm_unlink(); other operations, such as sizing or mapping the
* segment, are performed as if the shared memory segments were files.
*
* Indeed, on some platforms, they may be implemented that way. While
* POSIX shared memory segments seem intended to exist in a flat namespace,
* some operating systems may implement them as files, even going so far
* to treat a request for /xyz as a request to create a file by that name
* in the root directory. Users of such broken platforms should select
* a different shared memory implementation.
*/
static bool
dsm_impl_posix(dsm_op op, dsm_handle handle, uint64 request_size,
void **impl_private, void **mapped_address, uint64 *mapped_size,
int elevel)
{
char name[64];
int flags;
int fd;
char *address;
snprintf(name, 64, "/PostgreSQL.%u", handle);
/* Handle teardown cases. */
if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
{
if (*mapped_address != NULL
&& munmap(*mapped_address, *mapped_size) != 0)
{
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not unmap shared memory segment \"%s\": %m",
name)));
return false;
}
*mapped_address = NULL;
*mapped_size = 0;
if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
{
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not remove shared memory segment \"%s\": %m",
name)));
return false;
}
return true;
}
/*
* Create new segment or open an existing one for attach or resize.
*
* Even though we're not going through fd.c, we should be safe against
* running out of file descriptors, because of NUM_RESERVED_FDS. We're
* only opening one extra descriptor here, and we'll close it before
* returning.
*/
flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
if ((fd = shm_open(name, flags, 0600)) == -1)
{
if (errno != EEXIST)
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not open shared memory segment \"%s\": %m",
name)));
return false;
}
/*
* If we're attaching the segment, determine the current size; if we are
* creating or resizing the segment, set the size to the requested value.
*/
if (op == DSM_OP_ATTACH)
{
struct stat st;
if (fstat(fd, &st) != 0)
{
int save_errno;
/* Back out what's already been done. */
save_errno = errno;
close(fd);
errno = save_errno;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not stat shared memory segment \"%s\": %m",
name)));
return false;
}
request_size = st.st_size;
}
else if (*mapped_size != request_size && ftruncate(fd, request_size))
{
int save_errno;
/* Back out what's already been done. */
save_errno = errno;
close(fd);
if (op == DSM_OP_CREATE)
shm_unlink(name);
errno = save_errno;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not resize shared memory segment %s to " UINT64_FORMAT " bytes: %m",
name, request_size)));
return false;
}
/*
* If we're reattaching or resizing, we must remove any existing mapping,
* unless we've already got the right thing mapped.
*/
if (*mapped_address != NULL)
{
if (*mapped_size == request_size)
return true;
if (munmap(*mapped_address, *mapped_size) != 0)
{
int save_errno;
/* Back out what's already been done. */
save_errno = errno;
close(fd);
if (op == DSM_OP_CREATE)
shm_unlink(name);
errno = save_errno;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not unmap shared memory segment \"%s\": %m",
name)));
return false;
}
*mapped_address = NULL;
*mapped_size = 0;
}
/* Map it. */
address = mmap(NULL, request_size, PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_HASSEMAPHORE, fd, 0);
if (address == MAP_FAILED)
{
int save_errno;
/* Back out what's already been done. */
save_errno = errno;
close(fd);
if (op == DSM_OP_CREATE)
shm_unlink(name);
errno = save_errno;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not map shared memory segment \"%s\": %m",
name)));
return false;
}
*mapped_address = address;
*mapped_size = request_size;
close(fd);
return true;
}
#endif
#ifdef USE_DSM_SYSV
/*
* Operating system primitives to support System V shared memory.
*
* System V shared memory segments are manipulated using shmget(), shmat(),
* shmdt(), and shmctl(). There's no portable way to resize such
* segments. As the default allocation limits for System V shared memory
* are usually quite low, the POSIX facilities may be preferable; but
* those are not supported everywhere.
*/
static bool
dsm_impl_sysv(dsm_op op, dsm_handle handle, uint64 request_size,
void **impl_private, void **mapped_address, uint64 *mapped_size,
int elevel)
{
key_t key;
int ident;
char *address;
char name[64];
int *ident_cache;
/* Resize is not supported for System V shared memory. */
if (op == DSM_OP_RESIZE)
{
elog(elevel, "System V shared memory segments cannot be resized");
return false;
}
/* Since resize isn't supported, reattach is a no-op. */
if (op == DSM_OP_ATTACH && *mapped_address != NULL)
return true;
/*
* POSIX shared memory and mmap-based shared memory identify segments
* with names. To avoid needless error message variation, we use the
* handle as the name.
*/
snprintf(name, 64, "%u", handle);
/*
* The System V shared memory namespace is very restricted; names are
* of type key_t, which is expected to be some sort of integer data type,
* but not necessarily the same one as dsm_handle. Since we use
* dsm_handle to identify shared memory segments across processes, this
* might seem like a problem, but it's really not. If dsm_handle is
* bigger than key_t, the cast below might truncate away some bits from
* the handle the user-provided, but it'll truncate exactly the same bits
* away in exactly the same fashion every time we use that handle, which
* is all that really matters. Conversely, if dsm_handle is smaller than
* key_t, we won't use the full range of available key space, but that's
* no big deal either.
*
* We do make sure that the key isn't negative, because that might not
* be portable.
*/
key = (key_t) handle;
if (key < 1) /* avoid compiler warning if type is unsigned */
key = -key;
/*
* There's one special key, IPC_PRIVATE, which can't be used. If we end
* up with that value by chance during a create operation, just pretend
* it already exists, so that caller will retry. If we run into it
* anywhere else, the caller has passed a handle that doesn't correspond
* to anything we ever created, which should not happen.
*/
if (key == IPC_PRIVATE)
{
if (op != DSM_OP_CREATE)
elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
errno = EEXIST;
return false;
}
/*
* Before we can do anything with a shared memory segment, we have to
* map the shared memory key to a shared memory identifier using shmget().
* To avoid repeated lookups, we store the key using impl_private.
*/
if (*impl_private != NULL)
{
ident_cache = *impl_private;
ident = *ident_cache;
}
else
{
int flags = IPCProtection;
size_t segsize;
/*
* Allocate the memory BEFORE acquiring the resource, so that we don't
* leak the resource if memory allocation fails.
*/
ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
/*
* When using shmget to find an existing segment, we must pass the
* size as 0. Passing a non-zero size which is greater than the
* actual size will result in EINVAL.
*/
segsize = 0;
if (op == DSM_OP_CREATE)
{
flags |= IPC_CREAT | IPC_EXCL;
segsize = request_size;
}
if ((ident = shmget(key, segsize, flags)) == -1)
{
if (errno != EEXIST)
{
int save_errno = errno;
pfree(ident_cache);
errno = save_errno;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not get shared memory segment: %m")));
}
return false;
}
*ident_cache = ident;
*impl_private = ident_cache;
}
/* Handle teardown cases. */
if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
{
pfree(ident_cache);
*impl_private = NULL;
if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
{
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not unmap shared memory segment \"%s\": %m",
name)));
return false;
}
*mapped_address = NULL;
*mapped_size = 0;
if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
{
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not remove shared memory segment \"%s\": %m",
name)));
return false;
}
return true;
}
/* If we're attaching it, we must use IPC_STAT to determine the size. */
if (op == DSM_OP_ATTACH)
{
struct shmid_ds shm;
if (shmctl(ident, IPC_STAT, &shm) != 0)
{
int save_errno;
/* Back out what's already been done. */
save_errno = errno;
if (op == DSM_OP_CREATE)
shmctl(ident, IPC_RMID, NULL);
errno = save_errno;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not stat shared memory segment \"%s\": %m",
name)));
return false;
}
request_size = shm.shm_segsz;
}
/* Map it. */
address = shmat(ident, NULL, PG_SHMAT_FLAGS);
if (address == (void *) -1)
{
int save_errno;
/* Back out what's already been done. */
save_errno = errno;
if (op == DSM_OP_CREATE)
shmctl(ident, IPC_RMID, NULL);
errno = save_errno;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not map shared memory segment \"%s\": %m",
name)));
return false;
}
*mapped_address = address;
*mapped_size = request_size;
return true;
}
#endif
#ifdef USE_DSM_WINDOWS
/*
* Operating system primitives to support Windows shared memory.
*
* Windows shared memory implementation is done using file mapping
* which can be backed by either physical file or system paging file.
* Current implementation uses system paging file as other effects
* like performance are not clear for physical file and it is used in similar
* way for main shared memory in windows.
*
* A memory mapping object is a kernel object - they always get deleted when
* the last reference to them goes away, either explicitly via a CloseHandle or
* when the process containing the reference exits.
*/
static bool
dsm_impl_windows(dsm_op op, dsm_handle handle, uint64 request_size,
void **impl_private, void **mapped_address,
uint64 *mapped_size, int elevel)
{
char *address;
HANDLE hmap;
char name[64];
MEMORY_BASIC_INFORMATION info;
/* Resize is not supported for Windows shared memory. */
if (op == DSM_OP_RESIZE)
{
elog(elevel, "Windows shared memory segments cannot be resized");
return false;
}
/* Since resize isn't supported, reattach is a no-op. */
if (op == DSM_OP_ATTACH && *mapped_address != NULL)
return true;
/*
* Storing the shared memory segment in the Global\ namespace, can
* allow any process running in any session to access that file
* mapping object provided that the caller has the required access rights.
* But to avoid issues faced in main shared memory, we are using the naming
* convention similar to main shared memory. We can change here once
* issue mentioned in GetSharedMemName is resolved.
*/
snprintf(name, 64, "Global/PostgreSQL.%u", handle);
/*
* Handle teardown cases. Since Windows automatically destroys the object
* when no references reamin, we can treat it the same as detach.
*/
if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
{
if (*mapped_address != NULL
&& UnmapViewOfFile(*mapped_address) == 0)
{
_dosmaperr(GetLastError());
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not unmap shared memory segment \"%s\": %m",
name)));
return false;
}
if (*impl_private != NULL
&& CloseHandle(*impl_private) == 0)
{
_dosmaperr(GetLastError());
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not remove shared memory segment \"%s\": %m",
name)));
return false;
}
*impl_private = NULL;
*mapped_address = NULL;
*mapped_size = 0;
return true;
}
/* Create new segment or open an existing one for attach. */
if (op == DSM_OP_CREATE)
{
DWORD size_high = (DWORD) (request_size >> 32);
DWORD size_low = (DWORD) request_size;
hmap = CreateFileMapping(INVALID_HANDLE_VALUE, /* Use the pagefile */
NULL, /* Default security attrs */
PAGE_READWRITE, /* Memory is read/write */
size_high, /* Upper 32 bits of size */
size_low, /* Lower 32 bits of size */
name);
_dosmaperr(GetLastError());
if (errno == EEXIST)
{
/*
* On Windows, when the segment already exists, a handle for the
* existing segment is returned. We must close it before
* returning. We don't do _dosmaperr here, so errno won't be
* modified.
*/
CloseHandle(hmap);
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not open shared memory segment \"%s\": %m",
name)));
return false;
}
}
else
{
hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
FALSE, /* do not inherit the name */
name); /* name of mapping object */
_dosmaperr(GetLastError());
}
if (!hmap)
{
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not open shared memory segment \"%s\": %m",
name)));
return false;
}
/* Map it. */
address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
0, 0, 0);
if (!address)
{
int save_errno;
_dosmaperr(GetLastError());
/* Back out what's already been done. */
save_errno = errno;
CloseHandle(hmap);
errno = save_errno;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not map shared memory segment \"%s\": %m",
name)));
return false;
}
/*
* VirtualQuery gives size in page_size units, which is 4K for Windows.
* We need size only when we are attaching, but it's better to get the
* size when creating new segment to keep size consistent both for
* DSM_OP_CREATE and DSM_OP_ATTACH.
*/
if (VirtualQuery(address, &info, sizeof(info)) == 0)
{
int save_errno;
_dosmaperr(GetLastError());
/* Back out what's already been done. */
save_errno = errno;
UnmapViewOfFile(address);
CloseHandle(hmap);
errno = save_errno;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not stat shared memory segment \"%s\": %m",
name)));
return false;
}
*mapped_address = address;
*mapped_size = info.RegionSize;
*impl_private = hmap;
return true;
}
#endif
#ifdef USE_DSM_MMAP
/*
* Operating system primitives to support mmap-based shared memory.
*
* Calling this "shared memory" is somewhat of a misnomer, because what
* we're really doing is creating a bunch of files and mapping them into
* our address space. The operating system may feel obliged to
* synchronize the contents to disk even if nothing is being paged out,
* which will not serve us well. The user can relocate the pg_dynshmem
* directory to a ramdisk to avoid this problem, if available.
*/
static bool
dsm_impl_mmap(dsm_op op, dsm_handle handle, uint64 request_size,
void **impl_private, void **mapped_address, uint64 *mapped_size,
int elevel)
{
char name[64];
int flags;
int fd;
char *address;
snprintf(name, 64, PG_DYNSHMEM_DIR "/" PG_DYNSHMEM_MMAP_FILE_PREFIX "%u",
handle);
/* Handle teardown cases. */
if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
{
if (*mapped_address != NULL
&& munmap(*mapped_address, *mapped_size) != 0)
{
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not unmap shared memory segment \"%s\": %m",
name)));
return false;
}
*mapped_address = NULL;
*mapped_size = 0;
if (op == DSM_OP_DESTROY && unlink(name) != 0)
{
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not remove shared memory segment \"%s\": %m",
name)));
return false;
}
return true;
}
/* Create new segment or open an existing one for attach or resize. */
flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
if ((fd = OpenTransientFile(name, flags, 0600)) == -1)
{
if (errno != EEXIST)
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not open shared memory segment \"%s\": %m",
name)));
return false;
}
/*
* If we're attaching the segment, determine the current size; if we are
* creating or resizing the segment, set the size to the requested value.
*/
if (op == DSM_OP_ATTACH)
{
struct stat st;
if (fstat(fd, &st) != 0)
{
int save_errno;
/* Back out what's already been done. */
save_errno = errno;
CloseTransientFile(fd);
errno = save_errno;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not stat shared memory segment \"%s\": %m",
name)));
return false;
}
request_size = st.st_size;
}
else if (*mapped_size > request_size && ftruncate(fd, request_size))
{
int save_errno;
/* Back out what's already been done. */
save_errno = errno;
close(fd);
if (op == DSM_OP_CREATE)
shm_unlink(name);
errno = save_errno;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not resize shared memory segment %s to " UINT64_FORMAT " bytes: %m",
name, request_size)));
return false;
}
else if (*mapped_size < request_size)
{
/*
* Allocate a buffer full of zeros.
*
* Note: palloc zbuffer, instead of just using a local char array,
* to ensure it is reasonably well-aligned; this may save a few
* cycles transferring data to the kernel.
*/
char *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
uint32 remaining = request_size;
bool success = true;
/*
* Zero-fill the file. We have to do this the hard way to ensure
* that all the file space has really been allocated, so that we
* don't later seg fault when accessing the memory mapping. This
* is pretty pessimal.
*/
while (success && remaining > 0)
{
uint64 goal = remaining;
if (goal > ZBUFFER_SIZE)
goal = ZBUFFER_SIZE;
if (write(fd, zbuffer, goal) == goal)
remaining -= goal;
else
success = false;
}
if (!success)
{
int save_errno;
/* Back out what's already been done. */
save_errno = errno;
CloseTransientFile(fd);
if (op == DSM_OP_CREATE)
unlink(name);
errno = save_errno ? save_errno : ENOSPC;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not resize shared memory segment %s to " UINT64_FORMAT " bytes: %m",
name, request_size)));
return false;
}
}
/*
* If we're reattaching or resizing, we must remove any existing mapping,
* unless we've already got the right thing mapped.
*/
if (*mapped_address != NULL)
{
if (*mapped_size == request_size)
return true;
if (munmap(*mapped_address, *mapped_size) != 0)
{
int save_errno;
/* Back out what's already been done. */
save_errno = errno;
CloseTransientFile(fd);
if (op == DSM_OP_CREATE)
unlink(name);
errno = save_errno;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not unmap shared memory segment \"%s\": %m",
name)));
return false;
}
*mapped_address = NULL;
*mapped_size = 0;
}
/* Map it. */
address = mmap(NULL, request_size, PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_HASSEMAPHORE, fd, 0);
if (address == MAP_FAILED)
{
int save_errno;
/* Back out what's already been done. */
save_errno = errno;
CloseTransientFile(fd);
if (op == DSM_OP_CREATE)
unlink(name);
errno = save_errno;
ereport(elevel,
(errcode_for_dynamic_shared_memory(),
errmsg("could not map shared memory segment \"%s\": %m",
name)));
return false;
}
*mapped_address = address;
*mapped_size = request_size;
CloseTransientFile(fd);
return true;
}
#endif
static int
errcode_for_dynamic_shared_memory()
{
if (errno == EFBIG || errno == ENOMEM)
return errcode(ERRCODE_OUT_OF_MEMORY);
else
return errcode_for_file_access();
}
......@@ -30,6 +30,7 @@
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/dsm.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
......@@ -249,6 +250,10 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
ShmemBackendArrayAllocation();
#endif
/* Initialize dynamic shared memory facilities. */
if (!IsUnderPostmaster)
dsm_postmaster_startup();
/*
* Now give loadable modules a chance to set up their shmem allocations
*/
......
......@@ -61,6 +61,7 @@
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/dsm_impl.h"
#include "storage/standby.h"
#include "storage/fd.h"
#include "storage/proc.h"
......@@ -385,6 +386,7 @@ static const struct config_enum_entry synchronous_commit_options[] = {
*/
extern const struct config_enum_entry wal_level_options[];
extern const struct config_enum_entry sync_method_options[];
extern const struct config_enum_entry dynamic_shared_memory_options[];
/*
* GUC option variables that are exported from this module
......@@ -3335,6 +3337,16 @@ static struct config_enum ConfigureNamesEnum[] =
NULL, NULL, NULL
},
{
{"dynamic_shared_memory_type", PGC_POSTMASTER, RESOURCES_MEM,
gettext_noop("Selects the dynamic shared memory implementation used."),
NULL
},
&dynamic_shared_memory_type,
DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE, dynamic_shared_memory_options,
NULL, NULL, NULL
},
{
{"wal_sync_method", PGC_SIGHUP, WAL_SETTINGS,
gettext_noop("Selects the method used for forcing WAL updates to disk."),
......
......@@ -123,6 +123,13 @@
#work_mem = 1MB # min 64kB
#maintenance_work_mem = 16MB # min 1MB
#max_stack_depth = 2MB # min 100kB
#dynamic_shared_memory_type = posix # the default is the first option
# supported by the operating system:
# posix
# sysv
# windows
# mmap
# use none to disable dynamic shared memory
# - Disk -
......
......@@ -98,6 +98,11 @@ typedef struct ResourceOwnerData
int nfiles; /* number of owned temporary files */
File *files; /* dynamically allocated array */
int maxfiles; /* currently allocated array size */
/* We have built-in support for remembering dynamic shmem segments */
int ndsms; /* number of owned shmem segments */
dsm_segment **dsms; /* dynamically allocated array */
int maxdsms; /* currently allocated array size */
} ResourceOwnerData;
......@@ -132,6 +137,7 @@ static void PrintPlanCacheLeakWarning(CachedPlan *plan);
static void PrintTupleDescLeakWarning(TupleDesc tupdesc);
static void PrintSnapshotLeakWarning(Snapshot snapshot);
static void PrintFileLeakWarning(File file);
static void PrintDSMLeakWarning(dsm_segment *seg);
/*****************************************************************************
......@@ -271,6 +277,21 @@ ResourceOwnerReleaseInternal(ResourceOwner owner,
PrintRelCacheLeakWarning(owner->relrefs[owner->nrelrefs - 1]);
RelationClose(owner->relrefs[owner->nrelrefs - 1]);
}
/*
* Release dynamic shared memory segments. Note that dsm_detach()
* will remove the segment from my list, so I just have to iterate
* until there are none.
*
* As in the preceding cases, warn if there are leftover at commit
* time.
*/
while (owner->ndsms > 0)
{
if (isCommit)
PrintDSMLeakWarning(owner->dsms[owner->ndsms - 1]);
dsm_detach(owner->dsms[owner->ndsms - 1]);
}
}
else if (phase == RESOURCE_RELEASE_LOCKS)
{
......@@ -402,6 +423,7 @@ ResourceOwnerDelete(ResourceOwner owner)
Assert(owner->ncatrefs == 0);
Assert(owner->ncatlistrefs == 0);
Assert(owner->nrelrefs == 0);
Assert(owner->ndsms == 0);
Assert(owner->nplanrefs == 0);
Assert(owner->ntupdescs == 0);
Assert(owner->nsnapshots == 0);
......@@ -438,6 +460,8 @@ ResourceOwnerDelete(ResourceOwner owner)
pfree(owner->snapshots);
if (owner->files)
pfree(owner->files);
if (owner->dsms)
pfree(owner->dsms);
pfree(owner);
}
......@@ -1230,3 +1254,88 @@ PrintFileLeakWarning(File file)
"temporary file leak: File %d still referenced",
file);
}
/*
* Make sure there is room for at least one more entry in a ResourceOwner's
* dynamic shmem segment reference array.
*
* This is separate from actually inserting an entry because if we run out
* of memory, it's critical to do so *before* acquiring the resource.
*/
void
ResourceOwnerEnlargeDSMs(ResourceOwner owner)
{
int newmax;
if (owner->ndsms < owner->maxdsms)
return; /* nothing to do */
if (owner->dsms == NULL)
{
newmax = 16;
owner->dsms = (dsm_segment **)
MemoryContextAlloc(TopMemoryContext,
newmax * sizeof(dsm_segment *));
owner->maxdsms = newmax;
}
else
{
newmax = owner->maxdsms * 2;
owner->dsms = (dsm_segment **)
repalloc(owner->dsms, newmax * sizeof(dsm_segment *));
owner->maxdsms = newmax;
}
}
/*
* Remember that a dynamic shmem segment is owned by a ResourceOwner
*
* Caller must have previously done ResourceOwnerEnlargeDSMs()
*/
void
ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg)
{
Assert(owner->ndsms < owner->maxdsms);
owner->dsms[owner->ndsms] = seg;
owner->ndsms++;
}
/*
* Forget that a temporary file is owned by a ResourceOwner
*/
void
ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg)
{
dsm_segment **dsms = owner->dsms;
int ns1 = owner->ndsms - 1;
int i;
for (i = ns1; i >= 0; i--)
{
if (dsms[i] == seg)
{
while (i < ns1)
{
dsms[i] = dsms[i + 1];
i++;
}
owner->ndsms = ns1;
return;
}
}
elog(ERROR,
"dynamic shared memory segment %u is not owned by resource owner %s",
dsm_segment_handle(seg), owner->name);
}
/*
* Debugging subroutine
*/
static void
PrintDSMLeakWarning(dsm_segment *seg)
{
elog(WARNING,
"dynamic shared memory leak: segment %u still referenced",
dsm_segment_handle(seg));
}
......@@ -182,6 +182,7 @@ const char *subdirs[] = {
"pg_xlog",
"pg_xlog/archive_status",
"pg_clog",
"pg_dynshmem",
"pg_notify",
"pg_serial",
"pg_snapshots",
......
......@@ -424,6 +424,9 @@
/* Define to 1 if you have the `setsid' function. */
#undef HAVE_SETSID
/* Define to 1 if you have the `shm_open' function. */
#undef HAVE_SHM_OPEN
/* Define to 1 if you have the `sigprocmask' function. */
#undef HAVE_SIGPROCMASK
......
/*-------------------------------------------------------------------------
*
* mem.h
* portability definitions for various memory operations
*
* Copyright (c) 2001-2013, PostgreSQL Global Development Group
*
* src/include/portability/mem.h
*
*-------------------------------------------------------------------------
*/
#ifndef MEM_H
#define MEM_H
#define IPCProtection (0600) /* access/modify by user only */
#ifdef SHM_SHARE_MMU /* use intimate shared memory on Solaris */
#define PG_SHMAT_FLAGS SHM_SHARE_MMU
#else
#define PG_SHMAT_FLAGS 0
#endif
/* Linux prefers MAP_ANONYMOUS, but the flag is called MAP_ANON on other systems. */
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
/* BSD-derived systems have MAP_HASSEMAPHORE, but it's not present (or needed) on Linux. */
#ifndef MAP_HASSEMAPHORE
#define MAP_HASSEMAPHORE 0
#endif
#define PG_MMAP_FLAGS (MAP_SHARED|MAP_ANONYMOUS|MAP_HASSEMAPHORE)
/* Some really old systems don't define MAP_FAILED. */
#ifndef MAP_FAILED
#define MAP_FAILED ((void *) -1)
#endif
#endif /* MEM_H */
/*-------------------------------------------------------------------------
*
* dsm.h
* manage dynamic shared memory segments
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/dsm.h
*
*-------------------------------------------------------------------------
*/
#ifndef DSM_H
#define DSM_H
#include "storage/dsm_impl.h"
typedef struct dsm_segment dsm_segment;
/* Initialization function. */
extern void dsm_postmaster_startup(void);
/* Functions that create, update, or remove mappings. */
extern dsm_segment *dsm_create(uint64 size);
extern dsm_segment *dsm_attach(dsm_handle h);
extern void *dsm_resize(dsm_segment *seg, uint64 size);
extern void *dsm_remap(dsm_segment *seg);
extern void dsm_detach(dsm_segment *seg);
/* Resource management functions. */
extern void dsm_keep_mapping(dsm_segment *seg);
extern dsm_segment *dsm_find_mapping(dsm_handle h);
/* Informational functions. */
extern void *dsm_segment_address(dsm_segment *seg);
extern uint64 dsm_segment_map_length(dsm_segment *seg);
extern dsm_handle dsm_segment_handle(dsm_segment *seg);
#endif /* DSM_H */
/*-------------------------------------------------------------------------
*
* dsm_impl.h
* low-level dynamic shared memory primitives
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/dsm_impl.h
*
*-------------------------------------------------------------------------
*/
#ifndef DSM_IMPL_H
#define DSM_IMPL_H
/* Dynamic shared memory implementations. */
#define DSM_IMPL_NONE 0
#define DSM_IMPL_POSIX 1
#define DSM_IMPL_SYSV 2
#define DSM_IMPL_WINDOWS 3
#define DSM_IMPL_MMAP 4
/*
* Determine which dynamic shared memory implementations will be supported
* on this platform, and which one will be the default.
*/
#ifdef WIN32
#define USE_DSM_WINDOWS
#define DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE DSM_IMPL_WINDOWS
#else
#ifdef HAVE_SHM_OPEN
#define USE_DSM_POSIX
#define DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE DSM_IMPL_POSIX
#endif
#define USE_DSM_SYSV
#ifndef DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE
#define DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE DSM_IMPL_SYSV
#endif
#define USE_DSM_MMAP
#endif
/* GUC. */
extern int dynamic_shared_memory_type;
/*
* Directory for on-disk state.
*
* This is used by all implementations for crash recovery and by the mmap
* implementation for storage.
*/
#define PG_DYNSHMEM_DIR "pg_dynshmem"
#define PG_DYNSHMEM_MMAP_FILE_PREFIX "mmap."
/* A "name" for a dynamic shared memory segment. */
typedef uint32 dsm_handle;
/* All the shared-memory operations we know about. */
typedef enum
{
DSM_OP_CREATE,
DSM_OP_ATTACH,
DSM_OP_DETACH,
DSM_OP_RESIZE,
DSM_OP_DESTROY
} dsm_op;
/* Create, attach to, detach from, resize, or destroy a segment. */
extern bool dsm_impl_op(dsm_op op, dsm_handle handle, uint64 request_size,
void **impl_private, void **mapped_address, uint64 *mapped_size,
int elevel);
/* Some implementations cannot resize segments. Can this one? */
extern bool dsm_impl_can_resize(void);
#endif /* DSM_IMPL_H */
......@@ -80,6 +80,7 @@ typedef enum LWLockId
OldSerXidLock,
SyncRepLock,
BackgroundWorkerLock,
DynamicSharedMemoryControlLock,
/* Individual lock IDs end here */
FirstBufMappingLock,
FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
......
......@@ -16,6 +16,7 @@
#ifndef RESOWNER_PRIVATE_H
#define RESOWNER_PRIVATE_H
#include "storage/dsm.h"
#include "storage/fd.h"
#include "storage/lock.h"
#include "utils/catcache.h"
......@@ -80,4 +81,11 @@ extern void ResourceOwnerRememberFile(ResourceOwner owner,
extern void ResourceOwnerForgetFile(ResourceOwner owner,
File file);
/* support for dynamic shared memory management */
extern void ResourceOwnerEnlargeDSMs(ResourceOwner owner);
extern void ResourceOwnerRememberDSM(ResourceOwner owner,
dsm_segment *);
extern void ResourceOwnerForgetDSM(ResourceOwner owner,
dsm_segment *);
#endif /* RESOWNER_PRIVATE_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment