Commit b0fc0df9 authored by Robert Haas's avatar Robert Haas

Dramatically reduce System V shared memory consumption.

Except when compiling with EXEC_BACKEND, we'll now allocate only a tiny
amount of System V shared memory (as an interlock to protect the data
directory) and allocate the rest as anonymous shared memory via mmap.
This will hopefully spare most users the hassle of adjusting operating
system parameters before being able to start PostgreSQL with a
reasonable value for shared_buffers.

There are a bunch of documentation updates needed here, and we might
need to adjust some of the HINT messages related to shared memory as
well.  But it's not 100% clear how portable this is, so before we
write the documentation, let's give it a spin on the buildfarm and
see what turns red.
parent c5b3451a
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <signal.h> #include <signal.h>
#include <unistd.h> #include <unistd.h>
#include <sys/file.h> #include <sys/file.h>
#include <sys/mman.h>
#include <sys/stat.h> #include <sys/stat.h>
#ifdef HAVE_SYS_IPC_H #ifdef HAVE_SYS_IPC_H
#include <sys/ipc.h> #include <sys/ipc.h>
...@@ -43,9 +44,22 @@ typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */ ...@@ -43,9 +44,22 @@ typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
#define PG_SHMAT_FLAGS 0 #define PG_SHMAT_FLAGS 0
#endif #endif
/* Linux prefers MAP_ANONYMOUS, but the flag is called MAP_ANON on other systems. */
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
/* BSD-derived systems have MAP_HASSEMAPHORE, but it's not present (or needed) on Linux. */
#ifndef MAP_HASSEMAPHORE
#define MAP_HASSEMAPHORE 0
#endif
#define PG_MMAP_FLAGS (MAP_SHARED|MAP_ANONYMOUS|MAP_HASSEMAPHORE)
unsigned long UsedShmemSegID = 0; unsigned long UsedShmemSegID = 0;
void *UsedShmemSegAddr = NULL; void *UsedShmemSegAddr = NULL;
static Size AnonymousShmemSize;
static PGShmemHeader *AnonymousShmem;
static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size); static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
static void IpcMemoryDetach(int status, Datum shmaddr); static void IpcMemoryDetach(int status, Datum shmaddr);
...@@ -218,8 +232,13 @@ InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size) ...@@ -218,8 +232,13 @@ InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size)
static void static void
IpcMemoryDetach(int status, Datum shmaddr) IpcMemoryDetach(int status, Datum shmaddr)
{ {
/* Detach System V shared memory block. */
if (shmdt(DatumGetPointer(shmaddr)) < 0) if (shmdt(DatumGetPointer(shmaddr)) < 0)
elog(LOG, "shmdt(%p) failed: %m", DatumGetPointer(shmaddr)); elog(LOG, "shmdt(%p) failed: %m", DatumGetPointer(shmaddr));
/* Release anonymous shared memory block, if any. */
if (AnonymousShmem != NULL
&& munmap(AnonymousShmem, AnonymousShmemSize) < 0)
elog(LOG, "munmap(%p) failed: %m", AnonymousShmem);
} }
/****************************************************************************/ /****************************************************************************/
...@@ -357,10 +376,59 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) ...@@ -357,10 +376,59 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
PGShmemHeader *hdr; PGShmemHeader *hdr;
IpcMemoryId shmid; IpcMemoryId shmid;
struct stat statbuf; struct stat statbuf;
Size allocsize = size;
/* Room for a header? */ /* Room for a header? */
Assert(size > MAXALIGN(sizeof(PGShmemHeader))); Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
/*
* As of PostgreSQL 9.3, we normally allocate only a very small amount of
* System V shared memory, and only for the purposes of providing an
* interlock to protect the data directory. The real shared memory block
* is allocated using mmap(). This works around the problem that many
* systems have very low limits on the amount of System V shared memory
* that can be allocated. Even a limit of a few megabytes will be enough
* to run many copies of PostgreSQL without needing to adjust system
* settings.
*
* However, we disable this logic in the EXEC_BACKEND case, and fall back
* to the old method of allocating the entire segment using System V shared
* memory, because there's no way to attach an mmap'd segment to a process
* after exec(). Since EXEC_BACKEND is intended only for developer use,
* this shouldn't be a big problem.
*/
#ifndef EXEC_BACKEND
{
long pagesize = sysconf(_SC_PAGE_SIZE);
/*
* pagesize will, for practical purposes, always be a power of two.
* But just in case it isn't, we do it this way instead of using
* TYPEALIGN().
*/
AnonymousShmemSize = size;
if (size % pagesize != 0)
AnonymousShmemSize += pagesize - (size % pagesize);
/*
* We assume that no one will attempt to run PostgreSQL 9.3 or later
* on systems that are ancient enough that anonymous shared memory is
* not supported, such as pre-2.4 versions of Linux. If that turns out
* to be false, we might need to add a run-time test here and do this
* only if the running kernel supports it.
*/
AnonymousShmem = mmap(NULL, size, PROT_READ|PROT_WRITE, PG_MMAP_FLAGS,
-1, 0);
if (AnonymousShmem == NULL)
ereport(FATAL,
(errmsg("could not map %lu bytes of anonymous shared memory: %m",
(unsigned long) AnonymousShmemSize)));
/* Now we can allocate a minimal SHM block. */
allocsize = sizeof(PGShmemHeader);
}
#endif
/* Make sure PGSharedMemoryAttach doesn't fail without need */ /* Make sure PGSharedMemoryAttach doesn't fail without need */
UsedShmemSegAddr = NULL; UsedShmemSegAddr = NULL;
...@@ -370,7 +438,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) ...@@ -370,7 +438,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
for (NextShmemSegID++;; NextShmemSegID++) for (NextShmemSegID++;; NextShmemSegID++)
{ {
/* Try to create new segment */ /* Try to create new segment */
memAddress = InternalIpcMemoryCreate(NextShmemSegID, size); memAddress = InternalIpcMemoryCreate(NextShmemSegID, allocsize);
if (memAddress) if (memAddress)
break; /* successful create and attach */ break; /* successful create and attach */
...@@ -409,7 +477,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) ...@@ -409,7 +477,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
/* /*
* Now try again to create the segment. * Now try again to create the segment.
*/ */
memAddress = InternalIpcMemoryCreate(NextShmemSegID, size); memAddress = InternalIpcMemoryCreate(NextShmemSegID, allocsize);
if (memAddress) if (memAddress)
break; /* successful create and attach */ break; /* successful create and attach */
...@@ -448,7 +516,17 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) ...@@ -448,7 +516,17 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
UsedShmemSegAddr = memAddress; UsedShmemSegAddr = memAddress;
UsedShmemSegID = (unsigned long) NextShmemSegID; UsedShmemSegID = (unsigned long) NextShmemSegID;
return hdr; /*
* If AnonymousShmem is NULL here, then we're not using anonymous shared
* memory, and should return a pointer to the System V shared memory block.
* Otherwise, the System V shared memory block is only a shim, and we must
* return a pointer to the real block.
*/
if (AnonymousShmem == NULL)
return hdr;
memcpy(AnonymousShmem, hdr, sizeof(PGShmemHeader));
return AnonymousShmem;
} }
#ifdef EXEC_BACKEND #ifdef EXEC_BACKEND
...@@ -516,6 +594,11 @@ PGSharedMemoryDetach(void) ...@@ -516,6 +594,11 @@ PGSharedMemoryDetach(void)
elog(LOG, "shmdt(%p) failed: %m", UsedShmemSegAddr); elog(LOG, "shmdt(%p) failed: %m", UsedShmemSegAddr);
UsedShmemSegAddr = NULL; UsedShmemSegAddr = NULL;
} }
/* Release anonymous shared memory block, if any. */
if (AnonymousShmem != NULL
&& munmap(AnonymousShmem, AnonymousShmemSize) < 0)
elog(LOG, "munmap(%p) failed: %m", AnonymousShmem);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment