Commit 11a65eed authored by Robert Haas's avatar Robert Haas

Get rid of the dynamic shared memory state file.

Instead of storing the ID of the dynamic shared memory control
segment in a file within the data directory, store it in the main
control segment.  This avoids a number of nasty corner cases,
most seriously that doing an online backup and then using it on
the same machine (e.g. to fire up a standby) would result in the
standby clobbering all of the master's dynamic shared memory
segments.

Per complaints from Heikki Linnakangas, Fujii Masao, and Tom
Lane.
parent 0886fc6a
......@@ -30,6 +30,7 @@
#include "miscadmin.h"
#include "portability/mem.h"
#include "storage/dsm.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
#include "utils/guc.h"
......@@ -421,7 +422,8 @@ CreateAnonymousSegment(Size *size)
* zero will be passed.
*/
PGShmemHeader *
PGSharedMemoryCreate(Size size, bool makePrivate, int port)
PGSharedMemoryCreate(Size size, bool makePrivate, int port,
PGShmemHeader **shim)
{
IpcMemoryKey NextShmemSegID;
void *memAddress;
......@@ -509,10 +511,13 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
/*
* The segment appears to be from a dead Postgres process, or from a
* previous cycle of life in this same process. Zap it, if possible.
* previous cycle of life in this same process. Zap it, if possible,
* and any associated dynamic shared memory segments, as well.
* This probably shouldn't fail, but if it does, assume the segment
* belongs to someone else after all, and continue quietly.
*/
if (hdr->dsm_control != 0)
dsm_cleanup_using_control_segment(hdr->dsm_control);
shmdt(memAddress);
if (shmctl(shmid, IPC_RMID, NULL) < 0)
continue;
......@@ -539,6 +544,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
hdr = (PGShmemHeader *) memAddress;
hdr->creatorPID = getpid();
hdr->magic = PGShmemMagic;
hdr->dsm_control = 0;
/* Fill in the data directory ID info, too */
if (stat(DataDir, &statbuf) < 0)
......@@ -554,6 +560,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
*/
hdr->totalsize = size;
hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
*shim = hdr;
/* Save info for possible future use */
UsedShmemSegAddr = memAddress;
......@@ -608,6 +615,7 @@ PGSharedMemoryReAttach(void)
if (hdr != origUsedShmemSegAddr)
elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
hdr, origUsedShmemSegAddr);
dsm_set_control_handle(((PGShmemHeader *) hdr)->dsm_control);
UsedShmemSegAddr = hdr; /* probably redundant */
}
......
......@@ -117,7 +117,8 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
*
*/
PGShmemHeader *
PGSharedMemoryCreate(Size size, bool makePrivate, int port)
PGSharedMemoryCreate(Size size, bool makePrivate, int port,
PGShmemHeader **shim)
{
void *memAddress;
PGShmemHeader *hdr;
......@@ -245,12 +246,14 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
*/
hdr->totalsize = size;
hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
hdr->dsm_control = 0;
/* Save info for possible future use */
UsedShmemSegAddr = memAddress;
UsedShmemSegSize = size;
UsedShmemSegID = hmap2;
*shim = NULL;
return hdr;
}
......@@ -289,6 +292,7 @@ PGSharedMemoryReAttach(void)
hdr, origUsedShmemSegAddr);
if (hdr->magic != PGShmemMagic)
elog(FATAL, "reattaching to shared memory returned non-PostgreSQL memory");
dsm_set_control_handle(hdr->dsm_control);
UsedShmemSegAddr = hdr; /* probably redundant */
}
......
......@@ -39,13 +39,11 @@
#include "storage/dsm.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/pg_shmem.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/resowner_private.h"
#define PG_DYNSHMEM_STATE_FILE PG_DYNSHMEM_DIR "/state"
#define PG_DYNSHMEM_NEW_STATE_FILE PG_DYNSHMEM_DIR "/state.new"
#define PG_DYNSHMEM_STATE_BUFSIZ 512
#define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
/*
......@@ -95,10 +93,7 @@ typedef struct dsm_control_header
dsm_control_item item[FLEXIBLE_ARRAY_MEMBER];
} dsm_control_header;
static void dsm_cleanup_using_control_segment(void);
static void dsm_cleanup_for_mmap(void);
static bool dsm_read_state_file(dsm_handle *h);
static void dsm_write_state_file(dsm_handle h);
static void dsm_postmaster_shutdown(int code, Datum arg);
static dsm_segment *dsm_create_descriptor(void);
static bool dsm_control_segment_sane(dsm_control_header *control,
......@@ -146,7 +141,7 @@ static void *dsm_control_impl_private = NULL;
* startup time.
*/
void
dsm_postmaster_startup(void)
dsm_postmaster_startup(PGShmemHeader *shim)
{
void *dsm_control_address = NULL;
uint32 maxitems;
......@@ -159,26 +154,13 @@ dsm_postmaster_startup(void)
return;
/*
* Check for, and remove, shared memory segments left behind by a dead
* postmaster. This isn't necessary on Windows, which always removes them
* when the last reference is gone.
* If we're using the mmap implementations, clean up any leftovers.
* Cleanup isn't needed on Windows, and happens earlier in startup for
* POSIX and System V shared memory, via a direct call to
* dsm_cleanup_using_control_segment.
*/
switch (dynamic_shared_memory_type)
{
case DSM_IMPL_POSIX:
case DSM_IMPL_SYSV:
dsm_cleanup_using_control_segment();
break;
case DSM_IMPL_MMAP:
if (dynamic_shared_memory_type == DSM_IMPL_MMAP)
dsm_cleanup_for_mmap();
break;
case DSM_IMPL_WINDOWS:
/* Nothing to do. */
break;
default:
elog(ERROR, "unknown dynamic shared memory type: %d",
dynamic_shared_memory_type);
}
/* Determine size for new control segment. */
maxitems = PG_DYNSHMEM_FIXED_SLOTS
......@@ -187,23 +169,30 @@ dsm_postmaster_startup(void)
maxitems);
segsize = dsm_control_bytes_needed(maxitems);
/* Loop until we find an unused identifier for the new control segment. */
/*
* Loop until we find an unused identifier for the new control segment.
* We sometimes use 0 as a sentinel value indicating that no control
* segment is known to exist, so avoid using that value for a real
* control segment.
*/
for (;;)
{
Assert(dsm_control_address == NULL);
Assert(dsm_control_mapped_size == 0);
dsm_control_handle = random();
if (dsm_control_handle == 0)
continue;
if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize,
&dsm_control_impl_private, &dsm_control_address,
&dsm_control_mapped_size, ERROR))
break;
}
dsm_control = dsm_control_address;
on_shmem_exit(dsm_postmaster_shutdown, 0);
on_shmem_exit(dsm_postmaster_shutdown, PointerGetDatum(shim));
elog(DEBUG2,
"created dynamic shared memory control segment %u (%zu bytes)",
dsm_control_handle, segsize);
dsm_write_state_file(dsm_control_handle);
shim->dsm_control = dsm_control_handle;
/* Initialize control segment. */
dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
......@@ -216,8 +205,8 @@ dsm_postmaster_startup(void)
* invocation still exists. If so, remove the dynamic shared memory
* segments to which it refers, and then the control segment itself.
*/
static void
dsm_cleanup_using_control_segment(void)
void
dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
{
void *mapped_address = NULL;
void *junk_mapped_address = NULL;
......@@ -227,14 +216,10 @@ dsm_cleanup_using_control_segment(void)
Size junk_mapped_size = 0;
uint32 nitems;
uint32 i;
dsm_handle old_control_handle;
dsm_control_header *old_control;
/*
* Read the state file. If it doesn't exist or is empty, there's nothing
* more to do.
*/
if (!dsm_read_state_file(&old_control_handle))
/* If dynamic shared memory is disabled, there's nothing to do. */
if (dynamic_shared_memory_type == DSM_IMPL_NONE)
return;
/*
......@@ -346,111 +331,6 @@ dsm_cleanup_for_mmap(void)
FreeDir(dir);
}
/*
* Read and parse the state file.
*
* If the state file is empty or the contents are garbled, it probably means
* that the operating system rebooted before the data written by the previous
* postmaster made it to disk. In that case, we can just ignore it; any shared
* memory from before the reboot should be gone anyway.
*/
static bool
dsm_read_state_file(dsm_handle *h)
{
int statefd;
char statebuf[PG_DYNSHMEM_STATE_BUFSIZ];
int nbytes = 0;
char *endptr,
*s;
dsm_handle handle;
/* Read the state file to get the ID of the old control segment. */
statefd = BasicOpenFile(PG_DYNSHMEM_STATE_FILE, O_RDONLY | PG_BINARY, 0);
if (statefd < 0)
{
if (errno == ENOENT)
return false;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
PG_DYNSHMEM_STATE_FILE)));
}
nbytes = read(statefd, statebuf, PG_DYNSHMEM_STATE_BUFSIZ - 1);
if (nbytes < 0)
{
close(statefd);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m",
PG_DYNSHMEM_STATE_FILE)));
}
/* make sure buffer is NUL terminated */
statebuf[nbytes] = '\0';
close(statefd);
/*
* We expect to find the handle of the old control segment here,
* on a line by itself.
*/
handle = strtoul(statebuf, &endptr, 10);
for (s = endptr; *s == ' ' || *s == '\t'; ++s)
;
if (*s != '\n' && *s != '\0')
return false;
/* Looks good. */
*h = handle;
return true;
}
/*
* Write our control segment handle to the state file, so that if the
* postmaster is killed without running it's on_shmem_exit hooks, the
* next postmaster can clean things up after restart.
*/
static void
dsm_write_state_file(dsm_handle h)
{
int statefd;
char statebuf[PG_DYNSHMEM_STATE_BUFSIZ];
int nbytes;
/* Create or truncate the file. */
statefd = open(PG_DYNSHMEM_NEW_STATE_FILE,
O_RDWR | O_CREAT | O_TRUNC | PG_BINARY, 0600);
if (statefd < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m",
PG_DYNSHMEM_NEW_STATE_FILE)));
/* Write contents. */
snprintf(statebuf, PG_DYNSHMEM_STATE_BUFSIZ, "%u\n", dsm_control_handle);
nbytes = strlen(statebuf);
if (write(statefd, statebuf, nbytes) != nbytes)
{
if (errno == 0)
errno = ENOSPC; /* if no error signalled, assume no space */
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write file \"%s\": %m",
PG_DYNSHMEM_NEW_STATE_FILE)));
}
/* Close file. */
close(statefd);
/*
* Atomically rename file into place, so that no one ever sees a partially
* written state file.
*/
if (rename(PG_DYNSHMEM_NEW_STATE_FILE, PG_DYNSHMEM_STATE_FILE) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\": %m",
PG_DYNSHMEM_NEW_STATE_FILE)));
}
/*
* At shutdown time, we iterate over the control segment and remove all
* remaining dynamic shared memory segments. We avoid throwing errors here;
......@@ -466,6 +346,7 @@ dsm_postmaster_shutdown(int code, Datum arg)
void *junk_mapped_address = NULL;
void *junk_impl_private = NULL;
Size junk_mapped_size = 0;
PGShmemHeader *shim = (PGShmemHeader *) DatumGetPointer(arg);
/*
* If some other backend exited uncleanly, it might have corrupted the
......@@ -510,13 +391,7 @@ dsm_postmaster_shutdown(int code, Datum arg)
&dsm_control_impl_private, &dsm_control_address,
&dsm_control_mapped_size, LOG);
dsm_control = dsm_control_address;
/* And, finally, remove the state file. */
if (unlink(PG_DYNSHMEM_STATE_FILE) < 0)
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not unlink file \"%s\": %m",
PG_DYNSHMEM_STATE_FILE)));
shim->dsm_control = 0;
}
/*
......@@ -536,25 +411,18 @@ dsm_backend_startup(void)
#ifdef EXEC_BACKEND
{
dsm_handle control_handle;
void *control_address = NULL;
/* Read the control segment information from the state file. */
if (!dsm_read_state_file(&control_handle))
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("could not parse dynamic shared memory state file")));
/* Attach control segment. */
dsm_impl_op(DSM_OP_ATTACH, control_handle, 0,
Assert(dsm_control_handle != 0);
dsm_impl_op(DSM_OP_ATTACH, dsm_control_handle, 0,
&dsm_control_impl_private, &control_address,
&dsm_control_mapped_size, ERROR);
dsm_control_handle = control_handle;
dsm_control = control_address;
/* If control segment doesn't look sane, something is badly wrong. */
if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
{
dsm_impl_op(DSM_OP_DETACH, control_handle, 0,
dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0,
&dsm_control_impl_private, &control_address,
&dsm_control_mapped_size, WARNING);
ereport(FATAL,
......@@ -567,6 +435,20 @@ dsm_backend_startup(void)
dsm_init_done = true;
}
#ifdef EXEC_BACKEND
/*
* When running under EXEC_BACKEND, we get a callback here when the main
* shared memory segment is re-attached, so that we can record the control
* handle retrieved from it.
*/
void
dsm_set_control_handle(dsm_handle h)
{
Assert(dsm_control_handle == 0 && h != 0);
dsm_control_handle = h;
}
#endif
/*
* Create a new dynamic shared memory segment.
*/
......
......@@ -90,6 +90,8 @@ RequestAddinShmemSpace(Size size)
void
CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
{
PGShmemHeader *shim = NULL;
if (!IsUnderPostmaster)
{
PGShmemHeader *seghdr;
......@@ -149,7 +151,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
/*
* Create the shmem segment
*/
seghdr = PGSharedMemoryCreate(size, makePrivate, port);
seghdr = PGSharedMemoryCreate(size, makePrivate, port, &shim);
InitShmemAccess(seghdr);
......@@ -254,7 +256,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
/* Initialize dynamic shared memory facilities. */
if (!IsUnderPostmaster)
dsm_postmaster_startup();
dsm_postmaster_startup(shim);
/*
* Now give loadable modules a chance to set up their shmem allocations
......
......@@ -18,10 +18,16 @@
typedef struct dsm_segment dsm_segment;
/* Startup and shutdown functions. */
extern void dsm_postmaster_startup(void);
struct PGShmemHeader; /* avoid including pg_shmem.h */
extern void dsm_cleanup_using_control_segment(dsm_handle old_control_handle);
extern void dsm_postmaster_startup(struct PGShmemHeader *);
extern void dsm_backend_shutdown(void);
extern void dsm_detach_all(void);
#ifdef EXEC_BACKEND
extern void dsm_set_control_handle(dsm_handle h);
#endif
/* Functions that create, update, or remove mappings. */
extern dsm_segment *dsm_create(Size size);
extern dsm_segment *dsm_attach(dsm_handle h);
......
......@@ -24,6 +24,8 @@
#ifndef PG_SHMEM_H
#define PG_SHMEM_H
#include "storage/dsm_impl.h"
typedef struct PGShmemHeader /* standard header for all Postgres shmem */
{
int32 magic; /* magic # to identify Postgres segments */
......@@ -31,6 +33,7 @@ typedef struct PGShmemHeader /* standard header for all Postgres shmem */
pid_t creatorPID; /* PID of creating process */
Size totalsize; /* total size of segment */
Size freeoffset; /* offset to first free space */
dsm_handle dsm_control; /* ID of dynamic shared memory control seg */
void *index; /* pointer to ShmemIndex table */
#ifndef WIN32 /* Windows doesn't have useful inode#s */
dev_t device; /* device data directory is on */
......@@ -61,7 +64,7 @@ extern void PGSharedMemoryReAttach(void);
#endif
extern PGShmemHeader *PGSharedMemoryCreate(Size size, bool makePrivate,
int port);
int port, PGShmemHeader **shim);
extern bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2);
extern void PGSharedMemoryDetach(void);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment