Commit 6ddd5137 authored by Robert Haas's avatar Robert Haas

Simple table of contents for a shared memory segment.

This interface is intended to make it simple to divide a dynamic shared
memory segment into different regions with distinct purposes.  It
therefore serves much the same purpose that ShmemIndex accomplishes for
the main shared memory segment, but it is intended to be more
lightweight.

Patch by me.  Review by Andres Freund.
parent 05ff5062
......@@ -16,6 +16,6 @@ endif
endif
OBJS = dsm_impl.o dsm.o ipc.o ipci.o pmsignal.o procarray.o procsignal.o \
shmem.o shmqueue.o sinval.o sinvaladt.o standby.o
shmem.o shmqueue.o shm_toc.o sinval.o sinvaladt.o standby.o
include $(top_srcdir)/src/backend/common.mk
/*-------------------------------------------------------------------------
*
* shm_toc.c
* shared memory segment table of contents
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/shm_toc.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/barrier.h"
#include "storage/shm_toc.h"
#include "storage/spin.h"
typedef struct shm_toc_entry
{
uint64 key; /* Arbitrary identifier */
uint64 offset; /* Bytes offset */
} shm_toc_entry;
struct shm_toc
{
uint64 toc_magic; /* Magic number for this TOC */
slock_t toc_mutex; /* Spinlock for mutual exclusion */
Size toc_total_bytes; /* Bytes managed by this TOC */
Size toc_allocated_bytes; /* Bytes allocated of those managed */
Size toc_nentry; /* Number of entries in TOC */
shm_toc_entry toc_entry[FLEXIBLE_ARRAY_MEMBER];
};
/*
* Initialize a region of shared memory with a table of contents.
*/
shm_toc *
shm_toc_create(uint64 magic, void *address, Size nbytes)
{
shm_toc *toc = (shm_toc *) address;
Assert(nbytes > offsetof(shm_toc, toc_entry));
toc->toc_magic = magic;
SpinLockInit(&toc->toc_mutex);
toc->toc_total_bytes = nbytes;
toc->toc_allocated_bytes = 0;
toc->toc_nentry = 0;
return toc;
}
/*
* Attach to an existing table of contents. If the magic number found at
* the target address doesn't match our expectations, returns NULL.
*/
extern shm_toc *
shm_toc_attach(uint64 magic, void *address)
{
shm_toc *toc = (shm_toc *) address;
if (toc->toc_magic != magic)
return NULL;
Assert(toc->toc_total_bytes >= toc->toc_allocated_bytes);
Assert(toc->toc_total_bytes >= offsetof(shm_toc, toc_entry));
return toc;
}
/*
* Allocate shared memory from a segment managed by a table of contents.
*
* This is not a full-blown allocator; there's no way to free memory. It's
* just a way of dividing a single physical shared memory segment into logical
* chunks that may be used for different purposes.
*
* We allocated backwards from the end of the segment, so that the TOC entries
* can grow forward from the start of the segment.
*/
extern void *
shm_toc_allocate(shm_toc *toc, Size nbytes)
{
volatile shm_toc *vtoc = toc;
Size total_bytes;
Size allocated_bytes;
Size nentry;
Size toc_bytes;
/* Make sure request is well-aligned. */
nbytes = BUFFERALIGN(nbytes);
SpinLockAcquire(&toc->toc_mutex);
total_bytes = vtoc->toc_total_bytes;
allocated_bytes = vtoc->toc_allocated_bytes;
nentry = vtoc->toc_nentry;
toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry)
+ allocated_bytes;
/* Check for memory exhaustion and overflow. */
if (toc_bytes + nbytes > total_bytes || toc_bytes + nbytes < toc_bytes)
{
SpinLockRelease(&toc->toc_mutex);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));
}
vtoc->toc_allocated_bytes += nbytes;
SpinLockRelease(&toc->toc_mutex);
return ((char *) toc) + (total_bytes - allocated_bytes - nbytes);
}
/*
* Return the number of bytes that can still be allocated.
*/
extern Size
shm_toc_freespace(shm_toc *toc)
{
volatile shm_toc *vtoc = toc;
Size total_bytes;
Size allocated_bytes;
Size nentry;
Size toc_bytes;
SpinLockAcquire(&toc->toc_mutex);
total_bytes = vtoc->toc_total_bytes;
allocated_bytes = vtoc->toc_allocated_bytes;
nentry = vtoc->toc_nentry;
SpinLockRelease(&toc->toc_mutex);
toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry);
Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes);
return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes));
}
/*
* Insert a TOC entry.
*
* The idea here is that process setting up the shared memory segment will
* register the addresses of data structures within the segment using this
* function. Each data structure will be identified using a 64-bit key, which
* is assumed to be a well-known or discoverable integer. Other processes
* accessing the shared memory segment can pass the same key to
* shm_toc_lookup() to discover the addresses of those data structures.
*
* Since the shared memory segment may be mapped at different addresses within
* different backends, we store relative rather than absolute pointers.
*
* This won't scale well to a large number of keys. Hopefully, that isn't
* necessary; if it proves to be, we might need to provide a more sophisticated
* data structure here. But the real idea here is just to give someone mapping
* a dynamic shared memory the ability to find the bare minimum number of
* pointers that they need to bootstrap. If you're storing a lot of stuff in
* here, you're doing it wrong.
*/
void
shm_toc_insert(shm_toc *toc, uint64 key, void *address)
{
volatile shm_toc *vtoc = toc;
uint64 total_bytes;
uint64 allocated_bytes;
uint64 nentry;
uint64 toc_bytes;
uint64 offset;
/* Relativize pointer. */
Assert(address > (void *) toc);
offset = ((char *) address) - (char *) toc;
SpinLockAcquire(&toc->toc_mutex);
total_bytes = vtoc->toc_total_bytes;
allocated_bytes = vtoc->toc_allocated_bytes;
nentry = vtoc->toc_nentry;
toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry)
+ allocated_bytes;
/* Check for memory exhaustion and overflow. */
if (toc_bytes + sizeof(shm_toc_entry) > total_bytes ||
toc_bytes + sizeof(shm_toc_entry) < toc_bytes)
{
SpinLockRelease(&toc->toc_mutex);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));
}
Assert(offset < total_bytes);
vtoc->toc_entry[nentry].key = key;
vtoc->toc_entry[nentry].offset = offset;
/*
* By placing a write barrier after filling in the entry and before
* updating the number of entries, we make it safe to read the TOC
* unlocked.
*/
pg_write_barrier();
vtoc->toc_nentry++;
SpinLockRelease(&toc->toc_mutex);
}
/*
* Look up a TOC entry.
*
* Unlike the other functions in this file, this operation acquires no lock;
* it uses only barriers. It probably wouldn't hurt concurrency very much even
* if it did get a lock, but since it's reasonably likely that a group of
* worker processes could each read a series of entries from the same TOC
* right around the same time, there seems to be some value in avoiding it.
*/
void *
shm_toc_lookup(shm_toc *toc, uint64 key)
{
uint64 nentry;
uint64 i;
/* Read the number of entries before we examine any entry. */
nentry = toc->toc_nentry;
pg_read_barrier();
/* Now search for a matching entry. */
for (i = 0; i < nentry; ++i)
if (toc->toc_entry[i].key == key)
return ((char *) toc) + toc->toc_entry[i].offset;
/* No matching entry was found. */
return NULL;
}
/*
* Estimate how much shared memory will be required to store a TOC and its
* dependent data structures.
*/
Size
shm_toc_estimate(shm_toc_estimator *e)
{
return add_size(offsetof(shm_toc, toc_entry),
add_size(mul_size(e->number_of_keys, sizeof(shm_toc_entry)),
e->space_for_chunks));
}
/*-------------------------------------------------------------------------
*
* shm_toc.h
* shared memory segment table of contents
*
* This is intended to provide a simple way to divide a chunk of shared
* memory (probably dynamic shared memory allocated via dsm_create) into
* a number of regions and keep track of the addreses of those regions or
* key data structures within those regions. This is not intended to
* scale to a large number of keys and will perform poorly if used that
* way; if you need a large number of pointers, store them within some
* other data structure within the segment and only put the pointer to
* the data structure itself in the table of contents.
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/shm_toc.h
*
*-------------------------------------------------------------------------
*/
#ifndef SHM_TOC_H
#define SHM_TOC_H
#include "storage/shmem.h"
struct shm_toc;
typedef struct shm_toc shm_toc;
extern shm_toc *shm_toc_create(uint64 magic, void *address, Size nbytes);
extern shm_toc *shm_toc_attach(uint64 magic, void *address);
extern void *shm_toc_allocate(shm_toc *toc, Size nbytes);
extern Size shm_toc_freespace(shm_toc *toc);
extern void shm_toc_insert(shm_toc *toc, uint64 key, void *address);
extern void *shm_toc_lookup(shm_toc *toc, uint64 key);
/*
* Tools for estimating how large a chunk of shared memory will be needed
* to store a TOC and its dependent objects.
*/
typedef struct
{
Size space_for_chunks;
Size number_of_keys;
} shm_toc_estimator;
#define shm_toc_initialize_estimator(e) \
((e)->space_for_chunks = 0, (e)->number_of_keys = 0)
#define shm_toc_estimate_chunk(e, sz) \
((e)->space_for_chunks = add_size((e)->space_for_chunks, \
BUFFERALIGN((sz))))
#define shm_toc_estimate_keys(e, cnt) \
((e)->number_of_keys = add_size((e)->number_of_keys, (cnt)))
extern Size shm_toc_estimate(shm_toc_estimator *);
#endif /* SHM_TOC_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment