Commit 51ee9fa1 authored by Tom Lane's avatar Tom Lane

Add support to dynahash.c for partitioning shared hashtables according

to the low-order bits of the entry hash value.  Also make some incidental
cleanups in the dynahash API, such as not exporting the hash header
structs to the world.
parent c0e9b313
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/ipc/shmem.c,v 1.93 2006/07/14 14:52:22 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/ipc/shmem.c,v 1.94 2006/07/22 23:04:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -211,9 +211,6 @@ InitShmemIndex(void)
{
HASHCTL info;
int hash_flags;
ShmemIndexEnt *result,
item;
bool found;
/*
* Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex
......@@ -227,32 +224,11 @@ InitShmemIndex(void)
info.entrysize = sizeof(ShmemIndexEnt);
hash_flags = HASH_ELEM;
/* This will acquire the shmem index lock, but not release it. */
ShmemIndex = ShmemInitHash("ShmemIndex",
SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
&info, hash_flags);
if (!ShmemIndex)
elog(FATAL, "could not initialize Shmem Index");
/*
* Now, create an entry in the hashtable for the index itself.
*/
if (!IsUnderPostmaster)
{
MemSet(item.key, 0, SHMEM_INDEX_KEYSIZE);
strncpy(item.key, "ShmemIndex", SHMEM_INDEX_KEYSIZE);
result = (ShmemIndexEnt *)
hash_search(ShmemIndex, (void *) &item, HASH_ENTER, &found);
Assert(!found);
result->location = MAKE_OFFSET(ShmemIndex->hctl);
result->size = SHMEM_INDEX_SIZE;
}
/* now release the lock acquired in ShmemInitStruct */
LWLockRelease(ShmemIndexLock);
}
/*
......@@ -295,7 +271,7 @@ ShmemInitHash(const char *name, /* table string name for shmem index */
/* look it up in the shmem index */
location = ShmemInitStruct(name,
sizeof(HASHHDR) + infoP->dsize * sizeof(HASHSEGMENT),
hash_get_shared_size(infoP, hash_flags),
&found);
/*
......@@ -312,9 +288,8 @@ ShmemInitHash(const char *name, /* table string name for shmem index */
if (found)
hash_flags |= HASH_ATTACH;
/* Now provide the header and directory pointers */
/* Pass location of hashtable header to hash_create */
infoP->hctl = (HASHHDR *) location;
infoP->dir = (HASHSEGMENT *) (((char *) location) + sizeof(HASHHDR));
return hash_create(name, init_size, infoP, hash_flags);
}
......@@ -363,14 +338,16 @@ ShmemInitStruct(const char *name, Size size, bool *foundPtr)
* If the shmem index doesn't exist, we are bootstrapping: we must
* be trying to init the shmem index itself.
*
* Notice that the ShmemIndexLock is held until the shmem index
* has been completely initialized.
* Notice that the ShmemIndexLock is released before the shmem
* index has been initialized. This should be OK because no
* other process can be accessing shared memory yet.
*/
Assert(shmemseghdr->indexoffset == 0);
structPtr = ShmemAlloc(size);
shmemseghdr->indexoffset = MAKE_OFFSET(structPtr);
*foundPtr = FALSE;
}
LWLockRelease(ShmemIndexLock);
return structPtr;
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.166 2006/07/14 14:52:23 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.167 2006/07/22 23:04:39 tgl Exp $
*
* NOTES
* A lock table is a shared memory hash table. When
......@@ -1958,7 +1958,7 @@ GetLockStatusData(void)
{
LWLockAcquire(FirstLockMgrLock + i, LW_SHARED);
proclockTable = LockMethodProcLockHash[i];
els += proclockTable->hctl->nentries;
els += hash_get_num_entries(proclockTable);
}
data->nelements = els;
......
......@@ -3,17 +3,36 @@
* dynahash.c
* dynamic hash tables
*
* dynahash.c supports both local-to-a-backend hash tables and hash tables in
* shared memory. For shared hash tables, it is the caller's responsibility
* to provide appropriate access interlocking. The simplest convention is
* that a single LWLock protects the whole hash table. Searches (HASH_FIND or
* hash_seq_search) need only shared lock, but any update requires exclusive
* lock. For heavily-used shared tables, the single-lock approach creates a
* concurrency bottleneck, so we also support "partitioned" locking wherein
* there are multiple LWLocks guarding distinct subsets of the table. To use
* a hash table in partitioned mode, the HASH_PARTITION flag must be given
* to hash_create. This prevents any attempt to split buckets on-the-fly.
* Therefore, each hash bucket chain operates independently, and no fields
* of the hash header change after init except nentries and freeList.
* A partitioned table uses a spinlock to guard changes of those two fields.
* This lets any subset of the hash buckets be treated as a separately
* lockable partition. We expect callers to use the low-order bits of a
* lookup key's hash value as a partition number --- this will work because
* of the way calc_bucket() maps hash values to bucket numbers.
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/hash/dynahash.c,v 1.69 2006/07/14 14:52:25 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/hash/dynahash.c,v 1.70 2006/07/22 23:04:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* Original comments:
*
* Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
* Coded into C, with minor code improvements, and with hsearch(3) interface,
......@@ -45,9 +64,107 @@
#include "postgres.h"
#include "storage/shmem.h"
#include "storage/spin.h"
#include "utils/dynahash.h"
#include "utils/memutils.h"
/*
* Constants
*
* A hash table has a top-level "directory", each of whose entries points
* to a "segment" of ssize bucket headers. The maximum number of hash
* buckets is thus dsize * ssize (but dsize may be expansible). Of course,
* the number of records in the table can be larger, but we don't want a
* whole lot of records per bucket or performance goes down.
*
* In a hash table allocated in shared memory, the directory cannot be
* expanded because it must stay at a fixed address. The directory size
* should be selected using hash_select_dirsize (and you'd better have
* a good idea of the maximum number of entries!). For non-shared hash
* tables, the initial directory size can be left at the default.
*/
#define DEF_SEGSIZE 256
#define DEF_SEGSIZE_SHIFT 8 /* must be log2(DEF_SEGSIZE) */
#define DEF_DIRSIZE 256
#define DEF_FFACTOR 1 /* default fill factor */
/* A hash bucket is a linked list of HASHELEMENTs */
typedef HASHELEMENT *HASHBUCKET;
/* A hash segment is an array of bucket headers */
typedef HASHBUCKET *HASHSEGMENT;
/*
* Header structure for a hash table --- contains all changeable info
*
* In a shared-memory hash table, the HASHHDR is in shared memory, while
* each backend has a local HTAB struct. For a non-shared table, there isn't
* any functional difference between HASHHDR and HTAB, but we separate them
* anyway to share code between shared and non-shared tables.
*/
struct HASHHDR
{
/* In a partitioned table, take this lock to touch nentries or freeList */
slock_t mutex; /* unused if not partitioned table */
/* These fields change during entry addition/deletion */
long nentries; /* number of entries in hash table */
HASHELEMENT *freeList; /* linked list of free elements */
/* These fields can change, but not in a partitioned table */
/* Also, dsize can't change in a shared table, even if unpartitioned */
long dsize; /* directory size */
long nsegs; /* number of allocated segments (<= dsize) */
uint32 max_bucket; /* ID of maximum bucket in use */
uint32 high_mask; /* mask to modulo into entire table */
uint32 low_mask; /* mask to modulo into lower half of table */
/* These fields are fixed at hashtable creation */
Size keysize; /* hash key length in bytes */
Size entrysize; /* total user element size in bytes */
long num_partitions; /* # partitions (must be power of 2), or 0 */
long ffactor; /* target fill factor */
long max_dsize; /* 'dsize' limit if directory is fixed size */
long ssize; /* segment size --- must be power of 2 */
int sshift; /* segment shift = log2(ssize) */
int nelem_alloc; /* number of entries to allocate at once */
#ifdef HASH_STATISTICS
/*
* Count statistics here. NB: stats code doesn't bother with mutex,
* so counts could be corrupted a bit in a partitioned table.
*/
long accesses;
long collisions;
#endif
};
#define IS_PARTITIONED(hctl) ((hctl)->num_partitions != 0)
/*
* Top control structure for a hashtable --- in a shared table, each backend
* has its own copy (OK since no fields change at runtime)
*/
struct HTAB
{
HASHHDR *hctl; /* => shared control information */
HASHSEGMENT *dir; /* directory of segment starts */
HashValueFunc hash; /* hash function */
HashCompareFunc match; /* key comparison function */
HashCopyFunc keycopy; /* key copying function */
HashAllocFunc alloc; /* memory allocator */
MemoryContext hcxt; /* memory context if default allocator used */
char *tabname; /* table name (for error messages) */
bool isshared; /* true if table is in shared memory */
/* We keep local copies of these fixed values to reduce contention */
Size keysize; /* hash key length in bytes */
long ssize; /* segment size --- must be power of 2 */
int sshift; /* segment shift = log2(ssize) */
};
/*
* Key (also entry) part of a HASHELEMENT
*/
......@@ -58,6 +175,12 @@
*/
#define MOD(x,y) ((x) & ((y)-1))
#if HASH_STATISTICS
static long hash_accesses,
hash_collisions,
hash_expansions;
#endif
/*
* Private function prototypes
*/
......@@ -66,6 +189,7 @@ static HASHSEGMENT seg_alloc(HTAB *hashp);
static bool element_alloc(HTAB *hashp, int nelem);
static bool dir_realloc(HTAB *hashp);
static bool expand_table(HTAB *hashp);
static HASHBUCKET get_hash_entry(HTAB *hashp);
static void hdefault(HTAB *hashp);
static int choose_nelem_alloc(Size entrysize);
static bool init_htab(HTAB *hashp, long nelem);
......@@ -85,13 +209,6 @@ DynaHashAlloc(Size size)
}
#if HASH_STATISTICS
static long hash_accesses,
hash_collisions,
hash_expansions;
#endif
/************************** CREATE ROUTINES **********************/
/*
......@@ -185,17 +302,26 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
if (flags & HASH_SHARED_MEM)
{
/*
* ctl structure is preallocated for shared memory tables. Note that
* HASH_DIRSIZE and HASH_ALLOC had better be set as well.
* ctl structure and directory are preallocated for shared memory
* tables. Note that HASH_DIRSIZE and HASH_ALLOC had better be set
* as well.
*/
hashp->hctl = info->hctl;
hashp->dir = info->dir;
hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR));
hashp->hcxt = NULL;
hashp->isshared = true;
/* hash table already exists, we're just attaching to it */
if (flags & HASH_ATTACH)
{
/* make local copies of some heavily-used values */
hctl = hashp->hctl;
hashp->keysize = hctl->keysize;
hashp->ssize = hctl->ssize;
hashp->sshift = hctl->sshift;
return hashp;
}
}
else
{
......@@ -218,9 +344,16 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
hdefault(hashp);
hctl = hashp->hctl;
#ifdef HASH_STATISTICS
hctl->accesses = hctl->collisions = 0;
#endif
if (flags & HASH_PARTITION)
{
/* Doesn't make sense to partition a local hash table */
Assert(flags & HASH_SHARED_MEM);
/* # of partitions had better be a power of 2 */
Assert(info->num_partitions == (1L << my_log2(info->num_partitions)));
hctl->num_partitions = info->num_partitions;
}
if (flags & HASH_SEGMENT)
{
......@@ -252,6 +385,11 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
hctl->entrysize = info->entrysize;
}
/* make local copies of heavily-used constant fields */
hashp->keysize = hctl->keysize;
hashp->ssize = hctl->ssize;
hashp->sshift = hctl->sshift;
/* Build the hash directory structure */
if (!init_htab(hashp, nelem))
{
......@@ -292,22 +430,29 @@ hdefault(HTAB *hashp)
MemSet(hctl, 0, sizeof(HASHHDR));
hctl->ssize = DEF_SEGSIZE;
hctl->sshift = DEF_SEGSIZE_SHIFT;
hctl->dsize = DEF_DIRSIZE;
hctl->ffactor = DEF_FFACTOR;
hctl->nentries = 0;
hctl->freeList = NULL;
hctl->dsize = DEF_DIRSIZE;
hctl->nsegs = 0;
/* rather pointless defaults for key & entry size */
hctl->keysize = sizeof(char *);
hctl->entrysize = 2 * sizeof(char *);
hctl->num_partitions = 0; /* not partitioned */
hctl->ffactor = DEF_FFACTOR;
/* table has no fixed maximum size */
hctl->max_dsize = NO_MAX_DSIZE;
/* garbage collection for HASH_REMOVE */
hctl->freeList = NULL;
hctl->ssize = DEF_SEGSIZE;
hctl->sshift = DEF_SEGSIZE_SHIFT;
#ifdef HASH_STATISTICS
hctl->accesses = hctl->collisions = 0;
#endif
}
/*
......@@ -342,6 +487,10 @@ choose_nelem_alloc(Size entrysize)
return nelem_alloc;
}
/*
* Compute derived fields of hctl and build the initial directory/segment
* arrays
*/
static bool
init_htab(HTAB *hashp, long nelem)
{
......@@ -351,6 +500,12 @@ init_htab(HTAB *hashp, long nelem)
int nbuckets;
int nsegs;
/*
* initialize mutex if it's a partitioned table
*/
if (IS_PARTITIONED(hctl))
SpinLockInit(&hctl->mutex);
/*
* Divide number of elements by the fill factor to determine a desired
* number of buckets. Allocate space for the next greater power of two
......@@ -360,6 +515,15 @@ init_htab(HTAB *hashp, long nelem)
nbuckets = 1 << my_log2(lnbuckets);
/*
* In a partitioned table, nbuckets must be at least equal to
* num_partitions; were it less, keys with apparently different partition
* numbers would map to the same bucket, breaking partition independence.
* (Normally nbuckets will be much bigger; this is just a safety check.)
*/
while (nbuckets < hctl->num_partitions)
nbuckets <<= 1;
hctl->max_bucket = hctl->low_mask = nbuckets - 1;
hctl->high_mask = (nbuckets << 1) - 1;
......@@ -491,6 +655,19 @@ hash_select_dirsize(long num_entries)
return nDirEntries;
}
/*
* Compute the required initial memory allocation for a shared-memory
* hashtable with the given parameters. We need space for the HASHHDR
* and for the (non expansible) directory.
*/
Size
hash_get_shared_size(HASHCTL *info, int flags)
{
Assert(flags & HASH_DIRSIZE);
Assert(info->dsize == info->max_dsize);
return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);
}
/********************** DESTROY ROUTINES ************************/
......@@ -521,7 +698,7 @@ hash_stats(const char *where, HTAB *hashp)
where, hashp->hctl->accesses, hashp->hctl->collisions);
fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n",
hashp->hctl->nentries, hashp->hctl->keysize,
hashp->hctl->nentries, (long) hashp->hctl->keysize,
hashp->hctl->max_bucket, hashp->hctl->nsegs);
fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
where, hash_accesses, hash_collisions);
......@@ -533,6 +710,19 @@ hash_stats(const char *where, HTAB *hashp)
/*******************************SEARCH ROUTINES *****************************/
/*
* get_hash_value -- exported routine to calculate a key's hash value
*
* We export this because for partitioned tables, callers need to compute
* the partition number (from the low-order bits of the hash value) before
* searching.
*/
uint32
get_hash_value(HTAB *hashp, const void *keyPtr)
{
return hashp->hash(keyPtr, hashp->keysize);
}
/* Convert a hash value to a bucket number */
static inline uint32
calc_bucket(HASHHDR *hctl, uint32 hash_val)
......@@ -546,8 +736,9 @@ calc_bucket(HASHHDR *hctl, uint32 hash_val)
return bucket;
}
/*----------
/*
* hash_search -- look up key in table and perform action
* hash_search_with_hash_value -- same, with key's hash value already computed
*
* action is one of:
* HASH_FIND: look up key in table
......@@ -568,17 +759,32 @@ calc_bucket(HASHHDR *hctl, uint32 hash_val)
* If foundPtr isn't NULL, then *foundPtr is set TRUE if we found an
* existing entry in the table, FALSE otherwise. This is needed in the
* HASH_ENTER case, but is redundant with the return value otherwise.
*----------
*
* For hash_search_with_hash_value, the hashvalue parameter must have been
* calculated with get_hash_value().
*/
void *
hash_search(HTAB *hashp,
const void *keyPtr,
HASHACTION action,
bool *foundPtr)
{
return hash_search_with_hash_value(hashp,
keyPtr,
hashp->hash(keyPtr, hashp->keysize),
action,
foundPtr);
}
void *
hash_search_with_hash_value(HTAB *hashp,
const void *keyPtr,
uint32 hashvalue,
HASHACTION action,
bool *foundPtr)
{
HASHHDR *hctl = hashp->hctl;
Size keysize = hctl->keysize;
uint32 hashvalue;
Size keysize;
uint32 bucket;
long segment_num;
long segment_ndx;
......@@ -595,11 +801,10 @@ hash_search(HTAB *hashp,
/*
* Do the initial lookup
*/
hashvalue = hashp->hash(keyPtr, keysize);
bucket = calc_bucket(hctl, hashvalue);
segment_num = bucket >> hctl->sshift;
segment_ndx = MOD(bucket, hctl->ssize);
segment_num = bucket >> hashp->sshift;
segment_ndx = MOD(bucket, hashp->ssize);
segp = hashp->dir[segment_num];
......@@ -613,6 +818,7 @@ hash_search(HTAB *hashp,
* Follow collision chain looking for matching key
*/
match = hashp->match; /* save one fetch in inner loop */
keysize = hashp->keysize; /* ditto */
while (currBucket != NULL)
{
......@@ -643,15 +849,25 @@ hash_search(HTAB *hashp,
case HASH_REMOVE:
if (currBucket != NULL)
{
Assert(hctl->nentries > 0);
hctl->nentries--;
/* use volatile pointer to prevent code rearrangement */
volatile HASHHDR *hctlv = hctl;
/* if partitioned, must lock to touch nentries and freeList */
if (IS_PARTITIONED(hctlv))
SpinLockAcquire(&hctlv->mutex);
Assert(hctlv->nentries > 0);
hctlv->nentries--;
/* remove record from hash bucket's chain. */
*prevBucketPtr = currBucket->link;
/* add the record to the freelist for this table. */
currBucket->link = hctl->freeList;
hctl->freeList = currBucket;
currBucket->link = hctlv->freeList;
hctlv->freeList = currBucket;
if (IS_PARTITIONED(hctlv))
SpinLockRelease(&hctlv->mutex);
/*
* better hope the caller is synchronizing access to this
......@@ -672,32 +888,23 @@ hash_search(HTAB *hashp,
if (currBucket != NULL)
return (void *) ELEMENTKEY(currBucket);
/* get the next free element */
currBucket = hctl->freeList;
currBucket = get_hash_entry(hashp);
if (currBucket == NULL)
{
/* no free elements. allocate another chunk of buckets */
if (!element_alloc(hashp, hctl->nelem_alloc))
{
/* out of memory */
if (action == HASH_ENTER_NULL)
return NULL;
/* report a generic message */
if (hashp->isshared)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));
else
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
currBucket = hctl->freeList;
Assert(currBucket != NULL);
/* out of memory */
if (action == HASH_ENTER_NULL)
return NULL;
/* report a generic message */
if (hashp->isshared)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));
else
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
hctl->freeList = currBucket->link;
/* link into hashbucket chain */
*prevBucketPtr = currBucket;
currBucket->link = NULL;
......@@ -708,8 +915,10 @@ hash_search(HTAB *hashp,
/* caller is expected to fill the data field on return */
/* Check if it is time to split the segment */
if (++hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor)
/* Check if it is time to split a bucket */
/* Can't split if running in partitioned mode */
if (!IS_PARTITIONED(hctl) &&
hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor)
{
/*
* NOTE: failure to expand table is not a fatal error, it just
......@@ -726,6 +935,61 @@ hash_search(HTAB *hashp,
return NULL; /* keep compiler quiet */
}
/*
* create a new entry if possible
*/
static HASHBUCKET
get_hash_entry(HTAB *hashp)
{
/* use volatile pointer to prevent code rearrangement */
volatile HASHHDR *hctlv = hashp->hctl;
HASHBUCKET newElement;
for (;;)
{
/* if partitioned, must lock to touch nentries and freeList */
if (IS_PARTITIONED(hctlv))
SpinLockAcquire(&hctlv->mutex);
/* try to get an entry from the freelist */
newElement = hctlv->freeList;
if (newElement != NULL)
break;
/* no free elements. allocate another chunk of buckets */
if (IS_PARTITIONED(hctlv))
SpinLockRelease(&hctlv->mutex);
if (!element_alloc(hashp, hctlv->nelem_alloc))
{
/* out of memory */
return NULL;
}
}
/* remove entry from freelist, bump nentries */
hctlv->freeList = newElement->link;
hctlv->nentries++;
if (IS_PARTITIONED(hctlv))
SpinLockRelease(&hctlv->mutex);
return newElement;
}
/*
* hash_get_num_entries -- get the number of entries in a hashtable
*/
long
hash_get_num_entries(HTAB *hashp)
{
/*
* We currently don't bother with the mutex; it's only sensible to call
* this function if you've got lock on all partitions of the table.
*/
return hashp->hctl->nentries;
}
/*
* hash_seq_init/_search
* Sequentially search through hash table and return
......@@ -736,6 +1000,9 @@ hash_search(HTAB *hashp,
* UNDEFINED (it might be the one that curIndex is pointing at!). Also,
* if elements are added to the table while the scan is in progress, it is
* unspecified whether they will be visited by the scan or not.
*
* NOTE: to use this with a partitioned hashtable, caller had better hold
* at least shared lock on all partitions of the table throughout the scan!
*/
void
hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
......@@ -773,7 +1040,7 @@ hash_seq_search(HASH_SEQ_STATUS *status)
curBucket = status->curBucket;
hashp = status->hashp;
hctl = hashp->hctl;
ssize = hctl->ssize;
ssize = hashp->ssize;
max_bucket = hctl->max_bucket;
if (curBucket > max_bucket)
......@@ -782,7 +1049,7 @@ hash_seq_search(HASH_SEQ_STATUS *status)
/*
* first find the right segment in the table directory.
*/
segment_num = curBucket >> hctl->sshift;
segment_num = curBucket >> hashp->sshift;
segment_ndx = MOD(curBucket, ssize);
segp = hashp->dir[segment_num];
......@@ -840,13 +1107,15 @@ expand_table(HTAB *hashp)
HASHBUCKET currElement,
nextElement;
Assert(!IS_PARTITIONED(hctl));
#ifdef HASH_STATISTICS
hash_expansions++;
#endif
new_bucket = hctl->max_bucket + 1;
new_segnum = new_bucket >> hctl->sshift;
new_segndx = MOD(new_bucket, hctl->ssize);
new_segnum = new_bucket >> hashp->sshift;
new_segndx = MOD(new_bucket, hashp->ssize);
if (new_segnum >= hctl->nsegs)
{
......@@ -885,8 +1154,8 @@ expand_table(HTAB *hashp)
* split at this point. With a different way of reducing the hash value,
* that might not be true!
*/
old_segnum = old_bucket >> hctl->sshift;
old_segndx = MOD(old_bucket, hctl->ssize);
old_segnum = old_bucket >> hashp->sshift;
old_segndx = MOD(old_bucket, hashp->ssize);
old_seg = hashp->dir[old_segnum];
new_seg = hashp->dir[new_segnum];
......@@ -963,12 +1232,12 @@ seg_alloc(HTAB *hashp)
HASHSEGMENT segp;
CurrentDynaHashCxt = hashp->hcxt;
segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->hctl->ssize);
segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->ssize);
if (!segp)
return NULL;
MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->hctl->ssize);
MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->ssize);
return segp;
}
......@@ -979,29 +1248,44 @@ seg_alloc(HTAB *hashp)
static bool
element_alloc(HTAB *hashp, int nelem)
{
HASHHDR *hctl = hashp->hctl;
/* use volatile pointer to prevent code rearrangement */
volatile HASHHDR *hctlv = hashp->hctl;
Size elementSize;
HASHELEMENT *firstElement;
HASHELEMENT *tmpElement;
HASHELEMENT *prevElement;
int i;
/* Each element has a HASHELEMENT header plus user data. */
elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctlv->entrysize);
CurrentDynaHashCxt = hashp->hcxt;
tmpElement = (HASHELEMENT *)
hashp->alloc(nelem * elementSize);
firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize);
if (!tmpElement)
if (!firstElement)
return false;
/* link all the new entries into the freelist */
/* prepare to link all the new entries into the freelist */
prevElement = NULL;
tmpElement = firstElement;
for (i = 0; i < nelem; i++)
{
tmpElement->link = hctl->freeList;
hctl->freeList = tmpElement;
tmpElement->link = prevElement;
prevElement = tmpElement;
tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);
}
/* if partitioned, must lock to touch freeList */
if (IS_PARTITIONED(hctlv))
SpinLockAcquire(&hctlv->mutex);
/* freelist could be nonempty if two backends did this concurrently */
firstElement->link = hctlv->freeList;
hctlv->freeList = prevElement;
if (IS_PARTITIONED(hctlv))
SpinLockRelease(&hctlv->mutex);
return true;
}
......
/*-------------------------------------------------------------------------
*
* hsearch.h
* for hash tables, particularly hash tables in shared memory
* exported definitions for utils/hash/dynahash.c; see notes therein
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.43 2006/06/25 18:29:49 tgl Exp $
* $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.44 2006/07/22 23:04:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -43,27 +43,6 @@ typedef void *(*HashCopyFunc) (void *dest, const void *src, Size keysize);
*/
typedef void *(*HashAllocFunc) (Size request);
/*
* Constants
*
* A hash table has a top-level "directory", each of whose entries points
* to a "segment" of ssize bucket headers. The maximum number of hash
* buckets is thus dsize * ssize (but dsize may be expansible). Of course,
* the number of records in the table can be larger, but we don't want a
* whole lot of records per bucket or performance goes down.
*
* In a hash table allocated in shared memory, the directory cannot be
* expanded because it must stay at a fixed address. The directory size
* should be selected using hash_select_dirsize (and you'd better have
* a good idea of the maximum number of entries!). For non-shared hash
* tables, the initial directory size can be left at the default.
*/
#define DEF_SEGSIZE 256
#define DEF_SEGSIZE_SHIFT 8 /* must be log2(DEF_SEGSIZE) */
#define DEF_DIRSIZE 256
#define DEF_FFACTOR 1 /* default fill factor */
/*
* HASHELEMENT is the private part of a hashtable entry. The caller's data
* follows the HASHELEMENT structure (on a MAXALIGN'd boundary). The hash key
......@@ -75,81 +54,42 @@ typedef struct HASHELEMENT
uint32 hashvalue; /* hash function result for this entry */
} HASHELEMENT;
/* A hash bucket is a linked list of HASHELEMENTs */
typedef HASHELEMENT *HASHBUCKET;
/* Hash table header struct is an opaque type known only within dynahash.c */
typedef struct HASHHDR HASHHDR;
/* A hash segment is an array of bucket headers */
typedef HASHBUCKET *HASHSEGMENT;
/* Header structure for a hash table --- contains all changeable info */
typedef struct HASHHDR
{
long dsize; /* Directory Size */
long ssize; /* Segment Size --- must be power of 2 */
int sshift; /* Segment shift = log2(ssize) */
uint32 max_bucket; /* ID of Maximum bucket in use */
uint32 high_mask; /* Mask to modulo into entire table */
uint32 low_mask; /* Mask to modulo into lower half of table */
long ffactor; /* Fill factor */
long nentries; /* Number of entries in hash table */
long nsegs; /* Number of allocated segments */
Size keysize; /* hash key length in bytes */
Size entrysize; /* total user element size in bytes */
long max_dsize; /* 'dsize' limit if directory is fixed size */
int nelem_alloc; /* number of entries to allocate at once */
HASHELEMENT *freeList; /* linked list of free elements */
#ifdef HASH_STATISTICS
long accesses;
long collisions;
#endif
} HASHHDR;
/*
* Top control structure for a hashtable --- need not be shared, since
* no fields change at runtime
*/
typedef struct HTAB
{
HASHHDR *hctl; /* shared control information */
HASHSEGMENT *dir; /* directory of segment starts */
HashValueFunc hash; /* hash function */
HashCompareFunc match; /* key comparison function */
HashCopyFunc keycopy; /* key copying function */
HashAllocFunc alloc; /* memory allocator */
MemoryContext hcxt; /* memory context if default allocator used */
char *tabname; /* table name (for error messages) */
bool isshared; /* true if table is in shared memory */
} HTAB;
/* Hash table control struct is an opaque type known only within dynahash.c */
typedef struct HTAB HTAB;
/* Parameter data structure for hash_create */
/* Only those fields indicated by hash_flags need be set */
typedef struct HASHCTL
{
long ssize; /* Segment Size */
long dsize; /* (initial) Directory Size */
long max_dsize; /* limit to dsize if directory size is limited */
long ffactor; /* Fill factor */
long num_partitions; /* # partitions (must be power of 2) */
long ssize; /* segment size */
long dsize; /* (initial) directory size */
long max_dsize; /* limit to dsize if dir size is limited */
long ffactor; /* fill factor */
Size keysize; /* hash key length in bytes */
Size entrysize; /* total user element size in bytes */
HashValueFunc hash; /* hash function */
HashCompareFunc match; /* key comparison function */
HashCopyFunc keycopy; /* key copying function */
HashAllocFunc alloc; /* memory allocator */
HASHSEGMENT *dir; /* directory of segment starts */
HASHHDR *hctl; /* location of header in shared mem */
MemoryContext hcxt; /* memory context to use for allocations */
HASHHDR *hctl; /* location of header in shared mem */
} HASHCTL;
/* Flags to indicate which parameters are supplied */
#define HASH_PARTITION 0x001 /* Hashtable is used w/partitioned locking */
#define HASH_SEGMENT 0x002 /* Set segment size */
#define HASH_DIRSIZE 0x004 /* Set directory size */
#define HASH_DIRSIZE 0x004 /* Set directory size (initial and max) */
#define HASH_FFACTOR 0x008 /* Set fill factor */
#define HASH_FUNCTION 0x010 /* Set user defined hash function */
#define HASH_ELEM 0x020 /* Set key/entry size */
#define HASH_ELEM 0x020 /* Set keysize and entrysize */
#define HASH_SHARED_MEM 0x040 /* Hashtable is in shared memory */
#define HASH_ATTACH 0x080 /* Do not initialize hctl */
#define HASH_ALLOC 0x100 /* Set memory allocator */
#define HASH_CONTEXT 0x200 /* Set explicit memory context */
#define HASH_CONTEXT 0x200 /* Set memory allocation context */
#define HASH_COMPARE 0x400 /* Set user defined comparison function */
#define HASH_KEYCOPY 0x800 /* Set user defined key-copying function */
......@@ -183,10 +123,16 @@ extern void hash_destroy(HTAB *hashp);
extern void hash_stats(const char *where, HTAB *hashp);
extern void *hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action,
bool *foundPtr);
extern uint32 get_hash_value(HTAB *hashp, const void *keyPtr);
extern void *hash_search_with_hash_value(HTAB *hashp, const void *keyPtr,
uint32 hashvalue, HASHACTION action,
bool *foundPtr);
extern long hash_get_num_entries(HTAB *hashp);
extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
extern void *hash_seq_search(HASH_SEQ_STATUS *status);
extern Size hash_estimate_size(long num_entries, Size entrysize);
extern long hash_select_dirsize(long num_entries);
extern Size hash_get_shared_size(HASHCTL *info, int flags);
/*
* prototypes for functions in hashfn.c
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment