Commit 42748087 authored by Tom Lane's avatar Tom Lane

First non-stub implementation of shared free space map. It's not super

useful as yet, since its primary source of information is (full) VACUUM,
which makes a concerted effort to get rid of free space before telling
the map about it ... next stop is concurrent VACUUM ...
parent 755e367c
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/dbcommands.c,v 1.75 2001/06/12 05:55:49 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/commands/dbcommands.c,v 1.76 2001/07/02 20:50:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -30,7 +30,8 @@
#include "commands/comment.h"
#include "commands/dbcommands.h"
#include "miscadmin.h"
#include "storage/sinval.h" /* for DatabaseHasActiveBackends */
#include "storage/freespace.h"
#include "storage/sinval.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/syscache.h"
......@@ -372,6 +373,11 @@ dropdb(const char *dbname)
*/
DropBuffers(db_id);
/*
* Also, clean out any entries in the shared free space map.
*/
FreeSpaceMapForgetDatabase(db_id);
/*
* Remove the database's subdirectory and everything in it.
*/
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.200 2001/06/29 20:14:27 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.201 2001/07/02 20:50:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -37,6 +37,7 @@
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
#include "storage/freespace.h"
#include "storage/sinval.h"
#include "storage/smgr.h"
#include "tcop/tcopprot.h"
......@@ -146,6 +147,8 @@ static void vacuum_index(VacPageList vacpagelist, Relation indrel,
double num_tuples, int keep_tuples);
static void scan_index(Relation indrel, double num_tuples);
static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist);
static void vac_update_fsm(Relation onerel, VacPageList fraged_pages,
BlockNumber rel_pages);
static VacPage copy_vac_page(VacPage vacpage);
static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
static void get_indices(Relation relation, int *nindices, Relation **Irel);
......@@ -579,6 +582,9 @@ vacuum_rel(Oid relid)
activate_indexes_of_a_table(relid, true);
#endif /* NOT_USED */
/* update shared free space map with final free space info */
vac_update_fsm(onerel, &fraged_pages, vacrelstats->rel_pages);
/* all done with this class, but hold lock until commit */
heap_close(onerel, NoLock);
......@@ -1157,6 +1163,10 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
* useful as move targets, since we only want to move down. Note
* that since we stop the outer loop at last_move_dest_block, pages
* removed here cannot have had anything moved onto them already.
*
* Also note that we don't change the stored fraged_pages list,
* only our local variable num_fraged_pages; so the forgotten pages
* are still available to be loaded into the free space map later.
*/
while (num_fraged_pages > 0 &&
fraged_pages->pagedesc[num_fraged_pages-1]->blkno >= blkno)
......@@ -2080,6 +2090,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
if (blkno < nblocks)
{
blkno = smgrtruncate(DEFAULT_SMGR, onerel, blkno);
onerel->rd_nblocks = blkno; /* update relcache immediately */
onerel->rd_targblock = InvalidBlockNumber;
vacrelstats->rel_pages = blkno; /* set new number of blocks */
}
......@@ -2145,6 +2157,8 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
RelationGetRelationName(onerel),
vacrelstats->rel_pages, relblocks);
relblocks = smgrtruncate(DEFAULT_SMGR, onerel, relblocks);
onerel->rd_nblocks = relblocks; /* update relcache immediately */
onerel->rd_targblock = InvalidBlockNumber;
vacrelstats->rel_pages = relblocks; /* set new number of
* blocks */
}
......@@ -2414,6 +2428,45 @@ vac_update_relstats(Oid relid, BlockNumber num_pages, double num_tuples,
heap_close(rd, RowExclusiveLock);
}
/*
* Update the shared Free Space Map with the info we now have about
* free space in the relation, discarding any old info the map may have.
*/
static void
vac_update_fsm(Relation onerel, VacPageList fraged_pages,
BlockNumber rel_pages)
{
int nPages = fraged_pages->num_pages;
int i;
BlockNumber *pages;
Size *spaceAvail;
/* +1 to avoid palloc(0) */
pages = (BlockNumber *) palloc((nPages + 1) * sizeof(BlockNumber));
spaceAvail = (Size *) palloc((nPages + 1) * sizeof(Size));
for (i = 0; i < nPages; i++)
{
pages[i] = fraged_pages->pagedesc[i]->blkno;
spaceAvail[i] = fraged_pages->pagedesc[i]->free;
/*
* fraged_pages may contain entries for pages that we later decided
* to truncate from the relation; don't enter them into the map!
*/
if (pages[i] >= rel_pages)
{
nPages = i;
break;
}
}
MultiRecordFreeSpace(&onerel->rd_node,
0, MaxBlockNumber,
nPages, pages, spaceAvail);
pfree(pages);
pfree(spaceAvail);
}
/* Copy a VacPage structure */
static VacPage
copy_vac_page(VacPage vacpage)
......
......@@ -8,12 +8,52 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.2 2001/06/29 21:08:24 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.3 2001/07/02 20:50:46 tgl Exp $
*
*
* NOTES:
*
* The only really interesting aspect of this code is the heuristics for
* deciding how much information we can afford to keep about each relation,
* given that we have a limited amount of workspace in shared memory.
* These currently work as follows:
*
* The number of distinct relations tracked is limited by a configuration
* variable (MaxFSMRelations). When this would be exceeded, we discard the
* least frequently used relation. A previously-unknown relation is always
* entered into the map with useCount 1 on first reference, even if this
* causes an existing entry with higher useCount to be discarded. This may
* cause a little bit of thrashing among the bottom entries in the list,
* but if we didn't do it then there'd be no way for a relation not in the
* map to get in once the map is full. Note we allow a relation to be in the
* map even if no pages are currently stored for it: this allows us to track
* its useCount & threshold, which may eventually go high enough to give it
* priority for page storage.
*
* The total number of pages tracked is also set by a configuration variable
* (MaxFSMPages). We allocate these dynamically among the known relations,
* giving preference to the more-frequently-referenced relations and those
* with smaller tuples. This is done by a thresholding mechanism: for each
* relation we keep track of a current target threshold, and allow only pages
* with free space >= threshold to be stored in the map. The threshold starts
* at BLCKSZ/2 (a somewhat arbitrary choice) for a newly entered relation.
* On each GetFreeSpace reference, the threshold is moved towards the
* request size using a slow exponential moving average; this means that
* for heavily used relations, the threshold will approach the average
* freespace request size (average tuple size). Whenever we run out of
* storage space in the map, we double the threshold of all existing relations
* (but not to more than BLCKSZ, so as to prevent runaway thresholds).
* Infrequently used relations will thus tend to have large thresholds.
*
* XXX this thresholding mechanism is experimental; need to see how well
* it works in practice.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <limits.h>
#include "storage/freespace.h"
#include "storage/itemid.h"
#include "storage/shmem.h"
......@@ -33,10 +73,12 @@ typedef struct FSMChunk FSMChunk;
/* Header for whole map */
struct FSMHeader
{
HTAB *relationHash; /* hashtable of FSMRelation entries */
FSMRelation *relationList; /* FSMRelations in order by recency of use */
int numRelations; /* number of FSMRelations now in use */
HTAB *relHash; /* hashtable of FSMRelation entries */
FSMRelation *relList; /* FSMRelations in useCount order */
FSMRelation *relListTail; /* tail of FSMRelation list */
int numRels; /* number of FSMRelations now in use */
FSMChunk *freeChunks; /* linked list of currently-free chunks */
int numFreeChunks; /* number of free chunks */
};
/*
......@@ -47,14 +89,28 @@ struct FSMHeader
struct FSMRelation
{
RelFileNode key; /* hash key (must be first) */
FSMRelation *nextRel; /* next rel in order by recency of use */
FSMRelation *priorRel; /* prior rel in order by recency of use */
FSMRelation *nextRel; /* next rel in useCount order */
FSMRelation *priorRel; /* prior rel in useCount order */
int useCount; /* use count for prioritizing rels */
Size threshold; /* minimum amount of free space to keep */
int nextPage; /* index (from 0) to start next search at */
int numPages; /* total number of pages we have info about */
int numChunks; /* number of FSMChunks allocated to rel */
FSMChunk *relChunks; /* linked list of page info chunks */
};
#define SHMEM_FSMHASH_KEYSIZE sizeof(RelFileNode)
#define SHMEM_FSMHASH_DATASIZE (sizeof(FSMRelation) - SHMEM_FSMHASH_KEYSIZE)
/*
* Info about individual pages in a relation is stored in chunks to reduce
* allocation overhead. Note that we allow any chunk of a relation's list
* to be partly full, not only the last chunk; this speeds deletion of
* individual page entries. When the total number of unused slots reaches
* CHUNKPAGES, we compact out the unused slots so that we can return a chunk
* to the freelist; but there's no point in doing the compaction before that.
*/
#define CHUNKPAGES 32 /* each chunk can store this many pages */
struct FSMChunk
......@@ -75,6 +131,33 @@ int MaxFSMPages;
static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */
static FSMRelation *lookup_fsm_rel(RelFileNode *rel);
static FSMRelation *create_fsm_rel(RelFileNode *rel);
static void delete_fsm_rel(FSMRelation *fsmrel);
static void link_fsm_rel_after(FSMRelation *fsmrel, FSMRelation *oldrel);
static void unlink_fsm_rel(FSMRelation *fsmrel);
static void free_chunk_chain(FSMChunk *fchunk);
static BlockNumber find_free_space(FSMRelation *fsmrel, Size spaceNeeded);
static void fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page,
Size spaceAvail);
static bool lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
FSMChunk **outChunk, int *outChunkRelIndex);
static bool insert_fsm_page_entry(FSMRelation *fsmrel,
BlockNumber page, Size spaceAvail,
FSMChunk *chunk, int chunkRelIndex);
static bool push_fsm_page_entry(BlockNumber page, Size spaceAvail,
FSMChunk *chunk, int chunkRelIndex);
static void delete_fsm_page_entry(FSMRelation *fsmrel, FSMChunk *chunk,
int chunkRelIndex);
static void compact_fsm_page_list(FSMRelation *fsmrel);
static void acquire_fsm_free_space(void);
/*
* Exported routines
*/
/*
* InitFreeSpaceMap -- Initialize the freespace module.
*
......@@ -82,8 +165,7 @@ static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */
* It builds the empty free space map table. FreeSpaceLock must also be
* initialized at some point, but is not touched here --- we assume there is
* no need for locking, since only the calling process can be accessing shared
* memory as yet. FreeSpaceShmemSize() was called previously while computing
* the space needed for shared memory.
* memory as yet.
*/
void
InitFreeSpaceMap(void)
......@@ -104,17 +186,18 @@ InitFreeSpaceMap(void)
info.datasize = SHMEM_FSMHASH_DATASIZE;
info.hash = tag_hash;
FreeSpaceMap->relationHash = ShmemInitHash("Free Space Map Hash",
MaxFSMRelations / 10,
MaxFSMRelations,
&info,
(HASH_ELEM | HASH_FUNCTION));
FreeSpaceMap->relHash = ShmemInitHash("Free Space Map Hash",
MaxFSMRelations / 10,
MaxFSMRelations,
&info,
(HASH_ELEM | HASH_FUNCTION));
if (!FreeSpaceMap->relationHash)
if (!FreeSpaceMap->relHash)
elog(FATAL, "Insufficient shared memory for free space map");
/* Allocate FSMChunks and fill up the free-chunks list */
nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
FreeSpaceMap->numFreeChunks = nchunks;
chunks = (FSMChunk *) ShmemAlloc(nchunks * sizeof(FSMChunk));
if (chunks == NULL)
......@@ -130,21 +213,15 @@ InitFreeSpaceMap(void)
FreeSpaceMap->freeChunks = prevchunk;
}
/*
* Estimate amount of shmem space needed for FSM.
*/
int
FreeSpaceShmemSize(void)
{
int size;
int nchunks;
/*
* There is no point in allowing less than one "chunk" per relation,
* so force MaxFSMPages to be at least CHUNKPAGES * MaxFSMRelations.
*/
Assert(MaxFSMRelations > 0);
if (MaxFSMPages < CHUNKPAGES * MaxFSMRelations)
MaxFSMPages = CHUNKPAGES * MaxFSMRelations;
/* table header */
size = MAXALIGN(sizeof(FSMHeader));
......@@ -161,27 +238,135 @@ FreeSpaceShmemSize(void)
return size;
}
/*
* GetPageWithFreeSpace - try to find a page in the given relation with
* at least the specified amount of free space.
*
* If successful, return the block number; if not, return InvalidBlockNumber.
*
* The caller must be prepared for the possibility that the returned page
* will turn out to have too little space available by the time the caller
* gets a lock on it. In that case, the caller should report the actual
* amount of free space available on that page (via RecordFreeSpace) and
* then try again. If InvalidBlockNumber is returned, extend the relation.
*/
BlockNumber
GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded)
{
return InvalidBlockNumber; /* stub */
FSMRelation *fsmrel;
BlockNumber freepage;
SpinAcquire(FreeSpaceLock);
/*
* We always add a rel to the hashtable when it is inquired about.
*/
fsmrel = create_fsm_rel(rel);
/*
* Adjust the threshold towards the space request. This essentially
* implements an exponential moving average with an equivalent period
* of about 63 requests. Ignore silly requests, however, to ensure
* that the average stays in bounds.
*
* In theory, if the threshold increases here we should immediately
* delete any pages that fall below the new threshold. In practice
* it seems OK to wait until we have a need to compact space.
*/
if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
{
int cur_avg = (int) fsmrel->threshold;
cur_avg += ((int) spaceNeeded - cur_avg) / 32;
fsmrel->threshold = (Size) cur_avg;
}
freepage = find_free_space(fsmrel, spaceNeeded);
SpinRelease(FreeSpaceLock);
return freepage;
}
/*
* RecordFreeSpace - record the amount of free space available on a page.
*
* The FSM is at liberty to forget about the page instead, if the amount of
* free space is too small to be interesting. The only guarantee is that
* a subsequent request to get a page with more than spaceAvail space free
* will not return this page.
*/
void
RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail)
{
/* stub */
FSMRelation *fsmrel;
/* Sanity check: ensure spaceAvail will fit into ItemLength */
AssertArg(spaceAvail < BLCKSZ);
SpinAcquire(FreeSpaceLock);
/*
* We choose not to add rels to the hashtable unless they've been
* inquired about with GetPageWithFreeSpace. Also, a Record operation
* does not increase the useCount or change threshold, only Get does.
*/
fsmrel = lookup_fsm_rel(rel);
if (fsmrel)
fsm_record_free_space(fsmrel, page, spaceAvail);
SpinRelease(FreeSpaceLock);
}
/*
* RecordAndGetPageWithFreeSpace - combo form to save one spinlock and
* hash table lookup cycle.
*/
BlockNumber
RecordAndGetPageWithFreeSpace(RelFileNode *rel,
BlockNumber oldPage,
Size oldSpaceAvail,
Size spaceNeeded)
{
return InvalidBlockNumber; /* stub */
FSMRelation *fsmrel;
BlockNumber freepage;
/* Sanity check: ensure spaceAvail will fit into ItemLength */
AssertArg(oldSpaceAvail < BLCKSZ);
SpinAcquire(FreeSpaceLock);
/*
* We always add a rel to the hashtable when it is inquired about.
*/
fsmrel = create_fsm_rel(rel);
/*
* Adjust the threshold towards the space request, same as in
* GetPageWithFreeSpace.
*
* Note that we do this before storing data for oldPage, which means
* this isn't exactly equivalent to Record followed by Get; but it
* seems appropriate to adjust the threshold first.
*/
if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
{
int cur_avg = (int) fsmrel->threshold;
cur_avg += ((int) spaceNeeded - cur_avg) / 32;
fsmrel->threshold = (Size) cur_avg;
}
/* Do the Record */
fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail);
/* Do the Get */
freepage = find_free_space(fsmrel, spaceNeeded);
SpinRelease(FreeSpaceLock);
return freepage;
}
/*
* MultiRecordFreeSpace - record available-space info about multiple pages
* of a relation in one call.
*
* First, if minPage <= maxPage, the FSM must discard any entries it has for
* pages in that page number range (inclusive). This allows obsolete info
* to be discarded. Second, if nPages > 0, record the page numbers and free
* space amounts in the given arrays. As with RecordFreeSpace, the FSM is at
* liberty to discard some of the information. However, it *must* discard
* previously stored info in the minPage..maxPage range (for example, this
* case is used to remove info about deleted pages during relation truncation).
*/
void
MultiRecordFreeSpace(RelFileNode *rel,
BlockNumber minPage,
......@@ -190,13 +375,668 @@ MultiRecordFreeSpace(RelFileNode *rel,
BlockNumber *pages,
Size *spaceAvail)
{
/* stub */
FSMRelation *fsmrel;
int i;
SpinAcquire(FreeSpaceLock);
fsmrel = lookup_fsm_rel(rel);
if (fsmrel)
{
/*
* Remove entries in specified range
*/
if (minPage <= maxPage)
{
FSMChunk *chunk;
int chunkRelIndex;
bool done;
/* Use lookup to locate first entry >= minPage */
lookup_fsm_page_entry(fsmrel, minPage, &chunk, &chunkRelIndex);
/* Set free space to 0 for each page within range */
done = false;
while (chunk && !done)
{
int numPages = chunk->numPages;
for (; chunkRelIndex < numPages; chunkRelIndex++)
{
if (chunk->pages[chunkRelIndex] > maxPage)
{
done = true;
break;
}
chunk->bytes[chunkRelIndex] = 0;
}
chunk = chunk->next;
chunkRelIndex = 0;
}
/* Now compact out the zeroed entries */
compact_fsm_page_list(fsmrel);
}
/*
* Add new entries, if appropriate.
*
* XXX we could probably be smarter about this than doing it
* completely separately for each one. FIXME later.
*/
for (i = 0; i < nPages; i++)
fsm_record_free_space(fsmrel, pages[i], spaceAvail[i]);
}
SpinRelease(FreeSpaceLock);
}
/*
* FreeSpaceMapForgetRel - forget all about a relation.
*
* This is called when a relation is deleted. Although we could just let
* the rel age out of the map, it's better to reclaim and reuse the space
* sooner.
*/
void
FreeSpaceMapForgetRel(RelFileNode *rel)
{
/* stub */
FSMRelation *fsmrel;
SpinAcquire(FreeSpaceLock);
fsmrel = lookup_fsm_rel(rel);
if (fsmrel)
delete_fsm_rel(fsmrel);
SpinRelease(FreeSpaceLock);
}
/*
* FreeSpaceMapForgetDatabase - forget all relations of a database.
*
* This is called during DROP DATABASE. As above, might as well reclaim
* map space sooner instead of later.
*
* XXX when we implement tablespaces, target Oid will need to be tablespace
* ID not database ID.
*/
void
FreeSpaceMapForgetDatabase(Oid dbid)
{
FSMRelation *fsmrel,
*nextrel;
SpinAcquire(FreeSpaceLock);
for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = nextrel)
{
nextrel = fsmrel->nextRel; /* in case we delete it */
if (fsmrel->key.tblNode == dbid)
delete_fsm_rel(fsmrel);
}
SpinRelease(FreeSpaceLock);
}
/*
* Internal routines. These all assume the caller holds the FreeSpaceLock.
*/
/*
* Lookup a relation in the hash table. If not present, return NULL.
* The relation's useCount is not changed.
*/
static FSMRelation *
lookup_fsm_rel(RelFileNode *rel)
{
FSMRelation *fsmrel;
bool found;
fsmrel = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
(Pointer) rel,
HASH_FIND,
&found);
if (!fsmrel)
elog(ERROR, "FreeSpaceMap hashtable corrupted");
if (!found)
return NULL;
return fsmrel;
}
/*
* Lookup a relation in the hash table, creating an entry if not present.
*
* On successful lookup, the relation's useCount is incremented, possibly
* causing it to move up in the priority list.
*/
static FSMRelation *
create_fsm_rel(RelFileNode *rel)
{
FSMRelation *fsmrel,
*oldrel;
bool found;
fsmrel = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
(Pointer) rel,
HASH_ENTER,
&found);
if (!fsmrel)
elog(ERROR, "FreeSpaceMap hashtable corrupted");
if (!found)
{
/* New hashtable entry, initialize it (hash_search set the key) */
fsmrel->useCount = 1;
fsmrel->threshold = BLCKSZ/2; /* starting point for new entry */
fsmrel->nextPage = 0;
fsmrel->numPages = 0;
fsmrel->numChunks = 0;
fsmrel->relChunks = NULL;
/* Discard lowest-priority existing rel, if we are over limit */
if (FreeSpaceMap->numRels >= MaxFSMRelations)
delete_fsm_rel(FreeSpaceMap->relListTail);
/*
* Add new entry in front of any others with useCount 1 (since it
* is more recently used than them).
*/
oldrel = FreeSpaceMap->relListTail;
while (oldrel && oldrel->useCount <= 1)
oldrel = oldrel->priorRel;
link_fsm_rel_after(fsmrel, oldrel);
FreeSpaceMap->numRels++;
}
else
{
int myCount;
/* Existing entry, advance its useCount */
if (++(fsmrel->useCount) >= INT_MAX/2)
{
/* When useCounts threaten to overflow, reduce 'em all 2X */
for (oldrel = FreeSpaceMap->relList;
oldrel != NULL;
oldrel = oldrel->nextRel)
{
oldrel->useCount >>= 1;
}
}
/* If warranted, move it up the priority list */
oldrel = fsmrel->priorRel;
myCount = fsmrel->useCount;
if (oldrel && oldrel->useCount <= myCount)
{
unlink_fsm_rel(fsmrel);
while (oldrel && oldrel->useCount <= myCount)
oldrel = oldrel->priorRel;
link_fsm_rel_after(fsmrel, oldrel);
}
}
return fsmrel;
}
/*
* Remove an existing FSMRelation entry.
*/
static void
delete_fsm_rel(FSMRelation *fsmrel)
{
FSMRelation *result;
bool found;
free_chunk_chain(fsmrel->relChunks);
unlink_fsm_rel(fsmrel);
FreeSpaceMap->numRels--;
result = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
(Pointer) &(fsmrel->key),
HASH_REMOVE,
&found);
if (!result || !found)
elog(ERROR, "FreeSpaceMap hashtable corrupted");
}
/*
* Link a FSMRelation into the priority list just after the given existing
* entry (or at the head of the list, if oldrel is NULL).
*/
static void
link_fsm_rel_after(FSMRelation *fsmrel, FSMRelation *oldrel)
{
if (oldrel == NULL)
{
/* insert at head */
fsmrel->priorRel = NULL;
fsmrel->nextRel = FreeSpaceMap->relList;
FreeSpaceMap->relList = fsmrel;
if (fsmrel->nextRel != NULL)
fsmrel->nextRel->priorRel = fsmrel;
else
FreeSpaceMap->relListTail = fsmrel;
}
else
{
/* insert after oldrel */
fsmrel->priorRel = oldrel;
fsmrel->nextRel = oldrel->nextRel;
oldrel->nextRel = fsmrel;
if (fsmrel->nextRel != NULL)
fsmrel->nextRel->priorRel = fsmrel;
else
FreeSpaceMap->relListTail = fsmrel;
}
}
/*
* Delink a FSMRelation from the priority list.
*/
static void
unlink_fsm_rel(FSMRelation *fsmrel)
{
if (fsmrel->priorRel != NULL)
fsmrel->priorRel->nextRel = fsmrel->nextRel;
else
FreeSpaceMap->relList = fsmrel->nextRel;
if (fsmrel->nextRel != NULL)
fsmrel->nextRel->priorRel = fsmrel->priorRel;
else
FreeSpaceMap->relListTail = fsmrel->priorRel;
}
/*
* Return all the FSMChunks in the chain starting at fchunk to the freelist.
* (Caller must handle unlinking them from wherever they were.)
*/
static void
free_chunk_chain(FSMChunk *fchunk)
{
int nchunks;
FSMChunk *lchunk;
if (fchunk == NULL)
return;
nchunks = 1;
lchunk = fchunk;
while (lchunk->next != NULL)
{
nchunks++;
lchunk = lchunk->next;
}
lchunk->next = FreeSpaceMap->freeChunks;
FreeSpaceMap->freeChunks = fchunk;
FreeSpaceMap->numFreeChunks += nchunks;
}
/*
* Look to see if a page with at least the specified amount of space is
* available in the given FSMRelation. If so, return its page number,
* and advance the nextPage counter so that the next inquiry will return
* a different page if possible. Return InvalidBlockNumber if no success.
*/
static BlockNumber
find_free_space(FSMRelation *fsmrel, Size spaceNeeded)
{
int pagesToCheck, /* outer loop counter */
pageIndex, /* current page index relative to relation */
chunkRelIndex; /* current page index relative to curChunk */
FSMChunk *curChunk;
pageIndex = fsmrel->nextPage;
/* Last operation may have left nextPage pointing past end */
if (pageIndex >= fsmrel->numPages)
pageIndex = 0;
curChunk = fsmrel->relChunks;
chunkRelIndex = pageIndex;
for (pagesToCheck = fsmrel->numPages; pagesToCheck > 0; pagesToCheck--)
{
/*
* Make sure we are in the right chunk. First time through, we
* may have to advance through multiple chunks; subsequent loops
* should do this at most once.
*/
while (chunkRelIndex >= curChunk->numPages)
{
chunkRelIndex -= curChunk->numPages;
curChunk = curChunk->next;
}
/* Check the next page */
if ((Size) curChunk->bytes[chunkRelIndex] >= spaceNeeded)
{
fsmrel->nextPage = pageIndex+1;
return curChunk->pages[chunkRelIndex];
}
/* Advance pageIndex and chunkRelIndex, wrapping around if needed */
if (++pageIndex >= fsmrel->numPages)
{
pageIndex = 0;
curChunk = fsmrel->relChunks;
chunkRelIndex = 0;
}
else
chunkRelIndex++;
}
return InvalidBlockNumber; /* nothing found */
}
/*
* fsm_record_free_space - guts of RecordFreeSpace, which is also used by
* RecordAndGetPageWithFreeSpace.
*/
static void
fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail)
{
FSMChunk *chunk;
int chunkRelIndex;
if (lookup_fsm_page_entry(fsmrel, page, &chunk, &chunkRelIndex))
{
/* Found an existing entry for page; update or delete it */
if (spaceAvail >= fsmrel->threshold)
chunk->bytes[chunkRelIndex] = (ItemLength) spaceAvail;
else
delete_fsm_page_entry(fsmrel, chunk, chunkRelIndex);
}
else
{
/*
* No existing entry; add one if spaceAvail exceeds threshold.
*
* CORNER CASE: if we have to do acquire_fsm_free_space then
* our own threshold will increase, possibly meaning that we
* shouldn't store the page after all. Loop to redo the test
* if that happens. The loop also covers the possibility that
* acquire_fsm_free_space must be executed more than once to
* free any space (ie, thresholds must be more than doubled).
*/
while (spaceAvail >= fsmrel->threshold)
{
if (insert_fsm_page_entry(fsmrel, page, spaceAvail,
chunk, chunkRelIndex))
break;
/* No space, acquire some and recheck threshold */
acquire_fsm_free_space();
if (spaceAvail < fsmrel->threshold)
break;
/*
* Need to redo the lookup since our own page list may well
* have lost entries, so position is not correct anymore.
*/
if (lookup_fsm_page_entry(fsmrel, page,
&chunk, &chunkRelIndex))
elog(ERROR, "fsm_record_free_space: unexpected match");
}
}
}
/*
* Look for an entry for a specific page (block number) in a FSMRelation.
* Returns TRUE if a matching entry exists, else FALSE.
*
* The output arguments *outChunk, *outChunkRelIndex are set to indicate where
* the entry exists (if TRUE result) or could be inserted (if FALSE result).
* *chunk is set to NULL if there is no place to insert, ie, the entry would
* need to be added to a new chunk.
*/
static bool
lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
FSMChunk **outChunk, int *outChunkRelIndex)
{
FSMChunk *chunk;
int chunkRelIndex;
for (chunk = fsmrel->relChunks; chunk; chunk = chunk->next)
{
int numPages = chunk->numPages;
/* Can skip the chunk quickly if page must be after last in chunk */
if (numPages > 0 && page <= chunk->pages[numPages-1])
{
for (chunkRelIndex = 0; chunkRelIndex < numPages; chunkRelIndex++)
{
if (page <= chunk->pages[chunkRelIndex])
{
*outChunk = chunk;
*outChunkRelIndex = chunkRelIndex;
return (page == chunk->pages[chunkRelIndex]);
}
}
/* Should not get here, given above test */
Assert(false);
}
/*
* If we are about to fall off the end, and there's space available
* in the end chunk, return a pointer to it.
*/
if (chunk->next == NULL && numPages < CHUNKPAGES)
{
*outChunk = chunk;
*outChunkRelIndex = numPages;
return false;
}
}
/*
* Adding the page would require a new chunk (or, perhaps, compaction
* of available free space --- not my problem here).
*/
*outChunk = NULL;
*outChunkRelIndex = 0;
return false;
}
/*
* Insert a new page entry into a FSMRelation's list at given position
* (chunk == NULL implies at end).
*
* If there is no space available to insert the entry, return FALSE.
*/
static bool
insert_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail,
FSMChunk *chunk, int chunkRelIndex)
{
FSMChunk *newChunk;
int newChunkRelIndex;
if (fsmrel->numPages >= fsmrel->numChunks * CHUNKPAGES)
{
/* No free space within chunk list, so need another chunk */
if ((newChunk = FreeSpaceMap->freeChunks) == NULL)
return false; /* can't do it */
FreeSpaceMap->freeChunks = newChunk->next;
FreeSpaceMap->numFreeChunks--;
newChunk->next = NULL;
newChunk->numPages = 0;
if (fsmrel->relChunks == NULL)
fsmrel->relChunks = newChunk;
else
{
FSMChunk *priorChunk = fsmrel->relChunks;
while (priorChunk->next != NULL)
priorChunk = priorChunk->next;
priorChunk->next = newChunk;
}
fsmrel->numChunks++;
if (chunk == NULL)
{
/* Original search found that new page belongs at end */
chunk = newChunk;
chunkRelIndex = 0;
}
}
/* Try to insert it the easy way, ie, just move down subsequent data */
if (chunk &&
push_fsm_page_entry(page, spaceAvail, chunk, chunkRelIndex))
{
fsmrel->numPages++;
fsmrel->nextPage++; /* don't return same page twice running */
return true;
}
/*
* There is space available, but evidently it's before the place
* where the page entry needs to go. Compact the list and try again.
* This will require us to redo the search for the appropriate place.
*/
compact_fsm_page_list(fsmrel);
if (lookup_fsm_page_entry(fsmrel, page, &newChunk, &newChunkRelIndex))
elog(ERROR, "insert_fsm_page_entry: entry already exists!");
if (newChunk &&
push_fsm_page_entry(page, spaceAvail, newChunk, newChunkRelIndex))
{
fsmrel->numPages++;
fsmrel->nextPage++; /* don't return same page twice running */
return true;
}
/* Shouldn't get here given the initial if-test for space available */
elog(ERROR, "insert_fsm_page_entry: failed to insert entry!");
return false; /* keep compiler quiet */
}
/*
* Auxiliary routine for insert_fsm_page_entry: try to push entries to the
* right to insert at chunk/chunkRelIndex. Return TRUE if successful.
* Note that the FSMRelation's own fields are not updated.
*/
static bool
push_fsm_page_entry(BlockNumber page, Size spaceAvail,
FSMChunk *chunk, int chunkRelIndex)
{
int i;
if (chunk->numPages >= CHUNKPAGES)
{
if (chunk->next == NULL)
return false; /* no space */
/* try to push chunk's last item to next chunk */
if (! push_fsm_page_entry(chunk->pages[CHUNKPAGES-1],
chunk->bytes[CHUNKPAGES-1],
chunk->next, 0))
return false;
/* successfully pushed it */
chunk->numPages--;
}
for (i = chunk->numPages; i > chunkRelIndex; i--)
{
chunk->pages[i] = chunk->pages[i-1];
chunk->bytes[i] = chunk->bytes[i-1];
}
chunk->numPages++;
chunk->pages[chunkRelIndex] = page;
chunk->bytes[chunkRelIndex] = (ItemLength) spaceAvail;
return true;
}
/*
* Delete one page entry from a FSMRelation's list. Compact free space
* in the list, but only if a chunk can be returned to the freelist.
*/
static void
delete_fsm_page_entry(FSMRelation *fsmrel, FSMChunk *chunk, int chunkRelIndex)
{
int i,
lim;
Assert(chunk && chunkRelIndex >= 0 && chunkRelIndex < chunk->numPages);
/* Compact out space in this chunk */
lim = --chunk->numPages;
for (i = chunkRelIndex; i < lim; i++)
{
chunk->pages[i] = chunk->pages[i+1];
chunk->bytes[i] = chunk->bytes[i+1];
}
/* Compact the whole list if a chunk can be freed */
fsmrel->numPages--;
if (fsmrel->numPages <= (fsmrel->numChunks-1) * CHUNKPAGES)
compact_fsm_page_list(fsmrel);
}
/*
* Remove any pages with free space less than the current threshold for the
* FSMRelation, compact out free slots in non-last chunks, and return any
* completely freed chunks to the freelist.
*/
static void
compact_fsm_page_list(FSMRelation *fsmrel)
{
Size threshold = fsmrel->threshold;
FSMChunk *srcChunk,
*dstChunk;
int srcIndex,
dstIndex,
dstPages,
dstChunkCnt;
srcChunk = dstChunk = fsmrel->relChunks;
srcIndex = dstIndex = 0;
dstPages = 0; /* total pages kept */
dstChunkCnt = 1; /* includes current dstChunk */
while (srcChunk != NULL)
{
int srcPages = srcChunk->numPages;
while (srcIndex < srcPages)
{
if ((Size) srcChunk->bytes[srcIndex] >= threshold)
{
if (dstIndex >= CHUNKPAGES)
{
/*
* At this point srcChunk must be pointing to a later
* chunk, so it's OK to overwrite dstChunk->numPages.
*/
dstChunk->numPages = dstIndex;
dstChunk = dstChunk->next;
dstChunkCnt++;
dstIndex = 0;
}
dstChunk->pages[dstIndex] = srcChunk->pages[srcIndex];
dstChunk->bytes[dstIndex] = srcChunk->bytes[srcIndex];
dstIndex++;
dstPages++;
}
srcIndex++;
}
srcChunk = srcChunk->next;
srcIndex = 0;
}
if (dstPages == 0)
{
/* No chunks to be kept at all */
fsmrel->nextPage = 0;
fsmrel->numPages = 0;
fsmrel->numChunks = 0;
fsmrel->relChunks = NULL;
free_chunk_chain(dstChunk);
}
else
{
/* we deliberately don't change nextPage here */
fsmrel->numPages = dstPages;
fsmrel->numChunks = dstChunkCnt;
dstChunk->numPages = dstIndex;
free_chunk_chain(dstChunk->next);
dstChunk->next = NULL;
}
}
/*
* Acquire some free space by raising the thresholds of all FSMRelations.
*
* Note there is no guarantee as to how much space will be freed by a single
* invocation; caller may repeat if necessary.
*/
static void
acquire_fsm_free_space(void)
{
FSMRelation *fsmrel;
for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = fsmrel->nextRel)
{
fsmrel->threshold *= 2;
/* Limit thresholds to BLCKSZ so they can't get ridiculously large */
if (fsmrel->threshold > BLCKSZ)
fsmrel->threshold = BLCKSZ;
/* Release any pages that don't meet the new threshold */
compact_fsm_page_list(fsmrel);
}
}
......@@ -210,7 +1050,58 @@ FreeSpaceMapForgetRel(RelFileNode *rel)
void
DumpFreeSpace(void)
{
/* stub */
FSMRelation *fsmrel;
FSMRelation *prevrel = NULL;
int relNum = 0;
FSMChunk *chunk;
int chunkRelIndex;
int nChunks;
int nPages;
for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = fsmrel->nextRel)
{
relNum++;
fprintf(stderr, "Map %d: rel %u/%u useCount %d threshold %u nextPage %d\nMap= ",
relNum, fsmrel->key.tblNode, fsmrel->key.relNode,
fsmrel->useCount, fsmrel->threshold, fsmrel->nextPage);
nChunks = nPages = 0;
for (chunk = fsmrel->relChunks; chunk; chunk = chunk->next)
{
int numPages = chunk->numPages;
nChunks++;
for (chunkRelIndex = 0; chunkRelIndex < numPages; chunkRelIndex++)
{
nPages++;
fprintf(stderr, " %u:%u",
chunk->pages[chunkRelIndex],
chunk->bytes[chunkRelIndex]);
}
}
fprintf(stderr, "\n");
/* Cross-check local counters and list links */
if (nPages != fsmrel->numPages)
fprintf(stderr, "DumpFreeSpace: %d pages in rel, but numPages = %d\n",
nPages, fsmrel->numPages);
if (nChunks != fsmrel->numChunks)
fprintf(stderr, "DumpFreeSpace: %d chunks in rel, but numChunks = %d\n",
nChunks, fsmrel->numChunks);
if (prevrel != fsmrel->priorRel)
fprintf(stderr, "DumpFreeSpace: broken list links\n");
prevrel = fsmrel;
}
if (prevrel != FreeSpaceMap->relListTail)
fprintf(stderr, "DumpFreeSpace: broken list links\n");
/* Cross-check global counters */
if (relNum != FreeSpaceMap->numRels)
fprintf(stderr, "DumpFreeSpace: %d rels in list, but numRels = %d\n",
relNum, FreeSpaceMap->numRels);
nChunks = 0;
for (chunk = FreeSpaceMap->freeChunks; chunk; chunk = chunk->next)
nChunks++;
if (nChunks != FreeSpaceMap->numFreeChunks)
fprintf(stderr, "DumpFreeSpace: %d chunks in list, but numFreeChunks = %d\n",
nChunks, FreeSpaceMap->numFreeChunks);
}
#endif /* FREESPACE_DEBUG */
......@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.51 2001/06/29 21:08:24 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.52 2001/07/02 20:50:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -469,11 +469,13 @@ smgrtruncate(int16 which, Relation reln, BlockNumber nblocks)
if (smgrsw[which].smgr_truncate)
{
/*
* Tell the free space map to forget this relation, so that it
* stops caching info about the deleted blocks. XXX perhaps
* tell it to forget only info about blocks beyond nblocks?
* Tell the free space map to forget anything it may have stored
* for the about-to-be-deleted blocks. We want to be sure it won't
* return bogus block numbers later on.
*/
FreeSpaceMapForgetRel(&reln->rd_node);
MultiRecordFreeSpace(&reln->rd_node,
nblocks, MaxBlockNumber,
0, NULL, NULL);
newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks);
if (newblks == InvalidBlockNumber)
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: block.h,v 1.12 2001/01/24 19:43:27 momjian Exp $
* $Id: block.h,v 1.13 2001/07/02 20:50:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -32,6 +32,8 @@ typedef uint32 BlockNumber;
#define InvalidBlockNumber ((BlockNumber) 0xFFFFFFFF)
#define MaxBlockNumber ((BlockNumber) 0xFFFFFFFE)
/*
* BlockId:
*
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: freespace.h,v 1.1 2001/06/27 23:31:39 tgl Exp $
* $Id: freespace.h,v 1.2 2001/07/02 20:50:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -45,6 +45,7 @@ extern void MultiRecordFreeSpace(RelFileNode *rel,
BlockNumber *pages,
Size *spaceAvail);
extern void FreeSpaceMapForgetRel(RelFileNode *rel);
extern void FreeSpaceMapForgetDatabase(Oid dbid);
#ifdef FREESPACE_DEBUG
extern void DumpFreeSpace(void);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment