Commit 4b6c198a authored by Tom Lane's avatar Tom Lane

Add code to dump contents of free space map into $PGDATA/global/pg_fsm.cache

at database shutdown, and then load it again at database startup.  This
preserves our hard-won knowledge of free space across restarts (given
an orderly shutdown, that is).
parent 21591967
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.147 2002/12/15 16:17:38 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.148 2003/03/06 00:04:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -34,6 +34,7 @@
#include "executor/executor.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "storage/freespace.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "tcop/tcopprot.h"
......@@ -398,10 +399,12 @@ BootstrapMain(int argc, char *argv[])
case BS_XLOG_STARTUP:
StartupXLOG();
LoadFreeSpaceMap();
proc_exit(0); /* done */
case BS_XLOG_SHUTDOWN:
ShutdownXLOG();
DumpFreeSpaceMap();
proc_exit(0); /* done */
default:
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.16 2003/03/04 21:51:21 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.17 2003/03/06 00:04:27 tgl Exp $
*
*
* NOTES:
......@@ -45,22 +45,26 @@
* So the actual arithmetic is: for each relation compute myRequest as the
* number of chunks needed to hold its RRFS page count (not counting the
* first, guaranteed chunk); compute sumRequests as the sum of these values
* over all relations; then for each relation figure its actual allocation
* over all relations; then for each relation figure its target allocation
* as
* 1 + round(spareChunks * myRequest / sumRequests)
* where spareChunks = totalChunks - numRels is the number of chunks we have
* a choice what to do with. We round off these numbers because truncating
* all of them would waste significant space. But because of roundoff, it's
* possible for the last few relations to get less space than they should;
* the computed allocation must be checked against remaining available space.
* the target allocation must be checked against remaining available space.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <errno.h>
#include <limits.h>
#include <math.h>
#include <unistd.h>
#include "miscadmin.h"
#include "storage/fd.h"
#include "storage/freespace.h"
#include "storage/itemptr.h"
#include "storage/lwlock.h"
......@@ -105,6 +109,53 @@ typedef BlockIdData IndexFSMPageData;
#define IndexFSMPageSetPageNum(ptr, pg) \
BlockIdSet(ptr, pg)
/*----------
* During database shutdown, we store the contents of FSM into a disk file,
* which is re-read during startup. This way we don't have a startup
* transient condition where FSM isn't really functioning.
*
* The file format is:
* label "FSM\0"
* endian constant 0x01020304 for detecting endianness problems
* version#
* numRels
* -- for each rel, in *reverse* usage order:
* relfilenode
* isIndex
* avgRequest
* lastPageCount
* storedPages
* arena data array of storedPages FSMPageData or IndexFSMPageData
*----------
*/
/* Name of FSM cache file (relative to $PGDATA) */
#define FSM_CACHE_FILENAME "global/pg_fsm.cache"
/* Fixed values in header */
#define FSM_CACHE_LABEL "FSM"
#define FSM_CACHE_ENDIAN 0x01020304
#define FSM_CACHE_VERSION 20030305
/* File header layout */
typedef struct FsmCacheFileHeader
{
char label[4];
uint32 endian;
uint32 version;
int32 numRels;
} FsmCacheFileHeader;
/* Per-relation header */
typedef struct FsmCacheRelHeader
{
RelFileNode key; /* hash key (must be first) */
bool isIndex; /* if true, we store only page numbers */
uint32 avgRequest; /* moving average of space requests */
int32 lastPageCount; /* pages passed to RecordRelationFreeSpace */
int32 storedPages; /* # of pages stored in arena */
} FsmCacheRelHeader;
/*
* Shared free-space-map objects
......@@ -172,6 +223,7 @@ static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */
static FSMRelation *lookup_fsm_rel(RelFileNode *rel);
static FSMRelation *create_fsm_rel(RelFileNode *rel);
static void delete_fsm_rel(FSMRelation *fsmrel);
static int realloc_fsm_rel(FSMRelation *fsmrel, int nPages, bool isIndex);
static void link_fsm_rel_usage(FSMRelation *fsmrel);
static void unlink_fsm_rel_usage(FSMRelation *fsmrel);
static void link_fsm_rel_storage(FSMRelation *fsmrel);
......@@ -416,54 +468,18 @@ RecordRelationFreeSpace(RelFileNode *rel,
fsmrel = lookup_fsm_rel(rel);
if (fsmrel)
{
int myRequest;
int myAlloc;
int curAlloc;
int curAllocPages;
FSMPageData *newLocation;
/*
* Delete existing entries, and update request status.
*/
fsmrel->storedPages = 0;
FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel);
fsmrel->lastPageCount = nPages;
fsmrel->isIndex = false;
myRequest = fsm_calc_request(fsmrel);
FreeSpaceMap->sumRequests += myRequest;
myAlloc = fsm_calc_target_allocation(myRequest);
/*
* Need to reallocate space if (a) my target allocation is more
* than my current allocation, AND (b) my actual immediate need
* (myRequest+1 chunks) is more than my current allocation.
* Otherwise just store the new data in-place.
*/
curAlloc = fsm_current_allocation(fsmrel);
if (myAlloc > curAlloc && (myRequest+1) > curAlloc && nPages > 0)
{
/* Remove entry from storage list, and compact */
unlink_fsm_rel_storage(fsmrel);
compact_fsm_storage();
/* Reattach to end of storage list */
link_fsm_rel_storage(fsmrel);
/* And allocate storage */
fsmrel->firstChunk = FreeSpaceMap->usedChunks;
FreeSpaceMap->usedChunks += myAlloc;
curAlloc = myAlloc;
/* Watch out for roundoff error */
if (FreeSpaceMap->usedChunks > FreeSpaceMap->totalChunks)
{
FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks;
curAlloc = FreeSpaceMap->totalChunks - fsmrel->firstChunk;
}
}
curAlloc = realloc_fsm_rel(fsmrel, nPages, false);
curAllocPages = curAlloc * CHUNKPAGES;
/*
* If the data fits in our current allocation, just copy it;
* otherwise must compress.
*/
newLocation = (FSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
curAllocPages = curAlloc * CHUNKPAGES;
if (nPages <= curAllocPages)
{
int i;
......@@ -539,48 +555,13 @@ RecordIndexFreeSpace(RelFileNode *rel,
fsmrel = lookup_fsm_rel(rel);
if (fsmrel)
{
int myRequest;
int myAlloc;
int curAlloc;
int curAllocPages;
int i;
IndexFSMPageData *newLocation;
/*
* Delete existing entries, and update request status.
*/
fsmrel->storedPages = 0;
FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel);
fsmrel->lastPageCount = nPages;
fsmrel->isIndex = true;
myRequest = fsm_calc_request(fsmrel);
FreeSpaceMap->sumRequests += myRequest;
myAlloc = fsm_calc_target_allocation(myRequest);
/*
* Need to reallocate space if (a) my target allocation is more
* than my current allocation, AND (b) my actual immediate need
* (myRequest+1 chunks) is more than my current allocation.
* Otherwise just store the new data in-place.
*/
curAlloc = fsm_current_allocation(fsmrel);
if (myAlloc > curAlloc && (myRequest+1) > curAlloc && nPages > 0)
{
/* Remove entry from storage list, and compact */
unlink_fsm_rel_storage(fsmrel);
compact_fsm_storage();
/* Reattach to end of storage list */
link_fsm_rel_storage(fsmrel);
/* And allocate storage */
fsmrel->firstChunk = FreeSpaceMap->usedChunks;
FreeSpaceMap->usedChunks += myAlloc;
curAlloc = myAlloc;
/* Watch out for roundoff error */
if (FreeSpaceMap->usedChunks > FreeSpaceMap->totalChunks)
{
FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks;
curAlloc = FreeSpaceMap->totalChunks - fsmrel->firstChunk;
}
}
curAlloc = realloc_fsm_rel(fsmrel, nPages, true);
curAllocPages = curAlloc * INDEXCHUNKPAGES;
/*
* If the data fits in our current allocation, just copy it;
* otherwise must compress. But compression is easy: we merely
......@@ -588,7 +569,6 @@ RecordIndexFreeSpace(RelFileNode *rel,
*/
newLocation = (IndexFSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
curAllocPages = curAlloc * INDEXCHUNKPAGES;
if (nPages > curAllocPages)
nPages = curAllocPages;
......@@ -715,6 +695,254 @@ PrintFreeSpaceMapStatistics(int elevel)
(double) FreeSpaceShmemSize() / 1024.0);
}
/*
* DumpFreeSpaceMap - dump contents of FSM into a disk file for later reload
*
* This is expected to be called during database shutdown, after updates to
* the FSM have stopped. We lock the FreeSpaceLock but that's purely pro
* forma --- if anyone else is still accessing FSM, there's a problem.
*/
void
DumpFreeSpaceMap(void)
{
FILE *fp;
char cachefilename[MAXPGPATH];
FsmCacheFileHeader header;
FSMRelation *fsmrel;
/* Try to create file */
snprintf(cachefilename, sizeof(cachefilename), "%s/%s",
DataDir, FSM_CACHE_FILENAME);
unlink(cachefilename); /* in case it exists w/wrong permissions */
fp = AllocateFile(cachefilename, PG_BINARY_W);
if (fp == NULL)
{
elog(LOG, "Failed to write %s: %m", cachefilename);
return;
}
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/* Write file header */
MemSet(&header, 0, sizeof(header));
strcpy(header.label, FSM_CACHE_LABEL);
header.endian = FSM_CACHE_ENDIAN;
header.version = FSM_CACHE_VERSION;
header.numRels = FreeSpaceMap->numRels;
if (fwrite(&header, 1, sizeof(header), fp) != sizeof(header))
goto write_failed;
/* For each relation, in order from least to most recently used... */
for (fsmrel = FreeSpaceMap->usageListTail;
fsmrel != NULL;
fsmrel = fsmrel->priorUsage)
{
FsmCacheRelHeader relheader;
int nPages;
/* Write relation header */
MemSet(&relheader, 0, sizeof(relheader));
relheader.key = fsmrel->key;
relheader.isIndex = fsmrel->isIndex;
relheader.avgRequest = fsmrel->avgRequest;
relheader.lastPageCount = fsmrel->lastPageCount;
relheader.storedPages = fsmrel->storedPages;
if (fwrite(&relheader, 1, sizeof(relheader), fp) != sizeof(relheader))
goto write_failed;
/* Write the per-page data directly from the arena */
nPages = fsmrel->storedPages;
if (nPages > 0)
{
Size len;
char *data;
if (fsmrel->isIndex)
len = nPages * sizeof(IndexFSMPageData);
else
len = nPages * sizeof(FSMPageData);
data = (char *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
if (fwrite(data, 1, len, fp) != len)
goto write_failed;
}
}
/* Clean up */
LWLockRelease(FreeSpaceLock);
FreeFile(fp);
return;
write_failed:
elog(LOG, "Failed to write %s: %m", cachefilename);
/* Clean up */
LWLockRelease(FreeSpaceLock);
FreeFile(fp);
/* Remove busted cache file */
unlink(cachefilename);
}
/*
* LoadFreeSpaceMap - load contents of FSM from a disk file
*
* This is expected to be called during database startup, before any FSM
* updates begin. We lock the FreeSpaceLock but that's purely pro
* forma --- if anyone else is accessing FSM yet, there's a problem.
*
* Notes: no complaint is issued if no cache file is found. If the file is
* found, it is deleted after reading. Thus, if we crash without a clean
* shutdown, the next cycle of life starts with no FSM data. To do otherwise,
* we'd need to do significantly more validation in this routine, because of
* the likelihood that what is in the dump file would be out-of-date, eg
* there might be entries for deleted or truncated rels.
*/
void
LoadFreeSpaceMap(void)
{
FILE *fp;
char cachefilename[MAXPGPATH];
FsmCacheFileHeader header;
int relno;
/* Try to open file */
snprintf(cachefilename, sizeof(cachefilename), "%s/%s",
DataDir, FSM_CACHE_FILENAME);
fp = AllocateFile(cachefilename, PG_BINARY_R);
if (fp == NULL)
{
if (errno != ENOENT)
elog(LOG, "Failed to read %s: %m", cachefilename);
return;
}
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/* Read and verify file header */
if (fread(&header, 1, sizeof(header), fp) != sizeof(header) ||
strcmp(header.label, FSM_CACHE_LABEL) != 0 ||
header.endian != FSM_CACHE_ENDIAN ||
header.version != FSM_CACHE_VERSION ||
header.numRels < 0)
{
elog(LOG, "Bogus file header in %s", cachefilename);
goto read_failed;
}
/* For each relation, in order from least to most recently used... */
for (relno = 0; relno < header.numRels; relno++)
{
FsmCacheRelHeader relheader;
Size len;
char *data;
FSMRelation *fsmrel;
int nPages;
int curAlloc;
int curAllocPages;
/* Read and verify relation header, as best we can */
if (fread(&relheader, 1, sizeof(relheader), fp) != sizeof(relheader) ||
(relheader.isIndex != false && relheader.isIndex != true) ||
relheader.avgRequest >= BLCKSZ ||
relheader.lastPageCount < 0 ||
relheader.storedPages < 0)
{
elog(LOG, "Bogus rel header in %s", cachefilename);
goto read_failed;
}
/* Make sure lastPageCount doesn't exceed current MaxFSMPages */
if (relheader.lastPageCount > MaxFSMPages)
relheader.lastPageCount = MaxFSMPages;
/* Read the per-page data */
nPages = relheader.storedPages;
if (relheader.isIndex)
len = nPages * sizeof(IndexFSMPageData);
else
len = nPages * sizeof(FSMPageData);
data = (char *) palloc(len + 1); /* +1 to avoid palloc(0) */
if (fread(data, 1, len, fp) != len)
{
elog(LOG, "Premature EOF in %s", cachefilename);
pfree(data);
goto read_failed;
}
/*
* Okay, create the FSM entry and insert data into it. Since the
* rels were stored in reverse usage order, at the end of the loop
* they will be correctly usage-ordered in memory; and if
* MaxFSMRelations is less than it used to be, we will correctly
* drop the least recently used ones.
*/
fsmrel = create_fsm_rel(&relheader.key);
fsmrel->avgRequest = relheader.avgRequest;
curAlloc = realloc_fsm_rel(fsmrel, relheader.lastPageCount,
relheader.isIndex);
if (relheader.isIndex)
{
IndexFSMPageData *newLocation;
curAllocPages = curAlloc * INDEXCHUNKPAGES;
/*
* If the data fits in our current allocation, just copy it;
* otherwise must compress. But compression is easy: we merely
* forget extra pages.
*/
newLocation = (IndexFSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
if (nPages > curAllocPages)
nPages = curAllocPages;
memcpy(newLocation, data, nPages * sizeof(IndexFSMPageData));
fsmrel->storedPages = nPages;
}
else
{
FSMPageData *newLocation;
curAllocPages = curAlloc * CHUNKPAGES;
/*
* If the data fits in our current allocation, just copy it;
* otherwise must compress.
*/
newLocation = (FSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
if (nPages <= curAllocPages)
{
memcpy(newLocation, data, nPages * sizeof(FSMPageData));
fsmrel->storedPages = nPages;
}
else
{
pack_existing_pages(newLocation, curAllocPages,
(FSMPageData *) data, nPages);
fsmrel->storedPages = curAllocPages;
}
}
pfree(data);
}
read_failed:
/* Clean up */
LWLockRelease(FreeSpaceLock);
FreeFile(fp);
/* Remove cache file before it can become stale; see notes above */
unlink(cachefilename);
}
/*
* Internal routines. These all assume the caller holds the FreeSpaceLock.
......@@ -812,6 +1040,57 @@ delete_fsm_rel(FSMRelation *fsmrel)
elog(ERROR, "FreeSpaceMap hashtable corrupted");
}
/*
* Reallocate space for a FSMRelation.
*
* This is shared code for RecordRelationFreeSpace and RecordIndexFreeSpace.
* The return value is the actual new allocation, in chunks.
*/
static int
realloc_fsm_rel(FSMRelation *fsmrel, int nPages, bool isIndex)
{
int myRequest;
int myAlloc;
int curAlloc;
/*
* Delete any existing entries, and update request status.
*/
fsmrel->storedPages = 0;
FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel);
fsmrel->lastPageCount = nPages;
fsmrel->isIndex = isIndex;
myRequest = fsm_calc_request(fsmrel);
FreeSpaceMap->sumRequests += myRequest;
myAlloc = fsm_calc_target_allocation(myRequest);
/*
* Need to reallocate space if (a) my target allocation is more
* than my current allocation, AND (b) my actual immediate need
* (myRequest+1 chunks) is more than my current allocation.
* Otherwise just store the new data in-place.
*/
curAlloc = fsm_current_allocation(fsmrel);
if (myAlloc > curAlloc && (myRequest+1) > curAlloc && nPages > 0)
{
/* Remove entry from storage list, and compact */
unlink_fsm_rel_storage(fsmrel);
compact_fsm_storage();
/* Reattach to end of storage list */
link_fsm_rel_storage(fsmrel);
/* And allocate storage */
fsmrel->firstChunk = FreeSpaceMap->usedChunks;
FreeSpaceMap->usedChunks += myAlloc;
curAlloc = myAlloc;
/* Watch out for roundoff error */
if (FreeSpaceMap->usedChunks > FreeSpaceMap->totalChunks)
{
FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks;
curAlloc = FreeSpaceMap->totalChunks - fsmrel->firstChunk;
}
}
return curAlloc;
}
/*
* Link a FSMRelation into the LRU list (always at the head).
*/
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.315 2003/02/10 04:44:46 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.316 2003/03/06 00:04:27 tgl Exp $
*
* NOTES
* this is the "main" module of the postgres backend and
......@@ -46,6 +46,7 @@
#include "parser/analyze.h"
#include "parser/parser.h"
#include "rewrite/rewriteHandler.h"
#include "storage/freespace.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "tcop/fastpath.h"
......@@ -1740,6 +1741,13 @@ PostgresMain(int argc, char *argv[], const char *username)
*/
StartupXLOG();
on_shmem_exit(ShutdownXLOG, 0);
/*
* Read any existing FSM cache file, and register to write one out
* at exit.
*/
LoadFreeSpaceMap();
on_shmem_exit(DumpFreeSpaceMap, 0);
}
/*
......@@ -1781,7 +1789,7 @@ PostgresMain(int argc, char *argv[], const char *username)
if (!IsUnderPostmaster)
{
puts("\nPOSTGRES backend interactive interface ");
puts("$Revision: 1.315 $ $Date: 2003/02/10 04:44:46 $\n");
puts("$Revision: 1.316 $ $Date: 2003/03/06 00:04:27 $\n");
}
/*
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: freespace.h,v 1.9 2003/03/04 21:51:22 tgl Exp $
* $Id: freespace.h,v 1.10 2003/03/06 00:04:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -60,6 +60,9 @@ extern void FreeSpaceMapForgetDatabase(Oid dbid);
extern void PrintFreeSpaceMapStatistics(int elevel);
extern void DumpFreeSpaceMap(void);
extern void LoadFreeSpaceMap(void);
#ifdef FREESPACE_DEBUG
extern void DumpFreeSpace(void);
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment