Commit 391eb5e5 authored by Tom Lane's avatar Tom Lane

Reimplement free-space-map management as per recent discussions.

Adjustable threshold is gone in favor of keeping track of total requested
page storage and doling out proportional fractions to each relation
(with a minimum amount per relation, and some quantization of the results
to avoid thrashing with small changes in page counts).  Provide special-
case code for indexes so as not to waste space storing useless page
free space counts.  Restructure internal data storage to be a flat array
instead of list-of-chunks; this may cost a little more work in data
copying when reorganizing, but allows binary search to be used during
lookup_fsm_page_entry().
parent a455c942
<!-- <!--
$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.169 2003/02/19 04:06:28 momjian Exp $ $Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.170 2003/03/04 21:51:19 tgl Exp $
--> -->
<Chapter Id="runtime"> <Chapter Id="runtime">
...@@ -1725,7 +1725,9 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' ...@@ -1725,7 +1725,9 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir'
<listitem> <listitem>
<para> <para>
Sets the maximum number of disk pages for which free space will Sets the maximum number of disk pages for which free space will
be tracked in the shared free-space map. The default is 10000. be tracked in the shared free-space map. Six bytes of shared memory
are consumed for each page slot. This setting must be more than
16 * <varname>max_fsm_relations</varname>. The default is 20000.
This option can only be set at server start. This option can only be set at server start.
</para> </para>
</listitem> </listitem>
...@@ -1735,9 +1737,11 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' ...@@ -1735,9 +1737,11 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir'
<term><varname>MAX_FSM_RELATIONS</varname> (<type>integer</type>)</term> <term><varname>MAX_FSM_RELATIONS</varname> (<type>integer</type>)</term>
<listitem> <listitem>
<para> <para>
Sets the maximum number of relations (tables) for which free Sets the maximum number of relations (tables and indexes) for which
space will be tracked in the shared free-space map. The default free space will be tracked in the shared free-space map. Roughly
is 1000. This option can only be set at server start. fifty bytes of shared memory are consumed for each slot.
The default is 1000.
This option can only be set at server start.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.63 2003/02/23 23:20:52 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.64 2003/03/04 21:51:20 tgl Exp $
* *
* NOTES * NOTES
* Postgres btree pages look like ordinary relation pages. The opaque * Postgres btree pages look like ordinary relation pages. The opaque
...@@ -401,15 +401,10 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) ...@@ -401,15 +401,10 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
* that the page is still free. (For example, an already-free page * that the page is still free. (For example, an already-free page
* could have been re-used between the time the last VACUUM scanned * could have been re-used between the time the last VACUUM scanned
* it and the time the VACUUM made its FSM updates.) * it and the time the VACUUM made its FSM updates.)
*
* The request size should be more than half of what btvacuumcleanup
* logs as the per-page free space. We use BLCKSZ/2 and BLCKSZ-1
* to try to get some use out of FSM's space management algorithm.
* XXX this needs some more thought...
*/ */
for (;;) for (;;)
{ {
blkno = GetPageWithFreeSpace(&rel->rd_node, BLCKSZ/2); blkno = GetFreeIndexPage(&rel->rd_node);
if (blkno == InvalidBlockNumber) if (blkno == InvalidBlockNumber)
break; break;
buf = ReadBuffer(rel, blkno); buf = ReadBuffer(rel, blkno);
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.100 2003/02/24 00:57:17 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.101 2003/03/04 21:51:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -697,7 +697,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -697,7 +697,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2);
BlockNumber num_pages; BlockNumber num_pages;
BlockNumber blkno; BlockNumber blkno;
PageFreeSpaceInfo *pageSpaces; BlockNumber *freePages;
int nFreePages, int nFreePages,
maxFreePages; maxFreePages;
BlockNumber pages_deleted = 0; BlockNumber pages_deleted = 0;
...@@ -712,7 +712,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -712,7 +712,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
maxFreePages = MaxFSMPages; maxFreePages = MaxFSMPages;
if ((BlockNumber) maxFreePages > num_pages) if ((BlockNumber) maxFreePages > num_pages)
maxFreePages = (int) num_pages + 1; /* +1 to avoid palloc(0) */ maxFreePages = (int) num_pages + 1; /* +1 to avoid palloc(0) */
pageSpaces = (PageFreeSpaceInfo *) palloc(maxFreePages * sizeof(PageFreeSpaceInfo)); freePages = (BlockNumber *) palloc(maxFreePages * sizeof(BlockNumber));
nFreePages = 0; nFreePages = 0;
/* Create a temporary memory context to run _bt_pagedel in */ /* Create a temporary memory context to run _bt_pagedel in */
...@@ -740,12 +740,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -740,12 +740,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
{ {
/* Okay to recycle this page */ /* Okay to recycle this page */
if (nFreePages < maxFreePages) if (nFreePages < maxFreePages)
{ freePages[nFreePages++] = blkno;
pageSpaces[nFreePages].blkno = blkno;
/* claimed avail-space must be < BLCKSZ */
pageSpaces[nFreePages].avail = BLCKSZ-1;
nFreePages++;
}
pages_deleted++; pages_deleted++;
} }
else if (P_ISDELETED(opaque)) else if (P_ISDELETED(opaque))
...@@ -781,12 +776,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -781,12 +776,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
if (ndel && info->vacuum_full) if (ndel && info->vacuum_full)
{ {
if (nFreePages < maxFreePages) if (nFreePages < maxFreePages)
{ freePages[nFreePages++] = blkno;
pageSpaces[nFreePages].blkno = blkno;
/* claimed avail-space must be < BLCKSZ */
pageSpaces[nFreePages].avail = BLCKSZ-1;
nFreePages++;
}
} }
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);
...@@ -805,8 +795,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -805,8 +795,7 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
{ {
BlockNumber new_pages = num_pages; BlockNumber new_pages = num_pages;
while (nFreePages > 0 && while (nFreePages > 0 && freePages[nFreePages-1] == new_pages-1)
pageSpaces[nFreePages-1].blkno == new_pages-1)
{ {
new_pages--; new_pages--;
pages_deleted--; pages_deleted--;
...@@ -841,12 +830,12 @@ btvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -841,12 +830,12 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
/* /*
* Update the shared Free Space Map with the info we now have about * Update the shared Free Space Map with the info we now have about
* free space in the index, discarding any old info the map may have. * free pages in the index, discarding any old info the map may have.
* We do not need to sort the page numbers; they're in order already. * We do not need to sort the page numbers; they're in order already.
*/ */
MultiRecordFreeSpace(&rel->rd_node, 0, nFreePages, pageSpaces); RecordIndexFreeSpace(&rel->rd_node, nFreePages, freePages);
pfree(pageSpaces); pfree(freePages);
MemoryContextDelete(mycontext); MemoryContextDelete(mycontext);
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.250 2003/02/24 00:57:17 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.251 2003/03/04 21:51:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -336,6 +336,13 @@ vacuum(VacuumStmt *vacstmt) ...@@ -336,6 +336,13 @@ vacuum(VacuumStmt *vacstmt)
*/ */
StartTransactionCommand(true); StartTransactionCommand(true);
/*
* If it was a database-wide VACUUM, print FSM usage statistics
* (we don't make you be superuser to see these).
*/
if (vacstmt->relation == NULL)
PrintFreeSpaceMapStatistics(elevel);
/* /*
* If we completed a database-wide VACUUM without skipping any * If we completed a database-wide VACUUM without skipping any
* relations, update the database's pg_database row with info * relations, update the database's pg_database row with info
...@@ -2781,31 +2788,48 @@ vac_update_fsm(Relation onerel, VacPageList fraged_pages, ...@@ -2781,31 +2788,48 @@ vac_update_fsm(Relation onerel, VacPageList fraged_pages,
BlockNumber rel_pages) BlockNumber rel_pages)
{ {
int nPages = fraged_pages->num_pages; int nPages = fraged_pages->num_pages;
int i; VacPage *pagedesc = fraged_pages->pagedesc;
Size threshold;
PageFreeSpaceInfo *pageSpaces; PageFreeSpaceInfo *pageSpaces;
int outPages;
int i;
/*
* We only report pages with free space at least equal to the average
* request size --- this avoids cluttering FSM with uselessly-small bits
* of space. Although FSM would discard pages with little free space
* anyway, it's important to do this prefiltering because (a) it reduces
* the time spent holding the FSM lock in RecordRelationFreeSpace, and
* (b) FSM uses the number of pages reported as a statistic for guiding
* space management. If we didn't threshold our reports the same way
* vacuumlazy.c does, we'd be skewing that statistic.
*/
threshold = GetAvgFSMRequestSize(&onerel->rd_node);
/* +1 to avoid palloc(0) */ /* +1 to avoid palloc(0) */
pageSpaces = (PageFreeSpaceInfo *) pageSpaces = (PageFreeSpaceInfo *)
palloc((nPages + 1) * sizeof(PageFreeSpaceInfo)); palloc((nPages + 1) * sizeof(PageFreeSpaceInfo));
outPages = 0;
for (i = 0; i < nPages; i++) for (i = 0; i < nPages; i++)
{ {
pageSpaces[i].blkno = fraged_pages->pagedesc[i]->blkno;
pageSpaces[i].avail = fraged_pages->pagedesc[i]->free;
/* /*
* fraged_pages may contain entries for pages that we later * fraged_pages may contain entries for pages that we later
* decided to truncate from the relation; don't enter them into * decided to truncate from the relation; don't enter them into
* the free space map! * the free space map!
*/ */
if (pageSpaces[i].blkno >= rel_pages) if (pagedesc[i]->blkno >= rel_pages)
{
nPages = i;
break; break;
if (pagedesc[i]->free >= threshold)
{
pageSpaces[outPages].blkno = pagedesc[i]->blkno;
pageSpaces[outPages].avail = pagedesc[i]->free;
outPages++;
} }
} }
MultiRecordFreeSpace(&onerel->rd_node, 0, nPages, pageSpaces); RecordRelationFreeSpace(&onerel->rd_node, outPages, pageSpaces);
pfree(pageSpaces); pfree(pageSpaces);
} }
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.26 2003/02/24 00:57:17 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.27 2003/03/04 21:51:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -51,21 +51,11 @@ ...@@ -51,21 +51,11 @@
/* /*
* Space/time tradeoff parameters: do these need to be user-tunable? * Space/time tradeoff parameters: do these need to be user-tunable?
* *
* A page with less than PAGE_SPACE_THRESHOLD free space will be forgotten
* immediately, and not even passed to the free space map. Removing the
* uselessly small entries early saves cycles, and in particular reduces
* the amount of time we spend holding the FSM lock when we finally call
* MultiRecordFreeSpace. Since the FSM will ignore pages below its own
* runtime threshold anyway, there's no point in making this really small.
* XXX Is it worth trying to measure average tuple size, and using that to
* set the threshold? Problem is we don't know average tuple size very
* accurately for the first few pages...
*
* To consider truncating the relation, we want there to be at least * To consider truncating the relation, we want there to be at least
* relsize / REL_TRUNCATE_FRACTION potentially-freeable pages. * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
* is less) potentially-freeable pages.
*/ */
#define PAGE_SPACE_THRESHOLD ((Size) (BLCKSZ / 32)) #define REL_TRUNCATE_MINIMUM 1000
#define REL_TRUNCATE_FRACTION 16 #define REL_TRUNCATE_FRACTION 16
/* MAX_TUPLES_PER_PAGE can be a conservative upper limit */ /* MAX_TUPLES_PER_PAGE can be a conservative upper limit */
...@@ -78,6 +68,7 @@ typedef struct LVRelStats ...@@ -78,6 +68,7 @@ typedef struct LVRelStats
BlockNumber rel_pages; BlockNumber rel_pages;
double rel_tuples; double rel_tuples;
BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
Size threshold; /* minimum interesting free space */
/* List of TIDs of tuples we intend to delete */ /* List of TIDs of tuples we intend to delete */
/* NB: this list is ordered by TID address */ /* NB: this list is ordered by TID address */
int num_dead_tuples; /* current # of entries */ int num_dead_tuples; /* current # of entries */
...@@ -149,6 +140,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt) ...@@ -149,6 +140,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
/* Set threshold for interesting free space = average request size */
/* XXX should we scale it up or down? Adjust vacuum.c too, if so */
vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);
/* Open all indexes of the relation */ /* Open all indexes of the relation */
vac_open_indexes(onerel, &nindexes, &Irel); vac_open_indexes(onerel, &nindexes, &Irel);
hasindex = (nindexes > 0); hasindex = (nindexes > 0);
...@@ -166,7 +161,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt) ...@@ -166,7 +161,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
* number of pages. Otherwise, the time taken isn't worth it. * number of pages. Otherwise, the time taken isn't worth it.
*/ */
possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages; possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
if (possibly_freeable > vacrelstats->rel_pages / REL_TRUNCATE_FRACTION) if (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)
lazy_truncate_heap(onerel, vacrelstats); lazy_truncate_heap(onerel, vacrelstats);
/* Update shared free space map with final free space info */ /* Update shared free space map with final free space info */
...@@ -943,8 +939,21 @@ lazy_record_free_space(LVRelStats *vacrelstats, ...@@ -943,8 +939,21 @@ lazy_record_free_space(LVRelStats *vacrelstats,
PageFreeSpaceInfo *pageSpaces; PageFreeSpaceInfo *pageSpaces;
int n; int n;
/* Ignore pages with little free space */ /*
if (avail < PAGE_SPACE_THRESHOLD) * A page with less than stats->threshold free space will be forgotten
* immediately, and never passed to the free space map. Removing the
* uselessly small entries early saves cycles, and in particular reduces
* the amount of time we spend holding the FSM lock when we finally call
* RecordRelationFreeSpace. Since the FSM will probably drop pages with
* little free space anyway, there's no point in making this really small.
*
* XXX Is it worth trying to measure average tuple size, and using that to
* adjust the threshold? Would be worthwhile if FSM has no stats yet
* for this relation. But changing the threshold as we scan the rel
* might lead to bizarre behavior, too. Also, it's probably better if
* vacuum.c has the same thresholding behavior as we do here.
*/
if (avail < vacrelstats->threshold)
return; return;
/* Copy pointers to local variables for notational simplicity */ /* Copy pointers to local variables for notational simplicity */
...@@ -1079,13 +1088,13 @@ lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats) ...@@ -1079,13 +1088,13 @@ lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats)
int nPages = vacrelstats->num_free_pages; int nPages = vacrelstats->num_free_pages;
/* /*
* Sort data into order, as required by MultiRecordFreeSpace. * Sort data into order, as required by RecordRelationFreeSpace.
*/ */
if (nPages > 1) if (nPages > 1)
qsort(pageSpaces, nPages, sizeof(PageFreeSpaceInfo), qsort(pageSpaces, nPages, sizeof(PageFreeSpaceInfo),
vac_cmp_page_spaces); vac_cmp_page_spaces);
MultiRecordFreeSpace(&onerel->rd_node, 0, nPages, pageSpaces); RecordRelationFreeSpace(&onerel->rd_node, nPages, pageSpaces);
} }
/* /*
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.15 2003/02/23 06:17:13 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.16 2003/03/04 21:51:21 tgl Exp $
* *
* *
* NOTES: * NOTES:
...@@ -20,66 +20,126 @@ ...@@ -20,66 +20,126 @@
* *
* The number of distinct relations tracked is limited by a configuration * The number of distinct relations tracked is limited by a configuration
* variable (MaxFSMRelations). When this would be exceeded, we discard the * variable (MaxFSMRelations). When this would be exceeded, we discard the
* least frequently used relation. A previously-unknown relation is always * least recently used relation. A doubly-linked list with move-to-front
* entered into the map with useCount 1 on first reference, even if this * behavior keeps track of which relation is least recently used.
* causes an existing entry with higher useCount to be discarded. This may
* cause a little bit of thrashing among the bottom entries in the list,
* but if we didn't do it then there'd be no way for a relation not in the
* map to get in once the map is full. Note we allow a relation to be in the
* map even if no pages are currently stored for it: this allows us to track
* its useCount & threshold, which may eventually go high enough to give it
* priority for page storage.
* *
* The total number of pages tracked is also set by a configuration variable * For each known relation, we track the average request size given to
* (MaxFSMPages). We allocate these dynamically among the known relations, * GetPageWithFreeSpace() as well as the most recent number of pages given
* giving preference to the more-frequently-referenced relations and those * to RecordRelationFreeSpace(). The average request size is not directly
* with smaller tuples. This is done by a thresholding mechanism: for each * used in this module, but we expect VACUUM to use it to filter out
* relation we keep track of a current target threshold, and allow only pages * uninteresting amounts of space before calling RecordRelationFreeSpace().
* with free space >= threshold to be stored in the map. The threshold starts * The sum of the RRFS page counts is thus the total number of "interesting"
* at BLCKSZ/2 (a somewhat arbitrary choice) for a newly entered relation. * pages that we would like to track; this is called DesiredFSMPages.
* On each GetFreeSpace reference, the threshold is moved towards the
* request size using a slow exponential moving average; this means that
* for heavily used relations, the threshold will approach the average
* freespace request size (average tuple size). Whenever we run out of
* storage space in the map, we double the threshold of all existing relations
* (but not to more than BLCKSZ, so as to prevent runaway thresholds).
* Infrequently used relations will thus tend to have large thresholds.
* *
* XXX this thresholding mechanism is experimental; need to see how well * The number of pages actually tracked is limited by a configuration variable
* it works in practice. * (MaxFSMPages). When this is less than DesiredFSMPages, each relation
* gets to keep a fraction MaxFSMPages/DesiredFSMPages of its free pages.
* We discard pages with less free space to reach this target.
*
* Actually, our space allocation is done in "chunks" of CHUNKPAGES pages,
* with each relation guaranteed at least one chunk. This reduces thrashing
* of the storage allocations when there are small changes in the RRFS page
* counts from one VACUUM to the next. (XXX it might also be worthwhile to
* impose some kind of moving-average smoothing on the RRFS page counts?)
*
* So the actual arithmetic is: for each relation compute myRequest as the
* number of chunks needed to hold its RRFS page count (not counting the
* first, guaranteed chunk); compute sumRequests as the sum of these values
* over all relations; then for each relation figure its actual allocation
* as
* 1 + round(spareChunks * myRequest / sumRequests)
* where spareChunks = totalChunks - numRels is the number of chunks we have
* a choice what to do with. We round off these numbers because truncating
* all of them would waste significant space. But because of roundoff, it's
* possible for the last few relations to get less space than they should;
* the computed allocation must be checked against remaining available space.
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h" #include "postgres.h"
#include <limits.h> #include <limits.h>
#include <math.h>
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/itemid.h" #include "storage/itemptr.h"
#include "storage/lwlock.h" #include "storage/lwlock.h"
#include "storage/shmem.h" #include "storage/shmem.h"
/* Initial value for average-request moving average */
#define INITIAL_AVERAGE ((Size) (BLCKSZ / 32))
/*
* Number of pages and bytes per allocation chunk. Indexes can squeeze 50%
* more pages into the same space because they don't need to remember how much
* free space on each page. The nominal number of pages, CHUNKPAGES, is for
* regular rels, and INDEXCHUNKPAGES is for indexes. CHUNKPAGES should be
* even so that no space is wasted in the index case.
*/
#define CHUNKPAGES 16
#define CHUNKBYTES (CHUNKPAGES * sizeof(FSMPageData))
#define INDEXCHUNKPAGES ((int) (CHUNKBYTES / sizeof(IndexFSMPageData)))
/*
* Typedefs and macros for items in the page-storage arena. We use the
* existing ItemPointer and BlockId data structures, which are designed
* to pack well (they should be 6 and 4 bytes apiece regardless of machine
* alignment issues). Unfortunately we can't use the ItemPointer access
* macros, because they include Asserts insisting that ip_posid != 0.
*/
typedef ItemPointerData FSMPageData;
typedef BlockIdData IndexFSMPageData;
#define FSMPageGetPageNum(ptr) \
BlockIdGetBlockNumber(&(ptr)->ip_blkid)
#define FSMPageGetSpace(ptr) \
((Size) (ptr)->ip_posid)
#define FSMPageSetPageNum(ptr, pg) \
BlockIdSet(&(ptr)->ip_blkid, pg)
#define FSMPageSetSpace(ptr, sz) \
((ptr)->ip_posid = (OffsetNumber) (sz))
#define IndexFSMPageGetPageNum(ptr) \
BlockIdGetBlockNumber(ptr)
#define IndexFSMPageSetPageNum(ptr, pg) \
BlockIdSet(ptr, pg)
/* /*
* Shared free-space-map objects * Shared free-space-map objects
* *
* The per-relation objects are indexed by a hash table, and are also members
* of two linked lists: one ordered by recency of usage (most recent first),
* and the other ordered by physical location of the associated storage in
* the page-info arena.
*
* Each relation owns one or more chunks of per-page storage in the "arena".
* The chunks for each relation are always consecutive, so that it can treat
* its page storage as a simple array. We further insist that its page data
* be ordered by block number, so that binary search is possible.
*
* Note: we handle pointers to these items as pointers, not as SHMEM_OFFSETs. * Note: we handle pointers to these items as pointers, not as SHMEM_OFFSETs.
* This assumes that all processes accessing the map will have the shared * This assumes that all processes accessing the map will have the shared
* memory segment mapped at the same place in their address space. * memory segment mapped at the same place in their address space.
*/ */
typedef struct FSMHeader FSMHeader; typedef struct FSMHeader FSMHeader;
typedef struct FSMRelation FSMRelation; typedef struct FSMRelation FSMRelation;
typedef struct FSMChunk FSMChunk;
/* Header for whole map */ /* Header for whole map */
struct FSMHeader struct FSMHeader
{ {
HTAB *relHash; /* hashtable of FSMRelation entries */ HTAB *relHash; /* hashtable of FSMRelation entries */
FSMRelation *relList; /* FSMRelations in useCount order */ FSMRelation *usageList; /* FSMRelations in usage-recency order */
FSMRelation *relListTail; /* tail of FSMRelation list */ FSMRelation *usageListTail; /* tail of usage-recency list */
FSMRelation *firstRel; /* FSMRelations in arena storage order */
FSMRelation *lastRel; /* tail of storage-order list */
int numRels; /* number of FSMRelations now in use */ int numRels; /* number of FSMRelations now in use */
FSMChunk *freeChunks; /* linked list of currently-free chunks */ double sumRequests; /* sum of requested chunks over all rels */
int numFreeChunks; /* number of free chunks */ char *arena; /* arena for page-info storage */
int totalChunks; /* total size of arena, in chunks */
int usedChunks; /* # of chunks assigned */
/* NB: there are totalChunks - usedChunks free chunks at end of arena */
}; };
/* /*
...@@ -90,36 +150,16 @@ struct FSMHeader ...@@ -90,36 +150,16 @@ struct FSMHeader
struct FSMRelation struct FSMRelation
{ {
RelFileNode key; /* hash key (must be first) */ RelFileNode key; /* hash key (must be first) */
FSMRelation *nextRel; /* next rel in useCount order */ FSMRelation *nextUsage; /* next rel in usage-recency order */
FSMRelation *priorRel; /* prior rel in useCount order */ FSMRelation *priorUsage; /* prior rel in usage-recency order */
int useCount; /* use count for prioritizing rels */ FSMRelation *nextPhysical; /* next rel in arena-storage order */
Size threshold; /* minimum amount of free space to keep */ FSMRelation *priorPhysical; /* prior rel in arena-storage order */
bool isIndex; /* if true, we store only page numbers */
Size avgRequest; /* moving average of space requests */
int lastPageCount; /* pages passed to RecordRelationFreeSpace */
int firstChunk; /* chunk # of my first chunk in arena */
int storedPages; /* # of pages stored in arena */
int nextPage; /* index (from 0) to start next search at */ int nextPage; /* index (from 0) to start next search at */
int numPages; /* total number of pages we have info
* about */
int numChunks; /* number of FSMChunks allocated to rel */
FSMChunk *relChunks; /* linked list of page info chunks */
FSMChunk *lastChunk; /* last chunk in linked list */
};
/*
* Info about individual pages in a relation is stored in chunks to reduce
* allocation overhead. Note that we allow any chunk of a relation's list
* to be partly full, not only the last chunk; this speeds deletion of
* individual page entries. When the total number of unused slots reaches
* CHUNKPAGES, we compact out the unused slots so that we can return a chunk
* to the freelist; but there's no point in doing the compaction before that.
*/
#define CHUNKPAGES 32 /* each chunk can store this many pages */
struct FSMChunk
{
FSMChunk *next; /* linked-list link */
int numPages; /* number of pages described here */
BlockNumber pages[CHUNKPAGES]; /* page numbers within relation */
ItemLength bytes[CHUNKPAGES]; /* free space available on each
* page */
}; };
...@@ -132,24 +172,26 @@ static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */ ...@@ -132,24 +172,26 @@ static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */
static FSMRelation *lookup_fsm_rel(RelFileNode *rel); static FSMRelation *lookup_fsm_rel(RelFileNode *rel);
static FSMRelation *create_fsm_rel(RelFileNode *rel); static FSMRelation *create_fsm_rel(RelFileNode *rel);
static void delete_fsm_rel(FSMRelation *fsmrel); static void delete_fsm_rel(FSMRelation *fsmrel);
static void link_fsm_rel_after(FSMRelation *fsmrel, FSMRelation *oldrel); static void link_fsm_rel_usage(FSMRelation *fsmrel);
static void unlink_fsm_rel(FSMRelation *fsmrel); static void unlink_fsm_rel_usage(FSMRelation *fsmrel);
static void free_chunk_chain(FSMChunk *fchunk); static void link_fsm_rel_storage(FSMRelation *fsmrel);
static void unlink_fsm_rel_storage(FSMRelation *fsmrel);
static BlockNumber find_free_space(FSMRelation *fsmrel, Size spaceNeeded); static BlockNumber find_free_space(FSMRelation *fsmrel, Size spaceNeeded);
static BlockNumber find_index_free_space(FSMRelation *fsmrel);
static void fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, static void fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page,
Size spaceAvail); Size spaceAvail);
static bool lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page, static bool lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
FSMChunk **outChunk, int *outChunkRelIndex); int *outPageIndex);
static bool insert_fsm_page_entry(FSMRelation *fsmrel, static void compact_fsm_storage(void);
BlockNumber page, Size spaceAvail, static void push_fsm_rels_after(FSMRelation *afterRel);
FSMChunk *chunk, int chunkRelIndex); static void pack_incoming_pages(FSMPageData *newLocation, int newPages,
static bool append_fsm_chunk(FSMRelation *fsmrel); PageFreeSpaceInfo *pageSpaces, int nPages);
static bool push_fsm_page_entry(BlockNumber page, Size spaceAvail, static void pack_existing_pages(FSMPageData *newLocation, int newPages,
FSMChunk *chunk, int chunkRelIndex); FSMPageData *oldLocation, int oldPages);
static void delete_fsm_page_entry(FSMRelation *fsmrel, FSMChunk *chunk, static int fsm_calc_request(FSMRelation *fsmrel);
int chunkRelIndex); static int fsm_calc_target_allocation(int myRequest);
static void compact_fsm_page_list(FSMRelation *fsmrel); static int fsm_current_chunks(FSMRelation *fsmrel);
static void acquire_fsm_free_space(void); static int fsm_current_allocation(FSMRelation *fsmrel);
/* /*
...@@ -170,8 +212,6 @@ void ...@@ -170,8 +212,6 @@ void
InitFreeSpaceMap(void) InitFreeSpaceMap(void)
{ {
HASHCTL info; HASHCTL info;
FSMChunk *chunks,
*prevchunk;
int nchunks; int nchunks;
/* Create table header */ /* Create table header */
...@@ -194,22 +234,20 @@ InitFreeSpaceMap(void) ...@@ -194,22 +234,20 @@ InitFreeSpaceMap(void)
if (!FreeSpaceMap->relHash) if (!FreeSpaceMap->relHash)
elog(FATAL, "Insufficient shared memory for free space map"); elog(FATAL, "Insufficient shared memory for free space map");
/* Allocate FSMChunks and fill up the free-chunks list */ /* Allocate page-storage arena */
nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1; nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
FreeSpaceMap->numFreeChunks = nchunks; /* This check ensures spareChunks will be greater than zero */
if (nchunks <= MaxFSMRelations)
elog(FATAL, "max_fsm_pages must exceed max_fsm_relations * %d",
CHUNKPAGES);
chunks = (FSMChunk *) ShmemAlloc(nchunks * sizeof(FSMChunk)); FreeSpaceMap->arena = (char *) ShmemAlloc(nchunks * CHUNKBYTES);
if (chunks == NULL) if (FreeSpaceMap->arena == NULL)
elog(FATAL, "Insufficient shared memory for free space map"); elog(FATAL, "Insufficient shared memory for free space map");
prevchunk = NULL; FreeSpaceMap->totalChunks = nchunks;
while (nchunks-- > 0) FreeSpaceMap->usedChunks = 0;
{ FreeSpaceMap->sumRequests = 0;
chunks->next = prevchunk;
prevchunk = chunks;
chunks++;
}
FreeSpaceMap->freeChunks = prevchunk;
} }
/* /*
...@@ -227,10 +265,13 @@ FreeSpaceShmemSize(void) ...@@ -227,10 +265,13 @@ FreeSpaceShmemSize(void)
/* hash table, including the FSMRelation objects */ /* hash table, including the FSMRelation objects */
size += hash_estimate_size(MaxFSMRelations, sizeof(FSMRelation)); size += hash_estimate_size(MaxFSMRelations, sizeof(FSMRelation));
/* FSMChunk objects */ /* page-storage arena */
nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1; nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
size += MAXALIGN(nchunks * sizeof(FSMChunk)); if (nchunks >= (INT_MAX / CHUNKBYTES))
elog(FATAL, "max_fsm_pages is too large");
size += MAXALIGN(nchunks * CHUNKBYTES);
return size; return size;
} }
...@@ -244,8 +285,9 @@ FreeSpaceShmemSize(void) ...@@ -244,8 +285,9 @@ FreeSpaceShmemSize(void)
* The caller must be prepared for the possibility that the returned page * The caller must be prepared for the possibility that the returned page
* will turn out to have too little space available by the time the caller * will turn out to have too little space available by the time the caller
* gets a lock on it. In that case, the caller should report the actual * gets a lock on it. In that case, the caller should report the actual
* amount of free space available on that page (via RecordFreeSpace) and * amount of free space available on that page and then try again (see
* then try again. If InvalidBlockNumber is returned, extend the relation. * RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
* extend the relation.
*/ */
BlockNumber BlockNumber
GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded) GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded)
...@@ -261,21 +303,17 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded) ...@@ -261,21 +303,17 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded)
fsmrel = create_fsm_rel(rel); fsmrel = create_fsm_rel(rel);
/* /*
* Adjust the threshold towards the space request. This essentially * Update the moving average of space requests. This code implements an
* implements an exponential moving average with an equivalent period * exponential moving average with an equivalent period of about 63
* of about 63 requests. Ignore silly requests, however, to ensure * requests. Ignore silly requests, however, to ensure that the average
* that the average stays in bounds. * stays sane.
*
* In theory, if the threshold increases here we should immediately
* delete any pages that fall below the new threshold. In practice it
* seems OK to wait until we have a need to compact space.
*/ */
if (spaceNeeded > 0 && spaceNeeded < BLCKSZ) if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
{ {
int cur_avg = (int) fsmrel->threshold; int cur_avg = (int) fsmrel->avgRequest;
cur_avg += ((int) spaceNeeded - cur_avg) / 32; cur_avg += ((int) spaceNeeded - cur_avg) / 32;
fsmrel->threshold = (Size) cur_avg; fsmrel->avgRequest = (Size) cur_avg;
} }
freepage = find_free_space(fsmrel, spaceNeeded); freepage = find_free_space(fsmrel, spaceNeeded);
LWLockRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
...@@ -283,37 +321,10 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded) ...@@ -283,37 +321,10 @@ GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded)
} }
/* /*
* RecordFreeSpace - record the amount of free space available on a page. * RecordAndGetPageWithFreeSpace - update info about a page and try again.
* *
* The FSM is at liberty to forget about the page instead, if the amount of * We provide this combo form, instead of a separate Record operation,
* free space is too small to be interesting. The only guarantee is that * to save one lock and hash table lookup cycle.
* a subsequent request to get a page with more than spaceAvail space free
* will not return this page.
*/
void
RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail)
{
FSMRelation *fsmrel;
/* Sanity check: ensure spaceAvail will fit into ItemLength */
AssertArg(spaceAvail < BLCKSZ);
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/*
* We choose not to add rels to the hashtable unless they've been
* inquired about with GetPageWithFreeSpace. Also, a Record operation
* does not increase the useCount or change threshold, only Get does.
*/
fsmrel = lookup_fsm_rel(rel);
if (fsmrel)
fsm_record_free_space(fsmrel, page, spaceAvail);
LWLockRelease(FreeSpaceLock);
}
/*
* RecordAndGetPageWithFreeSpace - combo form to save one lock and
* hash table lookup cycle.
*/ */
BlockNumber BlockNumber
RecordAndGetPageWithFreeSpace(RelFileNode *rel, RecordAndGetPageWithFreeSpace(RelFileNode *rel,
...@@ -324,7 +335,7 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel, ...@@ -324,7 +335,7 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel,
FSMRelation *fsmrel; FSMRelation *fsmrel;
BlockNumber freepage; BlockNumber freepage;
/* Sanity check: ensure spaceAvail will fit into ItemLength */ /* Sanity check: ensure spaceAvail will fit into OffsetNumber */
AssertArg(oldSpaceAvail < BLCKSZ); AssertArg(oldSpaceAvail < BLCKSZ);
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
...@@ -334,23 +345,19 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel, ...@@ -334,23 +345,19 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel,
*/ */
fsmrel = create_fsm_rel(rel); fsmrel = create_fsm_rel(rel);
/* Do the Record */
fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail);
/* /*
* Adjust the threshold towards the space request, same as in * Update the moving average of space requests, same as in
* GetPageWithFreeSpace. * GetPageWithFreeSpace.
*
* Note that we do this before storing data for oldPage, which means this
* isn't exactly equivalent to Record followed by Get; but it seems
* appropriate to adjust the threshold first.
*/ */
if (spaceNeeded > 0 && spaceNeeded < BLCKSZ) if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
{ {
int cur_avg = (int) fsmrel->threshold; int cur_avg = (int) fsmrel->avgRequest;
cur_avg += ((int) spaceNeeded - cur_avg) / 32; cur_avg += ((int) spaceNeeded - cur_avg) / 32;
fsmrel->threshold = (Size) cur_avg; fsmrel->avgRequest = (Size) cur_avg;
} }
/* Do the Record */
fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail);
/* Do the Get */ /* Do the Get */
freepage = find_free_space(fsmrel, spaceNeeded); freepage = find_free_space(fsmrel, spaceNeeded);
LWLockRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
...@@ -358,91 +365,270 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel, ...@@ -358,91 +365,270 @@ RecordAndGetPageWithFreeSpace(RelFileNode *rel,
} }
/* /*
* MultiRecordFreeSpace - record available-space info about multiple pages * GetAvgFSMRequestSize - get average FSM request size for a relation.
* of a relation in one call. *
* If the relation is not known to FSM, return a default value.
*/
Size
GetAvgFSMRequestSize(RelFileNode *rel)
{
Size result;
FSMRelation *fsmrel;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
fsmrel = lookup_fsm_rel(rel);
if (fsmrel)
result = fsmrel->avgRequest;
else
result = INITIAL_AVERAGE;
LWLockRelease(FreeSpaceLock);
return result;
}
/*
* RecordRelationFreeSpace - record available-space info about a relation.
* *
* First, the FSM must discard any entries it has for pages >= minPage. * Any pre-existing info about the relation is assumed obsolete and discarded.
* This allows obsolete info to be discarded (for example, it is used when
* truncating a relation). Any entries before minPage should be kept.
* *
* Second, if nPages > 0, record the page numbers and free space amounts in * The given pageSpaces[] array must be sorted in order by blkno. Note that
* the given pageSpaces[] array. As with RecordFreeSpace, the FSM is at * the FSM is at liberty to discard some or all of the data.
* liberty to discard some of this information. The pageSpaces[] array must
* be sorted in order by blkno, and may not contain entries before minPage.
*/ */
void void
MultiRecordFreeSpace(RelFileNode *rel, RecordRelationFreeSpace(RelFileNode *rel,
BlockNumber minPage, int nPages,
int nPages, PageFreeSpaceInfo *pageSpaces)
PageFreeSpaceInfo *pageSpaces)
{ {
FSMRelation *fsmrel; FSMRelation *fsmrel;
int i;
/* Limit nPages to something sane */
if (nPages < 0)
nPages = 0;
else if (nPages > MaxFSMPages)
nPages = MaxFSMPages;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/*
* Note we don't record info about a relation unless there's already
* an FSM entry for it, implying someone has done GetPageWithFreeSpace
* for it. Inactive rels thus will not clutter the map simply by being
* vacuumed.
*/
fsmrel = lookup_fsm_rel(rel); fsmrel = lookup_fsm_rel(rel);
if (fsmrel) if (fsmrel)
{ {
int myRequest;
int myAlloc;
int curAlloc;
int curAllocPages;
FSMPageData *newLocation;
/* /*
* Remove entries >= minPage * Delete existing entries, and update request status.
*/ */
FSMChunk *chunk; fsmrel->storedPages = 0;
int chunkRelIndex; FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel);
fsmrel->lastPageCount = nPages;
/* Use lookup to locate first entry >= minPage */ fsmrel->isIndex = false;
lookup_fsm_page_entry(fsmrel, minPage, &chunk, &chunkRelIndex); myRequest = fsm_calc_request(fsmrel);
/* Set free space to 0 for each page >= minPage */ FreeSpaceMap->sumRequests += myRequest;
while (chunk) myAlloc = fsm_calc_target_allocation(myRequest);
/*
* Need to reallocate space if (a) my target allocation is more
* than my current allocation, AND (b) my actual immediate need
* (myRequest+1 chunks) is more than my current allocation.
* Otherwise just store the new data in-place.
*/
curAlloc = fsm_current_allocation(fsmrel);
if (myAlloc > curAlloc && (myRequest+1) > curAlloc && nPages > 0)
{ {
int numPages = chunk->numPages; /* Remove entry from storage list, and compact */
unlink_fsm_rel_storage(fsmrel);
for (i = chunkRelIndex; i < numPages; i++) compact_fsm_storage();
chunk->bytes[i] = 0; /* Reattach to end of storage list */
chunk = chunk->next; link_fsm_rel_storage(fsmrel);
chunkRelIndex = 0; /* And allocate storage */
fsmrel->firstChunk = FreeSpaceMap->usedChunks;
FreeSpaceMap->usedChunks += myAlloc;
curAlloc = myAlloc;
/* Watch out for roundoff error */
if (FreeSpaceMap->usedChunks > FreeSpaceMap->totalChunks)
{
FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks;
curAlloc = FreeSpaceMap->totalChunks - fsmrel->firstChunk;
}
} }
/* Now compact out the zeroed entries, along with any other junk */
compact_fsm_page_list(fsmrel);
/* /*
* Add new entries, if appropriate. * If the data fits in our current allocation, just copy it;
* * otherwise must compress.
* This can be much cheaper than a full fsm_record_free_space()
* call because we know we are appending to the end of the relation.
*/ */
for (i = 0; i < nPages; i++) newLocation = (FSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
curAllocPages = curAlloc * CHUNKPAGES;
if (nPages <= curAllocPages)
{ {
BlockNumber page = pageSpaces[i].blkno; int i;
Size avail = pageSpaces[i].avail;
FSMChunk *chunk;
/* Check caller provides sorted data */ for (i = 0; i < nPages; i++)
if (i > 0 ? (page <= pageSpaces[i-1].blkno) : (page < minPage)) {
elog(ERROR, "MultiRecordFreeSpace: data not in page order"); BlockNumber page = pageSpaces[i].blkno;
Size avail = pageSpaces[i].avail;
/* Check caller provides sorted data */
if (i > 0 && page <= pageSpaces[i-1].blkno)
elog(ERROR, "RecordRelationFreeSpace: data not in page order");
FSMPageSetPageNum(newLocation, page);
FSMPageSetSpace(newLocation, avail);
newLocation++;
}
fsmrel->storedPages = nPages;
}
else
{
pack_incoming_pages(newLocation, curAllocPages,
pageSpaces, nPages);
fsmrel->storedPages = curAllocPages;
}
}
LWLockRelease(FreeSpaceLock);
}
/* Ignore pages too small to fit */ /*
if (avail < fsmrel->threshold) * GetFreeIndexPage - like GetPageWithFreeSpace, but for indexes
continue; */
BlockNumber
GetFreeIndexPage(RelFileNode *rel)
{
FSMRelation *fsmrel;
BlockNumber freepage;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/*
* We always add a rel to the hashtable when it is inquired about.
*/
fsmrel = create_fsm_rel(rel);
freepage = find_index_free_space(fsmrel);
LWLockRelease(FreeSpaceLock);
return freepage;
}
/*
* RecordIndexFreeSpace - like RecordRelationFreeSpace, but for indexes
*/
void
RecordIndexFreeSpace(RelFileNode *rel,
int nPages,
BlockNumber *pages)
{
FSMRelation *fsmrel;
/* Limit nPages to something sane */
if (nPages < 0)
nPages = 0;
else if (nPages > MaxFSMPages)
nPages = MaxFSMPages;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/*
* Note we don't record info about a relation unless there's already
* an FSM entry for it, implying someone has done GetFreeIndexPage
* for it. Inactive rels thus will not clutter the map simply by being
* vacuumed.
*/
fsmrel = lookup_fsm_rel(rel);
if (fsmrel)
{
int myRequest;
int myAlloc;
int curAlloc;
int curAllocPages;
int i;
IndexFSMPageData *newLocation;
/* Get another chunk if needed */ /*
/* We may need to loop if acquire_fsm_free_space() fails */ * Delete existing entries, and update request status.
while ((chunk = fsmrel->lastChunk) == NULL || */
chunk->numPages >= CHUNKPAGES) fsmrel->storedPages = 0;
FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel);
fsmrel->lastPageCount = nPages;
fsmrel->isIndex = true;
myRequest = fsm_calc_request(fsmrel);
FreeSpaceMap->sumRequests += myRequest;
myAlloc = fsm_calc_target_allocation(myRequest);
/*
* Need to reallocate space if (a) my target allocation is more
* than my current allocation, AND (b) my actual immediate need
* (myRequest+1 chunks) is more than my current allocation.
* Otherwise just store the new data in-place.
*/
curAlloc = fsm_current_allocation(fsmrel);
if (myAlloc > curAlloc && (myRequest+1) > curAlloc && nPages > 0)
{
/* Remove entry from storage list, and compact */
unlink_fsm_rel_storage(fsmrel);
compact_fsm_storage();
/* Reattach to end of storage list */
link_fsm_rel_storage(fsmrel);
/* And allocate storage */
fsmrel->firstChunk = FreeSpaceMap->usedChunks;
FreeSpaceMap->usedChunks += myAlloc;
curAlloc = myAlloc;
/* Watch out for roundoff error */
if (FreeSpaceMap->usedChunks > FreeSpaceMap->totalChunks)
{ {
if (!append_fsm_chunk(fsmrel)) FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks;
acquire_fsm_free_space(); curAlloc = FreeSpaceMap->totalChunks - fsmrel->firstChunk;
} }
}
/*
* If the data fits in our current allocation, just copy it;
* otherwise must compress. But compression is easy: we merely
* forget extra pages.
*/
newLocation = (IndexFSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
curAllocPages = curAlloc * INDEXCHUNKPAGES;
if (nPages > curAllocPages)
nPages = curAllocPages;
/* Recheck in case threshold was raised by acquire */ for (i = 0; i < nPages; i++)
if (avail < fsmrel->threshold) {
continue; BlockNumber page = pages[i];
/* Okay to store */ /* Check caller provides sorted data */
chunk->pages[chunk->numPages] = page; if (i > 0 && page <= pages[i-1])
chunk->bytes[chunk->numPages] = (ItemLength) avail; elog(ERROR, "RecordIndexFreeSpace: data not in page order");
chunk->numPages++; IndexFSMPageSetPageNum(newLocation, page);
fsmrel->numPages++; newLocation++;
} }
fsmrel->storedPages = nPages;
}
LWLockRelease(FreeSpaceLock);
}
/*
* FreeSpaceMapTruncateRel - adjust for truncation of a relation.
*
* We need to delete any stored data past the new relation length, so that
* we don't bogusly return removed block numbers.
*/
void
FreeSpaceMapTruncateRel(RelFileNode *rel, BlockNumber nblocks)
{
FSMRelation *fsmrel;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
fsmrel = lookup_fsm_rel(rel);
if (fsmrel)
{
int pageIndex;
/* Use lookup to locate first entry >= nblocks */
(void) lookup_fsm_page_entry(fsmrel, nblocks, &pageIndex);
/* Delete all such entries */
fsmrel->storedPages = pageIndex;
/* XXX should we adjust rel's lastPageCount and sumRequests? */
} }
LWLockRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
} }
...@@ -482,15 +668,53 @@ FreeSpaceMapForgetDatabase(Oid dbid) ...@@ -482,15 +668,53 @@ FreeSpaceMapForgetDatabase(Oid dbid)
*nextrel; *nextrel;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = nextrel) for (fsmrel = FreeSpaceMap->usageList; fsmrel; fsmrel = nextrel)
{ {
nextrel = fsmrel->nextRel; /* in case we delete it */ nextrel = fsmrel->nextUsage; /* in case we delete it */
if (fsmrel->key.tblNode == dbid) if (fsmrel->key.tblNode == dbid)
delete_fsm_rel(fsmrel); delete_fsm_rel(fsmrel);
} }
LWLockRelease(FreeSpaceLock); LWLockRelease(FreeSpaceLock);
} }
/*
* PrintFreeSpaceMapStatistics - print statistics about FSM contents
*
* The info is sent to elog() with the specified message level. This is
* intended for use during VACUUM.
*/
void
PrintFreeSpaceMapStatistics(int elevel)
{
FSMRelation *fsmrel;
int storedPages = 0;
int numRels;
double sumRequests;
double needed;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/* Count total space used --- tedious, but seems useful */
for (fsmrel = FreeSpaceMap->firstRel;
fsmrel != NULL;
fsmrel = fsmrel->nextPhysical)
{
storedPages += fsmrel->storedPages;
}
/* Copy other stats before dropping lock */
numRels = FreeSpaceMap->numRels;
sumRequests = FreeSpaceMap->sumRequests;
LWLockRelease(FreeSpaceLock);
/* Convert stats to actual number of page slots needed */
needed = (sumRequests + numRels) * CHUNKPAGES;
elog(elevel, "Free space map: %d relations, %d pages stored; %.0f total pages needed."
"\n\tAllocated FSM size: %d relations + %d pages = %.0f KB shared mem.",
numRels, storedPages, needed,
MaxFSMRelations, MaxFSMPages,
(double) FreeSpaceShmemSize() / 1024.0);
}
/* /*
* Internal routines. These all assume the caller holds the FreeSpaceLock. * Internal routines. These all assume the caller holds the FreeSpaceLock.
...@@ -498,7 +722,8 @@ FreeSpaceMapForgetDatabase(Oid dbid) ...@@ -498,7 +722,8 @@ FreeSpaceMapForgetDatabase(Oid dbid)
/* /*
* Lookup a relation in the hash table. If not present, return NULL. * Lookup a relation in the hash table. If not present, return NULL.
* The relation's useCount is not changed. *
* The relation's position in the LRU list is not changed.
*/ */
static FSMRelation * static FSMRelation *
lookup_fsm_rel(RelFileNode *rel) lookup_fsm_rel(RelFileNode *rel)
...@@ -518,14 +743,12 @@ lookup_fsm_rel(RelFileNode *rel) ...@@ -518,14 +743,12 @@ lookup_fsm_rel(RelFileNode *rel)
/* /*
* Lookup a relation in the hash table, creating an entry if not present. * Lookup a relation in the hash table, creating an entry if not present.
* *
* On successful lookup, the relation's useCount is incremented, possibly * On successful lookup, the relation is moved to the front of the LRU list.
* causing it to move up in the priority list.
*/ */
static FSMRelation * static FSMRelation *
create_fsm_rel(RelFileNode *rel) create_fsm_rel(RelFileNode *rel)
{ {
FSMRelation *fsmrel, FSMRelation *fsmrel;
*oldrel;
bool found; bool found;
fsmrel = (FSMRelation *) hash_search(FreeSpaceMap->relHash, fsmrel = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
...@@ -538,49 +761,31 @@ create_fsm_rel(RelFileNode *rel) ...@@ -538,49 +761,31 @@ create_fsm_rel(RelFileNode *rel)
if (!found) if (!found)
{ {
/* New hashtable entry, initialize it (hash_search set the key) */ /* New hashtable entry, initialize it (hash_search set the key) */
fsmrel->useCount = 1; fsmrel->isIndex = false; /* until we learn different */
fsmrel->threshold = BLCKSZ / 2; /* starting point for new entry */ fsmrel->avgRequest = INITIAL_AVERAGE;
fsmrel->lastPageCount = 0;
fsmrel->firstChunk = -1; /* no space allocated */
fsmrel->storedPages = 0;
fsmrel->nextPage = 0; fsmrel->nextPage = 0;
fsmrel->numPages = 0;
fsmrel->numChunks = 0;
fsmrel->relChunks = NULL;
fsmrel->lastChunk = NULL;
/* Discard lowest-priority existing rel, if we are over limit */ /* Discard lowest-priority existing rel, if we are over limit */
if (FreeSpaceMap->numRels >= MaxFSMRelations) if (FreeSpaceMap->numRels >= MaxFSMRelations)
delete_fsm_rel(FreeSpaceMap->relListTail); delete_fsm_rel(FreeSpaceMap->usageListTail);
/* /* Add new entry at front of LRU list */
* Add new entry in front of any others with useCount 1 (since it link_fsm_rel_usage(fsmrel);
* is more recently used than them). fsmrel->nextPhysical = NULL; /* not in physical-storage list */
*/ fsmrel->priorPhysical = NULL;
oldrel = FreeSpaceMap->relListTail;
while (oldrel && oldrel->useCount <= 1)
oldrel = oldrel->priorRel;
link_fsm_rel_after(fsmrel, oldrel);
FreeSpaceMap->numRels++; FreeSpaceMap->numRels++;
/* sumRequests is unchanged because request must be zero */
} }
else else
{ {
int myCount; /* Existing entry, move to front of LRU list */
if (fsmrel->priorUsage != NULL)
/* Existing entry, advance its useCount */
if (++(fsmrel->useCount) >= INT_MAX / 2)
{
/* When useCounts threaten to overflow, reduce 'em all 2X */
for (oldrel = FreeSpaceMap->relList;
oldrel != NULL;
oldrel = oldrel->nextRel)
oldrel->useCount >>= 1;
}
/* If warranted, move it up the priority list */
oldrel = fsmrel->priorRel;
myCount = fsmrel->useCount;
if (oldrel && oldrel->useCount <= myCount)
{ {
unlink_fsm_rel(fsmrel); unlink_fsm_rel_usage(fsmrel);
while (oldrel && oldrel->useCount <= myCount) link_fsm_rel_usage(fsmrel);
oldrel = oldrel->priorRel;
link_fsm_rel_after(fsmrel, oldrel);
} }
} }
...@@ -595,8 +800,9 @@ delete_fsm_rel(FSMRelation *fsmrel) ...@@ -595,8 +800,9 @@ delete_fsm_rel(FSMRelation *fsmrel)
{ {
FSMRelation *result; FSMRelation *result;
free_chunk_chain(fsmrel->relChunks); FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel);
unlink_fsm_rel(fsmrel); unlink_fsm_rel_usage(fsmrel);
unlink_fsm_rel_storage(fsmrel);
FreeSpaceMap->numRels--; FreeSpaceMap->numRels--;
result = (FSMRelation *) hash_search(FreeSpaceMap->relHash, result = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
(void *) &(fsmrel->key), (void *) &(fsmrel->key),
...@@ -607,74 +813,78 @@ delete_fsm_rel(FSMRelation *fsmrel) ...@@ -607,74 +813,78 @@ delete_fsm_rel(FSMRelation *fsmrel)
} }
/* /*
* Link a FSMRelation into the priority list just after the given existing * Link a FSMRelation into the LRU list (always at the head).
* entry (or at the head of the list, if oldrel is NULL).
*/ */
static void static void
link_fsm_rel_after(FSMRelation *fsmrel, FSMRelation *oldrel) link_fsm_rel_usage(FSMRelation *fsmrel)
{ {
if (oldrel == NULL) fsmrel->priorUsage = NULL;
{ fsmrel->nextUsage = FreeSpaceMap->usageList;
/* insert at head */ FreeSpaceMap->usageList = fsmrel;
fsmrel->priorRel = NULL; if (fsmrel->nextUsage != NULL)
fsmrel->nextRel = FreeSpaceMap->relList; fsmrel->nextUsage->priorUsage = fsmrel;
FreeSpaceMap->relList = fsmrel;
if (fsmrel->nextRel != NULL)
fsmrel->nextRel->priorRel = fsmrel;
else
FreeSpaceMap->relListTail = fsmrel;
}
else else
{ FreeSpaceMap->usageListTail = fsmrel;
/* insert after oldrel */
fsmrel->priorRel = oldrel;
fsmrel->nextRel = oldrel->nextRel;
oldrel->nextRel = fsmrel;
if (fsmrel->nextRel != NULL)
fsmrel->nextRel->priorRel = fsmrel;
else
FreeSpaceMap->relListTail = fsmrel;
}
} }
/* /*
* Delink a FSMRelation from the priority list. * Delink a FSMRelation from the LRU list.
*/ */
static void static void
unlink_fsm_rel(FSMRelation *fsmrel) unlink_fsm_rel_usage(FSMRelation *fsmrel)
{ {
if (fsmrel->priorRel != NULL) if (fsmrel->priorUsage != NULL)
fsmrel->priorRel->nextRel = fsmrel->nextRel; fsmrel->priorUsage->nextUsage = fsmrel->nextUsage;
else else
FreeSpaceMap->relList = fsmrel->nextRel; FreeSpaceMap->usageList = fsmrel->nextUsage;
if (fsmrel->nextRel != NULL) if (fsmrel->nextUsage != NULL)
fsmrel->nextRel->priorRel = fsmrel->priorRel; fsmrel->nextUsage->priorUsage = fsmrel->priorUsage;
else else
FreeSpaceMap->relListTail = fsmrel->priorRel; FreeSpaceMap->usageListTail = fsmrel->priorUsage;
/*
* We don't bother resetting fsmrel's links, since it's about to be
* deleted or relinked at the head.
*/
} }
/* /*
* Return all the FSMChunks in the chain starting at fchunk to the freelist. * Link a FSMRelation into the storage-order list (always at the tail).
* (Caller must handle unlinking them from wherever they were.)
*/ */
static void static void
free_chunk_chain(FSMChunk *fchunk) link_fsm_rel_storage(FSMRelation *fsmrel)
{ {
int nchunks; fsmrel->nextPhysical = NULL;
FSMChunk *lchunk; fsmrel->priorPhysical = FreeSpaceMap->lastRel;
if (FreeSpaceMap->lastRel != NULL)
FreeSpaceMap->lastRel->nextPhysical = fsmrel;
else
FreeSpaceMap->firstRel = fsmrel;
FreeSpaceMap->lastRel = fsmrel;
}
if (fchunk == NULL) /*
return; * Delink a FSMRelation from the storage-order list, if it's in it.
nchunks = 1; */
lchunk = fchunk; static void
while (lchunk->next != NULL) unlink_fsm_rel_storage(FSMRelation *fsmrel)
{
if (fsmrel->priorPhysical != NULL || FreeSpaceMap->firstRel == fsmrel)
{ {
nchunks++; if (fsmrel->priorPhysical != NULL)
lchunk = lchunk->next; fsmrel->priorPhysical->nextPhysical = fsmrel->nextPhysical;
else
FreeSpaceMap->firstRel = fsmrel->nextPhysical;
if (fsmrel->nextPhysical != NULL)
fsmrel->nextPhysical->priorPhysical = fsmrel->priorPhysical;
else
FreeSpaceMap->lastRel = fsmrel->priorPhysical;
} }
lchunk->next = FreeSpaceMap->freeChunks; /* mark as not in list, since we may not put it back immediately */
FreeSpaceMap->freeChunks = fchunk; fsmrel->nextPhysical = NULL;
FreeSpaceMap->numFreeChunks += nchunks; fsmrel->priorPhysical = NULL;
/* Also mark it as having no storage */
fsmrel->firstChunk = -1;
fsmrel->storedPages = 0;
} }
/* /*
...@@ -688,104 +898,108 @@ free_chunk_chain(FSMChunk *fchunk) ...@@ -688,104 +898,108 @@ free_chunk_chain(FSMChunk *fchunk)
static BlockNumber static BlockNumber
find_free_space(FSMRelation *fsmrel, Size spaceNeeded) find_free_space(FSMRelation *fsmrel, Size spaceNeeded)
{ {
FSMPageData *info;
int pagesToCheck, /* outer loop counter */ int pagesToCheck, /* outer loop counter */
pageIndex, /* current page index relative to relation */ pageIndex; /* current page index */
chunkRelIndex; /* current page index relative to curChunk */
FSMChunk *curChunk;
if (fsmrel->isIndex)
elog(ERROR, "find_free_space: called for an index relation");
info = (FSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
pageIndex = fsmrel->nextPage; pageIndex = fsmrel->nextPage;
/* Last operation may have left nextPage pointing past end */ /* Last operation may have left nextPage pointing past end */
if (pageIndex >= fsmrel->numPages) if (pageIndex >= fsmrel->storedPages)
pageIndex = 0; pageIndex = 0;
curChunk = fsmrel->relChunks;
chunkRelIndex = pageIndex;
for (pagesToCheck = fsmrel->numPages; pagesToCheck > 0; pagesToCheck--) for (pagesToCheck = fsmrel->storedPages; pagesToCheck > 0; pagesToCheck--)
{ {
/* FSMPageData *page = info + pageIndex;
* Make sure we are in the right chunk. First time through, we Size spaceAvail = FSMPageGetSpace(page);
* may have to advance through multiple chunks; subsequent loops
* should do this at most once. /* Check this page */
*/ if (spaceAvail >= spaceNeeded)
while (chunkRelIndex >= curChunk->numPages)
{
chunkRelIndex -= curChunk->numPages;
curChunk = curChunk->next;
}
/* Check the next page */
if ((Size) curChunk->bytes[chunkRelIndex] >= spaceNeeded)
{ {
/* /*
* Found what we want --- adjust the entry. In theory we could * Found what we want --- adjust the entry, and update nextPage.
* delete the entry immediately if it drops below threshold,
* but it seems better to wait till we next need space.
*/ */
curChunk->bytes[chunkRelIndex] -= (ItemLength) spaceNeeded; FSMPageSetSpace(page, spaceAvail - spaceNeeded);
fsmrel->nextPage = pageIndex + 1; fsmrel->nextPage = pageIndex + 1;
return curChunk->pages[chunkRelIndex]; return FSMPageGetPageNum(page);
} }
/* Advance pageIndex and chunkRelIndex, wrapping around if needed */ /* Advance pageIndex, wrapping around if needed */
if (++pageIndex >= fsmrel->numPages) if (++pageIndex >= fsmrel->storedPages)
{
pageIndex = 0; pageIndex = 0;
curChunk = fsmrel->relChunks;
chunkRelIndex = 0;
}
else
chunkRelIndex++;
} }
return InvalidBlockNumber; /* nothing found */ return InvalidBlockNumber; /* nothing found */
} }
/* /*
* fsm_record_free_space - guts of RecordFreeSpace, which is also used by * As above, but for index case --- we only deal in whole pages.
* RecordAndGetPageWithFreeSpace. */
static BlockNumber
find_index_free_space(FSMRelation *fsmrel)
{
IndexFSMPageData *info;
BlockNumber result;
/*
* If isIndex isn't set, it could be that RecordIndexFreeSpace() has
* never yet been called on this relation, and we're still looking
* at the default setting from create_fsm_rel(). If so, just act as
* though there's no space.
*/
if (!fsmrel->isIndex)
{
if (fsmrel->storedPages == 0)
return InvalidBlockNumber;
elog(ERROR, "find_index_free_space: called for a non-index relation");
}
/*
* For indexes, there's no need for the nextPage state variable; we just
* remove and return the first available page. (We could save cycles here
* by returning the last page, but it seems better to encourage re-use
* of lower-numbered pages.)
*/
if (fsmrel->storedPages <= 0)
return InvalidBlockNumber; /* no pages available */
info = (IndexFSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
result = IndexFSMPageGetPageNum(info);
fsmrel->storedPages--;
memmove(info, info + 1, fsmrel->storedPages * sizeof(IndexFSMPageData));
return result;
}
/*
* fsm_record_free_space - guts of RecordFreeSpace operation (now only
* provided as part of RecordAndGetPageWithFreeSpace).
*/ */
static void static void
fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail) fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail)
{ {
FSMChunk *chunk; int pageIndex;
int chunkRelIndex;
if (lookup_fsm_page_entry(fsmrel, page, &chunk, &chunkRelIndex)) if (fsmrel->isIndex)
elog(ERROR, "fsm_record_free_space: called for an index relation");
if (lookup_fsm_page_entry(fsmrel, page, &pageIndex))
{ {
/* Found an existing entry for page; update or delete it */ /* Found an existing entry for page; update it */
if (spaceAvail >= fsmrel->threshold) FSMPageData *info;
chunk->bytes[chunkRelIndex] = (ItemLength) spaceAvail;
else info = (FSMPageData *)
delete_fsm_page_entry(fsmrel, chunk, chunkRelIndex); (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
info += pageIndex;
FSMPageSetSpace(info, spaceAvail);
} }
else else
{ {
/* /*
* No existing entry; add one if spaceAvail exceeds threshold. * No existing entry; ignore the call. We used to add the page
* * to the FSM --- but in practice, if the page hasn't got enough
* CORNER CASE: if we have to do acquire_fsm_free_space then our own * space to satisfy the caller who's kicking it back to us, then
* threshold will increase, possibly meaning that we shouldn't * it's probably uninteresting to everyone else as well.
* store the page after all. Loop to redo the test if that
* happens. The loop also covers the possibility that
* acquire_fsm_free_space must be executed more than once to free
* any space (ie, thresholds must be more than doubled).
*/ */
while (spaceAvail >= fsmrel->threshold)
{
if (insert_fsm_page_entry(fsmrel, page, spaceAvail,
chunk, chunkRelIndex))
break;
/* No space, acquire some and recheck threshold */
acquire_fsm_free_space();
if (spaceAvail < fsmrel->threshold)
break;
/*
* Need to redo the lookup since our own page list may well
* have lost entries, so position is not correct anymore.
*/
if (lookup_fsm_page_entry(fsmrel, page,
&chunk, &chunkRelIndex))
elog(ERROR, "fsm_record_free_space: unexpected match");
}
} }
} }
...@@ -793,293 +1007,474 @@ fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail) ...@@ -793,293 +1007,474 @@ fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail)
* Look for an entry for a specific page (block number) in a FSMRelation. * Look for an entry for a specific page (block number) in a FSMRelation.
* Returns TRUE if a matching entry exists, else FALSE. * Returns TRUE if a matching entry exists, else FALSE.
* *
* The output arguments *outChunk, *outChunkRelIndex are set to indicate where * The output argument *outPageIndex is set to indicate where the entry exists
* the entry exists (if TRUE result) or could be inserted (if FALSE result). * (if TRUE result) or could be inserted (if FALSE result).
* *chunk is set to NULL if there is no place to insert, ie, the entry would
* need to be added to a new chunk.
*/ */
static bool static bool
lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page, lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
FSMChunk **outChunk, int *outChunkRelIndex) int *outPageIndex)
{ {
FSMChunk *chunk; /* Check for empty relation */
int chunkRelIndex; if (fsmrel->storedPages <= 0)
for (chunk = fsmrel->relChunks; chunk; chunk = chunk->next)
{ {
int numPages = chunk->numPages; *outPageIndex = 0;
return false;
}
/* Can skip the chunk quickly if page must be after last in chunk */ /* Do binary search */
if (numPages > 0 && page <= chunk->pages[numPages - 1]) if (fsmrel->isIndex)
{
IndexFSMPageData *info;
int low,
high;
info = (IndexFSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
low = 0;
high = fsmrel->storedPages - 1;
while (low <= high)
{ {
for (chunkRelIndex = 0; chunkRelIndex < numPages; chunkRelIndex++) int middle;
BlockNumber probe;
middle = low + (high - low) / 2;
probe = IndexFSMPageGetPageNum(info + middle);
if (probe == page)
{ {
if (page <= chunk->pages[chunkRelIndex]) *outPageIndex = middle;
{ return true;
*outChunk = chunk;
*outChunkRelIndex = chunkRelIndex;
return (page == chunk->pages[chunkRelIndex]);
}
} }
/* Should not get here, given above test */ else if (probe < page)
Assert(false); low = middle + 1;
else
high = middle - 1;
} }
*outPageIndex = low;
/* return false;
* If we are about to fall off the end, and there's space }
* available in the end chunk, return a pointer to it. else
*/ {
if (chunk->next == NULL && numPages < CHUNKPAGES) FSMPageData *info;
int low,
high;
info = (FSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
low = 0;
high = fsmrel->storedPages - 1;
while (low <= high)
{ {
*outChunk = chunk; int middle;
*outChunkRelIndex = numPages; BlockNumber probe;
return false;
middle = low + (high - low) / 2;
probe = FSMPageGetPageNum(info + middle);
if (probe == page)
{
*outPageIndex = middle;
return true;
}
else if (probe < page)
low = middle + 1;
else
high = middle - 1;
} }
*outPageIndex = low;
return false;
} }
/*
* Adding the page would require a new chunk (or, perhaps, compaction
* of available free space --- not my problem here).
*/
*outChunk = NULL;
*outChunkRelIndex = 0;
return false;
} }
/* /*
* Insert a new page entry into a FSMRelation's list at given position * Re-pack the FSM storage arena, dropping data if necessary to meet the
* (chunk == NULL implies at end). * current allocation target for each relation. At conclusion, all available
* * space in the arena will be coalesced at the end.
* If there is no space available to insert the entry, return FALSE.
*/ */
static bool static void
insert_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail, compact_fsm_storage(void)
FSMChunk *chunk, int chunkRelIndex)
{ {
/* Outer loop handles retry after compacting rel's page list */ int nextChunkIndex = 0;
for (;;) FSMRelation *fsmrel;
for (fsmrel = FreeSpaceMap->firstRel;
fsmrel != NULL;
fsmrel = fsmrel->nextPhysical)
{ {
if (fsmrel->numPages >= fsmrel->numChunks * CHUNKPAGES) int newAlloc;
int newAllocPages;
int newChunkIndex;
int oldChunkIndex;
int curChunks;
char *newLocation;
char *oldLocation;
/*
* Calculate target allocation, make sure we don't overrun due to
* roundoff error
*/
newAlloc = fsm_calc_target_allocation(fsm_calc_request(fsmrel));
if (newAlloc > FreeSpaceMap->totalChunks - nextChunkIndex)
newAlloc = FreeSpaceMap->totalChunks - nextChunkIndex;
if (fsmrel->isIndex)
newAllocPages = newAlloc * INDEXCHUNKPAGES;
else
newAllocPages = newAlloc * CHUNKPAGES;
newChunkIndex = nextChunkIndex;
nextChunkIndex += newAlloc;
/*
* Determine current size, current and new locations
*/
curChunks = fsm_current_chunks(fsmrel);
oldChunkIndex = fsmrel->firstChunk;
newLocation = FreeSpaceMap->arena + newChunkIndex * CHUNKBYTES;
oldLocation = FreeSpaceMap->arena + oldChunkIndex * CHUNKBYTES;
/*
* It's possible that we have to move data down, not up, if the
* allocations of previous rels expanded. This should mean that
* our allocation expanded too (or at least got no worse), and
* ditto for later rels. So there should be room --- but we might
* have to push down following rels to make it. We don't want to
* do the push more than once, so pack everything against the
* end of the arena if so.
*/
if (newChunkIndex > oldChunkIndex)
{ {
/* No free space within chunk list, so need another chunk */ int limitChunkIndex;
if (!append_fsm_chunk(fsmrel))
return false; /* can't do it */ if (newAllocPages < fsmrel->storedPages)
if (chunk == NULL) elog(PANIC, "compact_fsm_storage: can't juggle and compress too");
if (fsmrel->nextPhysical != NULL)
limitChunkIndex = fsmrel->nextPhysical->firstChunk;
else
limitChunkIndex = FreeSpaceMap->totalChunks;
if (newChunkIndex + curChunks > limitChunkIndex)
{ {
/* Original search found that new page belongs at end */ /* need to push down additional rels */
chunk = fsmrel->lastChunk; push_fsm_rels_after(fsmrel);
chunkRelIndex = 0; /* recheck for safety */
if (fsmrel->nextPhysical != NULL)
limitChunkIndex = fsmrel->nextPhysical->firstChunk;
else
limitChunkIndex = FreeSpaceMap->totalChunks;
if (newChunkIndex + curChunks > limitChunkIndex)
elog(PANIC, "compact_fsm_storage: insufficient room");
} }
memmove(newLocation, oldLocation, curChunks * CHUNKBYTES);
} }
else if (newAllocPages < fsmrel->storedPages)
/*
* Try to insert it the easy way, ie, just move down subsequent
* data
*/
if (chunk &&
push_fsm_page_entry(page, spaceAvail, chunk, chunkRelIndex))
{ {
fsmrel->numPages++; /*
fsmrel->nextPage++; /* don't return same page twice running */ * Need to compress the page data. For an index, "compression"
return true; * just means dropping excess pages; otherwise we try to keep
* the ones with the most space.
*/
if (fsmrel->isIndex)
{
fsmrel->storedPages = newAllocPages;
/* may need to move data */
if (newChunkIndex != oldChunkIndex)
memmove(newLocation, oldLocation, newAlloc * CHUNKBYTES);
}
else
{
pack_existing_pages((FSMPageData *) newLocation,
newAllocPages,
(FSMPageData *) oldLocation,
fsmrel->storedPages);
fsmrel->storedPages = newAllocPages;
}
} }
else if (newChunkIndex != oldChunkIndex)
/* {
* There is space available, but evidently it's before the place /*
* where the page entry needs to go. Compact the list and try * No compression needed, but must copy the data up
* again. This will require us to redo the search for the */
* appropriate place. Furthermore, compact_fsm_page_list deletes memmove(newLocation, oldLocation, curChunks * CHUNKBYTES);
* empty end chunks, so we may need to repeat the action of }
* grabbing a new end chunk. fsmrel->firstChunk = newChunkIndex;
*/
compact_fsm_page_list(fsmrel);
if (lookup_fsm_page_entry(fsmrel, page, &chunk, &chunkRelIndex))
elog(ERROR, "insert_fsm_page_entry: entry already exists!");
} }
Assert(nextChunkIndex <= FreeSpaceMap->totalChunks);
FreeSpaceMap->usedChunks = nextChunkIndex;
} }
/* /*
* Add one chunk to a FSMRelation's chunk list, if possible. * Push all FSMRels physically after afterRel to the end of the storage arena.
* *
* Returns TRUE if successful, FALSE if no space available. Note that on * We sometimes have to do this when deletion or truncation of a relation
* success, the new chunk is easily accessible via fsmrel->lastChunk. * causes the allocations of remaining rels to expand markedly. We must
* temporarily push existing data down to the end so that we can move it
* back up in an orderly fashion.
*/ */
static bool static void
append_fsm_chunk(FSMRelation *fsmrel) push_fsm_rels_after(FSMRelation *afterRel)
{ {
FSMChunk *newChunk; int nextChunkIndex = FreeSpaceMap->totalChunks;
FSMRelation *fsmrel;
/* Remove a chunk from the freelist */
if ((newChunk = FreeSpaceMap->freeChunks) == NULL)
return false; /* can't do it */
FreeSpaceMap->freeChunks = newChunk->next;
FreeSpaceMap->numFreeChunks--;
/* Initialize chunk to empty */
newChunk->next = NULL;
newChunk->numPages = 0;
/* Link it into FSMRelation */ FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks;
if (fsmrel->relChunks == NULL)
fsmrel->relChunks = newChunk;
else
fsmrel->lastChunk->next = newChunk;
fsmrel->lastChunk = newChunk;
fsmrel->numChunks++;
return true; for (fsmrel = FreeSpaceMap->lastRel;
fsmrel != NULL;
fsmrel = fsmrel->priorPhysical)
{
int chunkCount;
int newChunkIndex;
int oldChunkIndex;
char *newLocation;
char *oldLocation;
if (fsmrel == afterRel)
break;
chunkCount = fsm_current_chunks(fsmrel);
nextChunkIndex -= chunkCount;
newChunkIndex = nextChunkIndex;
oldChunkIndex = fsmrel->firstChunk;
if (newChunkIndex < oldChunkIndex)
{
/* trouble... */
elog(PANIC, "push_fsm_rels_after: out of room");
}
else if (newChunkIndex > oldChunkIndex)
{
/* need to move it */
newLocation = FreeSpaceMap->arena + newChunkIndex * CHUNKBYTES;
oldLocation = FreeSpaceMap->arena + oldChunkIndex * CHUNKBYTES;
memmove(newLocation, oldLocation, chunkCount * CHUNKBYTES);
fsmrel->firstChunk = newChunkIndex;
}
}
Assert(nextChunkIndex >= 0);
} }
/* /*
* Auxiliary routine for insert_fsm_page_entry: try to push entries to the * Pack a set of per-page freespace data into a smaller amount of space.
* right to insert at chunk/chunkRelIndex. Return TRUE if successful. *
* Note that the FSMRelation's own fields are not updated. * The method is to compute a low-resolution histogram of the free space
* amounts, then determine which histogram bin contains the break point.
* We then keep all pages above that bin, none below it, and just enough
* of the pages in that bin to fill the output area exactly.
*/ */
static bool #define HISTOGRAM_BINS 64
push_fsm_page_entry(BlockNumber page, Size spaceAvail,
FSMChunk *chunk, int chunkRelIndex) static void
pack_incoming_pages(FSMPageData *newLocation, int newPages,
PageFreeSpaceInfo *pageSpaces, int nPages)
{ {
int i; int histogram[HISTOGRAM_BINS];
int above,
binct,
i;
Size thresholdL,
thresholdU;
Assert(newPages < nPages); /* else I shouldn't have been called */
/* Build histogram */
MemSet(histogram, 0, sizeof(histogram));
for (i = 0; i < nPages; i++)
{
Size avail = pageSpaces[i].avail;
if (chunk->numPages >= CHUNKPAGES) if (avail >= BLCKSZ)
elog(ERROR, "pack_incoming_pages: bogus freespace amount");
avail /= (BLCKSZ/HISTOGRAM_BINS);
histogram[avail]++;
}
/* Find the breakpoint bin */
above = 0;
for (i = HISTOGRAM_BINS-1; i >= 0; i--)
{ {
if (chunk->next == NULL) int sum = above + histogram[i];
return false; /* no space */
/* try to push chunk's last item to next chunk */ if (sum > newPages)
if (!push_fsm_page_entry(chunk->pages[CHUNKPAGES - 1], break;
chunk->bytes[CHUNKPAGES - 1], above = sum;
chunk->next, 0))
return false;
/* successfully pushed it */
chunk->numPages--;
} }
for (i = chunk->numPages; i > chunkRelIndex; i--) Assert(i >= 0);
thresholdL = i * BLCKSZ/HISTOGRAM_BINS; /* low bound of bp bin */
thresholdU = (i+1) * BLCKSZ/HISTOGRAM_BINS; /* hi bound */
binct = newPages - above; /* number to take from bp bin */
/* And copy the appropriate data */
for (i = 0; i < nPages; i++)
{ {
chunk->pages[i] = chunk->pages[i - 1]; BlockNumber page = pageSpaces[i].blkno;
chunk->bytes[i] = chunk->bytes[i - 1]; Size avail = pageSpaces[i].avail;
/* Check caller provides sorted data */
if (i > 0 && page <= pageSpaces[i-1].blkno)
elog(ERROR, "RecordIndexFreeSpace: data not in page order");
/* Save this page? */
if (avail >= thresholdU ||
(avail >= thresholdL && (--binct >= 0)))
{
FSMPageSetPageNum(newLocation, page);
FSMPageSetSpace(newLocation, avail);
newLocation++;
newPages--;
}
} }
chunk->numPages++; Assert(newPages == 0);
chunk->pages[chunkRelIndex] = page;
chunk->bytes[chunkRelIndex] = (ItemLength) spaceAvail;
return true;
} }
/* /*
* Delete one page entry from a FSMRelation's list. Compact free space * Pack a set of per-page freespace data into a smaller amount of space.
* in the list, but only if a chunk can be returned to the freelist. *
* This is algorithmically identical to pack_incoming_pages(), but accepts
* a different input representation. Also, we assume the input data has
* previously been checked for validity (size in bounds, pages in order).
*
* Note: it is possible for the source and destination arrays to overlap.
* The caller is responsible for making sure newLocation is at lower addresses
* so that we can copy data moving forward in the arrays without problem.
*/ */
static void static void
delete_fsm_page_entry(FSMRelation *fsmrel, FSMChunk *chunk, int chunkRelIndex) pack_existing_pages(FSMPageData *newLocation, int newPages,
FSMPageData *oldLocation, int oldPages)
{ {
int i, int histogram[HISTOGRAM_BINS];
lim; int above,
binct,
i;
Size thresholdL,
thresholdU;
Assert(newPages < oldPages); /* else I shouldn't have been called */
/* Build histogram */
MemSet(histogram, 0, sizeof(histogram));
for (i = 0; i < oldPages; i++)
{
Size avail = FSMPageGetSpace(oldLocation + i);
/* Shouldn't happen, but test to protect against stack clobber */
if (avail >= BLCKSZ)
elog(ERROR, "pack_existing_pages: bogus freespace amount");
avail /= (BLCKSZ/HISTOGRAM_BINS);
histogram[avail]++;
}
/* Find the breakpoint bin */
above = 0;
for (i = HISTOGRAM_BINS-1; i >= 0; i--)
{
int sum = above + histogram[i];
Assert(chunk && chunkRelIndex >= 0 && chunkRelIndex < chunk->numPages); if (sum > newPages)
/* Compact out space in this chunk */ break;
lim = --chunk->numPages; above = sum;
for (i = chunkRelIndex; i < lim; i++) }
Assert(i >= 0);
thresholdL = i * BLCKSZ/HISTOGRAM_BINS; /* low bound of bp bin */
thresholdU = (i+1) * BLCKSZ/HISTOGRAM_BINS; /* hi bound */
binct = newPages - above; /* number to take from bp bin */
/* And copy the appropriate data */
for (i = 0; i < oldPages; i++)
{ {
chunk->pages[i] = chunk->pages[i + 1]; BlockNumber page = FSMPageGetPageNum(oldLocation + i);
chunk->bytes[i] = chunk->bytes[i + 1]; Size avail = FSMPageGetSpace(oldLocation + i);
/* Save this page? */
if (avail >= thresholdU ||
(avail >= thresholdL && (--binct >= 0)))
{
FSMPageSetPageNum(newLocation, page);
FSMPageSetSpace(newLocation, avail);
newLocation++;
newPages--;
}
} }
/* Compact the whole list if a chunk can be freed */ Assert(newPages == 0);
fsmrel->numPages--;
if (fsmrel->numPages <= (fsmrel->numChunks - 1) * CHUNKPAGES)
compact_fsm_page_list(fsmrel);
} }
/* /*
* Remove any pages with free space less than the current threshold for the * Calculate number of chunks "requested" by a rel.
* FSMRelation, compact out free slots in non-last chunks, and return any *
* completely freed chunks to the freelist. * Rel's lastPageCount and isIndex settings must be up-to-date when called.
*
* See notes at top of file for details.
*/ */
static void static int
compact_fsm_page_list(FSMRelation *fsmrel) fsm_calc_request(FSMRelation *fsmrel)
{ {
Size threshold = fsmrel->threshold; int chunkCount;
FSMChunk *srcChunk,
*dstChunk;
int srcIndex,
dstIndex,
dstPages,
dstChunkCnt;
srcChunk = dstChunk = fsmrel->relChunks;
srcIndex = dstIndex = 0;
dstPages = 0; /* total pages kept */
dstChunkCnt = 1; /* includes current dstChunk */
while (srcChunk != NULL)
{
int srcPages = srcChunk->numPages;
while (srcIndex < srcPages) /* Convert page count to chunk count */
{ if (fsmrel->isIndex)
if ((Size) srcChunk->bytes[srcIndex] >= threshold) chunkCount = (fsmrel->lastPageCount - 1) / INDEXCHUNKPAGES + 1;
{ else
if (dstIndex >= CHUNKPAGES) chunkCount = (fsmrel->lastPageCount - 1) / CHUNKPAGES + 1;
{ /* "Request" is anything beyond our one guaranteed chunk */
/* if (chunkCount <= 0)
* At this point srcChunk must be pointing to a later return 0;
* chunk, so it's OK to overwrite dstChunk->numPages. else
*/ return chunkCount - 1;
dstChunk->numPages = dstIndex; }
dstChunk = dstChunk->next;
dstChunkCnt++; /*
dstIndex = 0; * Calculate target allocation (number of chunks) for a rel
} *
dstChunk->pages[dstIndex] = srcChunk->pages[srcIndex]; * Parameter is the result from fsm_calc_request(). The global sumRequests
dstChunk->bytes[dstIndex] = srcChunk->bytes[srcIndex]; * and numRels totals must be up-to-date already.
dstIndex++; *
dstPages++; * See notes at top of file for details.
} */
srcIndex++; static int
} fsm_calc_target_allocation(int myRequest)
srcChunk = srcChunk->next; {
srcIndex = 0; double spareChunks;
} int extra;
if (dstPages == 0) spareChunks = FreeSpaceMap->totalChunks - FreeSpaceMap->numRels;
Assert(spareChunks > 0);
if (spareChunks >= FreeSpaceMap->sumRequests)
{ {
/* No chunks to be kept at all */ /* We aren't oversubscribed, so allocate exactly the request */
fsmrel->nextPage = 0; extra = myRequest;
fsmrel->numPages = 0;
fsmrel->numChunks = 0;
fsmrel->relChunks = NULL;
fsmrel->lastChunk = NULL;
free_chunk_chain(dstChunk);
} }
else else
{ {
/* we deliberately don't change nextPage here */ extra = (int) rint(spareChunks * myRequest / FreeSpaceMap->sumRequests);
fsmrel->numPages = dstPages; if (extra < 0) /* shouldn't happen, but make sure */
fsmrel->numChunks = dstChunkCnt; extra = 0;
dstChunk->numPages = dstIndex;
free_chunk_chain(dstChunk->next);
dstChunk->next = NULL;
fsmrel->lastChunk = dstChunk;
} }
return 1 + extra;
} }
/* /*
* Acquire some free space by raising the thresholds of all FSMRelations. * Calculate number of chunks actually used to store current data
*
* Note there is no guarantee as to how much space will be freed by a single
* invocation; caller may repeat if necessary.
*/ */
static void static int
acquire_fsm_free_space(void) fsm_current_chunks(FSMRelation *fsmrel)
{ {
FSMRelation *fsmrel; int chunkCount;
/* Convert page count to chunk count */
if (fsmrel->isIndex)
chunkCount = (fsmrel->storedPages - 1) / INDEXCHUNKPAGES + 1;
else
chunkCount = (fsmrel->storedPages - 1) / CHUNKPAGES + 1;
/* Make sure storedPages==0 produces right answer */
if (chunkCount < 0)
chunkCount = 0;
return chunkCount;
}
for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = fsmrel->nextRel) /*
* Calculate current actual allocation (number of chunks) for a rel
*/
static int
fsm_current_allocation(FSMRelation *fsmrel)
{
if (fsmrel->nextPhysical != NULL)
{
return fsmrel->nextPhysical->firstChunk - fsmrel->firstChunk;
}
else if (fsmrel == FreeSpaceMap->lastRel)
{ {
fsmrel->threshold *= 2; return FreeSpaceMap->usedChunks - fsmrel->firstChunk;
/* Limit thresholds to BLCKSZ so they can't get ridiculously large */ }
if (fsmrel->threshold > BLCKSZ) else
fsmrel->threshold = BLCKSZ; {
/* Release any pages that don't meet the new threshold */ /* it's not in the storage-order list */
compact_fsm_page_list(fsmrel); Assert(fsmrel->firstChunk < 0 && fsmrel->storedPages == 0);
return 0;
} }
} }
...@@ -1097,55 +1492,54 @@ DumpFreeSpace(void) ...@@ -1097,55 +1492,54 @@ DumpFreeSpace(void)
FSMRelation *fsmrel; FSMRelation *fsmrel;
FSMRelation *prevrel = NULL; FSMRelation *prevrel = NULL;
int relNum = 0; int relNum = 0;
FSMChunk *chunk;
int chunkRelIndex;
int nChunks;
int nPages; int nPages;
for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = fsmrel->nextRel) for (fsmrel = FreeSpaceMap->usageList; fsmrel; fsmrel = fsmrel->nextUsage)
{ {
relNum++; relNum++;
fprintf(stderr, "Map %d: rel %u/%u useCount %d threshold %u nextPage %d\nMap= ", fprintf(stderr, "Map %d: rel %u/%u isIndex %d avgRequest %u lastPageCount %d nextPage %d\nMap= ",
relNum, fsmrel->key.tblNode, fsmrel->key.relNode, relNum, fsmrel->key.tblNode, fsmrel->key.relNode,
fsmrel->useCount, fsmrel->threshold, fsmrel->nextPage); (int) fsmrel->isIndex, fsmrel->avgRequest,
nChunks = nPages = 0; fsmrel->lastPageCount, fsmrel->nextPage);
for (chunk = fsmrel->relChunks; chunk; chunk = chunk->next) if (fsmrel->isIndex)
{
IndexFSMPageData *page;
page = (IndexFSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
for (nPages = 0; nPages < fsmrel->storedPages; nPages++)
{
fprintf(stderr, " %u",
IndexFSMPageGetPageNum(page));
page++;
}
}
else
{ {
int numPages = chunk->numPages; FSMPageData *page;
nChunks++; page = (FSMPageData *)
for (chunkRelIndex = 0; chunkRelIndex < numPages; chunkRelIndex++) (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
for (nPages = 0; nPages < fsmrel->storedPages; nPages++)
{ {
nPages++;
fprintf(stderr, " %u:%u", fprintf(stderr, " %u:%u",
chunk->pages[chunkRelIndex], FSMPageGetPageNum(page),
chunk->bytes[chunkRelIndex]); FSMPageGetSpace(page));
page++;
} }
} }
fprintf(stderr, "\n"); fprintf(stderr, "\n");
/* Cross-check local counters and list links */ /* Cross-check list links */
if (nPages != fsmrel->numPages) if (prevrel != fsmrel->priorUsage)
fprintf(stderr, "DumpFreeSpace: %d pages in rel, but numPages = %d\n",
nPages, fsmrel->numPages);
if (nChunks != fsmrel->numChunks)
fprintf(stderr, "DumpFreeSpace: %d chunks in rel, but numChunks = %d\n",
nChunks, fsmrel->numChunks);
if (prevrel != fsmrel->priorRel)
fprintf(stderr, "DumpFreeSpace: broken list links\n"); fprintf(stderr, "DumpFreeSpace: broken list links\n");
prevrel = fsmrel; prevrel = fsmrel;
} }
if (prevrel != FreeSpaceMap->relListTail) if (prevrel != FreeSpaceMap->usageListTail)
fprintf(stderr, "DumpFreeSpace: broken list links\n"); fprintf(stderr, "DumpFreeSpace: broken list links\n");
/* Cross-check global counters */ /* Cross-check global counters */
if (relNum != FreeSpaceMap->numRels) if (relNum != FreeSpaceMap->numRels)
fprintf(stderr, "DumpFreeSpace: %d rels in list, but numRels = %d\n", fprintf(stderr, "DumpFreeSpace: %d rels in list, but numRels = %d\n",
relNum, FreeSpaceMap->numRels); relNum, FreeSpaceMap->numRels);
nChunks = 0;
for (chunk = FreeSpaceMap->freeChunks; chunk; chunk = chunk->next)
nChunks++;
if (nChunks != FreeSpaceMap->numFreeChunks)
fprintf(stderr, "DumpFreeSpace: %d chunks in list, but numFreeChunks = %d\n",
nChunks, FreeSpaceMap->numFreeChunks);
} }
#endif /* FREESPACE_DEBUG */ #endif /* FREESPACE_DEBUG */
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.61 2002/09/20 19:56:01 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.62 2003/03/04 21:51:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -410,7 +410,7 @@ smgrtruncate(int16 which, Relation reln, BlockNumber nblocks) ...@@ -410,7 +410,7 @@ smgrtruncate(int16 which, Relation reln, BlockNumber nblocks)
* for the about-to-be-deleted blocks. We want to be sure it * for the about-to-be-deleted blocks. We want to be sure it
* won't return bogus block numbers later on. * won't return bogus block numbers later on.
*/ */
MultiRecordFreeSpace(&reln->rd_node, nblocks, 0, NULL); FreeSpaceMapTruncateRel(&reln->rd_node, nblocks);
newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks); newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks);
if (newblks == InvalidBlockNumber) if (newblks == InvalidBlockNumber)
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* command, configuration file, and command line options. * command, configuration file, and command line options.
* See src/backend/utils/misc/README for more information. * See src/backend/utils/misc/README for more information.
* *
* $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.115 2003/02/23 23:27:21 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.116 2003/03/04 21:51:21 tgl Exp $
* *
* Copyright 2000 by PostgreSQL Global Development Group * Copyright 2000 by PostgreSQL Global Development Group
* Written by Peter Eisentraut <peter_e@gmx.net>. * Written by Peter Eisentraut <peter_e@gmx.net>.
...@@ -644,11 +644,11 @@ static struct config_int ...@@ -644,11 +644,11 @@ static struct config_int
{ {
{"max_fsm_relations", PGC_POSTMASTER}, &MaxFSMRelations, {"max_fsm_relations", PGC_POSTMASTER}, &MaxFSMRelations,
1000, 10, INT_MAX, NULL, NULL 1000, 100, INT_MAX, NULL, NULL
}, },
{ {
{"max_fsm_pages", PGC_POSTMASTER}, &MaxFSMPages, {"max_fsm_pages", PGC_POSTMASTER}, &MaxFSMPages,
10000, 1000, INT_MAX, NULL, NULL 20000, 1000, INT_MAX, NULL, NULL
}, },
{ {
......
...@@ -48,10 +48,11 @@ ...@@ -48,10 +48,11 @@
# Shared Memory Size # Shared Memory Size
# #
#shared_buffers = 64 # min max_connections*2 or 16, 8KB each #shared_buffers = 64 # min max_connections*2 or 16, 8KB each
#max_fsm_relations = 1000 # min 10, fsm is free space map, ~40 bytes
#max_fsm_pages = 10000 # min 1000, fsm is free space map, ~6 bytes
#max_locks_per_transaction = 64 # min 10 #max_locks_per_transaction = 64 # min 10
#wal_buffers = 8 # min 4, typically 8KB each #wal_buffers = 8 # min 4, typically 8KB each
# fsm = free space map
#max_fsm_relations = 1000 # min 100, ~50 bytes each
#max_fsm_pages = 20000 # min max_fsm_relations*16, 6 bytes each
# #
# Non-shared Memory Sizes # Non-shared Memory Sizes
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: freespace.h,v 1.8 2002/09/20 19:56:01 tgl Exp $ * $Id: freespace.h,v 1.9 2003/03/04 21:51:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -28,6 +28,7 @@ typedef struct PageFreeSpaceInfo ...@@ -28,6 +28,7 @@ typedef struct PageFreeSpaceInfo
} PageFreeSpaceInfo; } PageFreeSpaceInfo;
/* GUC variables */
extern int MaxFSMRelations; extern int MaxFSMRelations;
extern int MaxFSMPages; extern int MaxFSMPages;
...@@ -39,19 +40,26 @@ extern void InitFreeSpaceMap(void); ...@@ -39,19 +40,26 @@ extern void InitFreeSpaceMap(void);
extern int FreeSpaceShmemSize(void); extern int FreeSpaceShmemSize(void);
extern BlockNumber GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded); extern BlockNumber GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded);
extern void RecordFreeSpace(RelFileNode *rel, BlockNumber page,
Size spaceAvail);
extern BlockNumber RecordAndGetPageWithFreeSpace(RelFileNode *rel, extern BlockNumber RecordAndGetPageWithFreeSpace(RelFileNode *rel,
BlockNumber oldPage, BlockNumber oldPage,
Size oldSpaceAvail, Size oldSpaceAvail,
Size spaceNeeded); Size spaceNeeded);
extern void MultiRecordFreeSpace(RelFileNode *rel, extern Size GetAvgFSMRequestSize(RelFileNode *rel);
BlockNumber minPage, extern void RecordRelationFreeSpace(RelFileNode *rel,
int nPages, int nPages,
PageFreeSpaceInfo *pageSpaces); PageFreeSpaceInfo *pageSpaces);
extern BlockNumber GetFreeIndexPage(RelFileNode *rel);
extern void RecordIndexFreeSpace(RelFileNode *rel,
int nPages,
BlockNumber *pages);
extern void FreeSpaceMapTruncateRel(RelFileNode *rel, BlockNumber nblocks);
extern void FreeSpaceMapForgetRel(RelFileNode *rel); extern void FreeSpaceMapForgetRel(RelFileNode *rel);
extern void FreeSpaceMapForgetDatabase(Oid dbid); extern void FreeSpaceMapForgetDatabase(Oid dbid);
extern void PrintFreeSpaceMapStatistics(int elevel);
#ifdef FREESPACE_DEBUG #ifdef FREESPACE_DEBUG
extern void DumpFreeSpace(void); extern void DumpFreeSpace(void);
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment