Commit a2367650 authored by Amit Kapila's avatar Amit Kapila

Revert "Avoid creation of the free space map for small heap relations."

This reverts commit ac88d296.
parent ac88d296
CREATE EXTENSION pageinspect; CREATE EXTENSION pageinspect;
CREATE TABLE test_rel_forks (a int); CREATE TABLE test1 (a int, b int);
-- Make sure there are enough blocks in the heap for the FSM to be created. INSERT INTO test1 VALUES (16777217, 131584);
INSERT INTO test_rel_forks SELECT i from generate_series(1,1000) i; VACUUM test1; -- set up FSM
-- set up FSM and VM
VACUUM test_rel_forks;
-- The page contents can vary, so just test that it can be read -- The page contents can vary, so just test that it can be read
-- successfully, but don't keep the output. -- successfully, but don't keep the output.
SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0; SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
main_0 main_0
-------- --------
8192 8192
(1 row) (1 row)
SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100; SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
ERROR: block number 100 is out of range for relation "test_rel_forks" ERROR: block number 1 is out of range for relation "test1"
SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0; SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
fsm_0 fsm_0
------- -------
8192 8192
(1 row) (1 row)
SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 10)) AS fsm_10; SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
ERROR: block number 10 is out of range for relation "test_rel_forks" fsm_1
SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0; -------
8192
(1 row)
SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
vm_0 vm_0
------ ------
8192 8192
(1 row) (1 row)
SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 1)) AS vm_1; SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
ERROR: block number 1 is out of range for relation "test_rel_forks" ERROR: block number 1 is out of range for relation "test1"
SELECT octet_length(get_raw_page('xxx', 'main', 0)); SELECT octet_length(get_raw_page('xxx', 'main', 0));
ERROR: relation "xxx" does not exist ERROR: relation "xxx" does not exist
SELECT octet_length(get_raw_page('test_rel_forks', 'xxx', 0)); SELECT octet_length(get_raw_page('test1', 'xxx', 0));
ERROR: invalid fork name ERROR: invalid fork name
HINT: Valid fork names are "main", "fsm", "vm", and "init". HINT: Valid fork names are "main", "fsm", "vm", and "init".
SELECT * FROM fsm_page_contents(get_raw_page('test_rel_forks', 'fsm', 0)); SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0);
fsm_page_contents
-------------------
0: 147 +
1: 147 +
3: 147 +
7: 147 +
15: 147 +
31: 147 +
63: 147 +
127: 147 +
255: 147 +
511: 147 +
1023: 147 +
2047: 147 +
4095: 147 +
fp_next_slot: 0 +
(1 row)
SELECT get_raw_page('test_rel_forks', 0) = get_raw_page('test_rel_forks', 'main', 0);
?column? ?column?
---------- ----------
t t
(1 row) (1 row)
DROP TABLE test_rel_forks;
CREATE TABLE test1 (a int, b int);
INSERT INTO test1 VALUES (16777217, 131584);
SELECT pagesize, version FROM page_header(get_raw_page('test1', 0)); SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
pagesize | version pagesize | version
----------+--------- ----------+---------
...@@ -83,6 +62,26 @@ SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bi ...@@ -83,6 +62,26 @@ SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bi
{"\\x01000001","\\x00020200"} {"\\x01000001","\\x00020200"}
(1 row) (1 row)
SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0));
fsm_page_contents
-------------------
0: 254 +
1: 254 +
3: 254 +
7: 254 +
15: 254 +
31: 254 +
63: 254 +
127: 254 +
255: 254 +
511: 254 +
1023: 254 +
2047: 254 +
4095: 254 +
fp_next_slot: 0 +
(1 row)
DROP TABLE test1; DROP TABLE test1;
-- check that using any of these functions with a partitioned table or index -- check that using any of these functions with a partitioned table or index
-- would fail -- would fail
......
CREATE EXTENSION pageinspect; CREATE EXTENSION pageinspect;
CREATE TABLE test_rel_forks (a int); CREATE TABLE test1 (a int, b int);
-- Make sure there are enough blocks in the heap for the FSM to be created. INSERT INTO test1 VALUES (16777217, 131584);
INSERT INTO test_rel_forks SELECT i from generate_series(1,1000) i;
-- set up FSM and VM VACUUM test1; -- set up FSM
VACUUM test_rel_forks;
-- The page contents can vary, so just test that it can be read -- The page contents can vary, so just test that it can be read
-- successfully, but don't keep the output. -- successfully, but don't keep the output.
SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0; SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100; SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0; SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 10)) AS fsm_10; SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0; SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 1)) AS vm_1; SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
SELECT octet_length(get_raw_page('xxx', 'main', 0)); SELECT octet_length(get_raw_page('xxx', 'main', 0));
SELECT octet_length(get_raw_page('test_rel_forks', 'xxx', 0)); SELECT octet_length(get_raw_page('test1', 'xxx', 0));
SELECT * FROM fsm_page_contents(get_raw_page('test_rel_forks', 'fsm', 0));
SELECT get_raw_page('test_rel_forks', 0) = get_raw_page('test_rel_forks', 'main', 0);
DROP TABLE test_rel_forks; SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0);
CREATE TABLE test1 (a int, b int);
INSERT INTO test1 VALUES (16777217, 131584);
SELECT pagesize, version FROM page_header(get_raw_page('test1', 0)); SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
...@@ -38,6 +29,8 @@ SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_ ...@@ -38,6 +29,8 @@ SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_
SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bits) SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bits)
FROM heap_page_items(get_raw_page('test1', 0)); FROM heap_page_items(get_raw_page('test1', 0));
SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0));
DROP TABLE test1; DROP TABLE test1;
-- check that using any of these functions with a partitioned table or index -- check that using any of these functions with a partitioned table or index
......
...@@ -590,13 +590,12 @@ tuple would otherwise be too big. ...@@ -590,13 +590,12 @@ tuple would otherwise be too big.
<indexterm><primary>FSM</primary><see>Free Space Map</see></indexterm> <indexterm><primary>FSM</primary><see>Free Space Map</see></indexterm>
<para> <para>
Each heap relation, unless it is very small, and each index relation, except Each heap and index relation, except for hash indexes, has a Free Space Map
for hash indexes, has a Free Space Map (FSM) to keep track of available (FSM) to keep track of available space in the relation. It's stored
space in the relation. It's stored alongside the main relation data in a alongside the main relation data in a separate relation fork, named after the
separate relation fork, named after the filenode number of the relation, plus filenode number of the relation, plus a <literal>_fsm</literal> suffix. For example,
a <literal>_fsm</literal> suffix. For example, if the filenode of a relation if the filenode of a relation is 12345, the FSM is stored in a file called
is 12345, the FSM is stored in a file called <filename>12345_fsm</filename>, <filename>12345_fsm</filename>, in the same directory as the main relation file.
in the same directory as the main relation file.
</para> </para>
<para> <para>
......
...@@ -1150,7 +1150,7 @@ terminate_brin_buildstate(BrinBuildState *state) ...@@ -1150,7 +1150,7 @@ terminate_brin_buildstate(BrinBuildState *state)
freespace = PageGetFreeSpace(page); freespace = PageGetFreeSpace(page);
blk = BufferGetBlockNumber(state->bs_currentInsertBuf); blk = BufferGetBlockNumber(state->bs_currentInsertBuf);
ReleaseBuffer(state->bs_currentInsertBuf); ReleaseBuffer(state->bs_currentInsertBuf);
RecordPageWithFreeSpace(state->bs_irel, blk, freespace, InvalidBlockNumber); RecordPageWithFreeSpace(state->bs_irel, blk, freespace);
FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1); FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
} }
......
...@@ -310,7 +310,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, ...@@ -310,7 +310,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
if (extended) if (extended)
{ {
RecordPageWithFreeSpace(idxrel, newblk, freespace, InvalidBlockNumber); RecordPageWithFreeSpace(idxrel, newblk, freespace);
FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1); FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
} }
...@@ -461,7 +461,7 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, ...@@ -461,7 +461,7 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
if (extended) if (extended)
{ {
RecordPageWithFreeSpace(idxrel, blk, freespace, InvalidBlockNumber); RecordPageWithFreeSpace(idxrel, blk, freespace);
FreeSpaceMapVacuumRange(idxrel, blk, blk + 1); FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
} }
...@@ -654,7 +654,7 @@ brin_page_cleanup(Relation idxrel, Buffer buf) ...@@ -654,7 +654,7 @@ brin_page_cleanup(Relation idxrel, Buffer buf)
/* Measure free space and record it */ /* Measure free space and record it */
RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf), RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf),
br_page_get_freespace(page), InvalidBlockNumber); br_page_get_freespace(page));
} }
/* /*
...@@ -703,7 +703,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, ...@@ -703,7 +703,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
/* Choose initial target page, re-using existing target if known */ /* Choose initial target page, re-using existing target if known */
newblk = RelationGetTargetBlock(irel); newblk = RelationGetTargetBlock(irel);
if (newblk == InvalidBlockNumber) if (newblk == InvalidBlockNumber)
newblk = GetPageWithFreeSpace(irel, itemsz, true); newblk = GetPageWithFreeSpace(irel, itemsz);
/* /*
* Loop until we find a page with sufficient free space. By the time we * Loop until we find a page with sufficient free space. By the time we
...@@ -895,7 +895,7 @@ brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer) ...@@ -895,7 +895,7 @@ brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
* pages whose FSM records were forgotten in a crash. * pages whose FSM records were forgotten in a crash.
*/ */
RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer), RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
br_page_get_freespace(page), InvalidBlockNumber); br_page_get_freespace(page));
} }
......
...@@ -239,14 +239,8 @@ RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) ...@@ -239,14 +239,8 @@ RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
* Immediately update the bottom level of the FSM. This has a good * Immediately update the bottom level of the FSM. This has a good
* chance of making this page visible to other concurrently inserting * chance of making this page visible to other concurrently inserting
* backends, and we want that to happen without delay. * backends, and we want that to happen without delay.
*
* Since we know the table will end up with extraBlocks additional
* pages, we pass the final number to avoid possible unnecessary
* system calls and to make sure the FSM is created when we add the
* first new page.
*/ */
RecordPageWithFreeSpace(relation, blockNum, freespace, RecordPageWithFreeSpace(relation, blockNum, freespace);
firstBlock + extraBlocks);
} }
while (--extraBlocks > 0); while (--extraBlocks > 0);
...@@ -383,9 +377,20 @@ RelationGetBufferForTuple(Relation relation, Size len, ...@@ -383,9 +377,20 @@ RelationGetBufferForTuple(Relation relation, Size len,
* We have no cached target page, so ask the FSM for an initial * We have no cached target page, so ask the FSM for an initial
* target. * target.
*/ */
targetBlock = GetPageWithFreeSpace(relation, targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
len + saveFreeSpace,
false); /*
* If the FSM knows nothing of the rel, try the last page before we
* give up and extend. This avoids one-tuple-per-page syndrome during
* bootstrapping or in a recently-started system.
*/
if (targetBlock == InvalidBlockNumber)
{
BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
if (nblocks > 0)
targetBlock = nblocks - 1;
}
} }
loop: loop:
...@@ -479,14 +484,6 @@ loop: ...@@ -479,14 +484,6 @@ loop:
{ {
/* use this page as future insert target, too */ /* use this page as future insert target, too */
RelationSetTargetBlock(relation, targetBlock); RelationSetTargetBlock(relation, targetBlock);
/*
* In case we used an in-memory map of available blocks, reset it
* for next use.
*/
if (targetBlock < HEAP_FSM_CREATION_THRESHOLD)
FSMClearLocalMap();
return buffer; return buffer;
} }
...@@ -546,12 +543,9 @@ loop: ...@@ -546,12 +543,9 @@ loop:
/* /*
* Check if some other backend has extended a block for us while * Check if some other backend has extended a block for us while
* we were waiting on the lock. We only check the FSM -- if there * we were waiting on the lock.
* isn't one we don't recheck the number of blocks.
*/ */
targetBlock = GetPageWithFreeSpace(relation, targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
len + saveFreeSpace,
true);
/* /*
* If some other waiter has already extended the relation, we * If some other waiter has already extended the relation, we
...@@ -631,12 +625,5 @@ loop: ...@@ -631,12 +625,5 @@ loop:
*/ */
RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer)); RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer));
/*
* In case we used an in-memory map of available blocks, reset it for next
* use. We do this unconditionally since after relation extension we
* can't skip this based on the targetBlock.
*/
FSMClearLocalMap();
return buffer; return buffer;
} }
...@@ -153,7 +153,7 @@ static BufferAccessStrategy vac_strategy; ...@@ -153,7 +153,7 @@ static BufferAccessStrategy vac_strategy;
static void lazy_scan_heap(Relation onerel, int options, static void lazy_scan_heap(Relation onerel, int options,
LVRelStats *vacrelstats, Relation *Irel, int nindexes, LVRelStats *vacrelstats, Relation *Irel, int nindexes,
bool aggressive); bool aggressive);
static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks); static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
static bool lazy_check_needs_freeze(Buffer buf, bool *hastup); static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
static void lazy_vacuum_index(Relation indrel, static void lazy_vacuum_index(Relation indrel,
IndexBulkDeleteResult **stats, IndexBulkDeleteResult **stats,
...@@ -758,7 +758,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, ...@@ -758,7 +758,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
pgstat_progress_update_multi_param(2, hvp_index, hvp_val); pgstat_progress_update_multi_param(2, hvp_index, hvp_val);
/* Remove tuples from heap */ /* Remove tuples from heap */
lazy_vacuum_heap(onerel, vacrelstats, nblocks); lazy_vacuum_heap(onerel, vacrelstats);
/* /*
* Forget the now-vacuumed tuples, and press on, but be careful * Forget the now-vacuumed tuples, and press on, but be careful
...@@ -896,7 +896,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, ...@@ -896,7 +896,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
MarkBufferDirty(buf); MarkBufferDirty(buf);
UnlockReleaseBuffer(buf); UnlockReleaseBuffer(buf);
RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks); RecordPageWithFreeSpace(onerel, blkno, freespace);
continue; continue;
} }
...@@ -935,7 +935,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, ...@@ -935,7 +935,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
} }
UnlockReleaseBuffer(buf); UnlockReleaseBuffer(buf);
RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks); RecordPageWithFreeSpace(onerel, blkno, freespace);
continue; continue;
} }
...@@ -1332,7 +1332,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, ...@@ -1332,7 +1332,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
* taken if there are no indexes.) * taken if there are no indexes.)
*/ */
if (vacrelstats->num_dead_tuples == prev_dead_count) if (vacrelstats->num_dead_tuples == prev_dead_count)
RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks); RecordPageWithFreeSpace(onerel, blkno, freespace);
} }
/* report that everything is scanned and vacuumed */ /* report that everything is scanned and vacuumed */
...@@ -1394,7 +1394,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, ...@@ -1394,7 +1394,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
/* Remove tuples from heap */ /* Remove tuples from heap */
pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
PROGRESS_VACUUM_PHASE_VACUUM_HEAP); PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
lazy_vacuum_heap(onerel, vacrelstats, nblocks); lazy_vacuum_heap(onerel, vacrelstats);
vacrelstats->num_index_scans++; vacrelstats->num_index_scans++;
} }
...@@ -1465,10 +1465,9 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, ...@@ -1465,10 +1465,9 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
* Note: the reason for doing this as a second pass is we cannot remove * Note: the reason for doing this as a second pass is we cannot remove
* the tuples until we've removed their index entries, and we want to * the tuples until we've removed their index entries, and we want to
* process index entry removal in batches as large as possible. * process index entry removal in batches as large as possible.
* Note: nblocks is passed as an optimization for RecordPageWithFreeSpace().
*/ */
static void static void
lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks) lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
{ {
int tupindex; int tupindex;
int npages; int npages;
...@@ -1505,7 +1504,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks) ...@@ -1505,7 +1504,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks)
freespace = PageGetHeapFreeSpace(page); freespace = PageGetHeapFreeSpace(page);
UnlockReleaseBuffer(buf); UnlockReleaseBuffer(buf);
RecordPageWithFreeSpace(onerel, tblk, freespace, nblocks); RecordPageWithFreeSpace(onerel, tblk, freespace);
npages++; npages++;
} }
......
...@@ -48,7 +48,6 @@ ...@@ -48,7 +48,6 @@
#include "replication/walsender.h" #include "replication/walsender.h"
#include "storage/condition_variable.h" #include "storage/condition_variable.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/freespace.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/predicate.h" #include "storage/predicate.h"
#include "storage/proc.h" #include "storage/proc.h"
...@@ -2494,12 +2493,6 @@ AbortTransaction(void) ...@@ -2494,12 +2493,6 @@ AbortTransaction(void)
pgstat_report_wait_end(); pgstat_report_wait_end();
pgstat_progress_end_command(); pgstat_progress_end_command();
/*
* In case we aborted during RelationGetBufferForTuple(), clear the local
* map of heap pages.
*/
FSMClearLocalMap();
/* Clean up buffer I/O and buffer context locks, too */ /* Clean up buffer I/O and buffer context locks, too */
AbortBufferIO(); AbortBufferIO();
UnlockBuffers(); UnlockBuffers();
...@@ -4721,13 +4714,6 @@ AbortSubTransaction(void) ...@@ -4721,13 +4714,6 @@ AbortSubTransaction(void)
pgstat_report_wait_end(); pgstat_report_wait_end();
pgstat_progress_end_command(); pgstat_progress_end_command();
/*
* In case we aborted during RelationGetBufferForTuple(), clear the local
* map of heap pages.
*/
FSMClearLocalMap();
AbortBufferIO(); AbortBufferIO();
UnlockBuffers(); UnlockBuffers();
......
...@@ -8,41 +8,7 @@ free space to hold a tuple to be stored; or to determine that no such page ...@@ -8,41 +8,7 @@ free space to hold a tuple to be stored; or to determine that no such page
exists and the relation must be extended by one page. As of PostgreSQL 8.4 exists and the relation must be extended by one page. As of PostgreSQL 8.4
each relation has its own, extensible free space map stored in a separate each relation has its own, extensible free space map stored in a separate
"fork" of its relation. This eliminates the disadvantages of the former "fork" of its relation. This eliminates the disadvantages of the former
fixed-size FSM. There are two exceptions: fixed-size FSM.
1. Hash indexes never have a FSM.
2. For very small tables, a 3-page relation fork would be relatively large
and wasteful, so to save space we refrain from creating the FSM if the
heap has HEAP_FSM_CREATION_THRESHOLD pages or fewer.
To locate free space in the latter case, we simply try pages directly without
knowing ahead of time how much free space they have. To maintain good
performance, we create a local in-memory map of pages to try, and only mark
every other page as available. For example, in a 3-page heap, the local map
would look like:
ANAN
0123
Pages 0 and 2 are marked "available", and page 1 as "not available".
Page 3 is beyond the end of the relation, so is likewise marked "not
available". First we try page 2, and if that doesn't have sufficient free
space we try page 0 before giving up and extending the relation. There may
be some wasted free space on block 1, but if the relation extends to 4 pages:
NANA
0123
We not only have the new page 3 at our disposal, we can now check page 1
for free space as well.
Once the FSM is created for a heap we don't remove it even if somebody deletes
all the rows from the corresponding relation. We don't think it is a useful
optimization as it is quite likely that relation will again grow to the same
size.
FSM data structure
------------------
It is important to keep the map small so that it can be searched rapidly. It is important to keep the map small so that it can be searched rapidly.
Therefore, we don't attempt to record the exact free space on a page. Therefore, we don't attempt to record the exact free space on a page.
...@@ -226,3 +192,5 @@ TODO ...@@ -226,3 +192,5 @@ TODO
---- ----
- fastroot to avoid traversing upper nodes with just 1 child - fastroot to avoid traversing upper nodes with just 1 child
- use a different system for tables that fit into one FSM page, with a
mechanism to switch to the real thing as it grows.
This diff is collapsed.
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
BlockNumber BlockNumber
GetFreeIndexPage(Relation rel) GetFreeIndexPage(Relation rel)
{ {
BlockNumber blkno = GetPageWithFreeSpace(rel, BLCKSZ / 2, true); BlockNumber blkno = GetPageWithFreeSpace(rel, BLCKSZ / 2);
if (blkno != InvalidBlockNumber) if (blkno != InvalidBlockNumber)
RecordUsedIndexPage(rel, blkno); RecordUsedIndexPage(rel, blkno);
...@@ -51,7 +51,7 @@ GetFreeIndexPage(Relation rel) ...@@ -51,7 +51,7 @@ GetFreeIndexPage(Relation rel)
void void
RecordFreeIndexPage(Relation rel, BlockNumber freeBlock) RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
{ {
RecordPageWithFreeSpace(rel, freeBlock, BLCKSZ - 1, InvalidBlockNumber); RecordPageWithFreeSpace(rel, freeBlock, BLCKSZ - 1);
} }
...@@ -61,7 +61,7 @@ RecordFreeIndexPage(Relation rel, BlockNumber freeBlock) ...@@ -61,7 +61,7 @@ RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
void void
RecordUsedIndexPage(Relation rel, BlockNumber usedBlock) RecordUsedIndexPage(Relation rel, BlockNumber usedBlock)
{ {
RecordPageWithFreeSpace(rel, usedBlock, 0, InvalidBlockNumber); RecordPageWithFreeSpace(rel, usedBlock, 0);
} }
/* /*
......
...@@ -18,20 +18,15 @@ ...@@ -18,20 +18,15 @@
#include "storage/relfilenode.h" #include "storage/relfilenode.h"
#include "utils/relcache.h" #include "utils/relcache.h"
/* Only create the FSM if the heap has greater than this many blocks */
#define HEAP_FSM_CREATION_THRESHOLD 4
/* prototypes for public functions in freespace.c */ /* prototypes for public functions in freespace.c */
extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk); extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk);
extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded, extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded);
bool check_fsm_only);
extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel,
BlockNumber oldPage, BlockNumber oldPage,
Size oldSpaceAvail, Size oldSpaceAvail,
Size spaceNeeded); Size spaceNeeded);
extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
Size spaceAvail, BlockNumber nblocks); Size spaceAvail);
extern void FSMClearLocalMap(void);
extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
Size spaceAvail); Size spaceAvail);
......
--
-- Free Space Map test
--
CREATE TABLE fsm_check_size (num int, str text);
-- Fill 3 blocks with as many large records as will fit
-- No FSM
INSERT INTO fsm_check_size SELECT i, rpad('', 1024, 'a')
FROM generate_series(1,7*3) i;
VACUUM fsm_check_size;
SELECT pg_relation_size('fsm_check_size', 'main') AS heap_size,
pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
heap_size | fsm_size
-----------+----------
24576 | 0
(1 row)
-- Clear some space on block 0
DELETE FROM fsm_check_size WHERE num <= 5;
VACUUM fsm_check_size;
-- Insert small record in block 2 to set the cached smgr targetBlock
INSERT INTO fsm_check_size VALUES(99, 'b');
-- Insert large record and make sure it goes in block 0 rather than
-- causing the relation to extend
INSERT INTO fsm_check_size VALUES (101, rpad('', 1024, 'a'));
SELECT pg_relation_size('fsm_check_size', 'main') AS heap_size,
pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
heap_size | fsm_size
-----------+----------
24576 | 0
(1 row)
-- Extend table with enough blocks to exceed the FSM threshold
-- FSM is created and extended to 3 blocks
INSERT INTO fsm_check_size SELECT i, 'c' FROM generate_series(200,1200) i;
VACUUM fsm_check_size;
SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
fsm_size
----------
24576
(1 row)
-- Truncate heap to 1 block
-- No change in FSM
DELETE FROM fsm_check_size WHERE num > 7;
VACUUM fsm_check_size;
SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
fsm_size
----------
24576
(1 row)
-- Truncate heap to 0 blocks
-- FSM now truncated to 2 blocks
DELETE FROM fsm_check_size;
VACUUM fsm_check_size;
SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
fsm_size
----------
16384
(1 row)
-- Add long random string to extend TOAST table to 1 block
INSERT INTO fsm_check_size
VALUES(0, (SELECT string_agg(md5(chr(i)), '')
FROM generate_series(1,100) i));
VACUUM fsm_check_size;
SELECT pg_relation_size(reltoastrelid, 'main') AS toast_size,
pg_relation_size(reltoastrelid, 'fsm') AS toast_fsm_size
FROM pg_class WHERE relname = 'fsm_check_size';
toast_size | toast_fsm_size
------------+----------------
8192 | 0
(1 row)
DROP TABLE fsm_check_size;
...@@ -68,12 +68,6 @@ test: create_aggregate create_function_3 create_cast constraints triggers inheri ...@@ -68,12 +68,6 @@ test: create_aggregate create_function_3 create_cast constraints triggers inheri
# ---------- # ----------
test: sanity_check test: sanity_check
# ----------
# fsm does a delete followed by vacuum, and running it in parallel can prevent
# removal of rows.
# ----------
test: fsm
# ---------- # ----------
# Believe it or not, select creates a table, subsequent # Believe it or not, select creates a table, subsequent
# tests need. # tests need.
......
...@@ -80,7 +80,6 @@ test: roleattributes ...@@ -80,7 +80,6 @@ test: roleattributes
test: create_am test: create_am
test: hash_func test: hash_func
test: sanity_check test: sanity_check
test: fsm
test: errors test: errors
test: select test: select
test: select_into test: select_into
......
--
-- Free Space Map test
--
CREATE TABLE fsm_check_size (num int, str text);
-- Fill 3 blocks with as many large records as will fit
-- No FSM
INSERT INTO fsm_check_size SELECT i, rpad('', 1024, 'a')
FROM generate_series(1,7*3) i;
VACUUM fsm_check_size;
SELECT pg_relation_size('fsm_check_size', 'main') AS heap_size,
pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
-- Clear some space on block 0
DELETE FROM fsm_check_size WHERE num <= 5;
VACUUM fsm_check_size;
-- Insert small record in block 2 to set the cached smgr targetBlock
INSERT INTO fsm_check_size VALUES(99, 'b');
-- Insert large record and make sure it goes in block 0 rather than
-- causing the relation to extend
INSERT INTO fsm_check_size VALUES (101, rpad('', 1024, 'a'));
SELECT pg_relation_size('fsm_check_size', 'main') AS heap_size,
pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
-- Extend table with enough blocks to exceed the FSM threshold
-- FSM is created and extended to 3 blocks
INSERT INTO fsm_check_size SELECT i, 'c' FROM generate_series(200,1200) i;
VACUUM fsm_check_size;
SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
-- Truncate heap to 1 block
-- No change in FSM
DELETE FROM fsm_check_size WHERE num > 7;
VACUUM fsm_check_size;
SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
-- Truncate heap to 0 blocks
-- FSM now truncated to 2 blocks
DELETE FROM fsm_check_size;
VACUUM fsm_check_size;
SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
-- Add long random string to extend TOAST table to 1 block
INSERT INTO fsm_check_size
VALUES(0, (SELECT string_agg(md5(chr(i)), '')
FROM generate_series(1,100) i));
VACUUM fsm_check_size;
SELECT pg_relation_size(reltoastrelid, 'main') AS toast_size,
pg_relation_size(reltoastrelid, 'fsm') AS toast_fsm_size
FROM pg_class WHERE relname = 'fsm_check_size';
DROP TABLE fsm_check_size;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment