Revert "Avoid the creation of the free space map for small heap relations".

This feature was using a process local map to track the first few blocks in the relation. The map was reset each time we get the block with enough freespace. It was discussed that it would be better to track this map on a per-relation basis in relcache and then invalidate the same whenever vacuum frees up some space in the page or when FSM is created. The new design would be better both in terms of API design and performance. List of commits reverted, in reverse chronological order: 06c8a509 Improve code comments in b0eaa4c5. 13e8643b During pg_upgrade, conditionally skip transfer of FSMs. 6f918159 Add more tests for FSM. 9c32e4c3 Clear the local map when not used. 29d108cd Update the documentation for FSM behavior.. 08ecdfe7 Make FSM test portable. b0eaa4c5 Avoid creation of the free space map for small heap relations. Discussion: https://postgr.es/m/20190416180452.3pm6uegx54iitbt5@alap3.anarazel.de

Revert "Avoid the creation of the free space map for small heap relations".
This feature was using a process local map to track the first few blocks in the relation. The map was reset each time we get the block with enough freespace. It was discussed that it would be better to track this map on a per-relation basis in relcache and then invalidate the same whenever vacuum frees up some space in the page or when FSM is created. The new design would be better both in terms of API design and performance. List of commits reverted, in reverse chronological order: 06c8a509 Improve code comments in b0eaa4c5. 13e8643b During pg_upgrade, conditionally skip transfer of FSMs. 6f918159 Add more tests for FSM. 9c32e4c3 Clear the local map when not used. 29d108cd Update the documentation for FSM behavior.. 08ecdfe7 Make FSM test portable. b0eaa4c5 Avoid creation of the free space map for small heap relations. Discussion: https://postgr.es/m/20190416180452.3pm6uegx54iitbt5@alap3.anarazel.de
7db0cde6 · Amit Kapila · af82f95a · 7db0cde6 · 7db0cde6 · 7db0cde6
Commit 7db0cde6 authored May 07, 2019 by Amit Kapila
23 changed files
--- a/contrib/pageinspect/expected/page.out
+++ b/contrib/pageinspect/expected/page.out
 CREATE EXTENSION pageinspect;
-CREATE TABLE test_rel_forks (a int);
+CREATE TABLE test1 (a int, b int);
-- Make sure there are enough blocks in the heap for the FSM to be created.
+INSERT INTO test1 VALUES (16777217, 131584);
-INSERT INTO test_rel_forks SELECT i from generate_series(1,2000) i;
+VACUUM test1;  -- set up FSM
-- set up FSM and VM
-VACUUM test_rel_forks;
 -- The page contents can vary, so just test that it can be read
 -- successfully, but don't keep the output.
-SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0;
+SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
 main_0 
 --------
   8192
 (1 row)
-SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100;
+SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
-ERROR:  block number 100 is out of range for relation "test_rel_forks"
+ERROR:  block number 1 is out of range for relation "test1"
-SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0;
+SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
 fsm_0 
 -------
  8192
 (1 row)
-SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 20)) AS fsm_20;
+SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
-ERROR:  block number 20 is out of range for relation "test_rel_forks"
+ fsm_1 
-SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0;
+-------
+  8192
+(1 row)
+SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
 vm_0 
 ------
 8192
 (1 row)
-SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 1)) AS vm_1;
+SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
-ERROR:  block number 1 is out of range for relation "test_rel_forks"
+ERROR:  block number 1 is out of range for relation "test1"
 SELECT octet_length(get_raw_page('xxx', 'main', 0));
 ERROR:  relation "xxx" does not exist
-SELECT octet_length(get_raw_page('test_rel_forks', 'xxx', 0));
+SELECT octet_length(get_raw_page('test1', 'xxx', 0));
 ERROR:  invalid fork name
 HINT:  Valid fork names are "main", "fsm", "vm", and "init".
-EXPLAIN (costs off, analyze on, timing off, summary off) SELECT * FROM
+SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0);
-        fsm_page_contents(get_raw_page('test_rel_forks', 'fsm', 0));
-                         QUERY PLAN                         
------------------------------------------------------------
- Function Scan on fsm_page_contents (actual rows=1 loops=1)
-(1 row)
-SELECT get_raw_page('test_rel_forks', 0) = get_raw_page('test_rel_forks', 'main', 0);
 ?column? 
 ----------
 t
 (1 row)
-DROP TABLE test_rel_forks;
-CREATE TABLE test1 (a int, b int);
-INSERT INTO test1 VALUES (16777217, 131584);
 SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
 pagesize | version 
 ----------+---------
@@ -70,6 +62,26 @@ SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bi
 {"\\x01000001","\\x00020200"}
 (1 row)
+SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0));
+ fsm_page_contents 
+-------------------
+ 0: 254           +
+ 1: 254           +
+ 3: 254           +
+ 7: 254           +
+ 15: 254          +
+ 31: 254          +
+ 63: 254          +
+ 127: 254         +
+ 255: 254         +
+ 511: 254         +
+ 1023: 254        +
+ 2047: 254        +
+ 4095: 254        +
+ fp_next_slot: 0  +
+(1 row)
 DROP TABLE test1;
 -- check that using any of these functions with a partitioned table or index
 -- would fail

--- a/contrib/pageinspect/sql/page.sql
+++ b/contrib/pageinspect/sql/page.sql
 CREATE EXTENSION pageinspect;
-CREATE TABLE test_rel_forks (a int);
+CREATE TABLE test1 (a int, b int);
-- Make sure there are enough blocks in the heap for the FSM to be created.
+INSERT INTO test1 VALUES (16777217, 131584);
-INSERT INTO test_rel_forks SELECT i from generate_series(1,2000) i;
-- set up FSM and VM
+VACUUM test1;  -- set up FSM
-VACUUM test_rel_forks;
 -- The page contents can vary, so just test that it can be read
 -- successfully, but don't keep the output.
-SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0;
+SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
-SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100;
+SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
-SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0;
+SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
-SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 20)) AS fsm_20;
+SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
-SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0;
+SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
-SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 1)) AS vm_1;
+SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
 SELECT octet_length(get_raw_page('xxx', 'main', 0));
-SELECT octet_length(get_raw_page('test_rel_forks', 'xxx', 0));
+SELECT octet_length(get_raw_page('test1', 'xxx', 0));
-EXPLAIN (costs off, analyze on, timing off, summary off) SELECT * FROM
-        fsm_page_contents(get_raw_page('test_rel_forks', 'fsm', 0));
-SELECT get_raw_page('test_rel_forks', 0) = get_raw_page('test_rel_forks', 'main', 0);
-DROP TABLE test_rel_forks;
+SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0);
-CREATE TABLE test1 (a int, b int);
-INSERT INTO test1 VALUES (16777217, 131584);
 SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
@@ -39,6 +29,8 @@ SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_
 SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bits)
    FROM heap_page_items(get_raw_page('test1', 0));
+SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0));
 DROP TABLE test1;
 -- check that using any of these functions with a partitioned table or index

--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -90,9 +90,6 @@ statapprox_heap(Relation rel, output_type *stat)
 		/*
 		 * If the page has only visible tuples, then we can find out the free
 		 * space from the FSM and move on.
-		 *
-		 * Note: If a relation has no FSM, GetRecordedFreeSpace() will report
-		 * zero free space.  This is fine for the purposes of approximation.
 		 */
 		if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
 		{

--- a/doc/src/sgml/pgfreespacemap.sgml
+++ b/doc/src/sgml/pgfreespacemap.sgml
@@ -61,8 +61,6 @@
   The values stored in the free space map are not exact. They're rounded
   to precision of 1/256th of <symbol>BLCKSZ</symbol> (32 bytes with default <symbol>BLCKSZ</symbol>), and
   they're not kept fully up-to-date as tuples are inserted and updated.
-   In addition, small tables don't have a free space map, so these functions
-   will return zero even if free space is available.
  </para>
  <para>

--- a/doc/src/sgml/pgstattuple.sgml
+++ b/doc/src/sgml/pgstattuple.sgml
@@ -527,9 +527,7 @@ approx_free_percent  | 2.09
      bit set, then it is assumed to contain no dead tuples). For such
      pages, it derives the free space value from the free space map, and
      assumes that the rest of the space on the page is taken up by live
-      tuples. Small tables don't have a free space map, so in that case
+      tuples.
-      this function will report zero free space, likewise inflating the
-      approximate tuple length.
     </para>
     <para>

--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -812,13 +812,6 @@ psql --username=postgres --file=script.sql postgres
   is down.
  </para>
-  <para>
-   In <productname>PostgreSQL</productname> 12 and later small tables by
-   default don't have a free space map, as a space optimization.  If you are
-   upgrading a pre-12 cluster, the free space maps of small tables will
-   likewise not be transferred to the new cluster.
-  </para>
 </refsect1>
 <refsect1>

--- a/doc/src/sgml/storage.sgml
+++ b/doc/src/sgml/storage.sgml
@@ -598,13 +598,12 @@ tuple would otherwise be too big.
 <indexterm><primary>FSM</primary><see>Free Space Map</see></indexterm>
 <para>
-Each heap relation, unless it is very small, and each index relation, except
+Each heap and index relation, except for hash indexes, has a Free Space Map
-for hash indexes, has a Free Space Map (FSM) to keep track of available
+(FSM) to keep track of available space in the relation. It's stored
-space in the relation. It's stored alongside the main relation data in a
+alongside the main relation data in a separate relation fork, named after the
-separate relation fork, named after the filenode number of the relation, plus
+filenode number of the relation, plus a <literal>_fsm</literal> suffix. For example,
-a <literal>_fsm</literal> suffix. For example, if the filenode of a relation
+if the filenode of a relation is 12345, the FSM is stored in a file called
-is 12345, the FSM is stored in a file called <filename>12345_fsm</filename>,
+<filename>12345_fsm</filename>, in the same directory as the main relation file.
-in the same directory as the main relation file.
 </para>
 <para>

--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -1152,7 +1152,7 @@ terminate_brin_buildstate(BrinBuildState *state)
 		freespace = PageGetFreeSpace(page);
 		blk = BufferGetBlockNumber(state->bs_currentInsertBuf);
 		ReleaseBuffer(state->bs_currentInsertBuf);
-		RecordPageWithFreeSpace(state->bs_irel, blk, freespace, InvalidBlockNumber);
+		RecordPageWithFreeSpace(state->bs_irel, blk, freespace);
 		FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
 	}

--- a/src/backend/access/brin/brin_pageops.c
+++ b/src/backend/access/brin/brin_pageops.c
@@ -310,7 +310,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
 		if (extended)
 		{
-			RecordPageWithFreeSpace(idxrel, newblk, freespace, InvalidBlockNumber);
+			RecordPageWithFreeSpace(idxrel, newblk, freespace);
 			FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
 		}
@@ -461,7 +461,7 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
 	if (extended)
 	{
-		RecordPageWithFreeSpace(idxrel, blk, freespace, InvalidBlockNumber);
+		RecordPageWithFreeSpace(idxrel, blk, freespace);
 		FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
 	}
@@ -654,7 +654,7 @@ brin_page_cleanup(Relation idxrel, Buffer buf)
 	/* Measure free space and record it */
 	RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf),
-							br_page_get_freespace(page), InvalidBlockNumber);
+							br_page_get_freespace(page));
 }
 /*
@@ -703,7 +703,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
 	/* Choose initial target page, re-using existing target if known */
 	newblk = RelationGetTargetBlock(irel);
 	if (newblk == InvalidBlockNumber)
-		newblk = GetPageWithFreeSpace(irel, itemsz, true);
+		newblk = GetPageWithFreeSpace(irel, itemsz);
 	/*
 	 * Loop until we find a page with sufficient free space.  By the time we
@@ -895,7 +895,7 @@ brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
 	 * pages whose FSM records were forgotten in a crash.
 	 */
 	RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
-							br_page_get_freespace(page), InvalidBlockNumber);
+							br_page_get_freespace(page));
 }

--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -246,14 +246,8 @@ RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
 		 * Immediately update the bottom level of the FSM.  This has a good
 		 * chance of making this page visible to other concurrently inserting
 		 * backends, and we want that to happen without delay.
-		 *
-		 * Since we know the table will end up with extraBlocks additional
-		 * pages, we pass the final number to avoid possible unnecessary
-		 * system calls and to make sure the FSM is created when we add the
-		 * first new page.
 		 */
-		RecordPageWithFreeSpace(relation, blockNum, freespace,
+		RecordPageWithFreeSpace(relation, blockNum, freespace);
-								firstBlock + extraBlocks);
 	}
 	while (--extraBlocks > 0);
@@ -390,9 +384,20 @@ RelationGetBufferForTuple(Relation relation, Size len,
 		 * We have no cached target page, so ask the FSM for an initial
 		 * target.
 		 */
-		targetBlock = GetPageWithFreeSpace(relation,
+		targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
-										   len + saveFreeSpace,
-										   false);
+		/*
+		 * If the FSM knows nothing of the rel, try the last page before we
+		 * give up and extend.  This avoids one-tuple-per-page syndrome during
+		 * bootstrapping or in a recently-started system.
+		 */
+		if (targetBlock == InvalidBlockNumber)
+		{
+			BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
+			if (nblocks > 0)
+				targetBlock = nblocks - 1;
+		}
 	}
 loop:
@@ -499,13 +504,6 @@ loop:
 		{
 			/* use this page as future insert target, too */
 			RelationSetTargetBlock(relation, targetBlock);
-			/*
-			 * In case we used an in-memory map of available blocks, reset it
-			 * for next use.
-			 */
-			FSMClearLocalMap();
 			return buffer;
 		}
@@ -565,12 +563,9 @@ loop:
 			/*
 			 * Check if some other backend has extended a block for us while
-			 * we were waiting on the lock.  We only check the FSM -- if there
+			 * we were waiting on the lock.
-			 * isn't one we don't recheck the number of blocks.
 			 */
-			targetBlock = GetPageWithFreeSpace(relation,
+			targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
-											   len + saveFreeSpace,
-											   true);
 			/*
 			 * If some other waiter has already extended the relation, we
@@ -675,8 +670,5 @@ loop:
 	 */
 	RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer));
-	/* This should already be cleared by now, but make sure it is. */
-	FSMClearLocalMap();
 	return buffer;
 }
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -153,7 +153,7 @@ static BufferAccessStrategy vac_strategy;
 static void lazy_scan_heap(Relation onerel, VacuumParams *params,
 			   LVRelStats *vacrelstats, Relation *Irel, int nindexes,
 			   bool aggressive);
-static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks);
+static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
 static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
 static void lazy_vacuum_index(Relation indrel,
 				  IndexBulkDeleteResult **stats,
@@ -780,7 +780,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			pgstat_progress_update_multi_param(2, hvp_index, hvp_val);
 			/* Remove tuples from heap */
-			lazy_vacuum_heap(onerel, vacrelstats, nblocks);
+			lazy_vacuum_heap(onerel, vacrelstats);
 			/*
 			 * Forget the now-vacuumed tuples, and press on, but be careful
@@ -919,7 +919,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 					Size		freespace;
 					freespace = BufferGetPageSize(buf) - SizeOfPageHeaderData;
-					RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks);
+					RecordPageWithFreeSpace(onerel, blkno, freespace);
 				}
 			}
 			continue;
@@ -963,7 +963,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			}
 			UnlockReleaseBuffer(buf);
-			RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks);
+			RecordPageWithFreeSpace(onerel, blkno, freespace);
 			continue;
 		}
@@ -1381,7 +1381,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		 * taken if there are no indexes.)
 		 */
 		if (vacrelstats->num_dead_tuples == prev_dead_count)
-			RecordPageWithFreeSpace(onerel, blkno, freespace, nblocks);
+			RecordPageWithFreeSpace(onerel, blkno, freespace);
 	}
 	/* report that everything is scanned and vacuumed */
@@ -1443,7 +1443,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		/* Remove tuples from heap */
 		pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
 									 PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
-		lazy_vacuum_heap(onerel, vacrelstats, nblocks);
+		lazy_vacuum_heap(onerel, vacrelstats);
 		vacrelstats->num_index_scans++;
 	}
@@ -1517,10 +1517,9 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 * Note: the reason for doing this as a second pass is we cannot remove
 * the tuples until we've removed their index entries, and we want to
 * process index entry removal in batches as large as possible.
- * Note: nblocks is passed as an optimization for RecordPageWithFreeSpace().
 */
 static void
-lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks)
+lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
 {
 	int			tupindex;
 	int			npages;
@@ -1557,7 +1556,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats, BlockNumber nblocks)
 		freespace = PageGetHeapFreeSpace(page);
 		UnlockReleaseBuffer(buf);
-		RecordPageWithFreeSpace(onerel, tblk, freespace, nblocks);
+		RecordPageWithFreeSpace(onerel, tblk, freespace);
 		npages++;
 	}

--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -48,7 +48,6 @@
 #include "replication/walsender.h"
 #include "storage/condition_variable.h"
 #include "storage/fd.h"
-#include "storage/freespace.h"
 #include "storage/lmgr.h"
 #include "storage/md.h"
 #include "storage/predicate.h"
@@ -2587,12 +2586,6 @@ AbortTransaction(void)
 	pgstat_report_wait_end();
 	pgstat_progress_end_command();
-	/*
-	 * In case we aborted during RelationGetBufferForTuple(), clear the local
-	 * map of heap pages.
-	 */
-	FSMClearLocalMap();
 	/* Clean up buffer I/O and buffer context locks, too */
 	AbortBufferIO();
 	UnlockBuffers();
@@ -4880,13 +4873,6 @@ AbortSubTransaction(void)
 	pgstat_report_wait_end();
 	pgstat_progress_end_command();
-	/*
-	 * In case we aborted during RelationGetBufferForTuple(), clear the local
-	 * map of heap pages.
-	 */
-	FSMClearLocalMap();
 	AbortBufferIO();
 	UnlockBuffers();

--- a/src/backend/storage/freespace/README
+++ b/src/backend/storage/freespace/README
@@ -8,41 +8,7 @@ free space to hold a tuple to be stored; or to determine that no such page
 exists and the relation must be extended by one page.  As of PostgreSQL 8.4
 each relation has its own, extensible free space map stored in a separate
 "fork" of its relation.  This eliminates the disadvantages of the former
-fixed-size FSM.  There are two exceptions:
+fixed-size FSM.
-1. Hash indexes never have a FSM.
-2. For very small tables, a 3-page relation fork would be relatively large
-and wasteful, so to save space we refrain from creating the FSM if the
-heap has HEAP_FSM_CREATION_THRESHOLD pages or fewer.
-To locate free space in the latter case, we simply try pages directly without
-knowing ahead of time how much free space they have.  To maintain good
-performance, we create a local in-memory map of pages to try, and only mark
-every other page as available.  For example, in a 3-page heap, the local map
-would look like:
-ANAN
-0123
-Pages 0 and 2 are marked "available", and page 1 as "not available".
-Page 3 is beyond the end of the relation, so is likewise marked "not
-available".  First we try page 2, and if that doesn't have sufficient free
-space we try page 0 before giving up and extending the relation.  There may
-be some wasted free space on block 1, but if the relation extends to 4 pages:
-NANA
-0123
-We not only have the new page 3 at our disposal, we can now check page 1
-for free space as well.
-Once the FSM is created for a heap we don't remove it even if somebody deletes
-all the rows from the corresponding relation.  We don't think it is a useful
-optimization as it is quite likely that relation will again grow to the same
-size.
-FSM data structure
------------------
 It is important to keep the map small so that it can be searched rapidly.
 Therefore, we don't attempt to record the exact free space on a page.
@@ -226,3 +192,5 @@ TODO
 ----
 - fastroot to avoid traversing upper nodes with just 1 child
+- use a different system for tables that fit into one FSM page, with a
+  mechanism to switch to the real thing as it grows.
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
--- a/src/backend/storage/freespace/indexfsm.c
+++ b/src/backend/storage/freespace/indexfsm.c
@@ -37,7 +37,7 @@
 BlockNumber
 GetFreeIndexPage(Relation rel)
 {
-	BlockNumber blkno = GetPageWithFreeSpace(rel, BLCKSZ / 2, true);
+	BlockNumber blkno = GetPageWithFreeSpace(rel, BLCKSZ / 2);
 	if (blkno != InvalidBlockNumber)
 		RecordUsedIndexPage(rel, blkno);
@@ -51,7 +51,7 @@ GetFreeIndexPage(Relation rel)
 void
 RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
 {
-	RecordPageWithFreeSpace(rel, freeBlock, BLCKSZ - 1, InvalidBlockNumber);
+	RecordPageWithFreeSpace(rel, freeBlock, BLCKSZ - 1);
 }
@@ -61,7 +61,7 @@ RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
 void
 RecordUsedIndexPage(Relation rel, BlockNumber usedBlock)
 {
-	RecordPageWithFreeSpace(rel, usedBlock, 0, InvalidBlockNumber);
+	RecordPageWithFreeSpace(rel, usedBlock, 0);
 }
 /*

--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -200,8 +200,6 @@ create_rel_filename_map(const char *old_data, const char *new_data,
 	map->old_db_oid = old_db->db_oid;
 	map->new_db_oid = new_db->db_oid;
-	map->relpages = old_rel->relpages;
-	map->relkind = old_rel->relkind;
 	/*
 	 * old_relfilenode might differ from pg_class.oid (and hence
@@ -420,7 +418,6 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	char	   *nspname = NULL;
 	char	   *relname = NULL;
 	char	   *tablespace = NULL;
-	char	   *relkind = NULL;
 	int			i_spclocation,
 				i_nspname,
 				i_relname,
@@ -428,9 +425,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 				i_indtable,
 				i_toastheap,
 				i_relfilenode,
-				i_reltablespace,
+				i_reltablespace;
-				i_relpages,
-				i_relkind;
 	char		query[QUERY_ALLOC];
 	char	   *last_namespace = NULL,
 			   *last_tablespace = NULL;
@@ -499,7 +494,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	 */
 	snprintf(query + strlen(query), sizeof(query) - strlen(query),
 			 "SELECT all_rels.*, n.nspname, c.relname, "
-			 "  c.relfilenode, c.reltablespace, c.relpages, c.relkind, %s "
+			 "  c.relfilenode, c.reltablespace, %s "
 			 "FROM (SELECT * FROM regular_heap "
 			 "      UNION ALL "
 			 "      SELECT * FROM toast_heap "
@@ -530,8 +525,6 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	i_relname = PQfnumber(res, "relname");
 	i_relfilenode = PQfnumber(res, "relfilenode");
 	i_reltablespace = PQfnumber(res, "reltablespace");
-	i_relpages = PQfnumber(res, "relpages");
-	i_relkind = PQfnumber(res, "relkind");
 	i_spclocation = PQfnumber(res, "spclocation");
 	for (relnum = 0; relnum < ntups; relnum++)
@@ -563,11 +556,6 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 		curr->relname = pg_strdup(relname);
 		curr->relfilenode = atooid(PQgetvalue(res, relnum, i_relfilenode));
-		curr->relpages = atoi(PQgetvalue(res, relnum, i_relpages));
-		relkind = PQgetvalue(res, relnum, i_relkind);
-		curr->relkind = relkind[0];
 		curr->tblsp_alloc = false;
 		/* Is the tablespace oid non-default? */

--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -147,8 +147,6 @@ typedef struct
 	char	   *tablespace;		/* tablespace path; "" for cluster default */
 	bool		nsp_alloc;		/* should nspname be freed? */
 	bool		tblsp_alloc;	/* should tablespace be freed? */
-	int32		relpages;		/* # of pages -- see pg_class.h */
-	char		relkind;		/* relation kind -- see pg_class.h */
 } RelInfo;
 typedef struct
@@ -175,10 +173,6 @@ typedef struct
 	 */
 	Oid			old_relfilenode;
 	Oid			new_relfilenode;
-	int32		relpages;		/* # of pages -- see pg_class.h */
-	char		relkind;		/* relation kind -- see pg_class.h */
 	/* the rest are used only for logging and error reporting */
 	char	   *nspname;		/* namespaces */
 	char	   *relname;

--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -14,12 +14,10 @@
 #include <sys/stat.h>
 #include "catalog/pg_class_d.h"
 #include "access/transam.h"
-#include "storage/freespace.h"
 static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
 static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_must_add_frozenbit);
-static bool new_cluster_needs_fsm(FileNameMap *map);
 /*
@@ -176,8 +174,7 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
 				/*
 				 * Copy/link any fsm and vm files, if they exist
 				 */
-				if (new_cluster_needs_fsm(&maps[mapnum]))
+				transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
-					transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
 				if (vm_crashsafe_match)
 					transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit);
 			}
@@ -281,61 +278,3 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
 			}
 	}
 }
-/*
- * new_cluster_needs_fsm()
- *
- * Return false for small heaps if we're upgrading across PG 12, the first
- * version where small heap relations don't have FSMs by default.
- */
-static bool
-new_cluster_needs_fsm(FileNameMap *map)
-{
-	char		old_primary_file[MAXPGPATH];
-	struct stat statbuf;
-	/* fsm/vm files added in PG 8.4 */
-	Assert(GET_MAJOR_VERSION(old_cluster.major_version) >= 804);
-	if (!(GET_MAJOR_VERSION(old_cluster.major_version) <= 1100 &&
-		  GET_MAJOR_VERSION(new_cluster.major_version) >= 1200))
-		return true;
-	/* Always transfer FSMs of non-heap relations. */
-	if (map->relkind != RELKIND_RELATION &&
-		map->relkind != RELKIND_TOASTVALUE)
-		return true;
-	/*
-	 * If pg_class.relpages falsely reports that the heap is above the
-	 * threshold, we will transfer a FSM when we don't need to, but this is
-	 * harmless.
-	 */
-	if (map->relpages > HEAP_FSM_CREATION_THRESHOLD)
-		return true;
-	/* Determine path of the primary file. */
-	snprintf(old_primary_file, sizeof(old_primary_file), "%s%s/%u/%u",
-			 map->old_tablespace,
-			 map->old_tablespace_suffix,
-			 map->old_db_oid,
-			 map->old_relfilenode);
-	/*
-	 * If pg_class.relpages falsely reports that the heap is below the
-	 * threshold, a FSM would be skipped when we actually need it.  To guard
-	 * against this, we verify the size of the primary file.
-	 */
-	if (stat(old_primary_file, &statbuf) != 0)
-	{
-		pg_fatal("error while checking for file existence \"%s.%s\" (\"%s\"): %s\n",
-				 map->nspname, map->relname, old_primary_file, strerror(errno));
-		/* Keep compiler quiet. */
-		return false;
-	}
-	else if (statbuf.st_size > HEAP_FSM_CREATION_THRESHOLD * BLCKSZ)
-		return true;
-	else
-		return false;
-}
--- a/src/include/storage/freespace.h
+++ b/src/include/storage/freespace.h
@@ -18,20 +18,15 @@
 #include "storage/relfilenode.h"
 #include "utils/relcache.h"
-/* Only create the FSM if the heap has greater than this many blocks */
-#define HEAP_FSM_CREATION_THRESHOLD 4
 /* prototypes for public functions in freespace.c */
 extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk);
-extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded,
+extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded);
-					 bool check_fsm_only);
 extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel,
 							  BlockNumber oldPage,
 							  Size oldSpaceAvail,
 							  Size spaceNeeded);
 extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
-						Size spaceAvail, BlockNumber nblocks);
+						Size spaceAvail);
-extern void FSMClearLocalMap(void);
 extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
 							Size spaceAvail);

--- a/src/test/regress/expected/fsm.out
+++ b/src/test/regress/expected/fsm.out
--
-- Free Space Map test
--
-SELECT current_setting('block_size')::integer AS blocksize,
-current_setting('block_size')::integer / 8 AS strsize
-\gset
-CREATE TABLE fsm_check_size (num int, str text);
-- Fill 3 blocks with one record each
-ALTER TABLE fsm_check_size SET (fillfactor=15);
-INSERT INTO fsm_check_size SELECT i, rpad('', :strsize, 'a')
-FROM generate_series(1,3) i;
-- There should be no FSM
-VACUUM fsm_check_size;
-SELECT pg_relation_size('fsm_check_size', 'main') / :blocksize AS heap_nblocks,
-pg_relation_size('fsm_check_size', 'fsm') / :blocksize AS fsm_nblocks;
- heap_nblocks | fsm_nblocks 
--------------+-------------
-            3 |           0
-(1 row)
-- The following operations are for testing the functionality of the local
-- in-memory map. In particular, we want to be able to insert into some
-- other block than the one at the end of the heap, without using a FSM.
-- Fill most of the last block
-ALTER TABLE fsm_check_size SET (fillfactor=100);
-INSERT INTO fsm_check_size SELECT i, rpad('', :strsize, 'a')
-FROM generate_series(101,105) i;
-- Make sure records can go into any block but the last one
-ALTER TABLE fsm_check_size SET (fillfactor=30);
-- Insert large record and make sure it does not cause the relation to extend
-INSERT INTO fsm_check_size VALUES (111, rpad('', :strsize, 'a'));
-VACUUM fsm_check_size;
-SELECT pg_relation_size('fsm_check_size', 'main') / :blocksize AS heap_nblocks,
-pg_relation_size('fsm_check_size', 'fsm') / :blocksize AS fsm_nblocks;
- heap_nblocks | fsm_nblocks 
--------------+-------------
-            3 |           0
-(1 row)
-- Extend table with enough blocks to exceed the FSM threshold
-DO $$
-DECLARE curtid tid;
-num int;
-BEGIN
-num = 11;
-  LOOP
-    INSERT INTO fsm_check_size VALUES (num, 'b') RETURNING ctid INTO curtid;
-    EXIT WHEN curtid >= tid '(4, 0)';
-    num = num + 1;
-  END LOOP;
-END;
-$$;
-VACUUM fsm_check_size;
-SELECT pg_relation_size('fsm_check_size', 'fsm') / :blocksize AS fsm_nblocks;
- fsm_nblocks 
-------------
-           3
-(1 row)
-- Add long random string to extend TOAST table to 1 block
-INSERT INTO fsm_check_size
-VALUES(0, (SELECT string_agg(md5(chr(i)), '')
-		   FROM generate_series(1, :blocksize / 100) i));
-VACUUM fsm_check_size;
-SELECT pg_relation_size(reltoastrelid, 'main') / :blocksize AS toast_nblocks,
-pg_relation_size(reltoastrelid, 'fsm') / :blocksize AS toast_fsm_nblocks
-FROM pg_class WHERE relname = 'fsm_check_size';
- toast_nblocks | toast_fsm_nblocks 
---------------+-------------------
-             1 |                 0
-(1 row)
-DROP TABLE fsm_check_size;
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -20,7 +20,7 @@ test: boolean char name varchar text int2 int4 int8 oid float4 float8 bit numeri
 # strings depends on char, varchar and text
 # numerology depends on int2, int4, int8, float4, float8
 # ----------
-test: strings numerology point lseg line box path polygon circle date time timetz timestamp timestamptz interval inet macaddr macaddr8 tstypes fsm
+test: strings numerology point lseg line box path polygon circle date time timetz timestamp timestamptz interval inet macaddr macaddr8 tstypes
 # ----------
 # Another group of parallel tests

--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -40,7 +40,6 @@ test: inet
 test: macaddr
 test: macaddr8
 test: tstypes
-test: fsm
 test: geometry
 test: horology
 test: regex

--- a/src/test/regress/sql/fsm.sql
+++ b/src/test/regress/sql/fsm.sql
--
-- Free Space Map test
--
-SELECT current_setting('block_size')::integer AS blocksize,
-current_setting('block_size')::integer / 8 AS strsize
-\gset
-CREATE TABLE fsm_check_size (num int, str text);
-- Fill 3 blocks with one record each
-ALTER TABLE fsm_check_size SET (fillfactor=15);
-INSERT INTO fsm_check_size SELECT i, rpad('', :strsize, 'a')
-FROM generate_series(1,3) i;
-- There should be no FSM
-VACUUM fsm_check_size;
-SELECT pg_relation_size('fsm_check_size', 'main') / :blocksize AS heap_nblocks,
-pg_relation_size('fsm_check_size', 'fsm') / :blocksize AS fsm_nblocks;
-- The following operations are for testing the functionality of the local
-- in-memory map. In particular, we want to be able to insert into some
-- other block than the one at the end of the heap, without using a FSM.
-- Fill most of the last block
-ALTER TABLE fsm_check_size SET (fillfactor=100);
-INSERT INTO fsm_check_size SELECT i, rpad('', :strsize, 'a')
-FROM generate_series(101,105) i;
-- Make sure records can go into any block but the last one
-ALTER TABLE fsm_check_size SET (fillfactor=30);
-- Insert large record and make sure it does not cause the relation to extend
-INSERT INTO fsm_check_size VALUES (111, rpad('', :strsize, 'a'));
-VACUUM fsm_check_size;
-SELECT pg_relation_size('fsm_check_size', 'main') / :blocksize AS heap_nblocks,
-pg_relation_size('fsm_check_size', 'fsm') / :blocksize AS fsm_nblocks;
-- Extend table with enough blocks to exceed the FSM threshold
-DO $$
-DECLARE curtid tid;
-num int;
-BEGIN
-num = 11;
-  LOOP
-    INSERT INTO fsm_check_size VALUES (num, 'b') RETURNING ctid INTO curtid;
-    EXIT WHEN curtid >= tid '(4, 0)';
-    num = num + 1;
-  END LOOP;
-END;
-$$;
-VACUUM fsm_check_size;
-SELECT pg_relation_size('fsm_check_size', 'fsm') / :blocksize AS fsm_nblocks;
-- Add long random string to extend TOAST table to 1 block
-INSERT INTO fsm_check_size
-VALUES(0, (SELECT string_agg(md5(chr(i)), '')
-		   FROM generate_series(1, :blocksize / 100) i));
-VACUUM fsm_check_size;
-SELECT pg_relation_size(reltoastrelid, 'main') / :blocksize AS toast_nblocks,
-pg_relation_size(reltoastrelid, 'fsm') / :blocksize AS toast_fsm_nblocks
-FROM pg_class WHERE relname = 'fsm_check_size';
-DROP TABLE fsm_check_size;