Commit 15c121b3 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Rewrite the FSM. Instead of relying on a fixed-size shared memory segment, the

free space information is stored in a dedicated FSM relation fork, with each
relation (except for hash indexes; they don't use FSM).

This eliminates the max_fsm_relations and max_fsm_pages GUC options; remove any
trace of them from the backend, initdb, and documentation.

Rewrite contrib/pg_freespacemap to match the new FSM implementation. Also
introduce a new variant of the get_raw_page(regclass, int4, int4) function in
contrib/pageinspect that let's you to return pages from any relation fork, and
a new fsm_page_contents() function to inspect the new FSM pages.
parent 2dbc0ca9
...@@ -2,12 +2,12 @@ ...@@ -2,12 +2,12 @@
# #
# pageinspect Makefile # pageinspect Makefile
# #
# $PostgreSQL: pgsql/contrib/pageinspect/Makefile,v 1.3 2007/11/10 23:59:51 momjian Exp $ # $PostgreSQL: pgsql/contrib/pageinspect/Makefile,v 1.4 2008/09/30 10:52:09 heikki Exp $
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
MODULE_big = pageinspect MODULE_big = pageinspect
OBJS = rawpage.o heapfuncs.o btreefuncs.o OBJS = rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o
DATA_built = pageinspect.sql DATA_built = pageinspect.sql
DATA = uninstall_pageinspect.sql DATA = uninstall_pageinspect.sql
......
/*-------------------------------------------------------------------------
*
* fsmfuncs.c
* Functions to investigate FSM pages
*
* These functions are restricted to superusers for the fear of introducing
* security holes if the input checking isn't as water-tight as it should.
* You'd need to be superuser to obtain a raw page image anyway, so
* there's hardly any use case for using these without superuser-rights
* anyway.
*
* Copyright (c) 2007-2008, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/contrib/pageinspect/fsmfuncs.c,v 1.1 2008/09/30 10:52:09 heikki Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "lib/stringinfo.h"
#include "storage/fsm_internals.h"
#include "utils/builtins.h"
#include "miscadmin.h"
#include "funcapi.h"
Datum fsm_page_contents(PG_FUNCTION_ARGS);
/*
* Dumps the contents of a FSM page.
*/
PG_FUNCTION_INFO_V1(fsm_page_contents);
Datum
fsm_page_contents(PG_FUNCTION_ARGS)
{
bytea *raw_page = PG_GETARG_BYTEA_P(0);
int raw_page_size;
StringInfoData sinfo;
FSMPage fsmpage;
int i;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("must be superuser to use raw page functions"))));
raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
fsmpage = (FSMPage) PageGetContents(VARDATA(raw_page));
initStringInfo(&sinfo);
for(i=0; i < NodesPerPage; i++)
{
if (fsmpage->fp_nodes[i] != 0)
appendStringInfo(&sinfo, "%d: %d\n", i, fsmpage->fp_nodes[i]);
}
appendStringInfo(&sinfo, "fp_next_slot: %d\n", fsmpage->fp_next_slot);
PG_RETURN_TEXT_P(cstring_to_text(sinfo.data));
}
/* $PostgreSQL: pgsql/contrib/pageinspect/pageinspect.sql.in,v 1.4 2007/11/13 04:24:28 momjian Exp $ */ /* $PostgreSQL: pgsql/contrib/pageinspect/pageinspect.sql.in,v 1.5 2008/09/30 10:52:09 heikki Exp $ */
-- Adjust this setting to control where the objects get created. -- Adjust this setting to control where the objects get created.
SET search_path = public; SET search_path = public;
...@@ -6,11 +6,16 @@ SET search_path = public; ...@@ -6,11 +6,16 @@ SET search_path = public;
-- --
-- get_raw_page() -- get_raw_page()
-- --
CREATE OR REPLACE FUNCTION get_raw_page(text, int4) CREATE OR REPLACE FUNCTION get_raw_page(text, int4, int4)
RETURNS bytea RETURNS bytea
AS 'MODULE_PATHNAME', 'get_raw_page' AS 'MODULE_PATHNAME', 'get_raw_page'
LANGUAGE C STRICT; LANGUAGE C STRICT;
CREATE OR REPLACE FUNCTION get_raw_page(text, int4)
RETURNS bytea
AS $$ SELECT get_raw_page($1, 0, $2); $$
LANGUAGE SQL STRICT;
-- --
-- page_header() -- page_header()
-- --
...@@ -92,3 +97,11 @@ CREATE OR REPLACE FUNCTION bt_page_items(IN relname text, IN blkno int4, ...@@ -92,3 +97,11 @@ CREATE OR REPLACE FUNCTION bt_page_items(IN relname text, IN blkno int4,
RETURNS SETOF record RETURNS SETOF record
AS 'MODULE_PATHNAME', 'bt_page_items' AS 'MODULE_PATHNAME', 'bt_page_items'
LANGUAGE C STRICT; LANGUAGE C STRICT;
--
-- fsm_page_contents()
--
CREATE OR REPLACE FUNCTION fsm_page_contents(IN page bytea)
RETURNS text
AS 'MODULE_PATHNAME', 'fsm_page_contents'
LANGUAGE C STRICT;
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Copyright (c) 2007-2008, PostgreSQL Global Development Group * Copyright (c) 2007-2008, PostgreSQL Global Development Group
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/contrib/pageinspect/rawpage.c,v 1.6 2008/05/12 00:00:43 alvherre Exp $ * $PostgreSQL: pgsql/contrib/pageinspect/rawpage.c,v 1.7 2008/09/30 10:52:09 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -41,7 +41,8 @@ Datum ...@@ -41,7 +41,8 @@ Datum
get_raw_page(PG_FUNCTION_ARGS) get_raw_page(PG_FUNCTION_ARGS)
{ {
text *relname = PG_GETARG_TEXT_P(0); text *relname = PG_GETARG_TEXT_P(0);
uint32 blkno = PG_GETARG_UINT32(1); uint32 forknum = PG_GETARG_UINT32(1);
uint32 blkno = PG_GETARG_UINT32(2);
Relation rel; Relation rel;
RangeVar *relrv; RangeVar *relrv;
...@@ -54,6 +55,11 @@ get_raw_page(PG_FUNCTION_ARGS) ...@@ -54,6 +55,11 @@ get_raw_page(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("must be superuser to use raw functions")))); (errmsg("must be superuser to use raw functions"))));
if (forknum > MAX_FORKNUM)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid fork number")));
relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
rel = relation_openrv(relrv, AccessShareLock); rel = relation_openrv(relrv, AccessShareLock);
...@@ -80,7 +86,7 @@ get_raw_page(PG_FUNCTION_ARGS) ...@@ -80,7 +86,7 @@ get_raw_page(PG_FUNCTION_ARGS)
/* Take a verbatim copy of the page */ /* Take a verbatim copy of the page */
buf = ReadBuffer(rel, blkno); buf = ReadBufferWithFork(rel, forknum, blkno);
LockBuffer(buf, BUFFER_LOCK_SHARE); LockBuffer(buf, BUFFER_LOCK_SHARE);
memcpy(raw_page_data, BufferGetPage(buf), BLCKSZ); memcpy(raw_page_data, BufferGetPage(buf), BLCKSZ);
......
/* $PostgreSQL: pgsql/contrib/pg_freespacemap/pg_freespacemap.sql.in,v 1.8 2007/11/13 04:24:28 momjian Exp $ */ /* $PostgreSQL: pgsql/contrib/pg_freespacemap/pg_freespacemap.sql.in,v 1.9 2008/09/30 10:52:09 heikki Exp $ */
-- Adjust this setting to control where the objects get created. -- Adjust this setting to control where the objects get created.
SET search_path = public; SET search_path = public;
-- Register the functions. -- Register the C function.
CREATE OR REPLACE FUNCTION pg_freespacemap_pages() CREATE OR REPLACE FUNCTION pg_freespace(regclass, int4)
RETURNS SETOF RECORD RETURNS int2
AS 'MODULE_PATHNAME', 'pg_freespacemap_pages' AS 'MODULE_PATHNAME', 'pg_freespace'
LANGUAGE C; LANGUAGE C;
CREATE OR REPLACE FUNCTION pg_freespacemap_relations() -- pg_freespace shows the recorded space avail at each block in a relation
CREATE OR REPLACE FUNCTION
pg_freespace(rel regclass, blkno OUT int4, avail OUT int2)
RETURNS SETOF RECORD RETURNS SETOF RECORD
AS 'MODULE_PATHNAME', 'pg_freespacemap_relations' AS $$
LANGUAGE C; SELECT blkno::int4, pg_freespace($1, blkno::int4) AS avail
FROM generate_series(0, pg_relation_size($1) / current_setting('block_size')::bigint - 1) AS blkno;
$$
LANGUAGE SQL;
-- Create views for convenient access.
CREATE VIEW pg_freespacemap_pages AS
SELECT P.* FROM pg_freespacemap_pages() AS P
(reltablespace oid,
reldatabase oid,
relfilenode oid,
relblocknumber bigint,
bytes integer);
CREATE VIEW pg_freespacemap_relations AS
SELECT P.* FROM pg_freespacemap_relations() AS P
(reltablespace oid,
reldatabase oid,
relfilenode oid,
avgrequest integer,
interestingpages integer,
storedpages integer,
nextpage integer);
-- Don't want these to be available to public. -- Don't want these to be available to public.
REVOKE ALL ON FUNCTION pg_freespacemap_pages() FROM PUBLIC; REVOKE ALL ON FUNCTION pg_freespace(regclass, int4) FROM PUBLIC;
REVOKE ALL ON pg_freespacemap_pages FROM PUBLIC; REVOKE ALL ON FUNCTION pg_freespace(regclass) FROM PUBLIC;
REVOKE ALL ON FUNCTION pg_freespacemap_relations() FROM PUBLIC;
REVOKE ALL ON pg_freespacemap_relations FROM PUBLIC;
<!-- $PostgreSQL: pgsql/doc/src/sgml/acronyms.sgml,v 1.5 2008/03/18 16:05:07 mha Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/acronyms.sgml,v 1.6 2008/09/30 10:52:09 heikki Exp $ -->
<appendix id="acronyms"> <appendix id="acronyms">
<title>Acronyms</title> <title>Acronyms</title>
...@@ -216,7 +216,7 @@ ...@@ -216,7 +216,7 @@
<term><acronym>FSM</acronym></term> <term><acronym>FSM</acronym></term>
<listitem> <listitem>
<para> <para>
<link linkend="runtime-config-resource-fsm">Free Space Map</link> <link linkend="storage-fsm">Free Space Map</link>
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
......
<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.190 2008/08/25 19:03:37 tgl Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.191 2008/09/30 10:52:09 heikki Exp $ -->
<chapter Id="runtime-config"> <chapter Id="runtime-config">
<title>Server Configuration</title> <title>Server Configuration</title>
...@@ -896,80 +896,6 @@ SET ENABLE_SEQSCAN TO OFF; ...@@ -896,80 +896,6 @@ SET ENABLE_SEQSCAN TO OFF;
</varlistentry> </varlistentry>
</variablelist> </variablelist>
</sect2>
<sect2 id="runtime-config-resource-fsm">
<title>Free Space Map</title>
<indexterm>
<primary>free space map</primary>
</indexterm>
<para>
These parameters control the size of the shared <firstterm>free space
map</> (<acronym>FSM</>), which tracks the locations of unused space in the database.
An undersized free space map can cause the database to consume
increasing amounts of disk space over time, because free space that
is not in the map cannot be re-used; instead <productname>PostgreSQL</>
will request more disk space from the operating system when it needs
to store new data.
The last few lines displayed by a database-wide <command>VACUUM VERBOSE</>
command can help in determining if the current settings are adequate.
A <literal>NOTICE</> message is also printed during such an operation
if the current settings are too low.
</para>
<para>
Increasing these parameters might cause <productname>PostgreSQL</>
to request more <systemitem class="osname">System V</> shared
memory than your operating system's default configuration
allows. See <xref linkend="sysvipc"> for information on how to
adjust those parameters, if necessary.
</para>
<variablelist>
<varlistentry id="guc-max-fsm-pages" xreflabel="max_fsm_pages">
<term><varname>max_fsm_pages</varname> (<type>integer</type>)</term>
<indexterm>
<primary><varname>max_fsm_pages</> configuration parameter</primary>
</indexterm>
<listitem>
<para>
Sets the maximum number of disk pages for which free space will
be tracked in the shared free-space map. Six bytes of shared memory
are consumed for each page slot. This setting must be at least
16 * <varname>max_fsm_relations</varname>. The default is chosen
by <application>initdb</> depending on the amount of available memory,
and can range from 20k to 200k pages.
This parameter can only be set at server start.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-max-fsm-relations" xreflabel="max_fsm_relations">
<term><varname>max_fsm_relations</varname> (<type>integer</type>)</term>
<indexterm>
<primary><varname>max_fsm_relations</> configuration parameter</primary>
</indexterm>
<listitem>
<para>
Sets the maximum number of relations (tables and indexes) for which
free space will be tracked in the shared free-space map. Roughly
seventy bytes of shared memory are consumed for each slot.
The default is one thousand relations.
This parameter can only be set at server start.
</para>
</listitem>
</varlistentry>
</variablelist>
<note>
<para>
See the <xref linkend="sql-vacuum" endterm="sql-vacuum-title">
command for information on setting this parameter.
</para>
</note>
</sect2> </sect2>
<sect2 id="runtime-config-resource-kernel"> <sect2 id="runtime-config-resource-kernel">
......
<!-- $PostgreSQL: pgsql/doc/src/sgml/pageinspect.sgml,v 1.3 2007/12/10 05:32:51 tgl Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/pageinspect.sgml,v 1.4 2008/09/30 10:52:09 heikki Exp $ -->
<sect1 id="pageinspect"> <sect1 id="pageinspect">
<title>pageinspect</title> <title>pageinspect</title>
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
<variablelist> <variablelist>
<varlistentry> <varlistentry>
<term> <term>
<function>get_raw_page(text, int) returns bytea</function> <function>get_raw_page(relname text, forknum int, blkno int) returns bytea</function>
</term> </term>
<listitem> <listitem>
...@@ -27,13 +27,28 @@ ...@@ -27,13 +27,28 @@
<function>get_raw_page</function> reads the specified block of the named <function>get_raw_page</function> reads the specified block of the named
table and returns a copy as a <type>bytea</> value. This allows a table and returns a copy as a <type>bytea</> value. This allows a
single time-consistent copy of the block to be obtained. single time-consistent copy of the block to be obtained.
<literal>forknum</literal> should be 0 for the main data fork, or 1 for
the FSM.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry> <varlistentry>
<term> <term>
<function>page_header(bytea) returns record</function> <function>get_raw_page(relname text, blkno int) returns bytea</function>
</term>
<listitem>
<para>
A shorthand of above, for reading from the main fork. Equal to
<literal>get_raw_page(relname, 0, blkno)</literal>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>
<function>page_header(page bytea) returns record</function>
</term> </term>
<listitem> <listitem>
...@@ -63,7 +78,7 @@ test=# SELECT * FROM page_header(get_raw_page('pg_class', 0)); ...@@ -63,7 +78,7 @@ test=# SELECT * FROM page_header(get_raw_page('pg_class', 0));
<varlistentry> <varlistentry>
<term> <term>
<function>heap_page_items(bytea) returns setof record</function> <function>heap_page_items(page bytea) returns setof record</function>
</term> </term>
<listitem> <listitem>
...@@ -90,7 +105,7 @@ test=# SELECT * FROM heap_page_items(get_raw_page('pg_class', 0)); ...@@ -90,7 +105,7 @@ test=# SELECT * FROM heap_page_items(get_raw_page('pg_class', 0));
<varlistentry> <varlistentry>
<term> <term>
<function>bt_metap(text) returns record</function> <function>bt_metap(relname text) returns record</function>
</term> </term>
<listitem> <listitem>
...@@ -113,7 +128,7 @@ fastlevel | 0 ...@@ -113,7 +128,7 @@ fastlevel | 0
<varlistentry> <varlistentry>
<term> <term>
<function>bt_page_stats(text, int) returns record</function> <function>bt_page_stats(relname text, blkno int) returns record</function>
</term> </term>
<listitem> <listitem>
...@@ -141,7 +156,7 @@ btpo_flags | 3 ...@@ -141,7 +156,7 @@ btpo_flags | 3
<varlistentry> <varlistentry>
<term> <term>
<function>bt_page_items(text, int) returns setof record</function> <function>bt_page_items(relname text, blkno int) returns setof record</function>
</term> </term>
<listitem> <listitem>
...@@ -164,6 +179,26 @@ test=# SELECT * FROM bt_page_items('pg_cast_oid_index', 1); ...@@ -164,6 +179,26 @@ test=# SELECT * FROM bt_page_items('pg_cast_oid_index', 1);
</programlisting> </programlisting>
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry>
<term>
<function>fsm_page_contents(page bytea) returns text</function>
</term>
<listitem>
<para>
<function>fsm_page_contents</function> shows the internal node structure
of a FSM page. The output is a multi-line string, with one line per
node in the binary tree within the page. Only those nodes that are not
zero are printed. The so-called "next" pointer, which points to the
next slot to be returned from the page, is also printed.
</para>
<para>
See <filename>src/backend/storage/freespace/README</> for more
information on the structure of an FSM page.
</para>
</listitem>
</varlistentry>
</variablelist> </variablelist>
</sect2> </sect2>
......
<!-- $PostgreSQL: pgsql/doc/src/sgml/pgfreespacemap.sgml,v 2.2 2007/12/10 05:32:51 tgl Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/pgfreespacemap.sgml,v 2.3 2008/09/30 10:52:09 heikki Exp $ -->
<sect1 id="pgfreespacemap"> <sect1 id="pgfreespacemap">
<title>pg_freespacemap</title> <title>pg_freespacemap</title>
...@@ -9,183 +9,66 @@ ...@@ -9,183 +9,66 @@
<para> <para>
The <filename>pg_freespacemap</> module provides a means for examining the The <filename>pg_freespacemap</> module provides a means for examining the
free space map (FSM). It provides two C functions: free space map (FSM). It provides a function called
<function>pg_freespacemap_relations</function> and <function>pg_freespacemap</function>, or two overloaded functions, to be
<function>pg_freespacemap_pages</function> that each return a set of precise. The functions show the value recorded in the free space map for
records, plus two views <structname>pg_freespacemap_relations</structname> a given page, or for all pages in the relation.
and <structname>pg_freespacemap_pages</structname> that wrap the functions
for convenient use.
</para> </para>
<para> <para>
By default public access is revoked from the functions and views, just in By default public access is revoked from the functions, just in case
case there are security issues lurking. there are security issues lurking.
</para> </para>
<sect2> <sect2>
<title>The <filename>pg_freespacemap</> views</title> <title>Functions</title>
<variablelist>
<varlistentry>
<term>
<function>pg_freespacemap(rel regclass IN, blkno bigint IN) returns int2</function>
</term>
<listitem>
<para>
Returns the amount of free space on the page of the relation, specified
by <literal>blkno</>, according to the FSM.
(blkno).
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>
<function>pg_freespacemap(rel regclass IN, blkno OUT int4, avail OUT int2)</function>
</term>
<listitem>
<para>
Displays the the amount of free space on each page of the relation,
according to the FSM. A set of <literal>(blkno int4, avail int2)</>
tuples is returned, one tuple for each page in the relation.
</para>
</listitem>
</varlistentry>
</variablelist>
<para> <para>
The definitions of the columns exposed by the views are: The values stored in the free space map are not exact. They're rounded
</para> to precision of 1/256th of BLCKSZ (32 bytes with default BLCKSZ), and
they're not kept fully up-to-date as tuples are inserted and updated.
<table>
<title><structname>pg_freespacemap_relations</> Columns</title>
<tgroup cols="4">
<thead>
<row>
<entry>Name</entry>
<entry>Type</entry>
<entry>References</entry>
<entry>Description</entry>
</row>
</thead>
<tbody>
<row>
<entry><structfield>reltablespace</structfield></entry>
<entry><type>oid</type></entry>
<entry><literal>pg_tablespace.oid</literal></entry>
<entry>Tablespace OID of the relation</entry>
</row>
<row>
<entry><structfield>reldatabase</structfield></entry>
<entry><type>oid</type></entry>
<entry><literal>pg_database.oid</literal></entry>
<entry>Database OID of the relation</entry>
</row>
<row>
<entry><structfield>relfilenode</structfield></entry>
<entry><type>oid</type></entry>
<entry><literal>pg_class.relfilenode</literal></entry>
<entry>Relfilenode of the relation</entry>
</row>
<row>
<entry><structfield>avgrequest</structfield></entry>
<entry><type>integer</type></entry>
<entry></entry>
<entry>Moving average of free space requests (NULL for indexes)</entry>
</row>
<row>
<entry><structfield>interestingpages</structfield></entry>
<entry><type>integer</type></entry>
<entry></entry>
<entry>Count of pages last reported as containing useful free space</entry>
</row>
<row>
<entry><structfield>storedpages</structfield></entry>
<entry><type>integer</type></entry>
<entry></entry>
<entry>Count of pages actually stored in free space map</entry>
</row>
<row>
<entry><structfield>nextpage</structfield></entry>
<entry><type>integer</type></entry>
<entry></entry>
<entry>Page index (from 0) to start next search at</entry>
</row>
</tbody>
</tgroup>
</table>
<table>
<title><structname>pg_freespacemap_pages</> Columns</title>
<tgroup cols="4">
<thead>
<row>
<entry>Name</entry>
<entry>Type</entry>
<entry>References</entry>
<entry>Description</entry>
</row>
</thead>
<tbody>
<row>
<entry><structfield>reltablespace</structfield></entry>
<entry><type>oid</type></entry>
<entry><literal>pg_tablespace.oid</literal></entry>
<entry>Tablespace OID of the relation</entry>
</row>
<row>
<entry><structfield>reldatabase</structfield></entry>
<entry><type>oid</type></entry>
<entry><literal>pg_database.oid</literal></entry>
<entry>Database OID of the relation</entry>
</row>
<row>
<entry><structfield>relfilenode</structfield></entry>
<entry><type>oid</type></entry>
<entry><literal>pg_class.relfilenode</literal></entry>
<entry>Relfilenode of the relation</entry>
</row>
<row>
<entry><structfield>relblocknumber</structfield></entry>
<entry><type>bigint</type></entry>
<entry></entry>
<entry>Page number within the relation</entry>
</row>
<row>
<entry><structfield>bytes</structfield></entry>
<entry><type>integer</type></entry>
<entry></entry>
<entry>Free bytes in the page, or NULL for an index page (see below)</entry>
</row>
</tbody>
</tgroup>
</table>
<para>
For <structname>pg_freespacemap_relations</structname>, there is one row
for each relation in the free space map.
<structfield>storedpages</structfield> is the number of pages actually
stored in the map, while <structfield>interestingpages</structfield> is the
number of pages the last <command>VACUUM</> thought had useful amounts of
free space.
</para>
<para>
If <structfield>storedpages</structfield> is consistently less than
<structfield>interestingpages</> then it'd be a good idea to increase
<varname>max_fsm_pages</varname>. Also, if the number of rows in
<structname>pg_freespacemap_relations</structname> is close to
<varname>max_fsm_relations</varname>, then you should consider increasing
<varname>max_fsm_relations</varname>.
</para>
<para>
For <structname>pg_freespacemap_pages</structname>, there is one row for
each page in the free space map. The number of rows for a relation will
match the <structfield>storedpages</structfield> column in
<structname>pg_freespacemap_relations</structname>.
</para> </para>
<para> <para>
For indexes, what is tracked is entirely-unused pages, rather than free For indexes, what is tracked is entirely-unused pages, rather than free
space within pages. Therefore, the average request size and free bytes space within pages. Therefore, the values are not meaningful, just
within a page are not meaningful, and are shown as NULL. whether a page is full or empty.
</para> </para>
<para> <para>
Because the map is shared by all the databases, there will normally be NOTE: The interface was changed in version 8.4, to reflect the new FSM
entries for relations not belonging to the current database. This means implementation introduced in the same version.
that there may not be matching join rows in <structname>pg_class</> for
some rows, or that there could even be incorrect joins. If you are
trying to join against <structname>pg_class</>, it's a good idea to
restrict the join to rows having <structfield>reldatabase</> equal to
the current database's OID or zero.
</para>
<para>
When either of the views is accessed, internal free space map locks are
taken for long enough to copy all the state data that the view will display.
This ensures that the views produce a consistent set of results, while not
blocking normal activity longer than necessary. Nonetheless there
could be some impact on database performance if they are read often.
</para> </para>
</sect2> </sect2>
...@@ -193,45 +76,37 @@ ...@@ -193,45 +76,37 @@
<title>Sample output</title> <title>Sample output</title>
<programlisting> <programlisting>
regression=# SELECT c.relname, r.avgrequest, r.interestingpages, r.storedpages postgres=# SELECT * FROM pg_freespace('foo');
FROM pg_freespacemap_relations r INNER JOIN pg_class c blkno | avail
ON r.relfilenode = c.relfilenode AND -------+-------
r.reldatabase IN (0, (SELECT oid FROM pg_database 0 | 0
WHERE datname = current_database())) 1 | 0
ORDER BY r.storedpages DESC LIMIT 10; 2 | 0
relname | avgrequest | interestingpages | storedpages 3 | 32
---------------------------------+------------+------------------+------------- 4 | 704
onek | 256 | 109 | 109 5 | 704
pg_attribute | 167 | 93 | 93 6 | 704
pg_class | 191 | 49 | 49 7 | 1216
pg_attribute_relid_attnam_index | | 48 | 48 8 | 704
onek2 | 256 | 37 | 37 9 | 704
pg_depend | 95 | 26 | 26 10 | 704
pg_type | 199 | 16 | 16 11 | 704
pg_rewrite | 1011 | 13 | 13 12 | 704
pg_class_relname_nsp_index | | 10 | 10 13 | 704
pg_proc | 302 | 8 | 8 14 | 704
(10 rows) 15 | 704
16 | 704
regression=# SELECT c.relname, p.relblocknumber, p.bytes 17 | 704
FROM pg_freespacemap_pages p INNER JOIN pg_class c 18 | 704
ON p.relfilenode = c.relfilenode AND 19 | 3648
p.reldatabase IN (0, (SELECT oid FROM pg_database (20 rows)
WHERE datname = current_database()))
ORDER BY c.relname LIMIT 10; postgres=# SELECT * FROM pg_freespace('foo', 7);
relname | relblocknumber | bytes pg_freespace
--------------+----------------+------- --------------
a_star | 0 | 8040 1216
abstime_tbl | 0 | 7908 (1 row)
aggtest | 0 | 8008
altinhoid | 0 | 8128
altstartwith | 0 | 8128
arrtest | 0 | 7172
b_star | 0 | 7976
box_tbl | 0 | 7912
bt_f8_heap | 54 | 7728
bt_i4_heap | 49 | 8008
(10 rows)
</programlisting> </programlisting>
</sect2> </sect2>
...@@ -239,7 +114,9 @@ regression=# SELECT c.relname, p.relblocknumber, p.bytes ...@@ -239,7 +114,9 @@ regression=# SELECT c.relname, p.relblocknumber, p.bytes
<title>Author</title> <title>Author</title>
<para> <para>
Mark Kirkwood <email>markir@paradise.net.nz</email> Original version by Mark Kirkwood <email>markir@paradise.net.nz</email>.
Rewritten in version 8.4 to suit new FSM implementation by Heikki
Linnakangas <email>heikki@enterprisedb.com</email>
</para> </para>
</sect2> </sect2>
......
<!-- <!--
$PostgreSQL: pgsql/doc/src/sgml/ref/vacuum.sgml,v 1.51 2008/02/03 16:24:08 tgl Exp $ $PostgreSQL: pgsql/doc/src/sgml/ref/vacuum.sgml,v 1.52 2008/09/30 10:52:10 heikki Exp $
PostgreSQL documentation PostgreSQL documentation
--> -->
...@@ -96,11 +96,7 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] ANALYZE [ <replaceable class="PARAMETER"> ...@@ -96,11 +96,7 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] ANALYZE [ <replaceable class="PARAMETER">
<term><literal>VERBOSE</literal></term> <term><literal>VERBOSE</literal></term>
<listitem> <listitem>
<para> <para>
Prints a detailed vacuum activity report for each table. Can be used Prints a detailed vacuum activity report for each table.
to help determine appropriate settings for
<xref linkend="guc-max-fsm-pages">,
<xref linkend="guc-max-fsm-relations">, and
<xref linkend="guc-default-statistics-target">.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
......
<!-- $PostgreSQL: pgsql/doc/src/sgml/release.sgml,v 1.585 2008/09/17 20:57:35 tgl Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/release.sgml,v 1.586 2008/09/30 10:52:09 heikki Exp $ -->
<!-- <!--
Typical markup: Typical markup:
...@@ -6004,8 +6004,7 @@ current_date &lt; 2017-11-17 ...@@ -6004,8 +6004,7 @@ current_date &lt; 2017-11-17
<para> <para>
Increase default values for <link Increase default values for <link
linkend="guc-shared-buffers"><varname>shared_buffers</></link> linkend="guc-shared-buffers"><varname>shared_buffers</></link>
and <link and <varname>max_fsm_pages</>
linkend="guc-max-fsm-pages"><varname>max_fsm_pages</></link>
(Andrew) (Andrew)
</para> </para>
</listitem> </listitem>
......
<!-- $PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.417 2008/09/23 09:20:34 heikki Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.418 2008/09/30 10:52:10 heikki Exp $ -->
<chapter Id="runtime"> <chapter Id="runtime">
<title>Operating System Environment</title> <title>Operating System Environment</title>
...@@ -1117,16 +1117,6 @@ set semsys:seminfo_semmsl=32 ...@@ -1117,16 +1117,6 @@ set semsys:seminfo_semmsl=32
<entry>8200 (assuming 8 kB <symbol>XLOG_BLCKSZ</>)</entry> <entry>8200 (assuming 8 kB <symbol>XLOG_BLCKSZ</>)</entry>
</row> </row>
<row>
<entry><xref linkend="guc-max-fsm-relations"></>
<entry>70</>
</row>
<row>
<entry><xref linkend="guc-max-fsm-pages"></>
<entry>6</>
</row>
<row> <row>
<entry>Fixed space requirements</> <entry>Fixed space requirements</>
<entry>770 kB</entry> <entry>770 kB</entry>
......
<!-- $PostgreSQL: pgsql/doc/src/sgml/storage.sgml,v 1.24 2008/08/05 12:09:30 mha Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/storage.sgml,v 1.25 2008/09/30 10:52:10 heikki Exp $ -->
<chapter id="storage"> <chapter id="storage">
...@@ -130,7 +130,12 @@ there. ...@@ -130,7 +130,12 @@ there.
<para> <para>
Each table and index is stored in a separate file, named after the table Each table and index is stored in a separate file, named after the table
or index's <firstterm>filenode</> number, which can be found in or index's <firstterm>filenode</> number, which can be found in
<structname>pg_class</>.<structfield>relfilenode</>. <structname>pg_class</>.<structfield>relfilenode</>. In addition to the
main file (aka. main fork), a <firstterm>free space map</> (see
<xref linkend="storage-fsm">) that stores information about free space
available in the relation, is stored in a file named after the filenode
number, with the the _1 suffix. For example, if the table's filenode number
is 12345, the FSM file is named <filename>12345_1</>.
</para> </para>
<caution> <caution>
...@@ -367,6 +372,48 @@ comparison table, in which all the HTML pages were cut down to 7 kB to fit. ...@@ -367,6 +372,48 @@ comparison table, in which all the HTML pages were cut down to 7 kB to fit.
</sect1> </sect1>
<sect1 id="storage-fsm">
<title>Free Space Map</title>
<indexterm>
<primary>Free Space Map</primary>
</indexterm>
<indexterm><primary>FSM</><see>Free Space Map</></indexterm>
<para>
A Free Space Map is stored with every heap and index relation, except for
hash indexes, to keep track of available space in the relation. It's stored
along the main relation data, in a separate FSM relation fork, named after
relfilenode of the relation, but with a <literal>_1</> suffix. For example,
if the relfilenode of a relation is 12345, the FSM is stored in a file called
<filename>12345_1</>, in the same directory as the main relation file.
</para>
<para>
The Free Space Map is organized as a tree of <acronym>FSM</> pages. The
bottom level <acronym>FSM</> pages stores the free space available on every
heap (or index) page, using one byte to represent each heap page. The upper
levels aggregate information from the lower levels.
</para>
<para>
Within each <acronym>FSM</> page is a binary tree, stored in an array with
one byte per node. Each leaf node represents a heap page, or a lower level
<acronym>FSM</> page. In each non-leaf node, the higher of its children's
values is stored. The maximum value in the leaf nodes is therefore stored
at the root.
</para>
<para>
See <filename>src/backend/storage/freespace/README</> for more details on
how the <acronym>FSM</> is structured, and how it's updated and searched.
<xref linkend="pgfreespacemap"> contrib module can be used to view the
information stored in free space maps.
</para>
</sect1>
<sect1 id="storage-page-layout"> <sect1 id="storage-page-layout">
<title>Database Page Layout</title> <title>Database Page Layout</title>
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.14 2008/07/11 21:06:29 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.15 2008/09/30 10:52:10 heikki Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "catalog/index.h" #include "catalog/index.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/indexfsm.h"
#include "utils/memutils.h" #include "utils/memutils.h"
...@@ -283,6 +284,9 @@ ginbuild(PG_FUNCTION_ARGS) ...@@ -283,6 +284,9 @@ ginbuild(PG_FUNCTION_ARGS)
elog(ERROR, "index \"%s\" already contains data", elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index)); RelationGetRelationName(index));
/* Initialize FSM */
InitIndexFreeSpaceMap(index);
initGinState(&buildstate.ginstate, index); initGinState(&buildstate.ginstate, index);
/* initialize the root page */ /* initialize the root page */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.16 2008/07/11 21:06:29 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.17 2008/09/30 10:52:10 heikki Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
void void
...@@ -151,7 +152,7 @@ GinNewBuffer(Relation index) ...@@ -151,7 +152,7 @@ GinNewBuffer(Relation index)
/* First, try to get a page from FSM */ /* First, try to get a page from FSM */
for (;;) for (;;)
{ {
BlockNumber blkno = GetFreeIndexPage(&index->rd_node); BlockNumber blkno = GetFreeIndexPage(index);
if (blkno == InvalidBlockNumber) if (blkno == InvalidBlockNumber)
break; break;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.21 2008/07/11 21:06:29 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.22 2008/09/30 10:52:10 heikki Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "miscadmin.h" #include "miscadmin.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
typedef struct typedef struct
...@@ -678,10 +679,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -678,10 +679,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
bool needLock; bool needLock;
BlockNumber npages, BlockNumber npages,
blkno; blkno;
BlockNumber totFreePages, BlockNumber totFreePages;
nFreePages,
*freePages,
maxFreePages;
BlockNumber lastBlock = GIN_ROOT_BLKNO, BlockNumber lastBlock = GIN_ROOT_BLKNO,
lastFilledBlock = GIN_ROOT_BLKNO; lastFilledBlock = GIN_ROOT_BLKNO;
...@@ -711,12 +709,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -711,12 +709,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
if (needLock) if (needLock)
UnlockRelationForExtension(index, ExclusiveLock); UnlockRelationForExtension(index, ExclusiveLock);
maxFreePages = npages; totFreePages = 0;
if (maxFreePages > MaxFSMPages)
maxFreePages = MaxFSMPages;
totFreePages = nFreePages = 0;
freePages = (BlockNumber *) palloc(sizeof(BlockNumber) * maxFreePages);
for (blkno = GIN_ROOT_BLKNO + 1; blkno < npages; blkno++) for (blkno = GIN_ROOT_BLKNO + 1; blkno < npages; blkno++)
{ {
...@@ -731,8 +724,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -731,8 +724,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
if (GinPageIsDeleted(page)) if (GinPageIsDeleted(page))
{ {
if (nFreePages < maxFreePages) RecordFreeIndexPage(index, blkno);
freePages[nFreePages++] = blkno;
totFreePages++; totFreePages++;
} }
else else
...@@ -742,25 +734,16 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -742,25 +734,16 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
} }
lastBlock = npages - 1; lastBlock = npages - 1;
if (info->vacuum_full && nFreePages > 0) if (info->vacuum_full && lastBlock > lastFilledBlock)
{ {
/* try to truncate index */ /* try to truncate index */
int i; FreeSpaceMapTruncateRel(index, lastFilledBlock + 1);
RelationTruncate(index, lastFilledBlock + 1);
for (i = 0; i < nFreePages; i++)
if (freePages[i] >= lastFilledBlock)
{
totFreePages = nFreePages = i;
break;
}
if (lastBlock > lastFilledBlock)
RelationTruncate(index, lastFilledBlock + 1);
stats->pages_removed = lastBlock - lastFilledBlock; stats->pages_removed = lastBlock - lastFilledBlock;
totFreePages = totFreePages - stats->pages_removed;
} }
RecordIndexFreeSpace(&index->rd_node, totFreePages, nFreePages, freePages);
stats->pages_free = totFreePages; stats->pages_free = totFreePages;
if (needLock) if (needLock)
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.151 2008/06/12 09:12:29 heikki Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.152 2008/09/30 10:52:10 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "catalog/index.h" #include "catalog/index.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/indexfsm.h"
#include "utils/memutils.h" #include "utils/memutils.h"
const XLogRecPtr XLogRecPtrForTemp = {1, 1}; const XLogRecPtr XLogRecPtrForTemp = {1, 1};
...@@ -102,6 +103,9 @@ gistbuild(PG_FUNCTION_ARGS) ...@@ -102,6 +103,9 @@ gistbuild(PG_FUNCTION_ARGS)
elog(ERROR, "index \"%s\" already contains data", elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index)); RelationGetRelationName(index));
/* Initialize FSM */
InitIndexFreeSpaceMap(index);
/* no locking is needed */ /* no locking is needed */
initGISTstate(&buildstate.giststate, index); initGISTstate(&buildstate.giststate, index);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.30 2008/07/13 20:45:46 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.31 2008/09/30 10:52:10 heikki Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h" #include "postgres.h"
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "access/gist_private.h" #include "access/gist_private.h"
#include "access/reloptions.h" #include "access/reloptions.h"
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "utils/rel.h" #include "utils/rel.h"
...@@ -617,7 +618,7 @@ gistNewBuffer(Relation r) ...@@ -617,7 +618,7 @@ gistNewBuffer(Relation r)
/* First, try to get a page from FSM */ /* First, try to get a page from FSM */
for (;;) for (;;)
{ {
BlockNumber blkno = GetFreeIndexPage(&r->rd_node); BlockNumber blkno = GetFreeIndexPage(r);
if (blkno == InvalidBlockNumber) if (blkno == InvalidBlockNumber)
break; /* nothing left in FSM */ break; /* nothing left in FSM */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.36 2008/06/12 09:12:30 heikki Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.37 2008/09/30 10:52:10 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "miscadmin.h" #include "miscadmin.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "utils/memutils.h" #include "utils/memutils.h"
...@@ -518,10 +519,7 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -518,10 +519,7 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
Relation rel = info->index; Relation rel = info->index;
BlockNumber npages, BlockNumber npages,
blkno; blkno;
BlockNumber totFreePages, BlockNumber totFreePages;
nFreePages,
*freePages,
maxFreePages;
BlockNumber lastBlock = GIST_ROOT_BLKNO, BlockNumber lastBlock = GIST_ROOT_BLKNO,
lastFilledBlock = GIST_ROOT_BLKNO; lastFilledBlock = GIST_ROOT_BLKNO;
bool needLock; bool needLock;
...@@ -589,13 +587,7 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -589,13 +587,7 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
if (needLock) if (needLock)
UnlockRelationForExtension(rel, ExclusiveLock); UnlockRelationForExtension(rel, ExclusiveLock);
maxFreePages = npages; totFreePages = 0;
if (maxFreePages > MaxFSMPages)
maxFreePages = MaxFSMPages;
totFreePages = nFreePages = 0;
freePages = (BlockNumber *) palloc(sizeof(BlockNumber) * maxFreePages);
for (blkno = GIST_ROOT_BLKNO + 1; blkno < npages; blkno++) for (blkno = GIST_ROOT_BLKNO + 1; blkno < npages; blkno++)
{ {
Buffer buffer; Buffer buffer;
...@@ -609,9 +601,8 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -609,9 +601,8 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
if (PageIsNew(page) || GistPageIsDeleted(page)) if (PageIsNew(page) || GistPageIsDeleted(page))
{ {
if (nFreePages < maxFreePages)
freePages[nFreePages++] = blkno;
totFreePages++; totFreePages++;
RecordFreeIndexPage(rel, blkno);
} }
else else
lastFilledBlock = blkno; lastFilledBlock = blkno;
...@@ -619,25 +610,15 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) ...@@ -619,25 +610,15 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
} }
lastBlock = npages - 1; lastBlock = npages - 1;
if (info->vacuum_full && nFreePages > 0) if (info->vacuum_full && lastFilledBlock < lastBlock)
{ /* try to truncate index */ { /* try to truncate index */
int i; FreeSpaceMapTruncateRel(rel, lastFilledBlock + 1);
RelationTruncate(rel, lastFilledBlock + 1);
for (i = 0; i < nFreePages; i++)
if (freePages[i] >= lastFilledBlock)
{
totFreePages = nFreePages = i;
break;
}
if (lastBlock > lastFilledBlock)
RelationTruncate(rel, lastFilledBlock + 1);
stats->std.pages_removed = lastBlock - lastFilledBlock; stats->std.pages_removed = lastBlock - lastFilledBlock;
totFreePages = totFreePages - stats->std.pages_removed;
} }
RecordIndexFreeSpace(&rel->rd_node, totFreePages, nFreePages, freePages);
pfree(freePages);
/* return statistics */ /* return statistics */
stats->std.pages_free = totFreePages; stats->std.pages_free = totFreePages;
if (needLock) if (needLock)
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.263 2008/09/11 14:01:09 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.264 2008/09/30 10:52:10 heikki Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
...@@ -4721,6 +4721,9 @@ heap_sync(Relation rel) ...@@ -4721,6 +4721,9 @@ heap_sync(Relation rel)
/* FlushRelationBuffers will have opened rd_smgr */ /* FlushRelationBuffers will have opened rd_smgr */
smgrimmedsync(rel->rd_smgr, MAIN_FORKNUM); smgrimmedsync(rel->rd_smgr, MAIN_FORKNUM);
/* sync FSM as well */
smgrimmedsync(rel->rd_smgr, FSM_FORKNUM);
/* toast heap, if any */ /* toast heap, if any */
if (OidIsValid(rel->rd_rel->reltoastrelid)) if (OidIsValid(rel->rd_rel->reltoastrelid))
{ {
...@@ -4729,6 +4732,7 @@ heap_sync(Relation rel) ...@@ -4729,6 +4732,7 @@ heap_sync(Relation rel)
toastrel = heap_open(rel->rd_rel->reltoastrelid, AccessShareLock); toastrel = heap_open(rel->rd_rel->reltoastrelid, AccessShareLock);
FlushRelationBuffers(toastrel); FlushRelationBuffers(toastrel);
smgrimmedsync(toastrel->rd_smgr, MAIN_FORKNUM); smgrimmedsync(toastrel->rd_smgr, MAIN_FORKNUM);
smgrimmedsync(toastrel->rd_smgr, FSM_FORKNUM);
heap_close(toastrel, AccessShareLock); heap_close(toastrel, AccessShareLock);
} }
} }
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.72 2008/07/13 20:45:47 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.73 2008/09/30 10:52:10 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -163,8 +163,7 @@ RelationGetBufferForTuple(Relation relation, Size len, ...@@ -163,8 +163,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
* We have no cached target page, so ask the FSM for an initial * We have no cached target page, so ask the FSM for an initial
* target. * target.
*/ */
targetBlock = GetPageWithFreeSpace(&relation->rd_node, targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
len + saveFreeSpace);
/* /*
* If the FSM knows nothing of the rel, try the last page before we * If the FSM knows nothing of the rel, try the last page before we
...@@ -250,7 +249,7 @@ RelationGetBufferForTuple(Relation relation, Size len, ...@@ -250,7 +249,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
* Update FSM as to condition of this page, and ask for another page * Update FSM as to condition of this page, and ask for another page
* to try. * to try.
*/ */
targetBlock = RecordAndGetPageWithFreeSpace(&relation->rd_node, targetBlock = RecordAndGetPageWithFreeSpace(relation,
targetBlock, targetBlock,
pageFreeSpace, pageFreeSpace,
len + saveFreeSpace); len + saveFreeSpace);
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.110 2008/07/13 20:45:47 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.111 2008/09/30 10:52:10 heikki Exp $
* *
* NOTES * NOTES
* Postgres btree pages look like ordinary relation pages. The opaque * Postgres btree pages look like ordinary relation pages. The opaque
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "miscadmin.h" #include "miscadmin.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "utils/inval.h" #include "utils/inval.h"
#include "utils/snapmgr.h" #include "utils/snapmgr.h"
...@@ -501,7 +502,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) ...@@ -501,7 +502,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
*/ */
for (;;) for (;;)
{ {
blkno = GetFreeIndexPage(&rel->rd_node); blkno = GetFreeIndexPage(rel);
if (blkno == InvalidBlockNumber) if (blkno == InvalidBlockNumber)
break; break;
buf = ReadBuffer(rel, blkno); buf = ReadBuffer(rel, blkno);
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.161 2008/06/19 00:46:03 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.162 2008/09/30 10:52:10 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "miscadmin.h" #include "miscadmin.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "utils/memutils.h" #include "utils/memutils.h"
...@@ -56,9 +57,7 @@ typedef struct ...@@ -56,9 +57,7 @@ typedef struct
IndexBulkDeleteCallback callback; IndexBulkDeleteCallback callback;
void *callback_state; void *callback_state;
BTCycleId cycleid; BTCycleId cycleid;
BlockNumber *freePages; BlockNumber lastUsedPage;
int nFreePages; /* number of entries in freePages[] */
int maxFreePages; /* allocated size of freePages[] */
BlockNumber totFreePages; /* true total # of free pages */ BlockNumber totFreePages; /* true total # of free pages */
MemoryContext pagedelcontext; MemoryContext pagedelcontext;
} BTVacState; } BTVacState;
...@@ -110,6 +109,9 @@ btbuild(PG_FUNCTION_ARGS) ...@@ -110,6 +109,9 @@ btbuild(PG_FUNCTION_ARGS)
elog(ERROR, "index \"%s\" already contains data", elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index)); RelationGetRelationName(index));
/* Initialize FSM */
InitIndexFreeSpaceMap(index);
buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique, false); buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique, false);
/* /*
...@@ -623,9 +625,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, ...@@ -623,9 +625,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
vstate.callback = callback; vstate.callback = callback;
vstate.callback_state = callback_state; vstate.callback_state = callback_state;
vstate.cycleid = cycleid; vstate.cycleid = cycleid;
vstate.freePages = NULL; /* temporarily */ vstate.lastUsedPage = BTREE_METAPAGE;
vstate.nFreePages = 0;
vstate.maxFreePages = 0;
vstate.totFreePages = 0; vstate.totFreePages = 0;
/* Create a temporary memory context to run _bt_pagedel in */ /* Create a temporary memory context to run _bt_pagedel in */
...@@ -670,17 +670,6 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, ...@@ -670,17 +670,6 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
if (needLock) if (needLock)
UnlockRelationForExtension(rel, ExclusiveLock); UnlockRelationForExtension(rel, ExclusiveLock);
/* Allocate freePages after we read num_pages the first time */
if (vstate.freePages == NULL)
{
/* No point in remembering more than MaxFSMPages pages */
vstate.maxFreePages = MaxFSMPages;
if ((BlockNumber) vstate.maxFreePages > num_pages)
vstate.maxFreePages = (int) num_pages;
vstate.freePages = (BlockNumber *)
palloc(vstate.maxFreePages * sizeof(BlockNumber));
}
/* Quit if we've scanned the whole relation */ /* Quit if we've scanned the whole relation */
if (blkno >= num_pages) if (blkno >= num_pages)
break; break;
...@@ -697,42 +686,22 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, ...@@ -697,42 +686,22 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
* acquiring exclusive lock on the index and then rechecking all the * acquiring exclusive lock on the index and then rechecking all the
* pages; doesn't seem worth it. * pages; doesn't seem worth it.
*/ */
if (info->vacuum_full && vstate.nFreePages > 0) if (info->vacuum_full && vstate.lastUsedPage < num_pages - 1)
{ {
BlockNumber new_pages = num_pages; BlockNumber new_pages = vstate.lastUsedPage + 1;
while (vstate.nFreePages > 0 &&
vstate.freePages[vstate.nFreePages - 1] == new_pages - 1)
{
new_pages--;
stats->pages_deleted--;
vstate.nFreePages--;
vstate.totFreePages = vstate.nFreePages; /* can't be more */
}
if (new_pages != num_pages)
{
/*
* Okay to truncate.
*/
RelationTruncate(rel, new_pages);
/* update statistics */ /*
stats->pages_removed += num_pages - new_pages; * Okay to truncate.
*/
FreeSpaceMapTruncateRel(rel, new_pages);
RelationTruncate(rel, new_pages);
num_pages = new_pages; /* update statistics */
} stats->pages_removed += num_pages - new_pages;
vstate.totFreePages -= (num_pages - new_pages);
num_pages = new_pages;
} }
/*
* Update the shared Free Space Map with the info we now have about free
* pages in the index, discarding any old info the map may have. We do not
* need to sort the page numbers; they're in order already.
*/
RecordIndexFreeSpace(&rel->rd_node, vstate.totFreePages,
vstate.nFreePages, vstate.freePages);
pfree(vstate.freePages);
MemoryContextDelete(vstate.pagedelcontext); MemoryContextDelete(vstate.pagedelcontext);
/* update statistics */ /* update statistics */
...@@ -788,8 +757,7 @@ restart: ...@@ -788,8 +757,7 @@ restart:
/* /*
* If we are recursing, the only case we want to do anything with is a * If we are recursing, the only case we want to do anything with is a
* live leaf page having the current vacuum cycle ID. Any other state * live leaf page having the current vacuum cycle ID. Any other state
* implies we already saw the page (eg, deleted it as being empty). In * implies we already saw the page (eg, deleted it as being empty).
* particular, we don't want to risk adding it to freePages twice.
*/ */
if (blkno != orig_blkno) if (blkno != orig_blkno)
{ {
...@@ -803,12 +771,15 @@ restart: ...@@ -803,12 +771,15 @@ restart:
} }
} }
/* If the page is in use, update lastUsedPage */
if (!_bt_page_recyclable(page) && vstate->lastUsedPage < blkno)
vstate->lastUsedPage = blkno;
/* Page is valid, see what to do with it */ /* Page is valid, see what to do with it */
if (_bt_page_recyclable(page)) if (_bt_page_recyclable(page))
{ {
/* Okay to recycle this page */ /* Okay to recycle this page */
if (vstate->nFreePages < vstate->maxFreePages) RecordFreeIndexPage(rel, blkno);
vstate->freePages[vstate->nFreePages++] = blkno;
vstate->totFreePages++; vstate->totFreePages++;
stats->pages_deleted++; stats->pages_deleted++;
} }
...@@ -944,8 +915,7 @@ restart: ...@@ -944,8 +915,7 @@ restart:
*/ */
if (ndel && info->vacuum_full) if (ndel && info->vacuum_full)
{ {
if (vstate->nFreePages < vstate->maxFreePages) RecordFreeIndexPage(rel, blkno);
vstate->freePages[vstate->nFreePages++] = blkno;
vstate->totFreePages++; vstate->totFreePages++;
} }
......
...@@ -52,12 +52,14 @@ ...@@ -52,12 +52,14 @@
* we log the completed index pages to WAL if and only if WAL archiving is * we log the completed index pages to WAL if and only if WAL archiving is
* active. * active.
* *
* This code isn't concerned about the FSM at all. The caller is responsible
* for initializing that.
* *
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.117 2008/08/11 11:05:10 heikki Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.118 2008/09/30 10:52:10 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* *
* Resource managers definition * Resource managers definition
* *
* $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.25 2006/11/05 22:42:07 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.26 2008/09/30 10:52:11 heikki Exp $
*/ */
#include "postgres.h" #include "postgres.h"
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "commands/dbcommands.h" #include "commands/dbcommands.h"
#include "commands/sequence.h" #include "commands/sequence.h"
#include "commands/tablespace.h" #include "commands/tablespace.h"
#include "storage/freespace.h"
#include "storage/smgr.h" #include "storage/smgr.h"
...@@ -30,7 +31,7 @@ const RmgrData RmgrTable[RM_MAX_ID + 1] = { ...@@ -30,7 +31,7 @@ const RmgrData RmgrTable[RM_MAX_ID + 1] = {
{"Database", dbase_redo, dbase_desc, NULL, NULL, NULL}, {"Database", dbase_redo, dbase_desc, NULL, NULL, NULL},
{"Tablespace", tblspc_redo, tblspc_desc, NULL, NULL, NULL}, {"Tablespace", tblspc_redo, tblspc_desc, NULL, NULL, NULL},
{"MultiXact", multixact_redo, multixact_desc, NULL, NULL, NULL}, {"MultiXact", multixact_redo, multixact_desc, NULL, NULL, NULL},
{"Reserved 7", NULL, NULL, NULL, NULL, NULL}, {"FreeSpaceMap", fsm_redo, fsm_desc, NULL, NULL, NULL},
{"Reserved 8", NULL, NULL, NULL, NULL, NULL}, {"Reserved 8", NULL, NULL, NULL, NULL, NULL},
{"Heap2", heap2_redo, heap2_desc, NULL, NULL, NULL}, {"Heap2", heap2_redo, heap2_desc, NULL, NULL, NULL},
{"Heap", heap_redo, heap_desc, NULL, NULL, NULL}, {"Heap", heap_redo, heap_desc, NULL, NULL, NULL},
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.58 2008/08/11 11:05:10 heikki Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.59 2008/09/30 10:52:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -359,6 +359,7 @@ CreateFakeRelcacheEntry(RelFileNode rnode) ...@@ -359,6 +359,7 @@ CreateFakeRelcacheEntry(RelFileNode rnode)
rel->rd_lockInfo.lockRelId.relId = rnode.relNode; rel->rd_lockInfo.lockRelId.relId = rnode.relNode;
rel->rd_targblock = InvalidBlockNumber; rel->rd_targblock = InvalidBlockNumber;
rel->rd_fsm_nblocks_cache = InvalidBlockNumber;
rel->rd_smgr = NULL; rel->rd_smgr = NULL;
return rel; return rel;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.245 2008/09/01 20:42:43 tgl Exp $ * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.246 2008/09/30 10:52:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
#include "postmaster/bgwriter.h" #include "postmaster/bgwriter.h"
#include "postmaster/walwriter.h" #include "postmaster/walwriter.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/proc.h" #include "storage/proc.h"
#include "tcop/tcopprot.h" #include "tcop/tcopprot.h"
...@@ -419,7 +418,6 @@ AuxiliaryProcessMain(int argc, char *argv[]) ...@@ -419,7 +418,6 @@ AuxiliaryProcessMain(int argc, char *argv[])
case StartupProcess: case StartupProcess:
bootstrap_signals(); bootstrap_signals();
StartupXLOG(); StartupXLOG();
LoadFreeSpaceMap();
BuildFlatFiles(false); BuildFlatFiles(false);
proc_exit(0); /* startup done */ proc_exit(0); /* startup done */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.339 2008/08/28 23:09:45 tgl Exp $ * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.340 2008/09/30 10:52:12 heikki Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#include "parser/parse_expr.h" #include "parser/parse_expr.h"
#include "parser/parse_relation.h" #include "parser/parse_relation.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/fmgroids.h" #include "utils/fmgroids.h"
...@@ -294,14 +295,22 @@ heap_create(const char *relname, ...@@ -294,14 +295,22 @@ heap_create(const char *relname,
/* /*
* Have the storage manager create the relation's disk file, if needed. * Have the storage manager create the relation's disk file, if needed.
* *
* We only create storage for the main fork here. The caller is * We create storage for the main fork here, and also for the FSM for a
* responsible for creating any additional forks if needed. * heap or toast relation. The caller is responsible for creating any
* additional forks if needed.
*/ */
if (create_storage) if (create_storage)
{ {
Assert(rel->rd_smgr == NULL); Assert(rel->rd_smgr == NULL);
RelationOpenSmgr(rel); RelationOpenSmgr(rel);
smgrcreate(rel->rd_smgr, MAIN_FORKNUM, rel->rd_istemp, false); smgrcreate(rel->rd_smgr, MAIN_FORKNUM, rel->rd_istemp, false);
/*
* For a real heap, create FSM fork as well. Indexams are
* responsible for creating any extra forks themselves.
*/
if (relkind == RELKIND_RELATION || relkind == RELKIND_TOASTVALUE)
smgrcreate(rel->rd_smgr, FSM_FORKNUM, rel->rd_istemp, false);
} }
return rel; return rel;
...@@ -2256,7 +2265,11 @@ RelationTruncateIndexes(Relation heapRelation) ...@@ -2256,7 +2265,11 @@ RelationTruncateIndexes(Relation heapRelation)
/* Fetch info needed for index_build */ /* Fetch info needed for index_build */
indexInfo = BuildIndexInfo(currentIndex); indexInfo = BuildIndexInfo(currentIndex);
/* Now truncate the actual file (and discard buffers) */ /*
* Now truncate the actual file (and discard buffers). The indexam
* is responsible for truncating the FSM in index_build(), if
* applicable.
*/
RelationTruncate(currentIndex, 0); RelationTruncate(currentIndex, 0);
/* Initialize the index and rebuild */ /* Initialize the index and rebuild */
...@@ -2310,7 +2323,8 @@ heap_truncate(List *relids) ...@@ -2310,7 +2323,8 @@ heap_truncate(List *relids)
{ {
Relation rel = lfirst(cell); Relation rel = lfirst(cell);
/* Truncate the actual file (and discard buffers) */ /* Truncate the FSM and actual file (and discard buffers) */
FreeSpaceMapTruncateRel(rel, 0);
RelationTruncate(rel, 0); RelationTruncate(rel, 0);
/* If this relation has indexes, truncate the indexes too */ /* If this relation has indexes, truncate the indexes too */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.304 2008/09/15 18:43:41 tgl Exp $ * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.305 2008/09/30 10:52:12 heikki Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
...@@ -920,7 +920,7 @@ index_drop(Oid indexId) ...@@ -920,7 +920,7 @@ index_drop(Oid indexId)
RelationOpenSmgr(userIndexRelation); RelationOpenSmgr(userIndexRelation);
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
if (smgrexists(userIndexRelation->rd_smgr, forknum)) if (smgrexists(userIndexRelation->rd_smgr, forknum))
smgrscheduleunlink(userIndexRelation->rd_smgr, forknum, smgrscheduleunlink(userIndexRelation->rd_smgr, forknum,
userIndexRelation->rd_istemp); userIndexRelation->rd_istemp);
RelationCloseSmgr(userIndexRelation); RelationCloseSmgr(userIndexRelation);
...@@ -1322,7 +1322,7 @@ setNewRelfilenode(Relation relation, TransactionId freezeXid) ...@@ -1322,7 +1322,7 @@ setNewRelfilenode(Relation relation, TransactionId freezeXid)
/* /*
* ... and create storage for corresponding forks in the new relfilenode. * ... and create storage for corresponding forks in the new relfilenode.
* *
* NOTE: any conflict in relfilenode value will be caught here * NOTE: any conflict in relfilenode value will be caught here
*/ */
newrnode = relation->rd_node; newrnode = relation->rd_node;
newrnode.relNode = newrelfilenode; newrnode.relNode = newrelfilenode;
...@@ -1331,6 +1331,14 @@ setNewRelfilenode(Relation relation, TransactionId freezeXid) ...@@ -1331,6 +1331,14 @@ setNewRelfilenode(Relation relation, TransactionId freezeXid)
/* Create the main fork, like heap_create() does */ /* Create the main fork, like heap_create() does */
smgrcreate(srel, MAIN_FORKNUM, relation->rd_istemp, false); smgrcreate(srel, MAIN_FORKNUM, relation->rd_istemp, false);
/*
* For a heap, create FSM fork as well. Indexams are responsible for
* creating any extra forks themselves.
*/
if (relation->rd_rel->relkind == RELKIND_RELATION ||
relation->rd_rel->relkind == RELKIND_TOASTVALUE)
smgrcreate(srel, FSM_FORKNUM, relation->rd_istemp, false);
/* schedule unlinking old files */ /* schedule unlinking old files */
for (i = 0; i <= MAX_FORKNUM; i++) for (i = 0; i <= MAX_FORKNUM; i++)
{ {
...@@ -2310,7 +2318,10 @@ reindex_index(Oid indexId) ...@@ -2310,7 +2318,10 @@ reindex_index(Oid indexId)
if (inplace) if (inplace)
{ {
/* Truncate the actual file (and discard buffers) */ /*
* Truncate the actual file (and discard buffers). The indexam
* is responsible for truncating the FSM, if applicable
*/
RelationTruncate(iRel, 0); RelationTruncate(iRel, 0);
} }
else else
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.212 2008/09/23 10:58:03 heikki Exp $ * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.213 2008/09/30 10:52:12 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -43,7 +43,6 @@ ...@@ -43,7 +43,6 @@
#include "postmaster/bgwriter.h" #include "postmaster/bgwriter.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/freespace.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/procarray.h" #include "storage/procarray.h"
#include "storage/smgr.h" #include "storage/smgr.h"
...@@ -796,11 +795,6 @@ dropdb(const char *dbname, bool missing_ok) ...@@ -796,11 +795,6 @@ dropdb(const char *dbname, bool missing_ok)
*/ */
DropDatabaseBuffers(db_id); DropDatabaseBuffers(db_id);
/*
* Also, clean out any entries in the shared free space map.
*/
FreeSpaceMapForgetDatabase(db_id);
/* /*
* Tell the stats collector to forget it immediately, too. * Tell the stats collector to forget it immediately, too.
*/ */
...@@ -1640,9 +1634,6 @@ dbase_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -1640,9 +1634,6 @@ dbase_redo(XLogRecPtr lsn, XLogRecord *record)
/* Drop pages for this database that are in the shared buffer cache */ /* Drop pages for this database that are in the shared buffer cache */
DropDatabaseBuffers(xlrec->db_id); DropDatabaseBuffers(xlrec->db_id);
/* Also, clean out any entries in the shared free space map */
FreeSpaceMapForgetDatabase(xlrec->db_id);
/* Also, clean out any fsync requests that might be pending in md.c */ /* Also, clean out any fsync requests that might be pending in md.c */
ForgetDatabaseFsyncRequests(xlrec->db_id); ForgetDatabaseFsyncRequests(xlrec->db_id);
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.377 2008/09/11 14:01:09 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.378 2008/09/30 10:52:12 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -505,14 +505,6 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast, ...@@ -505,14 +505,6 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
* (autovacuum.c does this for itself.) * (autovacuum.c does this for itself.)
*/ */
vac_update_datfrozenxid(); vac_update_datfrozenxid();
/*
* If it was a database-wide VACUUM, print FSM usage statistics (we
* don't make you be superuser to see these). We suppress this in
* autovacuum, too.
*/
if (all_rels)
PrintFreeSpaceMapStatistics(elevel);
} }
/* /*
...@@ -1272,8 +1264,9 @@ full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt) ...@@ -1272,8 +1264,9 @@ full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
} }
} }
/* update shared free space map with final free space info */ /* update thefree space map with final free space info, and vacuum it */
vac_update_fsm(onerel, &fraged_pages, vacrelstats->rel_pages); vac_update_fsm(onerel, &fraged_pages, vacrelstats->rel_pages);
FreeSpaceMapVacuum(onerel);
/* update statistics in pg_class */ /* update statistics in pg_class */
vac_update_relstats(RelationGetRelid(onerel), vacrelstats->rel_pages, vac_update_relstats(RelationGetRelid(onerel), vacrelstats->rel_pages,
...@@ -2849,6 +2842,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, ...@@ -2849,6 +2842,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
/* Truncate relation, if needed */ /* Truncate relation, if needed */
if (blkno < nblocks) if (blkno < nblocks)
{ {
FreeSpaceMapTruncateRel(onerel, blkno);
RelationTruncate(onerel, blkno); RelationTruncate(onerel, blkno);
vacrelstats->rel_pages = blkno; /* set new number of blocks */ vacrelstats->rel_pages = blkno; /* set new number of blocks */
} }
...@@ -3243,6 +3237,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages) ...@@ -3243,6 +3237,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
(errmsg("\"%s\": truncated %u to %u pages", (errmsg("\"%s\": truncated %u to %u pages",
RelationGetRelationName(onerel), RelationGetRelationName(onerel),
vacrelstats->rel_pages, relblocks))); vacrelstats->rel_pages, relblocks)));
FreeSpaceMapTruncateRel(onerel, relblocks);
RelationTruncate(onerel, relblocks); RelationTruncate(onerel, relblocks);
vacrelstats->rel_pages = relblocks; /* set new number of blocks */ vacrelstats->rel_pages = relblocks; /* set new number of blocks */
} }
...@@ -3475,8 +3470,8 @@ tid_reaped(ItemPointer itemptr, void *state) ...@@ -3475,8 +3470,8 @@ tid_reaped(ItemPointer itemptr, void *state)
} }
/* /*
* Update the shared Free Space Map with the info we now have about * Update the Free Space Map with the info we now have about free space in
* free space in the relation, discarding any old info the map may have. * the relation.
*/ */
static void static void
vac_update_fsm(Relation onerel, VacPageList fraged_pages, vac_update_fsm(Relation onerel, VacPageList fraged_pages,
...@@ -3484,26 +3479,8 @@ vac_update_fsm(Relation onerel, VacPageList fraged_pages, ...@@ -3484,26 +3479,8 @@ vac_update_fsm(Relation onerel, VacPageList fraged_pages,
{ {
int nPages = fraged_pages->num_pages; int nPages = fraged_pages->num_pages;
VacPage *pagedesc = fraged_pages->pagedesc; VacPage *pagedesc = fraged_pages->pagedesc;
Size threshold;
FSMPageData *pageSpaces;
int outPages;
int i; int i;
/*
* We only report pages with free space at least equal to the average
* request size --- this avoids cluttering FSM with uselessly-small bits
* of space. Although FSM would discard pages with little free space
* anyway, it's important to do this prefiltering because (a) it reduces
* the time spent holding the FSM lock in RecordRelationFreeSpace, and (b)
* FSM uses the number of pages reported as a statistic for guiding space
* management. If we didn't threshold our reports the same way
* vacuumlazy.c does, we'd be skewing that statistic.
*/
threshold = GetAvgFSMRequestSize(&onerel->rd_node);
pageSpaces = (FSMPageData *) palloc(nPages * sizeof(FSMPageData));
outPages = 0;
for (i = 0; i < nPages; i++) for (i = 0; i < nPages; i++)
{ {
/* /*
...@@ -3514,17 +3491,9 @@ vac_update_fsm(Relation onerel, VacPageList fraged_pages, ...@@ -3514,17 +3491,9 @@ vac_update_fsm(Relation onerel, VacPageList fraged_pages,
if (pagedesc[i]->blkno >= rel_pages) if (pagedesc[i]->blkno >= rel_pages)
break; break;
if (pagedesc[i]->free >= threshold) RecordPageWithFreeSpace(onerel, pagedesc[i]->blkno, pagedesc[i]->free);
{
FSMPageSetPageNum(&pageSpaces[outPages], pagedesc[i]->blkno);
FSMPageSetSpace(&pageSpaces[outPages], pagedesc[i]->free);
outPages++;
}
} }
RecordRelationFreeSpace(&onerel->rd_node, outPages, outPages, pageSpaces);
pfree(pageSpaces);
} }
/* Copy a VacPage structure */ /* Copy a VacPage structure */
......
...@@ -18,15 +18,6 @@ ...@@ -18,15 +18,6 @@
* index cleanup and page compaction, then resume the heap scan with an empty * index cleanup and page compaction, then resume the heap scan with an empty
* TID array. * TID array.
* *
* We can limit the storage for page free space to MaxFSMPages entries,
* since that's the most the free space map will be willing to remember
* anyway. If the relation has fewer than that many pages with free space,
* life is easy: just build an array of per-page info. If it has more,
* we store the free space info as a heap ordered by amount of free space,
* so that we can discard the pages with least free space to ensure we never
* have more than MaxFSMPages entries in all. The surviving page entries
* are passed to the free space map at conclusion of the scan.
*
* If we're processing a table with no indexes, we can just vacuum each page * If we're processing a table with no indexes, we can just vacuum each page
* as we go; there's no need to save up multiple tuples to minimize the number * as we go; there's no need to save up multiple tuples to minimize the number
* of index scans performed. So we don't use maintenance_work_mem memory for * of index scans performed. So we don't use maintenance_work_mem memory for
...@@ -38,7 +29,7 @@ ...@@ -38,7 +29,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.107 2008/05/12 00:00:48 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.108 2008/09/30 10:52:12 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -90,19 +81,11 @@ typedef struct LVRelStats ...@@ -90,19 +81,11 @@ typedef struct LVRelStats
BlockNumber pages_removed; BlockNumber pages_removed;
double tuples_deleted; double tuples_deleted;
BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
Size threshold; /* minimum interesting free space */
/* List of TIDs of tuples we intend to delete */ /* List of TIDs of tuples we intend to delete */
/* NB: this list is ordered by TID address */ /* NB: this list is ordered by TID address */
int num_dead_tuples; /* current # of entries */ int num_dead_tuples; /* current # of entries */
int max_dead_tuples; /* # slots allocated in array */ int max_dead_tuples; /* # slots allocated in array */
ItemPointer dead_tuples; /* array of ItemPointerData */ ItemPointer dead_tuples; /* array of ItemPointerData */
/* Array or heap of per-page info about free space */
/* We use a simple array until it fills up, then convert to heap */
bool fs_is_heap; /* are we using heap organization? */
int num_free_pages; /* current # of entries */
int max_free_pages; /* # slots allocated in array */
FSMPageData *free_pages; /* array or heap of blkno/avail */
BlockNumber tot_free_pages; /* total pages with >= threshold space */
int num_index_scans; int num_index_scans;
} LVRelStats; } LVRelStats;
...@@ -134,12 +117,8 @@ static BlockNumber count_nondeletable_pages(Relation onerel, ...@@ -134,12 +117,8 @@ static BlockNumber count_nondeletable_pages(Relation onerel,
static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks); static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
static void lazy_record_dead_tuple(LVRelStats *vacrelstats, static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
ItemPointer itemptr); ItemPointer itemptr);
static void lazy_record_free_space(LVRelStats *vacrelstats,
BlockNumber page, Size avail);
static bool lazy_tid_reaped(ItemPointer itemptr, void *state); static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
static void lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats);
static int vac_cmp_itemptr(const void *left, const void *right); static int vac_cmp_itemptr(const void *left, const void *right);
static int vac_cmp_page_spaces(const void *left, const void *right);
/* /*
...@@ -180,10 +159,6 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -180,10 +159,6 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
/* Set threshold for interesting free space = average request size */
/* XXX should we scale it up or down? Adjust vacuum.c too, if so */
vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);
vacrelstats->num_index_scans = 0; vacrelstats->num_index_scans = 0;
/* Open all indexes of the relation */ /* Open all indexes of the relation */
...@@ -207,18 +182,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -207,18 +182,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION) possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)
lazy_truncate_heap(onerel, vacrelstats); lazy_truncate_heap(onerel, vacrelstats);
/* Update shared free space map with final free space info */ /* Vacuum the Free Space Map */
lazy_update_fsm(onerel, vacrelstats); FreeSpaceMapVacuum(onerel);
if (vacrelstats->tot_free_pages > MaxFSMPages)
ereport(WARNING,
(errmsg("relation \"%s.%s\" contains more than \"max_fsm_pages\" pages with useful free space",
get_namespace_name(RelationGetNamespace(onerel)),
RelationGetRelationName(onerel)),
/* Only suggest VACUUM FULL if > 20% free */
(vacrelstats->tot_free_pages > vacrelstats->rel_pages * 0.20) ?
errhint("Consider using VACUUM FULL on this relation or increasing the configuration parameter \"max_fsm_pages\".") :
errhint("Consider increasing the configuration parameter \"max_fsm_pages\".")));
/* Update statistics in pg_class */ /* Update statistics in pg_class */
vac_update_relstats(RelationGetRelid(onerel), vac_update_relstats(RelationGetRelid(onerel),
...@@ -313,6 +278,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, ...@@ -313,6 +278,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
int prev_dead_count; int prev_dead_count;
OffsetNumber frozen[MaxOffsetNumber]; OffsetNumber frozen[MaxOffsetNumber];
int nfrozen; int nfrozen;
Size freespace;
vacuum_delay_point(); vacuum_delay_point();
...@@ -375,20 +341,21 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, ...@@ -375,20 +341,21 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
relname, blkno))); relname, blkno)));
PageInit(page, BufferGetPageSize(buf), 0); PageInit(page, BufferGetPageSize(buf), 0);
empty_pages++; empty_pages++;
lazy_record_free_space(vacrelstats, blkno,
PageGetHeapFreeSpace(page));
} }
freespace = PageGetHeapFreeSpace(page);
MarkBufferDirty(buf); MarkBufferDirty(buf);
UnlockReleaseBuffer(buf); UnlockReleaseBuffer(buf);
RecordPageWithFreeSpace(onerel, blkno, freespace);
continue; continue;
} }
if (PageIsEmpty(page)) if (PageIsEmpty(page))
{ {
empty_pages++; empty_pages++;
lazy_record_free_space(vacrelstats, blkno, freespace = PageGetHeapFreeSpace(page);
PageGetHeapFreeSpace(page));
UnlockReleaseBuffer(buf); UnlockReleaseBuffer(buf);
RecordPageWithFreeSpace(onerel, blkno, freespace);
continue; continue;
} }
...@@ -556,6 +523,14 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, ...@@ -556,6 +523,14 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
vacuumed_pages++; vacuumed_pages++;
} }
freespace = PageGetHeapFreeSpace(page);
/* Remember the location of the last page with nonremovable tuples */
if (hastup)
vacrelstats->nonempty_pages = blkno + 1;
UnlockReleaseBuffer(buf);
/* /*
* If we remembered any tuples for deletion, then the page will be * If we remembered any tuples for deletion, then the page will be
* visited again by lazy_vacuum_heap, which will compute and record * visited again by lazy_vacuum_heap, which will compute and record
...@@ -564,16 +539,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, ...@@ -564,16 +539,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* taken if there are no indexes.) * taken if there are no indexes.)
*/ */
if (vacrelstats->num_dead_tuples == prev_dead_count) if (vacrelstats->num_dead_tuples == prev_dead_count)
{ RecordPageWithFreeSpace(onerel, blkno, freespace);
lazy_record_free_space(vacrelstats, blkno,
PageGetHeapFreeSpace(page));
}
/* Remember the location of the last page with nonremovable tuples */
if (hastup)
vacrelstats->nonempty_pages = blkno + 1;
UnlockReleaseBuffer(buf);
} }
/* save stats for use later */ /* save stats for use later */
...@@ -611,12 +577,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, ...@@ -611,12 +577,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
tups_vacuumed, num_tuples, nblocks), tups_vacuumed, num_tuples, nblocks),
errdetail("%.0f dead row versions cannot be removed yet.\n" errdetail("%.0f dead row versions cannot be removed yet.\n"
"There were %.0f unused item pointers.\n" "There were %.0f unused item pointers.\n"
"%u pages contain useful free space.\n"
"%u pages are entirely empty.\n" "%u pages are entirely empty.\n"
"%s.", "%s.",
nkeep, nkeep,
nunused, nunused,
vacrelstats->tot_free_pages,
empty_pages, empty_pages,
pg_rusage_show(&ru0)))); pg_rusage_show(&ru0))));
} }
...@@ -649,6 +613,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) ...@@ -649,6 +613,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
BlockNumber tblk; BlockNumber tblk;
Buffer buf; Buffer buf;
Page page; Page page;
Size freespace;
vacuum_delay_point(); vacuum_delay_point();
...@@ -656,11 +621,13 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) ...@@ -656,11 +621,13 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
buf = ReadBufferWithStrategy(onerel, tblk, vac_strategy); buf = ReadBufferWithStrategy(onerel, tblk, vac_strategy);
LockBufferForCleanup(buf); LockBufferForCleanup(buf);
tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats); tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
/* Now that we've compacted the page, record its available space */ /* Now that we've compacted the page, record its available space */
page = BufferGetPage(buf); page = BufferGetPage(buf);
lazy_record_free_space(vacrelstats, tblk, freespace = PageGetHeapFreeSpace(page);
PageGetHeapFreeSpace(page));
UnlockReleaseBuffer(buf); UnlockReleaseBuffer(buf);
RecordPageWithFreeSpace(onerel, tblk, freespace);
npages++; npages++;
} }
...@@ -816,10 +783,6 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) ...@@ -816,10 +783,6 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
{ {
BlockNumber old_rel_pages = vacrelstats->rel_pages; BlockNumber old_rel_pages = vacrelstats->rel_pages;
BlockNumber new_rel_pages; BlockNumber new_rel_pages;
FSMPageData *pageSpaces;
int n;
int i,
j;
PGRUsage ru0; PGRUsage ru0;
pg_rusage_init(&ru0); pg_rusage_init(&ru0);
...@@ -865,6 +828,7 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) ...@@ -865,6 +828,7 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
/* /*
* Okay to truncate. * Okay to truncate.
*/ */
FreeSpaceMapTruncateRel(onerel, new_rel_pages);
RelationTruncate(onerel, new_rel_pages); RelationTruncate(onerel, new_rel_pages);
/* /*
...@@ -875,34 +839,6 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) ...@@ -875,34 +839,6 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
* the table again. * the table again.
*/ */
/*
* Drop free-space info for removed blocks; these must not get entered
* into the FSM!
*/
pageSpaces = vacrelstats->free_pages;
n = vacrelstats->num_free_pages;
j = 0;
for (i = 0; i < n; i++)
{
if (FSMPageGetPageNum(&pageSpaces[i]) < new_rel_pages)
{
pageSpaces[j] = pageSpaces[i];
j++;
}
}
vacrelstats->num_free_pages = j;
/*
* If tot_free_pages was more than num_free_pages, we can't tell for sure
* what its correct value is now, because we don't know which of the
* forgotten pages are getting truncated. Conservatively set it equal to
* num_free_pages.
*/
vacrelstats->tot_free_pages = j;
/* We destroyed the heap ordering, so mark array unordered */
vacrelstats->fs_is_heap = false;
/* update statistics */ /* update statistics */
vacrelstats->rel_pages = new_rel_pages; vacrelstats->rel_pages = new_rel_pages;
vacrelstats->pages_removed = old_rel_pages - new_rel_pages; vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
...@@ -1005,7 +941,6 @@ static void ...@@ -1005,7 +941,6 @@ static void
lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks) lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
{ {
long maxtuples; long maxtuples;
int maxpages;
if (vacrelstats->hasindex) if (vacrelstats->hasindex)
{ {
...@@ -1029,19 +964,6 @@ lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks) ...@@ -1029,19 +964,6 @@ lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
vacrelstats->max_dead_tuples = (int) maxtuples; vacrelstats->max_dead_tuples = (int) maxtuples;
vacrelstats->dead_tuples = (ItemPointer) vacrelstats->dead_tuples = (ItemPointer)
palloc(maxtuples * sizeof(ItemPointerData)); palloc(maxtuples * sizeof(ItemPointerData));
maxpages = MaxFSMPages;
maxpages = Min(maxpages, MaxAllocSize / sizeof(FSMPageData));
/* No need to allocate more pages than the relation has blocks */
if (relblocks < (BlockNumber) maxpages)
maxpages = (int) relblocks;
vacrelstats->fs_is_heap = false;
vacrelstats->num_free_pages = 0;
vacrelstats->max_free_pages = maxpages;
vacrelstats->free_pages = (FSMPageData *)
palloc(maxpages * sizeof(FSMPageData));
vacrelstats->tot_free_pages = 0;
} }
/* /*
...@@ -1063,127 +985,6 @@ lazy_record_dead_tuple(LVRelStats *vacrelstats, ...@@ -1063,127 +985,6 @@ lazy_record_dead_tuple(LVRelStats *vacrelstats,
} }
} }
/*
* lazy_record_free_space - remember free space on one page
*/
static void
lazy_record_free_space(LVRelStats *vacrelstats,
BlockNumber page,
Size avail)
{
FSMPageData *pageSpaces;
int n;
/*
* A page with less than stats->threshold free space will be forgotten
* immediately, and never passed to the free space map. Removing the
* uselessly small entries early saves cycles, and in particular reduces
* the amount of time we spend holding the FSM lock when we finally call
* RecordRelationFreeSpace. Since the FSM will probably drop pages with
* little free space anyway, there's no point in making this really small.
*
* XXX Is it worth trying to measure average tuple size, and using that to
* adjust the threshold? Would be worthwhile if FSM has no stats yet for
* this relation. But changing the threshold as we scan the rel might
* lead to bizarre behavior, too. Also, it's probably better if vacuum.c
* has the same thresholding behavior as we do here.
*/
if (avail < vacrelstats->threshold)
return;
/* Count all pages over threshold, even if not enough space in array */
vacrelstats->tot_free_pages++;
/* Copy pointers to local variables for notational simplicity */
pageSpaces = vacrelstats->free_pages;
n = vacrelstats->max_free_pages;
/* If we haven't filled the array yet, just keep adding entries */
if (vacrelstats->num_free_pages < n)
{
FSMPageSetPageNum(&pageSpaces[vacrelstats->num_free_pages], page);
FSMPageSetSpace(&pageSpaces[vacrelstats->num_free_pages], avail);
vacrelstats->num_free_pages++;
return;
}
/*----------
* The rest of this routine works with "heap" organization of the
* free space arrays, wherein we maintain the heap property
* avail[(j-1) div 2] <= avail[j] for 0 < j < n.
* In particular, the zero'th element always has the smallest available
* space and can be discarded to make room for a new page with more space.
* See Knuth's discussion of heap-based priority queues, sec 5.2.3;
* but note he uses 1-origin array subscripts, not 0-origin.
*----------
*/
/* If we haven't yet converted the array to heap organization, do it */
if (!vacrelstats->fs_is_heap)
{
/*
* Scan backwards through the array, "sift-up" each value into its
* correct position. We can start the scan at n/2-1 since each entry
* above that position has no children to worry about.
*/
int l = n / 2;
while (--l >= 0)
{
BlockNumber R = FSMPageGetPageNum(&pageSpaces[l]);
Size K = FSMPageGetSpace(&pageSpaces[l]);
int i; /* i is where the "hole" is */
i = l;
for (;;)
{
int j = 2 * i + 1;
if (j >= n)
break;
if (j + 1 < n && FSMPageGetSpace(&pageSpaces[j]) > FSMPageGetSpace(&pageSpaces[j + 1]))
j++;
if (K <= FSMPageGetSpace(&pageSpaces[j]))
break;
pageSpaces[i] = pageSpaces[j];
i = j;
}
FSMPageSetPageNum(&pageSpaces[i], R);
FSMPageSetSpace(&pageSpaces[i], K);
}
vacrelstats->fs_is_heap = true;
}
/* If new page has more than zero'th entry, insert it into heap */
if (avail > FSMPageGetSpace(&pageSpaces[0]))
{
/*
* Notionally, we replace the zero'th entry with the new data, and
* then sift-up to maintain the heap property. Physically, the new
* data doesn't get stored into the arrays until we find the right
* location for it.
*/
int i = 0; /* i is where the "hole" is */
for (;;)
{
int j = 2 * i + 1;
if (j >= n)
break;
if (j + 1 < n && FSMPageGetSpace(&pageSpaces[j]) > FSMPageGetSpace(&pageSpaces[j + 1]))
j++;
if (avail <= FSMPageGetSpace(&pageSpaces[j]))
break;
pageSpaces[i] = pageSpaces[j];
i = j;
}
FSMPageSetPageNum(&pageSpaces[i], page);
FSMPageSetSpace(&pageSpaces[i], avail);
}
}
/* /*
* lazy_tid_reaped() -- is a particular tid deletable? * lazy_tid_reaped() -- is a particular tid deletable?
* *
...@@ -1206,27 +1007,6 @@ lazy_tid_reaped(ItemPointer itemptr, void *state) ...@@ -1206,27 +1007,6 @@ lazy_tid_reaped(ItemPointer itemptr, void *state)
return (res != NULL); return (res != NULL);
} }
/*
* Update the shared Free Space Map with the info we now have about
* free space in the relation, discarding any old info the map may have.
*/
static void
lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats)
{
FSMPageData *pageSpaces = vacrelstats->free_pages;
int nPages = vacrelstats->num_free_pages;
/*
* Sort data into order, as required by RecordRelationFreeSpace.
*/
if (nPages > 1)
qsort(pageSpaces, nPages, sizeof(FSMPageData),
vac_cmp_page_spaces);
RecordRelationFreeSpace(&onerel->rd_node, vacrelstats->tot_free_pages,
nPages, pageSpaces);
}
/* /*
* Comparator routines for use with qsort() and bsearch(). * Comparator routines for use with qsort() and bsearch().
*/ */
...@@ -1256,18 +1036,3 @@ vac_cmp_itemptr(const void *left, const void *right) ...@@ -1256,18 +1036,3 @@ vac_cmp_itemptr(const void *left, const void *right)
return 0; return 0;
} }
static int
vac_cmp_page_spaces(const void *left, const void *right)
{
FSMPageData *linfo = (FSMPageData *) left;
FSMPageData *rinfo = (FSMPageData *) right;
BlockNumber lblkno = FSMPageGetPageNum(linfo);
BlockNumber rblkno = FSMPageGetPageNum(rinfo);
if (lblkno < rblkno)
return -1;
else if (lblkno > rblkno)
return 1;
return 0;
}
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.51 2008/08/11 11:05:11 heikki Exp $ * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.52 2008/09/30 10:52:13 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -55,7 +55,6 @@ ...@@ -55,7 +55,6 @@
#include "postmaster/bgwriter.h" #include "postmaster/bgwriter.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/freespace.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/lwlock.h" #include "storage/lwlock.h"
#include "storage/pmsignal.h" #include "storage/pmsignal.h"
...@@ -398,7 +397,6 @@ BackgroundWriterMain(void) ...@@ -398,7 +397,6 @@ BackgroundWriterMain(void)
ExitOnAnyError = true; ExitOnAnyError = true;
/* Close down the database */ /* Close down the database */
ShutdownXLOG(0, 0); ShutdownXLOG(0, 0);
DumpFreeSpaceMap(0, 0);
/* Normal exit from the bgwriter is here */ /* Normal exit from the bgwriter is here */
proc_exit(0); /* done */ proc_exit(0); /* done */
} }
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
# Makefile for storage/freespace # Makefile for storage/freespace
# #
# IDENTIFICATION # IDENTIFICATION
# $PostgreSQL: pgsql/src/backend/storage/freespace/Makefile,v 1.4 2008/02/19 10:30:08 petere Exp $ # $PostgreSQL: pgsql/src/backend/storage/freespace/Makefile,v 1.5 2008/09/30 10:52:13 heikki Exp $
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
...@@ -12,6 +12,6 @@ subdir = src/backend/storage/freespace ...@@ -12,6 +12,6 @@ subdir = src/backend/storage/freespace
top_builddir = ../../../.. top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global include $(top_builddir)/src/Makefile.global
OBJS = freespace.o OBJS = freespace.o fsmpage.o indexfsm.o
include $(top_srcdir)/src/backend/common.mk include $(top_srcdir)/src/backend/common.mk
$PostgreSQL: pgsql/src/backend/storage/freespace/README,v 1.1 2008/09/30 10:52:13 heikki Exp $
Free Space Map
--------------
The purpose of the free space map is to quickly locate a page with enough
free space to hold a tuple to be stored; or to determine that no such page
exists and the relation must be extended by one page. As of PostgreSQL 8.4
each relation has its own, extensible free space map stored in a separate
"fork" of its relation. This eliminates the disadvantages of the former
fixed-size FSM.
It is important to keep the map small so that it can be searched rapidly.
Therefore, we don't attempt to record the exact free space on a page.
We allocate one map byte to each page, allowing us to record free space
at a granularity of 1/256th of a page. Another way to say it is that
the stored value is the free space divided by BLCKSZ/256 (rounding down).
We assume that the free space must always be less than BLCKSZ, since
all pages have some overhead; so the maximum map value is 255.
To assist in fast searching, the map isn't simply an array of per-page
entries, but has a tree structure above those entries. There is a tree
structure of pages, and a tree structure within each page, as described
below.
FSM page structure
------------------
Within each FSM page, we use a binary tree structure where leaf nodes store
the amount of free space on heap pages (or lower level FSM pages, see
"Higher-level structure" below), with one leaf node per heap page. A non-leaf
node stores the max amount of free space on any of its children.
For example:
4
4 2
3 4 0 2 <- This level represents heap pages
We need two basic operations: search and update.
To search for a page with X amount of free space, traverse down the tree
along a path where n >= X, until you hit the bottom. If both children of a
node satisfy the condition, you can pick either one arbitrarily.
To update the amount of free space on a page to X, first update the leaf node
corresponding to the heap page, then "bubble up" the change to upper nodes,
by walking up to each parent and recomputing its value as the max of its
two children. Repeat until reaching the root or a parent whose value
doesn't change.
This data structure has a couple of nice properties:
- to discover that there is no page with X bytes of free space, you only
need to look at the root node
- by varying which child to traverse to in the search algorithm, when you have
a choice, we can implement various strategies, like preferring pages closer
to a given page, or spreading the load across the table.
Higher-level routines that use FSM pages access them through the fsm_set_avail()
and fsm_search_avail() functions. The interface to those functions hides the
page's internal tree structure, treating the FSM page as a black box that has
a certain number of "slots" for storing free space information. (However,
the higher routines have to be aware of the tree structure of the whole map.)
The binary tree is stored on each FSM page as an array. Because the page
header takes some space on a page, the binary tree isn't perfect. That is,
a few right-most leaf nodes are missing, and there are some useless non-leaf
nodes at the right. So the tree looks something like this:
0
1 2
3 4 5 6
7 8 9 A B
where the numbers denote each node's position in the array. Note that the
tree is guaranteed complete above the leaf level; only some leaf nodes are
missing. This is reflected in the number of usable "slots" per page not
being an exact power of 2.
A FSM page also has a next slot pointer, fp_next_slot, that determines where
to start the next search for free space within that page. The reason for that
is to spread out the pages that are returned by FSM searches. When several
backends are concurrently inserting into a relation, contention can be avoided
by having them insert into different pages. But it is also desirable to fill
up pages in sequential order, to get the benefit of OS prefetching and batched
writes. The FSM is responsible for making that happen, and the next slot
pointer helps provide the desired behavior.
Higher-level structure
----------------------
To scale up the data structure described above beyond a single page, we
maintain a similar tree-structure across pages. Leaf nodes in higher level
pages correspond to lower level FSM pages. The root node within each page
has the same value as the corresponding leaf node on its parent page.
The root page is always stored at physical block 0.
For example, assuming each FSM page can hold information about 4 pages (in
reality, it holds (BLCKSZ - headers) / 2, or ~4000 with default BLCKSZ),
we get a disk layout like this:
0 <-- page 0 at level 2 (root page)
0 <-- page 0 at level 1
0 <-- page 0 at level 0
1 <-- page 1 at level 0
2 <-- ...
3
1 <-- page 1 at level 1
4
5
6
7
2
8
9
10
11
3
12
13
14
15
where the numbers are page numbers *at that level*, starting from 0.
To find the physical block # corresponding to leaf page n, we need to
count the number number of leaf and upper-level pages preceding page n.
This turns out to be
y = n + (n / F + 1) + (n / F^2 + 1) + ... + 1
where F is the fanout (4 in the above example). The first term n is the number
of preceding leaf pages, the second term is the number of pages at level 1,
and so forth.
To keep things simple, the tree is always constant height. To cover the
maximum relation size of 2^32-1 blocks, three levels is enough with the default
BLCKSZ (4000^3 > 2^32).
Addressing
----------
The higher-level routines operate on "logical" addresses, consisting of
- level,
- logical page number, and
- slot (if applicable)
Bottom level FSM pages have level of 0, the level above that 1, and root 2.
As in the diagram above, logical page number is the page number at that level,
starting from 0.
Locking
-------
When traversing down to search for free space, only one page is locked at a
time: the parent page is released before locking the child. If the child page
is concurrently modified, and there no longer is free space on the child page
when you land on it, you need to start from scratch (after correcting the
parent page, so that you don't get into an infinite loop).
We use shared buffer locks when searching, but exclusive buffer lock when
updating a page. However, the next slot search pointer is updated during
searches even though we have only a shared lock. fp_next_slot is just a hint
and we can easily reset it if it gets corrupted; so it seems better to accept
some risk of that type than to pay the overhead of exclusive locking.
Recovery
--------
The FSM is not explicitly WAL-logged. Instead, we rely on a bunch of
self-correcting measures to repair possible corruption.
First of all, whenever a value is set on an FSM page, the root node of the
page is compared against the new value after bubbling up the change is
finished. It should be greater than or equal to the value just set, or we
have a corrupted page, with a parent somewhere with too small a value.
Secondly, if we detect corrupted pages while we search, traversing down
the tree. That check will notice if a parent node is set to too high a value.
In both cases, the upper nodes on the page are immediately rebuilt, fixing
the corruption.
Vacuum updates all the bottom level pages with correct amount of free space
on the heap pages, fixing any outdated values there. After the heap and
index passes are done, FreeSpaceMapVacuum is called, and the FSM tree is
scanned in depth-first order. This fixes any discrepancies between upper
and lower level FSM pages.
TODO
----
- fastroot to avoid traversing upper nodes with just 1 child
- use a different system for tables that fit into one FSM page, with a
mechanism to switch to the real thing as it grows.
...@@ -8,245 +8,123 @@ ...@@ -8,245 +8,123 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.60 2008/03/10 02:04:09 tgl Exp $ * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.61 2008/09/30 10:52:13 heikki Exp $
* *
* *
* NOTES: * NOTES:
* *
* The only really interesting aspect of this code is the heuristics for * Free Space Map keeps track of the amount of free space on pages, and
* deciding how much information we can afford to keep about each relation, * allows quickly searching for a page with enough free space. The FSM is
* given that we have a limited amount of workspace in shared memory. * stored in a dedicated relation fork of all heap relations, and those
* These currently work as follows: * index access methods that need it (see also indexfsm.c). See README for
* * more information.
* The number of distinct relations tracked is limited by a configuration
* variable (MaxFSMRelations). When this would be exceeded, we discard the
* least recently used relation. A doubly-linked list with move-to-front
* behavior keeps track of which relation is least recently used.
*
* For each known relation, we track the average request size given to
* GetPageWithFreeSpace() as well as the most recent number of pages reported
* to RecordRelationFreeSpace(). The average request size is not directly
* used in this module, but we expect VACUUM to use it to filter out
* uninteresting amounts of space before calling RecordRelationFreeSpace().
* The sum of the RRFS page counts is thus the total number of "interesting"
* pages that we would like to track; this is called DesiredFSMPages.
*
* The number of pages actually tracked is limited by a configuration variable
* (MaxFSMPages). When this is less than DesiredFSMPages, each relation
* gets to keep a fraction MaxFSMPages/DesiredFSMPages of its free pages.
* We discard pages with less free space to reach this target.
*
* Actually, our space allocation is done in "chunks" of CHUNKPAGES pages,
* with each relation guaranteed at least one chunk. This reduces thrashing
* of the storage allocations when there are small changes in the RRFS page
* counts from one VACUUM to the next. (XXX it might also be worthwhile to
* impose some kind of moving-average smoothing on the RRFS page counts?)
*
* So the actual arithmetic is: for each relation compute myRequest as the
* number of chunks needed to hold its RRFS page count (not counting the
* first, guaranteed chunk); compute sumRequests as the sum of these values
* over all relations; then for each relation figure its target allocation
* as
* 1 + round(spareChunks * myRequest / sumRequests)
* where spareChunks = totalChunks - numRels is the number of chunks we have
* a choice what to do with. We round off these numbers because truncating
* all of them would waste significant space. But because of roundoff, it's
* possible for the last few relations to get less space than they should;
* the target allocation must be checked against remaining available space.
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h" #include "postgres.h"
#include <limits.h> #include "access/htup.h"
#include <math.h> #include "access/xlogutils.h"
#include <unistd.h> #include "storage/bufpage.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/fsm_internals.h"
#include "storage/lmgr.h"
#include "storage/lwlock.h" #include "storage/lwlock.h"
#include "storage/shmem.h" #include "storage/smgr.h"
#include "utils/rel.h"
#include "utils/inval.h"
#include "miscadmin.h"
/*
/*---------- * We use just one byte to store the amount of free space on a page, so we
* During database shutdown, we store the contents of FSM into a disk file, * divide the amount of free space a page can have into 256 different
* which is re-read during startup. This way we don't have a startup * categories. The highest category, 255, represents a page with at least
* transient condition where FSM isn't really functioning. * MaxFSMRequestSize bytes of free space, and the second highest category
* represents the range from 254 * FSM_CAT_STEP, inclusive, to
* MaxFSMRequestSize, exclusive.
* *
* The file format is: * MaxFSMRequestSize depends on the architecture and BLCKSZ, but assuming
* label "FSM\0" * default 8k BLCKSZ, and that MaxFSMRequestSize is 24 bytes, the categories
* endian constant 0x01020304 for detecting endianness problems * look like this
* version# *
* numRels *
* -- for each rel, in *reverse* usage order: * Range Category
* relfilenode * 0 - 31 0
* isIndex * 32 - 63 1
* avgRequest * ... ... ...
* interestingPages * 8096 - 8127 253
* storedPages * 8128 - 8163 254
* arena data array of storedPages FSMPageData or IndexFSMPageData * 8164 - 8192 255
*---------- *
* The reason that MaxFSMRequestSize is special is that if MaxFSMRequestSize
* isn't equal to a range boundary, a page with exactly MaxFSMRequestSize
* bytes of free space wouldn't satisfy a request for MaxFSMRequestSize
* bytes. If there isn't more than MaxFSMRequestSize bytes of free space on a
* completely empty page, that would mean that we could never satisfy a
* request of exactly MaxFSMRequestSize bytes.
*/ */
#define FSM_CATEGORIES 256
/* Name of FSM cache file (relative to $PGDATA) */ #define FSM_CAT_STEP (BLCKSZ / FSM_CATEGORIES)
#define FSM_CACHE_FILENAME "global/pg_fsm.cache" #define MaxFSMRequestSize MaxHeapTupleSize
/* Fixed values in header */
#define FSM_CACHE_LABEL "FSM"
#define FSM_CACHE_ENDIAN 0x01020304
#define FSM_CACHE_VERSION 20030305
/* File header layout */
typedef struct FsmCacheFileHeader
{
char label[4];
uint32 endian;
uint32 version;
int32 numRels;
} FsmCacheFileHeader;
/* Per-relation header */
typedef struct FsmCacheRelHeader
{
RelFileNode key; /* hash key (must be first) */
bool isIndex; /* if true, we store only page numbers */
uint32 avgRequest; /* moving average of space requests */
BlockNumber interestingPages; /* # of pages with useful free space */
int32 storedPages; /* # of pages stored in arena */
} FsmCacheRelHeader;
int MaxFSMRelations; /* these are set by guc.c */
int MaxFSMPages;
static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */
static HTAB *FreeSpaceMapRelHash; /* points to (what used to be)
* FSMHeader->relHash */
static void CheckFreeSpaceMapStatistics(int elevel, int numRels,
double needed);
static FSMRelation *lookup_fsm_rel(RelFileNode *rel);
static FSMRelation *create_fsm_rel(RelFileNode *rel);
static void delete_fsm_rel(FSMRelation *fsmrel);
static int realloc_fsm_rel(FSMRelation *fsmrel, BlockNumber interestingPages,
bool isIndex);
static void link_fsm_rel_usage(FSMRelation *fsmrel);
static void unlink_fsm_rel_usage(FSMRelation *fsmrel);
static void link_fsm_rel_storage(FSMRelation *fsmrel);
static void unlink_fsm_rel_storage(FSMRelation *fsmrel);
static BlockNumber find_free_space(FSMRelation *fsmrel, Size spaceNeeded);
static BlockNumber find_index_free_space(FSMRelation *fsmrel);
static void fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page,
Size spaceAvail);
static bool lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
int *outPageIndex);
static void compact_fsm_storage(void);
static void push_fsm_rels_after(FSMRelation *afterRel);
static void pack_incoming_pages(FSMPageData *newLocation, int newPages,
FSMPageData *pageSpaces, int nPages);
static void pack_existing_pages(FSMPageData *newLocation, int newPages,
FSMPageData *oldLocation, int oldPages);
static int fsm_calc_request(FSMRelation *fsmrel);
static int fsm_calc_request_unclamped(FSMRelation *fsmrel);
static int fsm_calc_target_allocation(int myRequest);
static int fsm_current_chunks(FSMRelation *fsmrel);
static int fsm_current_allocation(FSMRelation *fsmrel);
/* /*
* Exported routines * Depth of the on-disk tree. We need to be able to address 2^32-1 blocks,
* and 1626 is the smallest number that satisfies X^3 >= 2^32-1. Likewise,
* 216 is the smallest number that satisfies X^4 >= 2^32-1. In practice,
* this means that 4096 bytes is the smallest BLCKSZ that we can get away
* with a 3-level tree, and 512 is the smallest we support.
*/ */
#define FSM_TREE_DEPTH ((SlotsPerFSMPage >= 1626) ? 3 : 4)
#define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1)
#define FSM_BOTTOM_LEVEL 0
/* /*
* InitFreeSpaceMap -- Initialize the freespace module. * The internal FSM routines work on a logical addressing scheme. Each
* * level of the tree can be thought of as a separately addressable file.
* This must be called once during shared memory initialization.
* It builds the empty free space map table. FreeSpaceLock must also be
* initialized at some point, but is not touched here --- we assume there is
* no need for locking, since only the calling process can be accessing shared
* memory as yet.
*/ */
void typedef struct
InitFreeSpaceMap(void)
{ {
HASHCTL info; int level; /* level */
int nchunks; int logpageno; /* page number within the level */
bool found; } FSMAddress;
/* Create table header */
FreeSpaceMap = (FSMHeader *) ShmemInitStruct("Free Space Map Header",
sizeof(FSMHeader),
&found);
if (FreeSpaceMap == NULL)
ereport(FATAL,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("insufficient shared memory for free space map")));
if (!found)
MemSet(FreeSpaceMap, 0, sizeof(FSMHeader));
/* Create hashtable for FSMRelations */
info.keysize = sizeof(RelFileNode);
info.entrysize = sizeof(FSMRelation);
info.hash = tag_hash;
FreeSpaceMapRelHash = ShmemInitHash("Free Space Map Hash",
MaxFSMRelations + 1,
MaxFSMRelations + 1,
&info,
(HASH_ELEM | HASH_FUNCTION));
if (!FreeSpaceMapRelHash)
ereport(FATAL,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("insufficient shared memory for free space map")));
if (found)
return;
/* Allocate page-storage arena */
nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
/* This check ensures spareChunks will be greater than zero */
if (nchunks <= MaxFSMRelations)
ereport(FATAL,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("max_fsm_pages must exceed max_fsm_relations * %d",
CHUNKPAGES)));
FreeSpaceMap->arena = (char *) ShmemAlloc((Size) nchunks * CHUNKBYTES);
if (FreeSpaceMap->arena == NULL)
ereport(FATAL,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("insufficient shared memory for free space map")));
FreeSpaceMap->totalChunks = nchunks;
FreeSpaceMap->usedChunks = 0;
FreeSpaceMap->sumRequests = 0;
}
/* /* Address of the root page. */
* Estimate amount of shmem space needed for FSM. static const FSMAddress FSM_ROOT_ADDRESS = { FSM_ROOT_LEVEL, 0 };
*/
Size /* XLOG record types */
FreeSpaceShmemSize(void) #define XLOG_FSM_TRUNCATE 0x00 /* truncate */
typedef struct
{ {
Size size; RelFileNode node; /* truncated relation */
int nchunks; BlockNumber nheapblocks; /* new number of blocks in the heap */
} xl_fsm_truncate;
/* table header */ /* functions to navigate the tree */
size = MAXALIGN(sizeof(FSMHeader)); static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot);
static BlockNumber fsm_get_heap_blk(FSMAddress addr, uint16 slot);
static BlockNumber fsm_logical_to_physical(FSMAddress addr);
/* hash table, including the FSMRelation objects */ static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend);
size = add_size(size, hash_estimate_size(MaxFSMRelations + 1, static void fsm_extend(Relation rel, BlockNumber nfsmblocks);
sizeof(FSMRelation)));
/* page-storage arena */ /* functions to convert amount of free space to a FSM category */
nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1; static uint8 fsm_space_avail_to_cat(Size avail);
size = add_size(size, mul_size(nchunks, CHUNKBYTES)); static uint8 fsm_space_needed_to_cat(Size needed);
static Size fsm_space_cat_to_avail(uint8 cat);
/* workhorse functions for various operations */
static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
uint8 newValue, uint8 minValue);
static BlockNumber fsm_search(Relation rel, uint8 min_cat);
static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof);
return size;
} /******** Public API ********/
/* /*
* GetPageWithFreeSpace - try to find a page in the given relation with * GetPageWithFreeSpace - try to find a page in the given relation with
...@@ -262,1608 +140,668 @@ FreeSpaceShmemSize(void) ...@@ -262,1608 +140,668 @@ FreeSpaceShmemSize(void)
* extend the relation. * extend the relation.
*/ */
BlockNumber BlockNumber
GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded) GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
{ {
FSMRelation *fsmrel; uint8 min_cat = fsm_space_needed_to_cat(spaceNeeded);
BlockNumber freepage; return fsm_search(rel, min_cat);
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/*
* We always add a rel to the hashtable when it is inquired about.
*/
fsmrel = create_fsm_rel(rel);
/*
* Update the moving average of space requests. This code implements an
* exponential moving average with an equivalent period of about 63
* requests. Ignore silly requests, however, to ensure that the average
* stays sane.
*/
if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
{
int cur_avg = (int) fsmrel->avgRequest;
cur_avg += ((int) spaceNeeded - cur_avg) / 32;
fsmrel->avgRequest = (Size) cur_avg;
}
freepage = find_free_space(fsmrel, spaceNeeded);
LWLockRelease(FreeSpaceLock);
return freepage;
} }
/* /*
* RecordAndGetPageWithFreeSpace - update info about a page and try again. * RecordAndGetPageWithFreeSpace - update info about a page and try again.
* *
* We provide this combo form, instead of a separate Record operation, * We provide this combo form to save some locking overhead, compared to
* to save one lock and hash table lookup cycle. * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's
* also some effort to return a page close to the old page; if there's a
* page with enough free space on the same FSM page where the old one page
* is located, it is preferred.
*/ */
BlockNumber BlockNumber
RecordAndGetPageWithFreeSpace(RelFileNode *rel, RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Size oldSpaceAvail,
Size spaceNeeded)
{ {
FSMRelation *fsmrel; int old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
BlockNumber freepage; int search_cat = fsm_space_needed_to_cat(spaceNeeded);
FSMAddress addr;
uint16 slot;
int search_slot;
/* Sanity check: ensure spaceAvail will fit into OffsetNumber */ /* Get the location of the FSM byte representing the heap block */
AssertArg(oldSpaceAvail < BLCKSZ); addr = fsm_get_location(oldPage, &slot);
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); search_slot = fsm_set_and_search(rel, addr, slot, old_cat, search_cat);
/* /*
* We always add a rel to the hashtable when it is inquired about. * If fsm_set_and_search found a suitable new block, return that.
* Otherwise, search as usual.
*/ */
fsmrel = create_fsm_rel(rel); if (search_slot != -1)
return fsm_get_heap_blk(addr, search_slot);
/* Do the Record */
fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail);
/*
* Update the moving average of space requests, same as in
* GetPageWithFreeSpace.
*/
if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
{
int cur_avg = (int) fsmrel->avgRequest;
cur_avg += ((int) spaceNeeded - cur_avg) / 32;
fsmrel->avgRequest = (Size) cur_avg;
}
/* Do the Get */
freepage = find_free_space(fsmrel, spaceNeeded);
LWLockRelease(FreeSpaceLock);
return freepage;
}
/*
* GetAvgFSMRequestSize - get average FSM request size for a relation.
*
* If the relation is not known to FSM, return a default value.
*/
Size
GetAvgFSMRequestSize(RelFileNode *rel)
{
Size result;
FSMRelation *fsmrel;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
fsmrel = lookup_fsm_rel(rel);
if (fsmrel)
result = fsmrel->avgRequest;
else else
result = INITIAL_AVERAGE; return fsm_search(rel, search_cat);
LWLockRelease(FreeSpaceLock);
return result;
} }
/* /*
* RecordRelationFreeSpace - record available-space info about a relation. * RecordPageWithFreeSpace - update info about a page.
*
* Any pre-existing info about the relation is assumed obsolete and discarded.
*
* interestingPages is the total number of pages in the relation that have
* at least threshold free space; nPages is the number actually reported in
* pageSpaces[] (may be less --- in particular, callers typically clamp their
* space usage to MaxFSMPages).
* *
* The given pageSpaces[] array must be sorted in order by blkno. Note that * Note that if the new spaceAvail value is higher than the old value stored
* the FSM is at liberty to discard some or all of the data. * in the FSM, the space might not become visible to searchers until the next
* FreeSpaceMapVacuum call, which updates the upper level pages.
*/ */
void void
RecordRelationFreeSpace(RelFileNode *rel, RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
BlockNumber interestingPages,
int nPages,
FSMPageData *pageSpaces)
{ {
FSMRelation *fsmrel; int new_cat = fsm_space_avail_to_cat(spaceAvail);
FSMAddress addr;
uint16 slot;
/* Limit nPages to something sane */ /* Get the location of the FSM byte representing the heap block */
if (nPages < 0) addr = fsm_get_location(heapBlk, &slot);
nPages = 0;
else if (nPages > MaxFSMPages)
nPages = MaxFSMPages;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); fsm_set_and_search(rel, addr, slot, new_cat, 0);
/*
* Note we don't record info about a relation unless there's already an
* FSM entry for it, implying someone has done GetPageWithFreeSpace for
* it. Inactive rels thus will not clutter the map simply by being
* vacuumed.
*/
fsmrel = lookup_fsm_rel(rel);
if (fsmrel)
{
int curAlloc;
int curAllocPages;
FSMPageData *newLocation;
curAlloc = realloc_fsm_rel(fsmrel, interestingPages, false);
curAllocPages = curAlloc * CHUNKPAGES;
/*
* If the data fits in our current allocation, just copy it; otherwise
* must compress.
*/
newLocation = (FSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
if (nPages <= curAllocPages)
{
int i;
for (i = 0; i < nPages; i++)
{
BlockNumber page = FSMPageGetPageNum(&pageSpaces[i]);
/* Check caller provides sorted data */
if (i > 0 && page <= FSMPageGetPageNum(&pageSpaces[i - 1]))
elog(ERROR, "free-space data is not in page order");
*newLocation = pageSpaces[i];
newLocation++;
}
fsmrel->storedPages = nPages;
}
else
{
pack_incoming_pages(newLocation, curAllocPages,
pageSpaces, nPages);
fsmrel->storedPages = curAllocPages;
}
}
LWLockRelease(FreeSpaceLock);
} }
/* /*
* GetFreeIndexPage - like GetPageWithFreeSpace, but for indexes * GetRecordedFreePage - return the amount of free space on a particular page,
* according to the FSM.
*/ */
BlockNumber Size
GetFreeIndexPage(RelFileNode *rel) GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
{ {
FSMRelation *fsmrel; FSMAddress addr;
BlockNumber freepage; uint16 slot;
Buffer buf;
uint8 cat;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); /* Get the location of the FSM byte representing the heap block */
addr = fsm_get_location(heapBlk, &slot);
/* buf = fsm_readbuf(rel, addr, false);
* We always add a rel to the hashtable when it is inquired about. if (!BufferIsValid(buf))
*/ return 0;
fsmrel = create_fsm_rel(rel); cat = fsm_get_avail(BufferGetPage(buf), slot);
ReleaseBuffer(buf);
freepage = find_index_free_space(fsmrel); return fsm_space_cat_to_avail(cat);
LWLockRelease(FreeSpaceLock);
return freepage;
} }
/* /*
* RecordIndexFreeSpace - like RecordRelationFreeSpace, but for indexes * FreeSpaceMapTruncateRel - adjust for truncation of a relation.
*
* The caller must hold AccessExclusiveLock on the relation, to ensure
* that other backends receive the relcache invalidation event that this
* function sends, before accessing the FSM again.
*
* nblocks is the new size of the heap.
*/ */
void void
RecordIndexFreeSpace(RelFileNode *rel, FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
BlockNumber interestingPages,
int nPages,
BlockNumber *pages)
{ {
FSMRelation *fsmrel; BlockNumber new_nfsmblocks;
FSMAddress first_removed_address;
uint16 first_removed_slot;
Buffer buf;
/* Limit nPages to something sane */ RelationOpenSmgr(rel);
if (nPages < 0)
nPages = 0;
else if (nPages > MaxFSMPages)
nPages = MaxFSMPages;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); /* Get the location in the FSM of the first removed heap block */
first_removed_address = fsm_get_location(nblocks, &first_removed_slot);
/* /*
* Note we don't record info about a relation unless there's already an * Zero out the tail of the last remaining FSM page. If the slot
* FSM entry for it, implying someone has done GetFreeIndexPage for it. * representing the first removed heap block is at a page boundary, as
* Inactive rels thus will not clutter the map simply by being vacuumed. * the first slot on the FSM page that first_removed_address points to,
* we can just truncate that page altogether.
*/ */
fsmrel = lookup_fsm_rel(rel); if (first_removed_slot > 0)
if (fsmrel)
{ {
int curAlloc; buf = fsm_readbuf(rel, first_removed_address, false);
int curAllocPages; if (!BufferIsValid(buf))
int i; return; /* nothing to do; the FSM was already smaller */
IndexFSMPageData *newLocation; LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
fsm_truncate_avail(BufferGetPage(buf), first_removed_slot);
MarkBufferDirty(buf);
UnlockReleaseBuffer(buf);
curAlloc = realloc_fsm_rel(fsmrel, interestingPages, true); new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1;
curAllocPages = curAlloc * INDEXCHUNKPAGES; }
else
{
new_nfsmblocks = fsm_logical_to_physical(first_removed_address);
if (smgrnblocks(rel->rd_smgr, FSM_FORKNUM) <= new_nfsmblocks)
return; /* nothing to do; the FSM was already smaller */
}
/* /* Truncate the unused FSM pages */
* If the data fits in our current allocation, just copy it; otherwise smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks, rel->rd_istemp);
* must compress. But compression is easy: we merely forget extra
* pages.
*/
newLocation = (IndexFSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
if (nPages > curAllocPages)
nPages = curAllocPages;
for (i = 0; i < nPages; i++) /*
{ * FSM truncations are WAL-logged, because we must never return a block
BlockNumber page = pages[i]; * that doesn't exist in the heap, not even if we crash before the FSM
* truncation has made it to disk. smgrtruncate() writes its own WAL
* record, but that's not enough to zero out the last remaining FSM page.
* (if we didn't need to zero out anything above, we can skip this)
*/
if (!rel->rd_istemp && !InRecovery && first_removed_slot != 0)
{
xl_fsm_truncate xlrec;
XLogRecData rdata;
XLogRecPtr recptr;
/* Check caller provides sorted data */ xlrec.node = rel->rd_node;
if (i > 0 && page <= pages[i - 1]) xlrec.nheapblocks = nblocks;
elog(ERROR, "free-space data is not in page order");
IndexFSMPageSetPageNum(newLocation, page);
newLocation++;
}
fsmrel->storedPages = nPages;
}
LWLockRelease(FreeSpaceLock);
}
/* rdata.data = (char *) &xlrec;
* FreeSpaceMapTruncateRel - adjust for truncation of a relation. rdata.len = sizeof(xl_fsm_truncate);
* rdata.buffer = InvalidBuffer;
* We need to delete any stored data past the new relation length, so that rdata.next = NULL;
* we don't bogusly return removed block numbers.
*/
void
FreeSpaceMapTruncateRel(RelFileNode *rel, BlockNumber nblocks)
{
FSMRelation *fsmrel;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); recptr = XLogInsert(RM_FREESPACE_ID, XLOG_FSM_TRUNCATE, &rdata);
fsmrel = lookup_fsm_rel(rel);
if (fsmrel)
{
int pageIndex;
/* Use lookup to locate first entry >= nblocks */ /*
(void) lookup_fsm_page_entry(fsmrel, nblocks, &pageIndex); * Flush, because otherwise the truncation of the main relation
/* Delete all such entries */ * might hit the disk before the WAL record of truncating the
fsmrel->storedPages = pageIndex; * FSM is flushed. If we crashed during that window, we'd be
/* XXX should we adjust rel's interestingPages and sumRequests? */ * left with a truncated heap, without a truncated FSM.
*/
XLogFlush(recptr);
} }
LWLockRelease(FreeSpaceLock);
/*
* Need to invalidate the relcache entry, because rd_fsm_nblocks_cache
* seen by other backends is no longer valid.
*/
if (!InRecovery)
CacheInvalidateRelcache(rel);
rel->rd_fsm_nblocks_cache = new_nfsmblocks;
} }
/* /*
* FreeSpaceMapForgetRel - forget all about a relation. * FreeSpaceMapVacuum - scan and fix any inconsistencies in the FSM
*
* This is called when a relation is deleted. Although we could just let
* the rel age out of the map, it's better to reclaim and reuse the space
* sooner.
*/ */
void void
FreeSpaceMapForgetRel(RelFileNode *rel) FreeSpaceMapVacuum(Relation rel)
{ {
FSMRelation *fsmrel; bool dummy;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); /*
fsmrel = lookup_fsm_rel(rel); * Traverse the tree in depth-first order. The tree is stored physically
if (fsmrel) * in depth-first order, so this should be pretty I/O efficient.
delete_fsm_rel(fsmrel); */
LWLockRelease(FreeSpaceLock); fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, &dummy);
} }
/******** Internal routines ********/
/* /*
* FreeSpaceMapForgetDatabase - forget all relations of a database. * Return category corresponding x bytes of free space
*
* This is called during DROP DATABASE. As above, might as well reclaim
* map space sooner instead of later.
*/ */
void static uint8
FreeSpaceMapForgetDatabase(Oid dbid) fsm_space_avail_to_cat(Size avail)
{ {
FSMRelation *fsmrel, int cat;
*nextrel;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); Assert(avail < BLCKSZ);
for (fsmrel = FreeSpaceMap->usageList; fsmrel; fsmrel = nextrel)
{
nextrel = fsmrel->nextUsage; /* in case we delete it */
if (fsmrel->key.dbNode == dbid)
delete_fsm_rel(fsmrel);
}
LWLockRelease(FreeSpaceLock);
}
/* if (avail >= MaxFSMRequestSize)
* PrintFreeSpaceMapStatistics - print statistics about FSM contents return 255;
*
* The info is sent to ereport() with the specified message level. This is
* intended for use during VACUUM.
*/
void
PrintFreeSpaceMapStatistics(int elevel)
{
FSMRelation *fsmrel;
int storedPages = 0;
double sumRequests = 0;
int numRels;
double needed;
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); cat = avail / FSM_CAT_STEP;
/* /*
* Count total space actually used, as well as the unclamped request total * The highest category, 255, is reserved for MaxFSMRequestSize bytes or
* more.
*/ */
for (fsmrel = FreeSpaceMap->firstRel; if (cat > 254)
fsmrel != NULL; cat = 254;
fsmrel = fsmrel->nextPhysical)
{
storedPages += fsmrel->storedPages;
sumRequests += fsm_calc_request_unclamped(fsmrel);
}
/* Copy other stats before dropping lock */ return (uint8) cat;
numRels = FreeSpaceMap->numRels;
LWLockRelease(FreeSpaceLock);
/* Convert stats to actual number of page slots needed */
needed = (sumRequests + numRels) * CHUNKPAGES;
ereport(elevel,
(errmsg("free space map contains %d pages in %d relations",
storedPages, numRels),
errdetail("A total of %.0f page slots are in use (including overhead).\n"
"%.0f page slots are required to track all free space.\n"
"Current limits are: %d page slots, %d relations, using %.0f kB.",
Min(needed, MaxFSMPages),
needed,
MaxFSMPages, MaxFSMRelations,
(double) FreeSpaceShmemSize() / 1024.0)));
CheckFreeSpaceMapStatistics(NOTICE, numRels, needed);
/* Print to server logs too because is deals with a config variable. */
CheckFreeSpaceMapStatistics(LOG, numRels, needed);
} }
static void /*
CheckFreeSpaceMapStatistics(int elevel, int numRels, double needed) * Return the lower bound of the range of free space represented by given
* category.
*/
static Size
fsm_space_cat_to_avail(uint8 cat)
{ {
if (numRels == MaxFSMRelations) /* The highest category represents exactly MaxFSMRequestSize bytes. */
ereport(elevel, if (cat == 255)
(errmsg("max_fsm_relations(%d) equals the number of relations checked", return MaxFSMRequestSize;
MaxFSMRelations), else
errhint("You have at least %d relations. " return cat * FSM_CAT_STEP;
"Consider increasing the configuration parameter \"max_fsm_relations\".",
numRels)));
else if (needed > MaxFSMPages)
ereport(elevel,
(errmsg("number of page slots needed (%.0f) exceeds max_fsm_pages (%d)",
needed, MaxFSMPages),
errhint("Consider increasing the configuration parameter \"max_fsm_pages\" "
"to a value over %.0f.", needed)));
} }
/* /*
* DumpFreeSpaceMap - dump contents of FSM into a disk file for later reload * Which category does a page need to have, to accommodate x bytes of data?
* * While fsm_size_to_avail_cat() rounds down, this needs to round up.
* This is expected to be called during database shutdown, after updates to
* the FSM have stopped. We lock the FreeSpaceLock but that's purely pro
* forma --- if anyone else is still accessing FSM, there's a problem.
*/ */
void static uint8
DumpFreeSpaceMap(int code, Datum arg) fsm_space_needed_to_cat(Size needed)
{ {
FILE *fp; int cat;
FsmCacheFileHeader header;
FSMRelation *fsmrel;
/* Try to create file */
unlink(FSM_CACHE_FILENAME); /* in case it exists w/wrong permissions */
fp = AllocateFile(FSM_CACHE_FILENAME, PG_BINARY_W);
if (fp == NULL)
{
elog(LOG, "could not write \"%s\": %m", FSM_CACHE_FILENAME);
return;
}
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
/* Write file header */
MemSet(&header, 0, sizeof(header));
strcpy(header.label, FSM_CACHE_LABEL);
header.endian = FSM_CACHE_ENDIAN;
header.version = FSM_CACHE_VERSION;
header.numRels = FreeSpaceMap->numRels;
if (fwrite(&header, 1, sizeof(header), fp) != sizeof(header))
goto write_failed;
/* For each relation, in order from least to most recently used... */
for (fsmrel = FreeSpaceMap->usageListTail;
fsmrel != NULL;
fsmrel = fsmrel->priorUsage)
{
FsmCacheRelHeader relheader;
int nPages;
/* Write relation header */
MemSet(&relheader, 0, sizeof(relheader));
relheader.key = fsmrel->key;
relheader.isIndex = fsmrel->isIndex;
relheader.avgRequest = fsmrel->avgRequest;
relheader.interestingPages = fsmrel->interestingPages;
relheader.storedPages = fsmrel->storedPages;
if (fwrite(&relheader, 1, sizeof(relheader), fp) != sizeof(relheader))
goto write_failed;
/* Write the per-page data directly from the arena */
nPages = fsmrel->storedPages;
if (nPages > 0)
{
Size len;
char *data;
if (fsmrel->isIndex)
len = nPages * sizeof(IndexFSMPageData);
else
len = nPages * sizeof(FSMPageData);
data = (char *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
if (fwrite(data, 1, len, fp) != len)
goto write_failed;
}
}
/* Clean up */
LWLockRelease(FreeSpaceLock);
if (FreeFile(fp))
{
elog(LOG, "could not write \"%s\": %m", FSM_CACHE_FILENAME);
/* Remove busted cache file */
unlink(FSM_CACHE_FILENAME);
}
return; /* Can't ask for more space than the highest category represents */
if (needed > MaxFSMRequestSize)
elog(ERROR, "invalid FSM request size %d", needed);
write_failed: if (needed == 0)
elog(LOG, "could not write \"%s\": %m", FSM_CACHE_FILENAME); return 1;
/* Clean up */ cat = (needed + FSM_CAT_STEP - 1) / FSM_CAT_STEP;
LWLockRelease(FreeSpaceLock);
FreeFile(fp); if (cat > 255)
cat = 255;
/* Remove busted cache file */ return (uint8) cat;
unlink(FSM_CACHE_FILENAME);
} }
/* /*
* LoadFreeSpaceMap - load contents of FSM from a disk file * Returns the physical block number an FSM page
*
* This is expected to be called during database startup, before any FSM
* updates begin. We lock the FreeSpaceLock but that's purely pro
* forma --- if anyone else is accessing FSM yet, there's a problem.
*
* Notes: no complaint is issued if no cache file is found. If the file is
* found, it is deleted after reading. Thus, if we crash without a clean
* shutdown, the next cycle of life starts with no FSM data. To do otherwise,
* we'd need to do significantly more validation in this routine, because of
* the likelihood that what is in the dump file would be out-of-date, eg
* there might be entries for deleted or truncated rels.
*/ */
void static BlockNumber
LoadFreeSpaceMap(void) fsm_logical_to_physical(FSMAddress addr)
{ {
FILE *fp; BlockNumber pages;
FsmCacheFileHeader header; int leafno;
int relno; int l;
/* Try to open file */
fp = AllocateFile(FSM_CACHE_FILENAME, PG_BINARY_R);
if (fp == NULL)
{
if (errno != ENOENT)
elog(LOG, "could not read \"%s\": %m", FSM_CACHE_FILENAME);
return;
}
LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); /*
* Calculate the logical page number of the first leaf page below the
* given page.
*/
leafno = addr.logpageno;
for (l = 0; l < addr.level; l++)
leafno *= SlotsPerFSMPage;
/* Read and verify file header */ /* Count upper level nodes required to address the leaf page */
if (fread(&header, 1, sizeof(header), fp) != sizeof(header) || pages = 0;
strcmp(header.label, FSM_CACHE_LABEL) != 0 || for (l = 0; l < FSM_TREE_DEPTH; l++)
header.endian != FSM_CACHE_ENDIAN ||
header.version != FSM_CACHE_VERSION ||
header.numRels < 0)
{ {
elog(LOG, "bogus file header in \"%s\"", FSM_CACHE_FILENAME); pages += leafno + 1;
goto read_failed; leafno /= SlotsPerFSMPage;
} }
/* For each relation, in order from least to most recently used... */ /*
for (relno = 0; relno < header.numRels; relno++) * If the page we were asked for wasn't at the bottom level, subtract
{ * the additional lower level pages we counted above.
FsmCacheRelHeader relheader; */
Size len; pages -= addr.level;
char *data;
FSMRelation *fsmrel;
int nPages;
int curAlloc;
int curAllocPages;
/* Read and verify relation header, as best we can */
if (fread(&relheader, 1, sizeof(relheader), fp) != sizeof(relheader) ||
(relheader.isIndex != false && relheader.isIndex != true) ||
relheader.avgRequest >= BLCKSZ ||
relheader.storedPages < 0)
{
elog(LOG, "bogus rel header in \"%s\"", FSM_CACHE_FILENAME);
goto read_failed;
}
/* Read the per-page data */
nPages = relheader.storedPages;
if (relheader.isIndex)
len = nPages * sizeof(IndexFSMPageData);
else
len = nPages * sizeof(FSMPageData);
data = (char *) palloc(len);
if (fread(data, 1, len, fp) != len)
{
elog(LOG, "premature EOF in \"%s\"", FSM_CACHE_FILENAME);
pfree(data);
goto read_failed;
}
/*
* Okay, create the FSM entry and insert data into it. Since the rels
* were stored in reverse usage order, at the end of the loop they
* will be correctly usage-ordered in memory; and if MaxFSMRelations
* is less than it used to be, we will correctly drop the least
* recently used ones.
*/
fsmrel = create_fsm_rel(&relheader.key);
fsmrel->avgRequest = relheader.avgRequest;
curAlloc = realloc_fsm_rel(fsmrel, relheader.interestingPages,
relheader.isIndex);
if (relheader.isIndex)
{
IndexFSMPageData *newLocation;
curAllocPages = curAlloc * INDEXCHUNKPAGES;
/*
* If the data fits in our current allocation, just copy it;
* otherwise must compress. But compression is easy: we merely
* forget extra pages.
*/
newLocation = (IndexFSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
if (nPages > curAllocPages)
nPages = curAllocPages;
memcpy(newLocation, data, nPages * sizeof(IndexFSMPageData));
fsmrel->storedPages = nPages;
}
else
{
FSMPageData *newLocation;
curAllocPages = curAlloc * CHUNKPAGES;
/*
* If the data fits in our current allocation, just copy it;
* otherwise must compress.
*/
newLocation = (FSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
if (nPages <= curAllocPages)
{
memcpy(newLocation, data, nPages * sizeof(FSMPageData));
fsmrel->storedPages = nPages;
}
else
{
pack_existing_pages(newLocation, curAllocPages,
(FSMPageData *) data, nPages);
fsmrel->storedPages = curAllocPages;
}
}
pfree(data);
}
read_failed:
/* Clean up */
LWLockRelease(FreeSpaceLock);
FreeFile(fp);
/* Remove cache file before it can become stale; see notes above */ /* Turn the page count into 0-based block number */
unlink(FSM_CACHE_FILENAME); return pages - 1;
} }
/*
* Internal routines. These all assume the caller holds the FreeSpaceLock.
*/
/* /*
* Lookup a relation in the hash table. If not present, return NULL. * Return the FSM location corresponding to given heap block.
*
* The relation's position in the LRU list is not changed.
*/ */
static FSMRelation * static FSMAddress
lookup_fsm_rel(RelFileNode *rel) fsm_get_location(BlockNumber heapblk, uint16 *slot)
{ {
FSMRelation *fsmrel; FSMAddress addr;
fsmrel = (FSMRelation *) hash_search(FreeSpaceMapRelHash, addr.level = FSM_BOTTOM_LEVEL;
(void *) rel, addr.logpageno = heapblk / SlotsPerFSMPage;
HASH_FIND, *slot = heapblk % SlotsPerFSMPage;
NULL);
if (!fsmrel)
return NULL;
return fsmrel; return addr;
} }
/* /*
* Lookup a relation in the hash table, creating an entry if not present. * Return the heap block number corresponding to given location in the FSM.
*
* On successful lookup, the relation is moved to the front of the LRU list.
*/ */
static FSMRelation * static BlockNumber
create_fsm_rel(RelFileNode *rel) fsm_get_heap_blk(FSMAddress addr, uint16 slot)
{ {
FSMRelation *fsmrel; Assert(addr.level == FSM_BOTTOM_LEVEL);
bool found; return ((unsigned int) addr.logpageno) * SlotsPerFSMPage + slot;
fsmrel = (FSMRelation *) hash_search(FreeSpaceMapRelHash,
(void *) rel,
HASH_ENTER,
&found);
if (!found)
{
/* New hashtable entry, initialize it (hash_search set the key) */
fsmrel->isIndex = false; /* until we learn different */
fsmrel->avgRequest = INITIAL_AVERAGE;
fsmrel->interestingPages = 0;
fsmrel->firstChunk = -1; /* no space allocated */
fsmrel->storedPages = 0;
fsmrel->nextPage = 0;
/* Discard lowest-priority existing rel, if we are over limit */
if (FreeSpaceMap->numRels >= MaxFSMRelations)
delete_fsm_rel(FreeSpaceMap->usageListTail);
/* Add new entry at front of LRU list */
link_fsm_rel_usage(fsmrel);
fsmrel->nextPhysical = NULL; /* not in physical-storage list */
fsmrel->priorPhysical = NULL;
FreeSpaceMap->numRels++;
/* sumRequests is unchanged because request must be zero */
}
else
{
/* Existing entry, move to front of LRU list */
if (fsmrel->priorUsage != NULL)
{
unlink_fsm_rel_usage(fsmrel);
link_fsm_rel_usage(fsmrel);
}
}
return fsmrel;
} }
/* /*
* Remove an existing FSMRelation entry. * Given a logical address of a child page, get the logical page number of
* the parent, and the slot within the parent corresponding to the child.
*/ */
static void static FSMAddress
delete_fsm_rel(FSMRelation *fsmrel) fsm_get_parent(FSMAddress child, uint16 *slot)
{ {
FSMRelation *result; FSMAddress parent;
FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel);
unlink_fsm_rel_usage(fsmrel);
unlink_fsm_rel_storage(fsmrel);
FreeSpaceMap->numRels--;
result = (FSMRelation *) hash_search(FreeSpaceMapRelHash,
(void *) &(fsmrel->key),
HASH_REMOVE,
NULL);
if (!result)
elog(ERROR, "FreeSpaceMap hashtable corrupted");
}
/* Assert(child.level < FSM_ROOT_LEVEL);
* Reallocate space for a FSMRelation.
*
* This is shared code for RecordRelationFreeSpace and RecordIndexFreeSpace.
* The return value is the actual new allocation, in chunks.
*/
static int
realloc_fsm_rel(FSMRelation *fsmrel, BlockNumber interestingPages,
bool isIndex)
{
int myRequest;
int myAlloc;
int curAlloc;
/* parent.level = child.level + 1;
* Delete any existing entries, and update request status. parent.logpageno = child.logpageno / SlotsPerFSMPage;
*/ *slot = child.logpageno % SlotsPerFSMPage;
fsmrel->storedPages = 0;
FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel);
fsmrel->interestingPages = interestingPages;
fsmrel->isIndex = isIndex;
myRequest = fsm_calc_request(fsmrel);
FreeSpaceMap->sumRequests += myRequest;
myAlloc = fsm_calc_target_allocation(myRequest);
/* return parent;
* Need to reallocate space if (a) my target allocation is more than my
* current allocation, AND (b) my actual immediate need (myRequest+1
* chunks) is more than my current allocation. Otherwise just store the
* new data in-place.
*/
curAlloc = fsm_current_allocation(fsmrel);
if (myAlloc > curAlloc && (myRequest + 1) > curAlloc && interestingPages > 0)
{
/* Remove entry from storage list, and compact */
unlink_fsm_rel_storage(fsmrel);
compact_fsm_storage();
/* Reattach to end of storage list */
link_fsm_rel_storage(fsmrel);
/* And allocate storage */
fsmrel->firstChunk = FreeSpaceMap->usedChunks;
FreeSpaceMap->usedChunks += myAlloc;
curAlloc = myAlloc;
/* Watch out for roundoff error */
if (FreeSpaceMap->usedChunks > FreeSpaceMap->totalChunks)
{
FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks;
curAlloc = FreeSpaceMap->totalChunks - fsmrel->firstChunk;
}
}
return curAlloc;
} }
/* /*
* Link a FSMRelation into the LRU list (always at the head). * Given a logical address of a parent page, and a slot number get the
* logical address of the corresponding child page.
*/ */
static void static FSMAddress
link_fsm_rel_usage(FSMRelation *fsmrel) fsm_get_child(FSMAddress parent, uint16 slot)
{ {
fsmrel->priorUsage = NULL; FSMAddress child;
fsmrel->nextUsage = FreeSpaceMap->usageList;
FreeSpaceMap->usageList = fsmrel;
if (fsmrel->nextUsage != NULL)
fsmrel->nextUsage->priorUsage = fsmrel;
else
FreeSpaceMap->usageListTail = fsmrel;
}
/* Assert(parent.level > FSM_BOTTOM_LEVEL);
* Delink a FSMRelation from the LRU list.
*/
static void
unlink_fsm_rel_usage(FSMRelation *fsmrel)
{
if (fsmrel->priorUsage != NULL)
fsmrel->priorUsage->nextUsage = fsmrel->nextUsage;
else
FreeSpaceMap->usageList = fsmrel->nextUsage;
if (fsmrel->nextUsage != NULL)
fsmrel->nextUsage->priorUsage = fsmrel->priorUsage;
else
FreeSpaceMap->usageListTail = fsmrel->priorUsage;
/* child.level = parent.level - 1;
* We don't bother resetting fsmrel's links, since it's about to be child.logpageno = parent.logpageno * SlotsPerFSMPage + slot;
* deleted or relinked at the head.
*/
}
/* return child;
* Link a FSMRelation into the storage-order list (always at the tail).
*/
static void
link_fsm_rel_storage(FSMRelation *fsmrel)
{
fsmrel->nextPhysical = NULL;
fsmrel->priorPhysical = FreeSpaceMap->lastRel;
if (FreeSpaceMap->lastRel != NULL)
FreeSpaceMap->lastRel->nextPhysical = fsmrel;
else
FreeSpaceMap->firstRel = fsmrel;
FreeSpaceMap->lastRel = fsmrel;
} }
/* /*
* Delink a FSMRelation from the storage-order list, if it's in it. * Read a FSM page.
*
* If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
* true, the FSM file is extended.
*/ */
static void static Buffer
unlink_fsm_rel_storage(FSMRelation *fsmrel) fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
{ {
if (fsmrel->priorPhysical != NULL || FreeSpaceMap->firstRel == fsmrel) BlockNumber blkno = fsm_logical_to_physical(addr);
RelationOpenSmgr(rel);
if (rel->rd_fsm_nblocks_cache == InvalidBlockNumber ||
rel->rd_fsm_nblocks_cache <= blkno)
rel->rd_fsm_nblocks_cache = smgrnblocks(rel->rd_smgr, FSM_FORKNUM);
if (blkno >= rel->rd_fsm_nblocks_cache)
{ {
if (fsmrel->priorPhysical != NULL) if (extend)
fsmrel->priorPhysical->nextPhysical = fsmrel->nextPhysical; fsm_extend(rel, blkno + 1);
else else
FreeSpaceMap->firstRel = fsmrel->nextPhysical; return InvalidBuffer;
if (fsmrel->nextPhysical != NULL)
fsmrel->nextPhysical->priorPhysical = fsmrel->priorPhysical;
else
FreeSpaceMap->lastRel = fsmrel->priorPhysical;
} }
/* mark as not in list, since we may not put it back immediately */ return ReadBufferWithFork(rel, FSM_FORKNUM, blkno);
fsmrel->nextPhysical = NULL;
fsmrel->priorPhysical = NULL;
/* Also mark it as having no storage */
fsmrel->firstChunk = -1;
fsmrel->storedPages = 0;
} }
/* /*
* Look to see if a page with at least the specified amount of space is * Ensure that the FSM fork is at least n_fsmblocks long, extending
* available in the given FSMRelation. If so, return its page number, * it if necessary with empty pages. And by empty, I mean pages filled
* and advance the nextPage counter so that the next inquiry will return * with zeros, meaning there's no free space.
* a different page if possible; also update the entry to show that the
* requested space is not available anymore. Return InvalidBlockNumber
* if no success.
*/ */
static BlockNumber static void
find_free_space(FSMRelation *fsmrel, Size spaceNeeded) fsm_extend(Relation rel, BlockNumber n_fsmblocks)
{ {
FSMPageData *info; BlockNumber n_fsmblocks_now;
int pagesToCheck, /* outer loop counter */ Page pg;
pageIndex; /* current page index */
if (fsmrel->isIndex)
elog(ERROR, "find_free_space called for an index relation");
info = (FSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
pageIndex = fsmrel->nextPage;
/* Last operation may have left nextPage pointing past end */
if (pageIndex >= fsmrel->storedPages)
pageIndex = 0;
for (pagesToCheck = fsmrel->storedPages; pagesToCheck > 0; pagesToCheck--)
{
FSMPageData *page = info + pageIndex;
Size spaceAvail = FSMPageGetSpace(page);
/* Check this page */
if (spaceAvail >= spaceNeeded)
{
/*
* Found what we want --- adjust the entry, and update nextPage.
*/
FSMPageSetSpace(page, spaceAvail - spaceNeeded);
fsmrel->nextPage = pageIndex + 1;
return FSMPageGetPageNum(page);
}
/* Advance pageIndex, wrapping around if needed */
if (++pageIndex >= fsmrel->storedPages)
pageIndex = 0;
}
return InvalidBlockNumber; /* nothing found */ pg = (Page) palloc(BLCKSZ);
} PageInit(pg, BLCKSZ, 0);
/*
* As above, but for index case --- we only deal in whole pages.
*/
static BlockNumber
find_index_free_space(FSMRelation *fsmrel)
{
IndexFSMPageData *info;
BlockNumber result;
/* /*
* If isIndex isn't set, it could be that RecordIndexFreeSpace() has never * We use the relation extension lock to lock out other backends
* yet been called on this relation, and we're still looking at the * trying to extend the FSM at the same time. It also locks out
* default setting from create_fsm_rel(). If so, just act as though * extension of the main fork, unnecessarily, but extending the
* there's no space. * FSM happens seldom enough that it doesn't seem worthwhile to
* have a separate lock tag type for it.
*
* Note that another backend might have extended the relation
* before we get the lock.
*/ */
if (!fsmrel->isIndex) LockRelationForExtension(rel, ExclusiveLock);
n_fsmblocks_now = smgrnblocks(rel->rd_smgr, FSM_FORKNUM);
while (n_fsmblocks_now < n_fsmblocks)
{ {
if (fsmrel->storedPages == 0) smgrextend(rel->rd_smgr, FSM_FORKNUM, n_fsmblocks_now,
return InvalidBlockNumber; (char *) pg, rel->rd_istemp);
elog(ERROR, "find_index_free_space called for a non-index relation"); n_fsmblocks_now++;
} }
/* UnlockRelationForExtension(rel, ExclusiveLock);
* For indexes, there's no need for the nextPage state variable; we just
* remove and return the first available page. (We could save cycles here
* by returning the last page, but it seems better to encourage re-use of
* lower-numbered pages.)
*/
if (fsmrel->storedPages <= 0)
return InvalidBlockNumber; /* no pages available */
info = (IndexFSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
result = IndexFSMPageGetPageNum(info);
fsmrel->storedPages--;
memmove(info, info + 1, fsmrel->storedPages * sizeof(IndexFSMPageData));
return result;
}
/*
* fsm_record_free_space - guts of RecordFreeSpace operation (now only
* provided as part of RecordAndGetPageWithFreeSpace).
*/
static void
fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail)
{
int pageIndex;
if (fsmrel->isIndex) pfree(pg);
elog(ERROR, "fsm_record_free_space called for an index relation");
if (lookup_fsm_page_entry(fsmrel, page, &pageIndex))
{
/* Found an existing entry for page; update it */
FSMPageData *info;
info = (FSMPageData *) /* update the cache with the up-to-date size */
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); rel->rd_fsm_nblocks_cache = n_fsmblocks_now;
info += pageIndex;
FSMPageSetSpace(info, spaceAvail);
}
else
{
/*
* No existing entry; ignore the call. We used to add the page to the
* FSM --- but in practice, if the page hasn't got enough space to
* satisfy the caller who's kicking it back to us, then it's probably
* uninteresting to everyone else as well.
*/
}
} }
/* /*
* Look for an entry for a specific page (block number) in a FSMRelation. * Set value in given FSM page and slot.
* Returns TRUE if a matching entry exists, else FALSE.
* *
* The output argument *outPageIndex is set to indicate where the entry exists * If minValue > 0, the updated page is also searched for a page with at
* (if TRUE result) or could be inserted (if FALSE result). * least minValue of free space. If one is found, its slot number is
* returned, -1 otherwise.
*/ */
static bool static int
lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page, fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
int *outPageIndex) uint8 newValue, uint8 minValue)
{ {
/* Check for empty relation */ Buffer buf;
if (fsmrel->storedPages <= 0) Page page;
{ int newslot = -1;
*outPageIndex = 0;
return false;
}
/* Do binary search */ buf = fsm_readbuf(rel, addr, true);
if (fsmrel->isIndex) LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
{
IndexFSMPageData *info;
int low,
high;
info = (IndexFSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
low = 0;
high = fsmrel->storedPages - 1;
while (low <= high)
{
int middle;
BlockNumber probe;
middle = low + (high - low) / 2; page = BufferGetPage(buf);
probe = IndexFSMPageGetPageNum(info + middle);
if (probe == page)
{
*outPageIndex = middle;
return true;
}
else if (probe < page)
low = middle + 1;
else
high = middle - 1;
}
*outPageIndex = low;
return false;
}
else
{
FSMPageData *info;
int low,
high;
info = (FSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
low = 0;
high = fsmrel->storedPages - 1;
while (low <= high)
{
int middle;
BlockNumber probe;
middle = low + (high - low) / 2; if (fsm_set_avail(page, slot, newValue))
probe = FSMPageGetPageNum(info + middle); MarkBufferDirty(buf);
if (probe == page)
{ if (minValue != 0)
*outPageIndex = middle; {
return true; /* Search while we still hold the lock */
} newslot = fsm_search_avail(buf, minValue,
else if (probe < page) addr.level == FSM_BOTTOM_LEVEL,
low = middle + 1; true);
else
high = middle - 1;
}
*outPageIndex = low;
return false;
} }
UnlockReleaseBuffer(buf);
return newslot;
} }
/* /*
* Re-pack the FSM storage arena, dropping data if necessary to meet the * Search the tree for a heap page with at least min_cat of free space
* current allocation target for each relation. At conclusion, all available
* space in the arena will be coalesced at the end.
*/ */
static void static BlockNumber
compact_fsm_storage(void) fsm_search(Relation rel, uint8 min_cat)
{ {
int nextChunkIndex = 0; int restarts = 0;
bool did_push = false; FSMAddress addr = FSM_ROOT_ADDRESS;
FSMRelation *fsmrel;
for (fsmrel = FreeSpaceMap->firstRel; for (;;)
fsmrel != NULL;
fsmrel = fsmrel->nextPhysical)
{ {
int newAlloc; int slot;
int newAllocPages; Buffer buf;
int newChunkIndex; uint8 max_avail;
int oldChunkIndex;
int curChunks;
char *newLocation;
char *oldLocation;
/* /*
* Calculate target allocation, make sure we don't overrun due to * Read the FSM page. The root page is created if it doesn't exist
* roundoff error * yet, to save future searchers the effort of having to call
* smgrnblocks() in fsm_readbuf(), only to see that the FSM is
* completely empty.
*/ */
newAlloc = fsm_calc_target_allocation(fsm_calc_request(fsmrel)); buf = fsm_readbuf(rel, addr, (addr.level != FSM_ROOT_LEVEL));
if (newAlloc > FreeSpaceMap->totalChunks - nextChunkIndex)
newAlloc = FreeSpaceMap->totalChunks - nextChunkIndex;
if (fsmrel->isIndex)
newAllocPages = newAlloc * INDEXCHUNKPAGES;
else
newAllocPages = newAlloc * CHUNKPAGES;
/*
* Determine current size, current and new locations
*/
curChunks = fsm_current_chunks(fsmrel);
oldChunkIndex = fsmrel->firstChunk;
oldLocation = FreeSpaceMap->arena + oldChunkIndex * CHUNKBYTES;
newChunkIndex = nextChunkIndex;
newLocation = FreeSpaceMap->arena + newChunkIndex * CHUNKBYTES;
/* /* Search within the page */
* It's possible that we have to move data down, not up, if the if (BufferIsValid(buf))
* allocations of previous rels expanded. This normally means that
* our allocation expanded too (or at least got no worse), and ditto
* for later rels. So there should be room to move all our data down
* without dropping any --- but we might have to push down following
* rels to acquire the room. We don't want to do the push more than
* once, so pack everything against the end of the arena if so.
*
* In corner cases where we are on the short end of a roundoff choice
* that we were formerly on the long end of, it's possible that we
* have to move down and compress our data too. In fact, even after
* pushing down the following rels, there might not be as much space
* as we computed for this rel above --- that would imply that some
* following rel(s) are also on the losing end of roundoff choices. We
* could handle this fairly by doing the per-rel compactions
* out-of-order, but that seems like way too much complexity to deal
* with a very infrequent corner case. Instead, we simply drop pages
* from the end of the current rel's data until it fits.
*/
if (newChunkIndex > oldChunkIndex)
{ {
int limitChunkIndex; LockBuffer(buf, BUFFER_LOCK_SHARE);
slot = fsm_search_avail(buf, min_cat,
if (newAllocPages < fsmrel->storedPages) (addr.level == FSM_BOTTOM_LEVEL),
{ false);
/* move and compress --- just drop excess pages */ if (slot == -1)
fsmrel->storedPages = newAllocPages; max_avail = fsm_get_max_avail(BufferGetPage(buf));
curChunks = fsm_current_chunks(fsmrel); UnlockReleaseBuffer(buf);
} }
/* is there enough space? */ else
if (fsmrel->nextPhysical != NULL) {
limitChunkIndex = fsmrel->nextPhysical->firstChunk; slot = -1;
else max_avail = 0;
limitChunkIndex = FreeSpaceMap->totalChunks;
if (newChunkIndex + curChunks > limitChunkIndex)
{
/* not enough space, push down following rels */
if (!did_push)
{
push_fsm_rels_after(fsmrel);
did_push = true;
}
/* now is there enough space? */
if (fsmrel->nextPhysical != NULL)
limitChunkIndex = fsmrel->nextPhysical->firstChunk;
else
limitChunkIndex = FreeSpaceMap->totalChunks;
if (newChunkIndex + curChunks > limitChunkIndex)
{
/* uh-oh, forcibly cut the allocation to fit */
newAlloc = limitChunkIndex - newChunkIndex;
/*
* If newAlloc < 0 at this point, we are moving the rel's
* firstChunk into territory currently assigned to a later
* rel. This is okay so long as we do not copy any data.
* The rels will be back in nondecreasing firstChunk order
* at completion of the compaction pass.
*/
if (newAlloc < 0)
newAlloc = 0;
if (fsmrel->isIndex)
newAllocPages = newAlloc * INDEXCHUNKPAGES;
else
newAllocPages = newAlloc * CHUNKPAGES;
fsmrel->storedPages = newAllocPages;
curChunks = fsm_current_chunks(fsmrel);
}
}
memmove(newLocation, oldLocation, curChunks * CHUNKBYTES);
} }
else if (newAllocPages < fsmrel->storedPages)
if (slot != -1)
{ {
/* /*
* Need to compress the page data. For an index, "compression" * Descend the tree, or return the found block if we're at the
* just means dropping excess pages; otherwise we try to keep the * bottom.
* ones with the most space.
*/ */
if (fsmrel->isIndex) if (addr.level == FSM_BOTTOM_LEVEL)
{ return fsm_get_heap_blk(addr, slot);
fsmrel->storedPages = newAllocPages;
/* may need to move data */ addr = fsm_get_child(addr, slot);
if (newChunkIndex != oldChunkIndex)
memmove(newLocation, oldLocation, newAlloc * CHUNKBYTES);
}
else
{
pack_existing_pages((FSMPageData *) newLocation,
newAllocPages,
(FSMPageData *) oldLocation,
fsmrel->storedPages);
fsmrel->storedPages = newAllocPages;
}
} }
else if (newChunkIndex != oldChunkIndex) else if (addr.level == FSM_ROOT_LEVEL)
{ {
/* /*
* No compression needed, but must copy the data up * At the root, failure means there's no page with enough free
* space in the FSM. Give up.
*/ */
memmove(newLocation, oldLocation, curChunks * CHUNKBYTES); return InvalidBlockNumber;
} }
fsmrel->firstChunk = newChunkIndex; else
nextChunkIndex += newAlloc; {
} uint16 parentslot;
Assert(nextChunkIndex <= FreeSpaceMap->totalChunks); FSMAddress parent;
FreeSpaceMap->usedChunks = nextChunkIndex;
}
/*
* Push all FSMRels physically after afterRel to the end of the storage arena.
*
* We sometimes have to do this when deletion or truncation of a relation
* causes the allocations of remaining rels to expand markedly. We must
* temporarily push existing data down to the end so that we can move it
* back up in an orderly fashion.
*/
static void
push_fsm_rels_after(FSMRelation *afterRel)
{
int nextChunkIndex = FreeSpaceMap->totalChunks;
FSMRelation *fsmrel;
FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks;
for (fsmrel = FreeSpaceMap->lastRel; /*
fsmrel != NULL; * At lower level, failure can happen if the value in the upper-
fsmrel = fsmrel->priorPhysical) * level node didn't reflect the value on the lower page. Update
{ * the upper node, to avoid falling into the same trap again, and
int chunkCount; * start over.
int newChunkIndex; *
int oldChunkIndex; * There's a race condition here, if another backend updates this
char *newLocation; * page right after we release it, and gets the lock on the parent
char *oldLocation; * page before us. We'll then update the parent page with the now
* stale information we had. It's OK, because it should happen
* rarely, and will be fixed by the next vacuum.
*/
parent = fsm_get_parent(addr, &parentslot);
fsm_set_and_search(rel, parent, parentslot, max_avail, 0);
if (fsmrel == afterRel) /*
break; * If the upper pages are badly out of date, we might need to
* loop quite a few times, updating them as we go. Any
* inconsistencies should eventually be corrected and the loop
* should end. Looping indefinitely is nevertheless scary, so
* provide an emergency valve.
*/
if (restarts++ > 10000)
return InvalidBlockNumber;
chunkCount = fsm_current_chunks(fsmrel); /* Start search all over from the root */
nextChunkIndex -= chunkCount; addr = FSM_ROOT_ADDRESS;
newChunkIndex = nextChunkIndex;
oldChunkIndex = fsmrel->firstChunk;
if (newChunkIndex < oldChunkIndex)
{
/* we're pushing down, how can it move up? */
elog(PANIC, "inconsistent entry sizes in FSM");
}
else if (newChunkIndex > oldChunkIndex)
{
/* need to move it */
newLocation = FreeSpaceMap->arena + newChunkIndex * CHUNKBYTES;
oldLocation = FreeSpaceMap->arena + oldChunkIndex * CHUNKBYTES;
memmove(newLocation, oldLocation, chunkCount * CHUNKBYTES);
fsmrel->firstChunk = newChunkIndex;
} }
} }
Assert(nextChunkIndex >= 0);
} }
/* /*
* Pack a set of per-page freespace data into a smaller amount of space. * Recursive guts of FreeSpaceMapVacuum
*
* The method is to compute a low-resolution histogram of the free space
* amounts, then determine which histogram bin contains the break point.
* We then keep all pages above that bin, none below it, and just enough
* of the pages in that bin to fill the output area exactly.
*/ */
#define HISTOGRAM_BINS 64 static uint8
fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
static void
pack_incoming_pages(FSMPageData *newLocation, int newPages,
FSMPageData *pageSpaces, int nPages)
{ {
int histogram[HISTOGRAM_BINS]; Buffer buf;
int above, Page page;
binct, uint8 max_avail;
i;
Size thresholdL,
thresholdU;
Assert(newPages < nPages); /* else I shouldn't have been called */
/* Build histogram */
MemSet(histogram, 0, sizeof(histogram));
for (i = 0; i < nPages; i++)
{
Size avail = FSMPageGetSpace(&pageSpaces[i]);
if (avail >= BLCKSZ)
elog(ERROR, "bogus freespace amount");
avail /= (BLCKSZ / HISTOGRAM_BINS);
histogram[avail]++;
}
/* Find the breakpoint bin */
above = 0;
for (i = HISTOGRAM_BINS - 1; i >= 0; i--)
{
int sum = above + histogram[i];
if (sum > newPages) /* Read the page if it exists, or return EOF */
break; buf = fsm_readbuf(rel, addr, false);
above = sum; if (!BufferIsValid(buf))
}
Assert(i >= 0);
thresholdL = i * BLCKSZ / HISTOGRAM_BINS; /* low bound of bp bin */
thresholdU = (i + 1) * BLCKSZ / HISTOGRAM_BINS; /* hi bound */
binct = newPages - above; /* number to take from bp bin */
/* And copy the appropriate data */
for (i = 0; i < nPages; i++)
{ {
BlockNumber page = FSMPageGetPageNum(&pageSpaces[i]); *eof_p = true;
Size avail = FSMPageGetSpace(&pageSpaces[i]); return 0;
/* Check caller provides sorted data */
if (i > 0 && page <= FSMPageGetPageNum(&pageSpaces[i - 1]))
elog(ERROR, "free-space data is not in page order");
/* Save this page? */
if (avail >= thresholdU ||
(avail >= thresholdL && (--binct >= 0)))
{
*newLocation = pageSpaces[i];
newLocation++;
newPages--;
}
} }
Assert(newPages == 0); else
} *eof_p = false;
/*
* Pack a set of per-page freespace data into a smaller amount of space.
*
* This is algorithmically identical to pack_incoming_pages(), but accepts
* a different input representation. Also, we assume the input data has
* previously been checked for validity (size in bounds, pages in order).
*
* Note: it is possible for the source and destination arrays to overlap.
* The caller is responsible for making sure newLocation is at lower addresses
* so that we can copy data moving forward in the arrays without problem.
*/
static void
pack_existing_pages(FSMPageData *newLocation, int newPages,
FSMPageData *oldLocation, int oldPages)
{
int histogram[HISTOGRAM_BINS];
int above,
binct,
i;
Size thresholdL,
thresholdU;
Assert(newPages < oldPages); /* else I shouldn't have been called */
/* Build histogram */
MemSet(histogram, 0, sizeof(histogram));
for (i = 0; i < oldPages; i++)
{
Size avail = FSMPageGetSpace(oldLocation + i);
/* Shouldn't happen, but test to protect against stack clobber */ page = BufferGetPage(buf);
if (avail >= BLCKSZ)
elog(ERROR, "bogus freespace amount");
avail /= (BLCKSZ / HISTOGRAM_BINS);
histogram[avail]++;
}
/* Find the breakpoint bin */
above = 0;
for (i = HISTOGRAM_BINS - 1; i >= 0; i--)
{
int sum = above + histogram[i];
if (sum > newPages) /*
break; * Recurse into children, and fix the information stored about them
above = sum; * at this level.
} */
Assert(i >= 0); if (addr.level > FSM_BOTTOM_LEVEL)
thresholdL = i * BLCKSZ / HISTOGRAM_BINS; /* low bound of bp bin */
thresholdU = (i + 1) * BLCKSZ / HISTOGRAM_BINS; /* hi bound */
binct = newPages - above; /* number to take from bp bin */
/* And copy the appropriate data */
for (i = 0; i < oldPages; i++)
{ {
BlockNumber page = FSMPageGetPageNum(oldLocation + i); int slot;
Size avail = FSMPageGetSpace(oldLocation + i); bool eof = false;
/* Save this page? */ for (slot = 0; slot < SlotsPerFSMPage; slot++)
if (avail >= thresholdU ||
(avail >= thresholdL && (--binct >= 0)))
{ {
FSMPageSetPageNum(newLocation, page); int child_avail;
FSMPageSetSpace(newLocation, avail);
newLocation++;
newPages--;
}
}
Assert(newPages == 0);
}
/* /* After we hit end-of-file, just clear the rest of the slots */
* Calculate number of chunks "requested" by a rel. The "request" is if (!eof)
* anything beyond the rel's one guaranteed chunk. child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), &eof);
* else
* Rel's interestingPages and isIndex settings must be up-to-date when called. child_avail = 0;
*
* See notes at top of file for details.
*/
static int
fsm_calc_request(FSMRelation *fsmrel)
{
int req;
/* Convert page count to chunk count */ /* Update information about the child */
if (fsmrel->isIndex) if (fsm_get_avail(page, slot) != child_avail)
{ {
/* test to avoid unsigned underflow at zero */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
if (fsmrel->interestingPages <= INDEXCHUNKPAGES) fsm_set_avail(BufferGetPage(buf), slot, child_avail);
return 0; MarkBufferDirty(buf);
/* quotient will fit in int, even if interestingPages doesn't */ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
req = (fsmrel->interestingPages - 1) / INDEXCHUNKPAGES; }
} }
else
{
if (fsmrel->interestingPages <= CHUNKPAGES)
return 0;
req = (fsmrel->interestingPages - 1) / CHUNKPAGES;
} }
max_avail = fsm_get_max_avail(BufferGetPage(buf));
/* /*
* We clamp the per-relation requests to at most half the arena size; this * Reset the next slot pointer. This encourages the use of low-numbered
* is intended to prevent a single bloated relation from crowding out FSM * pages, increasing the chances that a later vacuum can truncate the
* service for every other rel. * relation.
*/ */
req = Min(req, FreeSpaceMap->totalChunks / 2); ((FSMPage) PageGetContents(page))->fp_next_slot = 0;
return req;
}
/* ReleaseBuffer(buf);
* Same as above, but without the clamp ... this is just intended for
* reporting the total space needed to store all information.
*/
static int
fsm_calc_request_unclamped(FSMRelation *fsmrel)
{
int req;
/* Convert page count to chunk count */
if (fsmrel->isIndex)
{
/* test to avoid unsigned underflow at zero */
if (fsmrel->interestingPages <= INDEXCHUNKPAGES)
return 0;
/* quotient will fit in int, even if interestingPages doesn't */
req = (fsmrel->interestingPages - 1) / INDEXCHUNKPAGES;
}
else
{
if (fsmrel->interestingPages <= CHUNKPAGES)
return 0;
req = (fsmrel->interestingPages - 1) / CHUNKPAGES;
}
return req; return max_avail;
} }
/*
* Calculate target allocation (number of chunks) for a rel
*
* Parameter is the result from fsm_calc_request(). The global sumRequests
* and numRels totals must be up-to-date already.
*
* See notes at top of file for details.
*/
static int
fsm_calc_target_allocation(int myRequest)
{
double spareChunks;
int extra;
spareChunks = FreeSpaceMap->totalChunks - FreeSpaceMap->numRels; /****** WAL-logging ******/
Assert(spareChunks > 0);
if (spareChunks >= FreeSpaceMap->sumRequests)
{
/* We aren't oversubscribed, so allocate exactly the request */
extra = myRequest;
}
else
{
extra = (int) rint(spareChunks * myRequest / FreeSpaceMap->sumRequests);
if (extra < 0) /* shouldn't happen, but make sure */
extra = 0;
}
return 1 + extra;
}
/* void
* Calculate number of chunks actually used to store current data fsm_redo(XLogRecPtr lsn, XLogRecord *record)
*/
static int
fsm_current_chunks(FSMRelation *fsmrel)
{ {
int chunkCount; uint8 info = record->xl_info & ~XLR_INFO_MASK;
/* Make sure storedPages==0 produces right answer */
if (fsmrel->storedPages <= 0)
return 0;
/* Convert page count to chunk count */
if (fsmrel->isIndex)
chunkCount = (fsmrel->storedPages - 1) / INDEXCHUNKPAGES + 1;
else
chunkCount = (fsmrel->storedPages - 1) / CHUNKPAGES + 1;
return chunkCount;
}
/* switch (info)
* Calculate current actual allocation (number of chunks) for a rel
*/
static int
fsm_current_allocation(FSMRelation *fsmrel)
{
if (fsmrel->nextPhysical != NULL)
return fsmrel->nextPhysical->firstChunk - fsmrel->firstChunk;
else if (fsmrel == FreeSpaceMap->lastRel)
return FreeSpaceMap->usedChunks - fsmrel->firstChunk;
else
{ {
/* it's not in the storage-order list */ case XLOG_FSM_TRUNCATE:
Assert(fsmrel->firstChunk < 0 && fsmrel->storedPages == 0); {
return 0; xl_fsm_truncate *xlrec;
} Relation rel;
}
/*
* Return the FreeSpaceMap structure for examination.
*/
FSMHeader *
GetFreeSpaceMap(void)
{
return FreeSpaceMap; xlrec = (xl_fsm_truncate *) XLogRecGetData(record);
rel = CreateFakeRelcacheEntry(xlrec->node);
FreeSpaceMapTruncateRel(rel, xlrec->nheapblocks);
FreeFakeRelcacheEntry(rel);
}
break;
default:
elog(PANIC, "fsm_redo: unknown op code %u", info);
}
} }
#ifdef FREESPACE_DEBUG
/*
* Dump contents of freespace map for debugging.
*
* We assume caller holds the FreeSpaceLock, or is otherwise unconcerned
* about other processes.
*/
void void
DumpFreeSpace(void) fsm_desc(StringInfo buf, uint8 xl_info, char *rec)
{ {
FSMRelation *fsmrel; uint8 info = xl_info & ~XLR_INFO_MASK;
FSMRelation *prevrel = NULL;
int relNum = 0;
int nPages;
for (fsmrel = FreeSpaceMap->usageList; fsmrel; fsmrel = fsmrel->nextUsage) switch (info)
{ {
relNum++; case XLOG_FSM_TRUNCATE:
fprintf(stderr, "Map %d: rel %u/%u/%u isIndex %d avgRequest %u interestingPages %u nextPage %d\nMap= ",
relNum,
fsmrel->key.spcNode, fsmrel->key.dbNode, fsmrel->key.relNode,
(int) fsmrel->isIndex, fsmrel->avgRequest,
fsmrel->interestingPages, fsmrel->nextPage);
if (fsmrel->isIndex)
{
IndexFSMPageData *page;
page = (IndexFSMPageData *)
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
for (nPages = 0; nPages < fsmrel->storedPages; nPages++)
{
fprintf(stderr, " %u",
IndexFSMPageGetPageNum(page));
page++;
}
}
else
{ {
FSMPageData *page; xl_fsm_truncate *xlrec = (xl_fsm_truncate *) rec;
page = (FSMPageData *) appendStringInfo(buf, "truncate: rel %u/%u/%u; nheapblocks %u;",
(FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); xlrec->node.spcNode, xlrec->node.dbNode,
for (nPages = 0; nPages < fsmrel->storedPages; nPages++) xlrec->node.relNode, xlrec->nheapblocks);
{ break;
fprintf(stderr, " %u:%u",
FSMPageGetPageNum(page),
FSMPageGetSpace(page));
page++;
}
} }
fprintf(stderr, "\n"); default:
/* Cross-check list links */ appendStringInfo(buf, "UNKNOWN");
if (prevrel != fsmrel->priorUsage) break;
fprintf(stderr, "DumpFreeSpace: broken list links\n");
prevrel = fsmrel;
} }
if (prevrel != FreeSpaceMap->usageListTail)
fprintf(stderr, "DumpFreeSpace: broken list links\n");
/* Cross-check global counters */
if (relNum != FreeSpaceMap->numRels)
fprintf(stderr, "DumpFreeSpace: %d rels in list, but numRels = %d\n",
relNum, FreeSpaceMap->numRels);
} }
#endif /* FREESPACE_DEBUG */
/*-------------------------------------------------------------------------
*
* fsmpage.c
* routines to search and manipulate one FSM page.
*
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/freespace/fsmpage.c,v 1.1 2008/09/30 10:52:13 heikki Exp $
*
* NOTES:
*
* The public functions in this file form an API that hides the internal
* structure of a FSM page. This allows freespace.c to treat each FSM page
* as a black box with SlotsPerPage "slots". fsm_set_avail() and
* fsm_get_avail() let's you get/set the value of a slot, and
* fsm_search_avail() let's you search for a slot with value >= X.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/fsm_internals.h"
/* macros to navigate the tree within a page. */
#define leftchild(x) (2 * (x) + 1)
#define rightchild(x) (2 * (x) + 2)
#define parentof(x) (((x) - 1) / 2)
/* returns right sibling of x, wrapping around within the level */
static int
rightsibling(int x)
{
/*
* Move right. This might wrap around, stepping to the leftmost node at
* the next level.
*/
x++;
/*
* Check if we stepped to the leftmost node at next level, and correct
* if so. The leftmost nodes at each level are of form x = 2^level - 1, so
* check if (x + 1) is a power of two.
*/
if (((x + 1) & x) == 0)
x = parentof(x);
return x;
}
/*
* Sets the value of a slot on page. Returns true if the page was
* modified.
*
* The caller must hold an exclusive lock on the page.
*/
bool
fsm_set_avail(Page page, int slot, uint8 value)
{
int nodeno = NonLeafNodesPerPage + slot;
FSMPage fsmpage = (FSMPage) PageGetContents(page);
uint8 oldvalue;
Assert(slot < LeafNodesPerPage);
oldvalue = fsmpage->fp_nodes[nodeno];
/* If the value hasn't changed, we don't need to do anything */
if (oldvalue == value && value <= fsmpage->fp_nodes[0])
return false;
fsmpage->fp_nodes[nodeno] = value;
/*
* Propagate up, until we hit the root or a node that doesn't
* need to be updated.
*/
do
{
uint8 newvalue = 0;
int lchild;
int rchild;
nodeno = parentof(nodeno);
lchild = leftchild(nodeno);
rchild = lchild + 1;
newvalue = fsmpage->fp_nodes[lchild];
if (rchild < NodesPerPage)
newvalue = Max(newvalue,
fsmpage->fp_nodes[rchild]);
oldvalue = fsmpage->fp_nodes[nodeno];
if (oldvalue == newvalue)
break;
fsmpage->fp_nodes[nodeno] = newvalue;
} while (nodeno > 0);
/*
* sanity check: if the new value value is higher than the value
* at the top, the tree is corrupt.
*/
if (value > fsmpage->fp_nodes[0])
fsm_rebuild_page(page);
return true;
}
/*
* Returns the value of given slot on page.
*
* Since this is just a read-only access of a single byte, the page doesn't
* need to be locked.
*/
uint8
fsm_get_avail(Page page, int slot)
{
FSMPage fsmpage = (FSMPage) PageGetContents(page);
return fsmpage->fp_nodes[NonLeafNodesPerPage + slot];
}
/*
* Returns the value at the root of a page.
* Since this is just a read-only access of a single byte, the page doesn't
* need to be locked.
*/
uint8
fsm_get_max_avail(Page page)
{
FSMPage fsmpage = (FSMPage) PageGetContents(page);
return fsmpage->fp_nodes[0];
}
/*
* Searches for a slot with min. category. Returns slot number, or -1 if
* none found.
*
* The caller must hold at least a shared lock on the page, and this
* function can unlock and lock the page again in exclusive mode if it
* needs to be updated. exclusive_lock_held should be set to true if the
* caller is already holding an exclusive lock, to avoid extra work.
*
* If advancenext is false, fp_next_slot is set to point to the returned
* slot, and if it's true, to the slot next to the returned slot.
*/
int
fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext,
bool exclusive_lock_held)
{
Page page = BufferGetPage(buf);
FSMPage fsmpage = (FSMPage) PageGetContents(page);
int nodeno;
int target;
uint16 slot;
restart:
/*
* Check the root first, and exit quickly if there's no page with
* enough free space
*/
if (fsmpage->fp_nodes[0] < minvalue)
return -1;
/* fp_next_slot is just a hint, so check that it's sane */
target = fsmpage->fp_next_slot;
if (target < 0 || target >= LeafNodesPerPage)
target = 0;
target += NonLeafNodesPerPage;
/*
* Start the search from the target slot. At every step, move one
* node to the right, and climb up to the parent. Stop when we reach a
* node with enough free space. (note that moving to the right only
* makes a difference if we're on the right child of the parent)
*
* The idea is to graduall expand our "search triangle", that is, all
* nodes covered by the current node. In the beginning, just the target
* node is included, and more nodes to the right of the target node,
* taking wrap-around into account, is included at each step. Nodes are
* added to the search triangle in left-to-right order, starting from
* the target node. This ensures that we'll find the first suitable node
* to the right of the target node, and not some other node with enough
* free space.
*
* For example, consider this tree:
*
* 7
* 7 6
* 5 7 6 5
* 4 5 5 7 2 6 5 2
* T
*
* Imagine that target node is the node indicated by the letter T, and
* we're searching for a node with value of 6 or higher. The search
* begins at T. At first iteration, we move to the right, and to the
* parent, arriving the rightmost 5. At the 2nd iteration, we move to the
* right, wrapping around, and climb up, arriving at the 7 at the 2nd
* level. 7 satisfies our search, so we descend down to the bottom,
* following the path of sevens.
*/
nodeno = target;
while (nodeno > 0)
{
if (fsmpage->fp_nodes[nodeno] >= minvalue)
break;
/*
* Move to the right, wrapping around at the level if necessary, and
* climb up.
*/
nodeno = parentof(rightsibling(nodeno));
}
/*
* We're now at a node with enough free space, somewhere in the middle of
* the tree. Descend to the bottom, following a path with enough free
* space, preferring to move left if there's a choice.
*/
while (nodeno < NonLeafNodesPerPage)
{
int leftnodeno = leftchild(nodeno);
int rightnodeno = leftnodeno + 1;
bool leftok = (leftnodeno < NodesPerPage) &&
(fsmpage->fp_nodes[leftnodeno] >= minvalue);
bool rightok = (rightnodeno < NodesPerPage) &&
(fsmpage->fp_nodes[rightnodeno] >= minvalue);
if (leftok)
nodeno = leftnodeno;
else if (rightok)
nodeno = rightnodeno;
else
{
/*
* Oops. The parent node promised that either left or right
* child has enough space, but neither actually did. This can
* happen in case of a "torn page", IOW if we crashed earlier
* while writing the page to disk, and only part of the page
* made it to disk.
*
* Fix the corruption and restart.
*/
RelFileNode rnode;
ForkNumber forknum;
BlockNumber blknum;
BufferGetTag(buf, &rnode, &forknum, &blknum);
elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u",
blknum, rnode.spcNode, rnode.dbNode, rnode.relNode);
/* make sure we hold an exclusive lock */
if (!exclusive_lock_held)
{
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
exclusive_lock_held = true;
}
fsm_rebuild_page(page);
MarkBufferDirty(buf);
goto restart;
}
}
/* We're now at the bottom level, at a node with enough space. */
slot = nodeno - NonLeafNodesPerPage;
/*
* Update the next slot pointer. Note that we do this even if we're only
* holding a shared lock, on the grounds that it's better to use a shared
* lock and get a garbled next pointer every now and then, than take the
* concurrency hit of an exlusive lock.
*
* Wrap-around is handled at the beginning of this function.
*/
fsmpage->fp_next_slot = slot + (advancenext ? 1 : 0);
return slot;
}
/*
* Sets the available space to zero for all slots numbered >= nslots.
* Returns true if the page was modified.
*/
bool
fsm_truncate_avail(Page page, int nslots)
{
FSMPage fsmpage = (FSMPage) PageGetContents(page);
uint8 *ptr;
bool changed = false;
Assert(nslots >= 0 && nslots < LeafNodesPerPage);
/* Clear all truncated leaf nodes */
ptr = &fsmpage->fp_nodes[NonLeafNodesPerPage + nslots];
for (; ptr < &fsmpage->fp_nodes[NodesPerPage]; ptr++)
{
if (*ptr != 0)
changed = true;
*ptr = 0;
}
/* Fix upper nodes. */
if (changed)
fsm_rebuild_page(page);
return changed;
}
/*
* Reconstructs the upper levels of a page. Returns true if the page
* was modified.
*/
bool
fsm_rebuild_page(Page page)
{
FSMPage fsmpage = (FSMPage) PageGetContents(page);
bool changed = false;
int nodeno;
/*
* Start from the lowest non-leaflevel, at last node, working our way
* backwards, through all non-leaf nodes at all levels, up to the root.
*/
for (nodeno = NonLeafNodesPerPage - 1; nodeno >= 0; nodeno--)
{
int lchild = leftchild(nodeno);
int rchild = lchild + 1;
uint8 newvalue = 0;
if (lchild < NodesPerPage)
newvalue = fsmpage->fp_nodes[lchild];
if (rchild < NodesPerPage)
newvalue = Max(newvalue,
fsmpage->fp_nodes[rchild]);
if (fsmpage->fp_nodes[nodeno] != newvalue)
{
fsmpage->fp_nodes[nodeno] = newvalue;
changed = true;
}
}
return changed;
}
/*-------------------------------------------------------------------------
*
* indexfsm.c
* POSTGRES free space map for quickly finding free pages in relations
*
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/freespace/indexfsm.c,v 1.1 2008/09/30 10:52:13 heikki Exp $
*
*
* NOTES:
*
* This is similar to the FSM used for heap, in freespace.c, but instead
* of tracking the amount of free space on pages, we only track whether
* pages are completely free or in-use. We use the same FSM implementation
* as for heaps, using BLCKSZ - 1 to denote used pages, and 0 for unused.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/smgr.h"
/*
* Exported routines
*/
/*
* InitIndexFreeSpaceMap - Create or reset the FSM fork for relation.
*/
void
InitIndexFreeSpaceMap(Relation rel)
{
/* Create FSM fork if it doesn't exist yet, or truncate it if it does */
RelationOpenSmgr(rel);
if (!smgrexists(rel->rd_smgr, FSM_FORKNUM))
smgrcreate(rel->rd_smgr, FSM_FORKNUM, rel->rd_istemp, false);
else
smgrtruncate(rel->rd_smgr, FSM_FORKNUM, 0, rel->rd_istemp);
}
/*
* GetFreeIndexPage - return a free page from the FSM
*
* As a side effect, the page is marked as used in the FSM.
*/
BlockNumber
GetFreeIndexPage(Relation rel)
{
BlockNumber blkno = GetPageWithFreeSpace(rel, BLCKSZ/2);
if (blkno != InvalidBlockNumber)
RecordUsedIndexPage(rel, blkno);
return blkno;
}
/*
* RecordFreeIndexPage - mark a page as free in the FSM
*/
void
RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
{
RecordPageWithFreeSpace(rel, freeBlock, BLCKSZ - 1);
}
/*
* RecordUsedIndexPage - mark a page as used in the FSM
*/
void
RecordUsedIndexPage(Relation rel, BlockNumber usedBlock)
{
RecordPageWithFreeSpace(rel, usedBlock, 0);
}
/*
* IndexFreeSpaceMapTruncate - adjust for truncation of a relation.
*
* We need to delete any stored data past the new relation length, so that
* we don't bogusly return removed block numbers.
*/
void
IndexFreeSpaceMapTruncate(Relation rel, BlockNumber nblocks)
{
FreeSpaceMapTruncateRel(rel, nblocks);
}
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.96 2008/05/12 00:00:50 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.97 2008/09/30 10:52:13 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -26,7 +26,6 @@ ...@@ -26,7 +26,6 @@
#include "postmaster/bgwriter.h" #include "postmaster/bgwriter.h"
#include "postmaster/postmaster.h" #include "postmaster/postmaster.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/pg_shmem.h" #include "storage/pg_shmem.h"
#include "storage/pmsignal.h" #include "storage/pmsignal.h"
...@@ -110,7 +109,6 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) ...@@ -110,7 +109,6 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
size = add_size(size, ProcArrayShmemSize()); size = add_size(size, ProcArrayShmemSize());
size = add_size(size, BackendStatusShmemSize()); size = add_size(size, BackendStatusShmemSize());
size = add_size(size, SInvalShmemSize()); size = add_size(size, SInvalShmemSize());
size = add_size(size, FreeSpaceShmemSize());
size = add_size(size, BgWriterShmemSize()); size = add_size(size, BgWriterShmemSize());
size = add_size(size, AutoVacuumShmemSize()); size = add_size(size, AutoVacuumShmemSize());
size = add_size(size, BTreeShmemSize()); size = add_size(size, BTreeShmemSize());
...@@ -203,11 +201,6 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) ...@@ -203,11 +201,6 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
*/ */
CreateSharedInvalidationState(); CreateSharedInvalidationState();
/*
* Set up free-space map
*/
InitFreeSpaceMap();
/* /*
* Set up interprocess signaling mechanisms * Set up interprocess signaling mechanisms
*/ */
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.111 2008/08/11 11:05:11 heikki Exp $ * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.112 2008/09/30 10:52:13 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
#include "access/xlogutils.h" #include "access/xlogutils.h"
#include "commands/tablespace.h" #include "commands/tablespace.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "utils/hsearch.h" #include "utils/hsearch.h"
...@@ -474,13 +473,6 @@ smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, ...@@ -474,13 +473,6 @@ smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum,
*/ */
DropRelFileNodeBuffers(rnode, forknum, isTemp, 0); DropRelFileNodeBuffers(rnode, forknum, isTemp, 0);
/*
* Tell the free space map to forget this relation. It won't be accessed
* any more anyway, but we may as well recycle the map space quickly.
*/
if (forknum == MAIN_FORKNUM)
FreeSpaceMapForgetRel(&rnode);
/* /*
* It'd be nice to tell the stats collector to forget it immediately, too. * It'd be nice to tell the stats collector to forget it immediately, too.
* But we can't because we don't know the OID (and in cases involving * But we can't because we don't know the OID (and in cases involving
...@@ -577,13 +569,6 @@ smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, ...@@ -577,13 +569,6 @@ smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks,
*/ */
DropRelFileNodeBuffers(reln->smgr_rnode, forknum, isTemp, nblocks); DropRelFileNodeBuffers(reln->smgr_rnode, forknum, isTemp, nblocks);
/*
* Tell the free space map to forget anything it may have stored for the
* about-to-be-deleted blocks. We want to be sure it won't return bogus
* block numbers later on.
*/
FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks);
/* Do the truncation */ /* Do the truncation */
(*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks, (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks,
isTemp); isTemp);
...@@ -905,13 +890,6 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -905,13 +890,6 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
DropRelFileNodeBuffers(xlrec->rnode, xlrec->forknum, false, DropRelFileNodeBuffers(xlrec->rnode, xlrec->forknum, false,
xlrec->blkno); xlrec->blkno);
/*
* Tell the free space map to forget anything it may have stored for
* the about-to-be-deleted blocks. We want to be sure it won't return
* bogus block numbers later on.
*/
FreeSpaceMapTruncateRel(&reln->smgr_rnode, xlrec->blkno);
/* Do the truncation */ /* Do the truncation */
(*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln,
xlrec->forknum, xlrec->forknum,
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.556 2008/08/19 18:30:04 tgl Exp $ * $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.557 2008/09/30 10:52:13 heikki Exp $
* *
* NOTES * NOTES
* this is the "main" module of the postgres backend and * this is the "main" module of the postgres backend and
...@@ -57,7 +57,6 @@ ...@@ -57,7 +57,6 @@
#include "postmaster/autovacuum.h" #include "postmaster/autovacuum.h"
#include "rewrite/rewriteHandler.h" #include "rewrite/rewriteHandler.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/proc.h" #include "storage/proc.h"
#include "storage/sinval.h" #include "storage/sinval.h"
...@@ -3258,13 +3257,6 @@ PostgresMain(int argc, char *argv[], const char *username) ...@@ -3258,13 +3257,6 @@ PostgresMain(int argc, char *argv[], const char *username)
StartupXLOG(); StartupXLOG();
on_shmem_exit(ShutdownXLOG, 0); on_shmem_exit(ShutdownXLOG, 0);
/*
* Read any existing FSM cache file, and register to write one out at
* exit.
*/
LoadFreeSpaceMap();
on_shmem_exit(DumpFreeSpaceMap, 0);
/* /*
* We have to build the flat file for pg_database, but not for the * We have to build the flat file for pg_database, but not for the
* user and group tables, since we won't try to do authentication. * user and group tables, since we won't try to do authentication.
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.273 2008/08/10 19:02:33 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.274 2008/09/30 10:52:13 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -304,6 +304,7 @@ AllocateRelationDesc(Relation relation, Form_pg_class relp) ...@@ -304,6 +304,7 @@ AllocateRelationDesc(Relation relation, Form_pg_class relp)
*/ */
MemSet(relation, 0, sizeof(RelationData)); MemSet(relation, 0, sizeof(RelationData));
relation->rd_targblock = InvalidBlockNumber; relation->rd_targblock = InvalidBlockNumber;
relation->rd_fsm_nblocks_cache = InvalidBlockNumber;
/* make sure relation is marked as having no open file yet */ /* make sure relation is marked as having no open file yet */
relation->rd_smgr = NULL; relation->rd_smgr = NULL;
...@@ -1364,6 +1365,7 @@ formrdesc(const char *relationName, Oid relationReltype, ...@@ -1364,6 +1365,7 @@ formrdesc(const char *relationName, Oid relationReltype,
*/ */
relation = (Relation) palloc0(sizeof(RelationData)); relation = (Relation) palloc0(sizeof(RelationData));
relation->rd_targblock = InvalidBlockNumber; relation->rd_targblock = InvalidBlockNumber;
relation->rd_fsm_nblocks_cache = InvalidBlockNumber;
/* make sure relation is marked as having no open file yet */ /* make sure relation is marked as having no open file yet */
relation->rd_smgr = NULL; relation->rd_smgr = NULL;
...@@ -1652,8 +1654,9 @@ RelationReloadIndexInfo(Relation relation) ...@@ -1652,8 +1654,9 @@ RelationReloadIndexInfo(Relation relation)
heap_freetuple(pg_class_tuple); heap_freetuple(pg_class_tuple);
/* We must recalculate physical address in case it changed */ /* We must recalculate physical address in case it changed */
RelationInitPhysicalAddr(relation); RelationInitPhysicalAddr(relation);
/* Make sure targblock is reset in case rel was truncated */ /* Must reset targblock and fsm_nblocks_cache in case rel was truncated */
relation->rd_targblock = InvalidBlockNumber; relation->rd_targblock = InvalidBlockNumber;
relation->rd_fsm_nblocks_cache = InvalidBlockNumber;
/* Must free any AM cached data, too */ /* Must free any AM cached data, too */
if (relation->rd_amcache) if (relation->rd_amcache)
pfree(relation->rd_amcache); pfree(relation->rd_amcache);
...@@ -1736,6 +1739,7 @@ RelationClearRelation(Relation relation, bool rebuild) ...@@ -1736,6 +1739,7 @@ RelationClearRelation(Relation relation, bool rebuild)
if (relation->rd_isnailed) if (relation->rd_isnailed)
{ {
relation->rd_targblock = InvalidBlockNumber; relation->rd_targblock = InvalidBlockNumber;
relation->rd_fsm_nblocks_cache = InvalidBlockNumber;
if (relation->rd_rel->relkind == RELKIND_INDEX) if (relation->rd_rel->relkind == RELKIND_INDEX)
{ {
relation->rd_isvalid = false; /* needs to be revalidated */ relation->rd_isvalid = false; /* needs to be revalidated */
...@@ -2330,6 +2334,7 @@ RelationBuildLocalRelation(const char *relname, ...@@ -2330,6 +2334,7 @@ RelationBuildLocalRelation(const char *relname,
rel = (Relation) palloc0(sizeof(RelationData)); rel = (Relation) palloc0(sizeof(RelationData));
rel->rd_targblock = InvalidBlockNumber; rel->rd_targblock = InvalidBlockNumber;
rel->rd_fsm_nblocks_cache = InvalidBlockNumber;
/* make sure relation is marked as having no open file yet */ /* make sure relation is marked as having no open file yet */
rel->rd_smgr = NULL; rel->rd_smgr = NULL;
...@@ -3586,6 +3591,7 @@ load_relcache_init_file(void) ...@@ -3586,6 +3591,7 @@ load_relcache_init_file(void)
*/ */
rel->rd_smgr = NULL; rel->rd_smgr = NULL;
rel->rd_targblock = InvalidBlockNumber; rel->rd_targblock = InvalidBlockNumber;
rel->rd_fsm_nblocks_cache = InvalidBlockNumber;
if (rel->rd_isnailed) if (rel->rd_isnailed)
rel->rd_refcnt = 1; rel->rd_refcnt = 1;
else else
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* Written by Peter Eisentraut <peter_e@gmx.net>. * Written by Peter Eisentraut <peter_e@gmx.net>.
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.473 2008/09/23 21:12:03 mha Exp $ * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.474 2008/09/30 10:52:13 heikki Exp $
* *
*-------------------------------------------------------------------- *--------------------------------------------------------------------
*/ */
...@@ -57,7 +57,6 @@ ...@@ -57,7 +57,6 @@
#include "regex/regex.h" #include "regex/regex.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/freespace.h"
#include "tcop/tcopprot.h" #include "tcop/tcopprot.h"
#include "tsearch/ts_cache.h" #include "tsearch/ts_cache.h"
#include "utils/builtins.h" #include "utils/builtins.h"
...@@ -446,8 +445,6 @@ const char *const config_group_names[] = ...@@ -446,8 +445,6 @@ const char *const config_group_names[] =
gettext_noop("Resource Usage"), gettext_noop("Resource Usage"),
/* RESOURCES_MEM */ /* RESOURCES_MEM */
gettext_noop("Resource Usage / Memory"), gettext_noop("Resource Usage / Memory"),
/* RESOURCES_FSM */
gettext_noop("Resource Usage / Free Space Map"),
/* RESOURCES_KERNEL */ /* RESOURCES_KERNEL */
gettext_noop("Resource Usage / Kernel Resources"), gettext_noop("Resource Usage / Kernel Resources"),
/* WAL */ /* WAL */
...@@ -1528,23 +1525,6 @@ static struct config_int ConfigureNamesInt[] = ...@@ -1528,23 +1525,6 @@ static struct config_int ConfigureNamesInt[] =
100000000, 0, 1000000000, NULL, NULL 100000000, 0, 1000000000, NULL, NULL
}, },
{
{"max_fsm_relations", PGC_POSTMASTER, RESOURCES_FSM,
gettext_noop("Sets the maximum number of tables and indexes for which free space is tracked."),
NULL
},
&MaxFSMRelations,
1000, 100, INT_MAX, NULL, NULL
},
{
{"max_fsm_pages", PGC_POSTMASTER, RESOURCES_FSM,
gettext_noop("Sets the maximum number of disk pages for which free space is tracked."),
NULL
},
&MaxFSMPages,
20000, 1000, INT_MAX, NULL, NULL
},
{ {
{"max_locks_per_transaction", PGC_POSTMASTER, LOCK_MANAGEMENT, {"max_locks_per_transaction", PGC_POSTMASTER, LOCK_MANAGEMENT,
gettext_noop("Sets the maximum number of locks per transaction."), gettext_noop("Sets the maximum number of locks per transaction."),
......
...@@ -114,13 +114,6 @@ ...@@ -114,13 +114,6 @@
#maintenance_work_mem = 16MB # min 1MB #maintenance_work_mem = 16MB # min 1MB
#max_stack_depth = 2MB # min 100kB #max_stack_depth = 2MB # min 100kB
# - Free Space Map -
#max_fsm_pages = 204800 # min max_fsm_relations*16, 6 bytes each
# (change requires restart)
#max_fsm_relations = 1000 # min 100, ~70 bytes each
# (change requires restart)
# - Kernel Resource Usage - # - Kernel Resource Usage -
#max_files_per_process = 1000 # min 25 #max_files_per_process = 1000 # min 25
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* Portions taken from FreeBSD. * Portions taken from FreeBSD.
* *
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.161 2008/09/23 10:58:03 heikki Exp $ * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.162 2008/09/30 10:52:13 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -119,7 +119,6 @@ static int output_errno = 0; ...@@ -119,7 +119,6 @@ static int output_errno = 0;
/* defaults */ /* defaults */
static int n_connections = 10; static int n_connections = 10;
static int n_buffers = 50; static int n_buffers = 50;
static int n_fsm_pages = 20000;
/* /*
* Warning messages for authentication methods * Warning messages for authentication methods
...@@ -1041,13 +1040,10 @@ static void ...@@ -1041,13 +1040,10 @@ static void
test_config_settings(void) test_config_settings(void)
{ {
/* /*
* These macros define the minimum shared_buffers we want for a given * This macro defines the minimum shared_buffers we want for a given
* max_connections value, and the max_fsm_pages setting to be used for a * max_connections value. The arrays show the settings to try.
* given shared_buffers value. The arrays show the settings to try.
*/ */
#define MIN_BUFS_FOR_CONNS(nconns) ((nconns) * 10) #define MIN_BUFS_FOR_CONNS(nconns) ((nconns) * 10)
#define FSM_FOR_BUFS(nbuffers) ((nbuffers) > 1000 ? 50 * (nbuffers) : 20000)
static const int trial_conns[] = { static const int trial_conns[] = {
100, 50, 40, 30, 20, 10 100, 50, 40, 30, 20, 10
...@@ -1065,7 +1061,6 @@ test_config_settings(void) ...@@ -1065,7 +1061,6 @@ test_config_settings(void)
status, status,
test_conns, test_conns,
test_buffs, test_buffs,
test_max_fsm,
ok_buffers = 0; ok_buffers = 0;
...@@ -1076,16 +1071,14 @@ test_config_settings(void) ...@@ -1076,16 +1071,14 @@ test_config_settings(void)
{ {
test_conns = trial_conns[i]; test_conns = trial_conns[i];
test_buffs = MIN_BUFS_FOR_CONNS(test_conns); test_buffs = MIN_BUFS_FOR_CONNS(test_conns);
test_max_fsm = FSM_FOR_BUFS(test_buffs);
snprintf(cmd, sizeof(cmd), snprintf(cmd, sizeof(cmd),
SYSTEMQUOTE "\"%s\" --boot -x0 %s " SYSTEMQUOTE "\"%s\" --boot -x0 %s "
"-c max_connections=%d " "-c max_connections=%d "
"-c shared_buffers=%d " "-c shared_buffers=%d "
"-c max_fsm_pages=%d "
"< \"%s\" > \"%s\" 2>&1" SYSTEMQUOTE, "< \"%s\" > \"%s\" 2>&1" SYSTEMQUOTE,
backend_exec, boot_options, backend_exec, boot_options,
test_conns, test_buffs, test_max_fsm, test_conns, test_buffs,
DEVNULL, DEVNULL); DEVNULL, DEVNULL);
status = system(cmd); status = system(cmd);
if (status == 0) if (status == 0)
...@@ -1100,7 +1093,7 @@ test_config_settings(void) ...@@ -1100,7 +1093,7 @@ test_config_settings(void)
printf("%d\n", n_connections); printf("%d\n", n_connections);
printf(_("selecting default shared_buffers/max_fsm_pages ... ")); printf(_("selecting default shared_buffers ... "));
fflush(stdout); fflush(stdout);
for (i = 0; i < bufslen; i++) for (i = 0; i < bufslen; i++)
...@@ -1112,28 +1105,25 @@ test_config_settings(void) ...@@ -1112,28 +1105,25 @@ test_config_settings(void)
test_buffs = ok_buffers; test_buffs = ok_buffers;
break; break;
} }
test_max_fsm = FSM_FOR_BUFS(test_buffs);
snprintf(cmd, sizeof(cmd), snprintf(cmd, sizeof(cmd),
SYSTEMQUOTE "\"%s\" --boot -x0 %s " SYSTEMQUOTE "\"%s\" --boot -x0 %s "
"-c max_connections=%d " "-c max_connections=%d "
"-c shared_buffers=%d " "-c shared_buffers=%d "
"-c max_fsm_pages=%d "
"< \"%s\" > \"%s\" 2>&1" SYSTEMQUOTE, "< \"%s\" > \"%s\" 2>&1" SYSTEMQUOTE,
backend_exec, boot_options, backend_exec, boot_options,
n_connections, test_buffs, test_max_fsm, n_connections, test_buffs,
DEVNULL, DEVNULL); DEVNULL, DEVNULL);
status = system(cmd); status = system(cmd);
if (status == 0) if (status == 0)
break; break;
} }
n_buffers = test_buffs; n_buffers = test_buffs;
n_fsm_pages = FSM_FOR_BUFS(n_buffers);
if ((n_buffers * (BLCKSZ / 1024)) % 1024 == 0) if ((n_buffers * (BLCKSZ / 1024)) % 1024 == 0)
printf("%dMB/%d\n", (n_buffers * (BLCKSZ / 1024)) / 1024, n_fsm_pages); printf("%dMB\n", (n_buffers * (BLCKSZ / 1024)) / 1024);
else else
printf("%dkB/%d\n", n_buffers * (BLCKSZ / 1024), n_fsm_pages); printf("%dkB\n", n_buffers * (BLCKSZ / 1024));
} }
/* /*
...@@ -1164,9 +1154,6 @@ setup_config(void) ...@@ -1164,9 +1154,6 @@ setup_config(void)
n_buffers * (BLCKSZ / 1024)); n_buffers * (BLCKSZ / 1024));
conflines = replace_token(conflines, "#shared_buffers = 32MB", repltok); conflines = replace_token(conflines, "#shared_buffers = 32MB", repltok);
snprintf(repltok, sizeof(repltok), "max_fsm_pages = %d", n_fsm_pages);
conflines = replace_token(conflines, "#max_fsm_pages = 204800", repltok);
#if DEF_PGPORT != 5432 #if DEF_PGPORT != 5432
snprintf(repltok, sizeof(repltok), "#port = %d", DEF_PGPORT); snprintf(repltok, sizeof(repltok), "#port = %d", DEF_PGPORT);
conflines = replace_token(conflines, "#port = 5432", repltok); conflines = replace_token(conflines, "#port = 5432", repltok);
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* *
* Resource managers definition * Resource managers definition
* *
* $PostgreSQL: pgsql/src/include/access/rmgr.h,v 1.17 2006/11/05 22:42:10 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/rmgr.h,v 1.18 2008/09/30 10:52:13 heikki Exp $
*/ */
#ifndef RMGR_H #ifndef RMGR_H
#define RMGR_H #define RMGR_H
...@@ -23,6 +23,7 @@ typedef uint8 RmgrId; ...@@ -23,6 +23,7 @@ typedef uint8 RmgrId;
#define RM_DBASE_ID 4 #define RM_DBASE_ID 4
#define RM_TBLSPC_ID 5 #define RM_TBLSPC_ID 5
#define RM_MULTIXACT_ID 6 #define RM_MULTIXACT_ID 6
#define RM_FREESPACE_ID 7
#define RM_HEAP2_ID 9 #define RM_HEAP2_ID 9
#define RM_HEAP_ID 10 #define RM_HEAP_ID 10
#define RM_BTREE_ID 11 #define RM_BTREE_ID 11
......
...@@ -7,152 +7,32 @@ ...@@ -7,152 +7,32 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/freespace.h,v 1.28 2008/03/10 02:04:10 tgl Exp $ * $PostgreSQL: pgsql/src/include/storage/freespace.h,v 1.29 2008/09/30 10:52:13 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#ifndef FREESPACE_H_ #ifndef FREESPACE_H_
#define FREESPACE_H_ #define FREESPACE_H_
#include "storage/relfilenode.h" #include "utils/rel.h"
#include "storage/itemptr.h" #include "storage/bufpage.h"
#include "access/xlog.h"
/* prototypes for public functions in freespace.c */
/* Initial value for average-request moving average */ extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk);
#define INITIAL_AVERAGE ((Size) (BLCKSZ / 32)) extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded);
extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel,
/*
* Number of pages and bytes per allocation chunk. Indexes can squeeze 50%
* more pages into the same space because they don't need to remember how much
* free space on each page. The nominal number of pages, CHUNKPAGES, is for
* regular rels, and INDEXCHUNKPAGES is for indexes. CHUNKPAGES should be
* even so that no space is wasted in the index case.
*/
#define CHUNKPAGES 16
#define CHUNKBYTES (CHUNKPAGES * sizeof(FSMPageData))
#define INDEXCHUNKPAGES ((int) (CHUNKBYTES / sizeof(IndexFSMPageData)))
/*
* Typedefs and macros for items in the page-storage arena. We use the
* existing ItemPointer and BlockId data structures, which are designed
* to pack well (they should be 6 and 4 bytes apiece regardless of machine
* alignment issues). Unfortunately we can't use the ItemPointer access
* macros, because they include Asserts insisting that ip_posid != 0.
*/
typedef ItemPointerData FSMPageData;
typedef BlockIdData IndexFSMPageData;
#define FSMPageGetPageNum(ptr) \
BlockIdGetBlockNumber(&(ptr)->ip_blkid)
#define FSMPageGetSpace(ptr) \
((Size) (ptr)->ip_posid)
#define FSMPageSetPageNum(ptr, pg) \
BlockIdSet(&(ptr)->ip_blkid, pg)
#define FSMPageSetSpace(ptr, sz) \
((ptr)->ip_posid = (OffsetNumber) (sz))
#define IndexFSMPageGetPageNum(ptr) \
BlockIdGetBlockNumber(ptr)
#define IndexFSMPageSetPageNum(ptr, pg) \
BlockIdSet(ptr, pg)
/*
* Shared free-space-map objects
*
* The per-relation objects are indexed by a hash table, and are also members
* of two linked lists: one ordered by recency of usage (most recent first),
* and the other ordered by physical location of the associated storage in
* the page-info arena.
*
* Each relation owns one or more chunks of per-page storage in the "arena".
* The chunks for each relation are always consecutive, so that it can treat
* its page storage as a simple array. We further insist that its page data
* be ordered by block number, so that binary search is possible.
*
* Note: we handle pointers to these items as pointers, not as SHMEM_OFFSETs.
* This assumes that all processes accessing the map will have the shared
* memory segment mapped at the same place in their address space.
*/
typedef struct FSMHeader FSMHeader;
typedef struct FSMRelation FSMRelation;
/* Header for whole map */
struct FSMHeader
{
FSMRelation *usageList; /* FSMRelations in usage-recency order */
FSMRelation *usageListTail; /* tail of usage-recency list */
FSMRelation *firstRel; /* FSMRelations in arena storage order */
FSMRelation *lastRel; /* tail of storage-order list */
int numRels; /* number of FSMRelations now in use */
double sumRequests; /* sum of requested chunks over all rels */
char *arena; /* arena for page-info storage */
int totalChunks; /* total size of arena, in chunks */
int usedChunks; /* # of chunks assigned */
/* NB: there are totalChunks - usedChunks free chunks at end of arena */
};
/*
* Per-relation struct --- this is an entry in the shared hash table.
* The hash key is the RelFileNode value (hence, we look at the physical
* relation ID, not the logical ID, which is appropriate).
*/
struct FSMRelation
{
RelFileNode key; /* hash key (must be first) */
FSMRelation *nextUsage; /* next rel in usage-recency order */
FSMRelation *priorUsage; /* prior rel in usage-recency order */
FSMRelation *nextPhysical; /* next rel in arena-storage order */
FSMRelation *priorPhysical; /* prior rel in arena-storage order */
bool isIndex; /* if true, we store only page numbers */
Size avgRequest; /* moving average of space requests */
BlockNumber interestingPages; /* # of pages with useful free space */
int firstChunk; /* chunk # of my first chunk in arena */
int storedPages; /* # of pages stored in arena */
int nextPage; /* index (from 0) to start next search at */
};
/* GUC variables */
extern PGDLLIMPORT int MaxFSMRelations;
extern PGDLLIMPORT int MaxFSMPages;
/*
* function prototypes
*/
extern void InitFreeSpaceMap(void);
extern Size FreeSpaceShmemSize(void);
extern FSMHeader *GetFreeSpaceMap(void);
extern BlockNumber GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded);
extern BlockNumber RecordAndGetPageWithFreeSpace(RelFileNode *rel,
BlockNumber oldPage, BlockNumber oldPage,
Size oldSpaceAvail, Size oldSpaceAvail,
Size spaceNeeded); Size spaceNeeded);
extern Size GetAvgFSMRequestSize(RelFileNode *rel); extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
extern void RecordRelationFreeSpace(RelFileNode *rel, Size spaceAvail);
BlockNumber interestingPages,
int nPages,
FSMPageData *pageSpaces);
extern BlockNumber GetFreeIndexPage(RelFileNode *rel);
extern void RecordIndexFreeSpace(RelFileNode *rel,
BlockNumber interestingPages,
int nPages,
BlockNumber *pages);
extern void FreeSpaceMapTruncateRel(RelFileNode *rel, BlockNumber nblocks);
extern void FreeSpaceMapForgetRel(RelFileNode *rel);
extern void FreeSpaceMapForgetDatabase(Oid dbid);
extern void PrintFreeSpaceMapStatistics(int elevel);
extern void DumpFreeSpaceMap(int code, Datum arg); extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks);
extern void LoadFreeSpaceMap(void); extern void FreeSpaceMapVacuum(Relation rel);
#ifdef FREESPACE_DEBUG /* WAL prototypes */
extern void DumpFreeSpace(void); extern void fsm_desc(StringInfo buf, uint8 xl_info, char *rec);
#endif extern void fsm_redo(XLogRecPtr lsn, XLogRecord *record);
#endif /* FREESPACE_H */ #endif /* FREESPACE_H */
/*-------------------------------------------------------------------------
*
* fsm_internal.h
* internal functions for free space map
*
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/fsm_internals.h,v 1.1 2008/09/30 10:52:14 heikki Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef FSM_INTERNALS_H
#define FSM_INTERNALS_H
#include "storage/buf.h"
#include "storage/bufpage.h"
#include "lib/stringinfo.h"
/*
* Structure of a FSM page. See src/backend/storage/freespace/README for
* details.
*/
typedef struct
{
/*
* fsm_search_avail() tries to spread the load of multiple backends
* by returning different pages to different backends in a round-robin
* fashion. fp_next_slot points to the next slot to be returned
* (assuming there's enough space on it for the request). It's defined
* as an int, because it's updated without an exclusive lock. uint16
* would be more appropriate, but int is more likely to be atomically
* fetchable/storable.
*/
int fp_next_slot;
/*
* fp_nodes contains the binary tree, stored in array. The first
* NonLeafNodesPerPage elements are upper nodes, and the following
* LeafNodesPerPage elements are leaf nodes. Unused nodes are zero.
*/
uint8 fp_nodes[1];
} FSMPageData;
typedef FSMPageData *FSMPage;
/*
* Number of non-leaf and leaf nodes, and nodes in total, on an FSM page.
* These definitions are internal to fsmpage.c.
*/
#define NodesPerPage (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \
offsetof(FSMPageData, fp_nodes))
#define NonLeafNodesPerPage (BLCKSZ / 2 - 1)
#define LeafNodesPerPage (NodesPerPage - NonLeafNodesPerPage)
/*
* Number of FSM "slots" on a FSM page. This is what should be used
* outside fsmpage.c.
*/
#define SlotsPerFSMPage LeafNodesPerPage
/* Prototypes for functions in fsmpage.c */
extern int fsm_search_avail(Buffer buf, uint8 min_cat, bool advancenext,
bool exclusive_lock_held);
extern uint8 fsm_get_avail(Page page, int slot);
extern uint8 fsm_get_max_avail(Page page);
extern bool fsm_set_avail(Page page, int slot, uint8 value);
extern bool fsm_truncate_avail(Page page, int nslots);
extern bool fsm_rebuild_page(Page page);
#endif /* FSM_INTERNALS_H */
/*-------------------------------------------------------------------------
*
* indexfsm.h
* POSTGRES free space map for quickly finding an unused page in index
*
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/indexfsm.h,v 1.1 2008/09/30 10:52:14 heikki Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef INDEXFSM_H_
#define INDEXFSM_H_
#include "utils/rel.h"
extern void InitIndexFreeSpaceMap(Relation rel);
extern BlockNumber GetFreeIndexPage(Relation rel);
extern void RecordFreeIndexPage(Relation rel, BlockNumber page);
extern void RecordUsedIndexPage(Relation rel, BlockNumber page);
extern void IndexFreeSpaceMapTruncate(Relation rel, BlockNumber nblocks);
#endif /* INDEXFSM_H */
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.39 2008/06/19 21:32:56 tgl Exp $ * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.40 2008/09/30 10:52:14 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -35,6 +35,10 @@ ...@@ -35,6 +35,10 @@
* by allowing values not listed in the enum declaration to be assigned. * by allowing values not listed in the enum declaration to be assigned.
* The extra value MaxDynamicLWLock is there to keep the compiler from * The extra value MaxDynamicLWLock is there to keep the compiler from
* deciding that the enum can be represented as char or short ... * deciding that the enum can be represented as char or short ...
*
* If you remove a lock, please replace it with a placeholder like was done
* for FreeSpaceMapLock. This retains the lock numbering, which is helpful for
* DTrace and other external debugging scripts.
*/ */
typedef enum LWLockId typedef enum LWLockId
{ {
...@@ -45,7 +49,7 @@ typedef enum LWLockId ...@@ -45,7 +49,7 @@ typedef enum LWLockId
ProcArrayLock, ProcArrayLock,
SInvalReadLock, SInvalReadLock,
SInvalWriteLock, SInvalWriteLock,
FreeSpaceLock, UnusedLock1, /* FreeSpaceMapLock used to be here */
WALInsertLock, WALInsertLock,
WALWriteLock, WALWriteLock,
ControlFileLock, ControlFileLock,
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.16 2008/08/11 11:05:11 heikki Exp $ * $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.17 2008/09/30 10:52:14 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -23,11 +23,12 @@ ...@@ -23,11 +23,12 @@
typedef enum ForkNumber typedef enum ForkNumber
{ {
InvalidForkNumber = -1, InvalidForkNumber = -1,
MAIN_FORKNUM = 0 MAIN_FORKNUM = 0,
/* NOTE: change NUM_FORKS below when you add new forks */ FSM_FORKNUM
/* NOTE: change MAX_FORKNUM below when you add new forks */
} ForkNumber; } ForkNumber;
#define MAX_FORKNUM MAIN_FORKNUM #define MAX_FORKNUM FSM_FORKNUM
/* /*
* RelFileNode must provide all that we need to know to physically access * RelFileNode must provide all that we need to know to physically access
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/utils/guc_tables.h,v 1.42 2008/09/10 18:09:20 alvherre Exp $ * $PostgreSQL: pgsql/src/include/utils/guc_tables.h,v 1.43 2008/09/30 10:52:14 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -49,7 +49,6 @@ enum config_group ...@@ -49,7 +49,6 @@ enum config_group
CONN_AUTH_SECURITY, CONN_AUTH_SECURITY,
RESOURCES, RESOURCES,
RESOURCES_MEM, RESOURCES_MEM,
RESOURCES_FSM,
RESOURCES_KERNEL, RESOURCES_KERNEL,
WAL, WAL,
WAL_SETTINGS, WAL_SETTINGS,
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.107 2008/06/19 00:46:06 alvherre Exp $ * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.108 2008/09/30 10:52:14 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -195,6 +195,9 @@ typedef struct RelationData ...@@ -195,6 +195,9 @@ typedef struct RelationData
List *rd_indpred; /* index predicate tree, if any */ List *rd_indpred; /* index predicate tree, if any */
void *rd_amcache; /* available for use by index AM */ void *rd_amcache; /* available for use by index AM */
/* Cached last-seen size of the FSM */
BlockNumber rd_fsm_nblocks_cache;
/* use "struct" here to avoid needing to include pgstat.h: */ /* use "struct" here to avoid needing to include pgstat.h: */
struct PgStat_TableStatus *pgstat_info; /* statistics collection area */ struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
} RelationData; } RelationData;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment