Commit 71d05a2c authored by Robert Haas's avatar Robert Haas

pg_visibility: Add pg_truncate_visibility_map function.

This requires some core changes as well so that we can properly
WAL-log the truncation.  Specifically, it changes the format of the
XLOG_SMGR_TRUNCATE WAL record, so bump XLOG_PAGE_MAGIC.

Patch by me, reviewed but not fully endorsed by Andres Freund.
parent 54f5c515
...@@ -13,5 +13,12 @@ RETURNS SETOF tid ...@@ -13,5 +13,12 @@ RETURNS SETOF tid
AS 'MODULE_PATHNAME', 'pg_check_visible' AS 'MODULE_PATHNAME', 'pg_check_visible'
LANGUAGE C STRICT; LANGUAGE C STRICT;
CREATE FUNCTION pg_truncate_visibility_map(regclass)
RETURNS void
AS 'MODULE_PATHNAME', 'pg_truncate_visibility_map'
LANGUAGE C STRICT
PARALLEL UNSAFE; -- let's not make this any more dangerous
REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC; REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC;
REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC; REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC;
REVOKE ALL ON FUNCTION pg_truncate_visibility_map(regclass) FROM PUBLIC;
...@@ -57,6 +57,13 @@ RETURNS SETOF tid ...@@ -57,6 +57,13 @@ RETURNS SETOF tid
AS 'MODULE_PATHNAME', 'pg_check_visible' AS 'MODULE_PATHNAME', 'pg_check_visible'
LANGUAGE C STRICT; LANGUAGE C STRICT;
-- Truncate the visibility map fork.
CREATE FUNCTION pg_truncate_visibility_map(regclass)
RETURNS void
AS 'MODULE_PATHNAME', 'pg_truncate_visibility_map'
LANGUAGE C STRICT
PARALLEL UNSAFE; -- let's not make this any more dangerous
-- Don't want these to be available to public. -- Don't want these to be available to public.
REVOKE ALL ON FUNCTION pg_visibility_map(regclass, bigint) FROM PUBLIC; REVOKE ALL ON FUNCTION pg_visibility_map(regclass, bigint) FROM PUBLIC;
REVOKE ALL ON FUNCTION pg_visibility(regclass, bigint) FROM PUBLIC; REVOKE ALL ON FUNCTION pg_visibility(regclass, bigint) FROM PUBLIC;
...@@ -65,3 +72,4 @@ REVOKE ALL ON FUNCTION pg_visibility(regclass) FROM PUBLIC; ...@@ -65,3 +72,4 @@ REVOKE ALL ON FUNCTION pg_visibility(regclass) FROM PUBLIC;
REVOKE ALL ON FUNCTION pg_visibility_map_summary(regclass) FROM PUBLIC; REVOKE ALL ON FUNCTION pg_visibility_map_summary(regclass) FROM PUBLIC;
REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC; REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC;
REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC; REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC;
REVOKE ALL ON FUNCTION pg_truncate_visibility_map(regclass) FROM PUBLIC;
...@@ -11,10 +11,12 @@ ...@@ -11,10 +11,12 @@
#include "access/htup_details.h" #include "access/htup_details.h"
#include "access/visibilitymap.h" #include "access/visibilitymap.h"
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "catalog/storage_xlog.h"
#include "funcapi.h" #include "funcapi.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/procarray.h" #include "storage/procarray.h"
#include "storage/smgr.h"
#include "utils/rel.h" #include "utils/rel.h"
PG_MODULE_MAGIC; PG_MODULE_MAGIC;
...@@ -40,6 +42,7 @@ PG_FUNCTION_INFO_V1(pg_visibility_rel); ...@@ -40,6 +42,7 @@ PG_FUNCTION_INFO_V1(pg_visibility_rel);
PG_FUNCTION_INFO_V1(pg_visibility_map_summary); PG_FUNCTION_INFO_V1(pg_visibility_map_summary);
PG_FUNCTION_INFO_V1(pg_check_frozen); PG_FUNCTION_INFO_V1(pg_check_frozen);
PG_FUNCTION_INFO_V1(pg_check_visible); PG_FUNCTION_INFO_V1(pg_check_visible);
PG_FUNCTION_INFO_V1(pg_truncate_visibility_map);
static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd); static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd);
static vbits *collect_visibility_data(Oid relid, bool include_pd); static vbits *collect_visibility_data(Oid relid, bool include_pd);
...@@ -335,6 +338,75 @@ pg_check_visible(PG_FUNCTION_ARGS) ...@@ -335,6 +338,75 @@ pg_check_visible(PG_FUNCTION_ARGS)
SRF_RETURN_DONE(funcctx); SRF_RETURN_DONE(funcctx);
} }
/*
* Remove the visibility map fork for a relation. If there turn out to be
* any bugs in the visibility map code that require rebuilding the VM, this
* provides users with a way to do it that is cleaner than shutting down the
* server and removing files by hand.
*
* This is a cut-down version of RelationTruncate.
*/
Datum
pg_truncate_visibility_map(PG_FUNCTION_ARGS)
{
Oid relid = PG_GETARG_OID(0);
Relation rel;
rel = relation_open(relid, AccessExclusiveLock);
if (rel->rd_rel->relkind != RELKIND_RELATION &&
rel->rd_rel->relkind != RELKIND_MATVIEW &&
rel->rd_rel->relkind != RELKIND_TOASTVALUE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table, materialized view, or TOAST table",
RelationGetRelationName(rel))));
RelationOpenSmgr(rel);
rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber;
visibilitymap_truncate(rel, 0);
if (RelationNeedsWAL(rel))
{
xl_smgr_truncate xlrec;
xlrec.blkno = 0;
xlrec.rnode = rel->rd_node;
xlrec.flags = SMGR_TRUNCATE_VM;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, sizeof(xlrec));
XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
}
/*
* Release the lock right away, not at commit time.
*
* It would be a problem to release the lock prior to commit if this
* truncate operation sends any transactional invalidation messages. Other
* backends would potentially be able to lock the relation without
* processing them in the window of time between when we release the lock
* here and when we sent the messages at our eventual commit. However,
* we're currently only sending a non-transactional smgr invalidation,
* which will have been posted to shared memory immediately from within
* visibilitymap_truncate. Therefore, there should be no race here.
*
* The reason why it's desirable to release the lock early here is because
* of the possibility that someone will need to use this to blow away many
* visibility map forks at once. If we can't release the lock until
* commit time, the transaction doing this will accumulate
* AccessExclusiveLocks on all of those relations at the same time, which
* is undesirable. However, if this turns out to be unsafe we may have no
* choice...
*/
relation_close(rel, AccessExclusiveLock);
/* Nothing to return. */
PG_RETURN_VOID();
}
/* /*
* Helper function to construct whichever TupleDesc we need for a particular * Helper function to construct whichever TupleDesc we need for a particular
* call. * call.
......
...@@ -9,14 +9,16 @@ ...@@ -9,14 +9,16 @@
<para> <para>
The <filename>pg_visibility</> module provides a means for examining the The <filename>pg_visibility</> module provides a means for examining the
visibility map (VM) and page-level visibility information. visibility map (VM) and page-level visibility information. It also
provides functions to check the integrity of the visibility map and to
force it to be rebuilt.
</para> </para>
<para> <para>
These routines return information about three different bits. The Three different bits are used to store information about page-level
all-visible bit in the visibility map indicates that every tuple on visibility. The all-visible bit in the visibility map indicates that every
a given page of a relation is visible to every current transaction. The tuple on a given page of a relation is visible to every current transaction.
all-frozen bit in the visibility map indicates that every tuple on the The all-frozen bit in the visibility map indicates that every tuple on the
page is frozen; that is, no future vacuum will need to modify the page page is frozen; that is, no future vacuum will need to modify the page
until such time as a tuple is inserted, updated, deleted, or locked on until such time as a tuple is inserted, updated, deleted, or locked on
that page. The page-level <literal>PD_ALL_VISIBLE</literal> bit has the that page. The page-level <literal>PD_ALL_VISIBLE</literal> bit has the
...@@ -25,7 +27,8 @@ ...@@ -25,7 +27,8 @@
will normally agree, but the page-level bit can sometimes be set while the will normally agree, but the page-level bit can sometimes be set while the
visibility map bit is clear after a crash recovery; or they can disagree visibility map bit is clear after a crash recovery; or they can disagree
because of a change which occurs after <literal>pg_visibility</> examines because of a change which occurs after <literal>pg_visibility</> examines
the visibility map and before it examines the data page. the visibility map and before it examines the data page. Any event which
causes data corruption can also cause these bits to disagree.
</para> </para>
<para> <para>
...@@ -118,6 +121,21 @@ ...@@ -118,6 +121,21 @@
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry>
<term><function>pg_truncate_visibility_map(regclass) returns void</function></term>
<listitem>
<para>
Truncates the visibility map for the given relation. This function
is only expected to be useful if you suspect that the visibility map
for the indicated relation is corrupt and wish to rebuild it. The first
<command>VACUUM</> executed on the given relation after this function
is executed will scan every page in the relation and rebuild the
visibility map.
</para>
</listitem>
</varlistentry>
</variablelist> </variablelist>
<para> <para>
......
...@@ -37,7 +37,8 @@ smgr_desc(StringInfo buf, XLogReaderState *record) ...@@ -37,7 +37,8 @@ smgr_desc(StringInfo buf, XLogReaderState *record)
xl_smgr_truncate *xlrec = (xl_smgr_truncate *) rec; xl_smgr_truncate *xlrec = (xl_smgr_truncate *) rec;
char *path = relpathperm(xlrec->rnode, MAIN_FORKNUM); char *path = relpathperm(xlrec->rnode, MAIN_FORKNUM);
appendStringInfo(buf, "%s to %u blocks", path, xlrec->blkno); appendStringInfo(buf, "%s to %u blocks flags %d", path,
xlrec->blkno, xlrec->flags);
pfree(path); pfree(path);
} }
} }
......
...@@ -268,6 +268,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks) ...@@ -268,6 +268,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
xlrec.blkno = nblocks; xlrec.blkno = nblocks;
xlrec.rnode = rel->rd_node; xlrec.rnode = rel->rd_node;
xlrec.flags = SMGR_TRUNCATE_ALL;
XLogBeginInsert(); XLogBeginInsert();
XLogRegisterData((char *) &xlrec, sizeof(xlrec)); XLogRegisterData((char *) &xlrec, sizeof(xlrec));
...@@ -522,17 +523,22 @@ smgr_redo(XLogReaderState *record) ...@@ -522,17 +523,22 @@ smgr_redo(XLogReaderState *record)
*/ */
XLogFlush(lsn); XLogFlush(lsn);
if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
{
smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno); smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno);
/* Also tell xlogutils.c about it */ /* Also tell xlogutils.c about it */
XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno); XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno);
}
/* Truncate FSM and VM too */ /* Truncate FSM and VM too */
rel = CreateFakeRelcacheEntry(xlrec->rnode); rel = CreateFakeRelcacheEntry(xlrec->rnode);
if (smgrexists(reln, FSM_FORKNUM)) if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 &&
smgrexists(reln, FSM_FORKNUM))
FreeSpaceMapTruncateRel(rel, xlrec->blkno); FreeSpaceMapTruncateRel(rel, xlrec->blkno);
if (smgrexists(reln, VISIBILITYMAP_FORKNUM)) if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 &&
smgrexists(reln, VISIBILITYMAP_FORKNUM))
visibilitymap_truncate(rel, xlrec->blkno); visibilitymap_truncate(rel, xlrec->blkno);
FreeFakeRelcacheEntry(rel); FreeFakeRelcacheEntry(rel);
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
/* /*
* Each page of XLOG file has a header like this: * Each page of XLOG file has a header like this:
*/ */
#define XLOG_PAGE_MAGIC 0xD091 /* can be used as WAL version indicator */ #define XLOG_PAGE_MAGIC 0xD092 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData typedef struct XLogPageHeaderData
{ {
......
...@@ -36,10 +36,18 @@ typedef struct xl_smgr_create ...@@ -36,10 +36,18 @@ typedef struct xl_smgr_create
ForkNumber forkNum; ForkNumber forkNum;
} xl_smgr_create; } xl_smgr_create;
/* flags for xl_smgr_truncate */
#define SMGR_TRUNCATE_HEAP 0x0001
#define SMGR_TRUNCATE_VM 0x0002
#define SMGR_TRUNCATE_FSM 0x0004
#define SMGR_TRUNCATE_ALL \
(SMGR_TRUNCATE_HEAP|SMGR_TRUNCATE_VM|SMGR_TRUNCATE_FSM)
typedef struct xl_smgr_truncate typedef struct xl_smgr_truncate
{ {
BlockNumber blkno; BlockNumber blkno;
RelFileNode rnode; RelFileNode rnode;
int flags;
} xl_smgr_truncate; } xl_smgr_truncate;
extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum); extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment