Commit 6d05086c authored by Fujii Masao's avatar Fujii Masao

Speedup truncations of relation forks.

When a relation is truncated, shared_buffers needs to be scanned
so that any buffers for the relation forks are invalidated in it.
Previously, shared_buffers was scanned for each relation forks, i.e.,
MAIN, FSM and VM, when VACUUM truncated off any empty pages
at the end of relation or TRUNCATE truncated the relation in place.
Since shared_buffers needed to be scanned multiple times,
it could take a long time to finish those commands especially
when shared_buffers was large.

This commit changes the logic so that shared_buffers is scanned only
one time for those three relation forks.

Author: Kirk Jamison
Reviewed-by: Masahiko Sawada, Thomas Munro, Alvaro Herrera, Takayuki Tsunakawa and Fujii Masao
Discussion: https://postgr.es/m/D09B13F772D2274BB348A310EE3027C64E2067@g01jpexmbkw24
parent 2e5c83ac
......@@ -383,6 +383,8 @@ pg_truncate_visibility_map(PG_FUNCTION_ARGS)
{
Oid relid = PG_GETARG_OID(0);
Relation rel;
ForkNumber fork;
BlockNumber block;
rel = relation_open(relid, AccessExclusiveLock);
......@@ -392,7 +394,12 @@ pg_truncate_visibility_map(PG_FUNCTION_ARGS)
RelationOpenSmgr(rel);
rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber;
visibilitymap_truncate(rel, 0);
block = visibilitymap_prepare_truncate(rel, 0);
if (BlockNumberIsValid(block))
{
fork = VISIBILITYMAP_FORKNUM;
smgrtruncate(rel->rd_smgr, &fork, 1, &block);
}
if (RelationNeedsWAL(rel))
{
......@@ -418,7 +425,7 @@ pg_truncate_visibility_map(PG_FUNCTION_ARGS)
* here and when we sent the messages at our eventual commit. However,
* we're currently only sending a non-transactional smgr invalidation,
* which will have been posted to shared memory immediately from within
* visibilitymap_truncate. Therefore, there should be no race here.
* smgr_truncate. Therefore, there should be no race here.
*
* The reason why it's desirable to release the lock early here is because
* of the possibility that someone will need to use this to blow away many
......
......@@ -17,7 +17,8 @@
* visibilitymap_set - set a bit in a previously pinned page
* visibilitymap_get_status - get status of bits
* visibilitymap_count - count number of bits set in visibility map
* visibilitymap_truncate - truncate the visibility map
* visibilitymap_prepare_truncate -
* prepare for truncation of the visibility map
*
* NOTES
*
......@@ -430,16 +431,18 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro
}
/*
* visibilitymap_truncate - truncate the visibility map
*
* The caller must hold AccessExclusiveLock on the relation, to ensure that
* other backends receive the smgr invalidation event that this function sends
* before they access the VM again.
* visibilitymap_prepare_truncate -
* prepare for truncation of the visibility map
*
* nheapblocks is the new size of the heap.
*
* Return the number of blocks of new visibility map.
* If it's InvalidBlockNumber, there is nothing to truncate;
* otherwise the caller is responsible for calling smgrtruncate()
* to truncate the visibility map pages.
*/
void
visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
BlockNumber
visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
{
BlockNumber newnblocks;
......@@ -459,7 +462,7 @@ visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
* nothing to truncate.
*/
if (!smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM))
return;
return InvalidBlockNumber;
/*
* Unless the new size is exactly at a visibility map page boundary, the
......@@ -480,7 +483,7 @@ visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
if (!BufferIsValid(mapBuffer))
{
/* nothing to do, the file was already smaller */
return;
return InvalidBlockNumber;
}
page = BufferGetPage(mapBuffer);
......@@ -528,20 +531,10 @@ visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
if (smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM) <= newnblocks)
{
/* nothing to do, the file was already smaller than requested size */
return;
return InvalidBlockNumber;
}
/* Truncate the unused VM pages, and send smgr inval message */
smgrtruncate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, newnblocks);
/*
* We might as well update the local smgr_vm_nblocks setting. smgrtruncate
* sent an smgr cache inval message, which will cause other backends to
* invalidate their copy of smgr_vm_nblocks, and this one too at the next
* command boundary. But this ensures it isn't outright wrong until then.
*/
if (rel->rd_smgr)
rel->rd_smgr->smgr_vm_nblocks = newnblocks;
return newnblocks;
}
/*
......
......@@ -231,6 +231,10 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
{
bool fsm;
bool vm;
bool need_fsm_vacuum = false;
ForkNumber forks[MAX_FORKNUM];
BlockNumber blocks[MAX_FORKNUM];
int nforks = 0;
/* Open it at the smgr level if not already done */
RelationOpenSmgr(rel);
......@@ -242,15 +246,35 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
rel->rd_smgr->smgr_fsm_nblocks = InvalidBlockNumber;
rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber;
/* Truncate the FSM first if it exists */
/* Prepare for truncation of MAIN fork of the relation */
forks[nforks] = MAIN_FORKNUM;
blocks[nforks] = nblocks;
nforks++;
/* Prepare for truncation of the FSM if it exists */
fsm = smgrexists(rel->rd_smgr, FSM_FORKNUM);
if (fsm)
FreeSpaceMapTruncateRel(rel, nblocks);
{
blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, nblocks);
if (BlockNumberIsValid(blocks[nforks]))
{
forks[nforks] = FSM_FORKNUM;
nforks++;
need_fsm_vacuum = true;
}
}
/* Truncate the visibility map too if it exists. */
/* Prepare for truncation of the visibility map too if it exists */
vm = smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM);
if (vm)
visibilitymap_truncate(rel, nblocks);
{
blocks[nforks] = visibilitymap_prepare_truncate(rel, nblocks);
if (BlockNumberIsValid(blocks[nforks]))
{
forks[nforks] = VISIBILITYMAP_FORKNUM;
nforks++;
}
}
/*
* We WAL-log the truncation before actually truncating, which means
......@@ -290,8 +314,16 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
XLogFlush(lsn);
}
/* Do the real work */
smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
/* Do the real work to truncate relation forks */
smgrtruncate(rel->rd_smgr, forks, nforks, blocks);
/*
* Update upper-level FSM pages to account for the truncation.
* This is important because the just-truncated pages were likely
* marked as all-free, and would be preferentially selected.
*/
if (need_fsm_vacuum)
FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber);
}
/*
......@@ -588,6 +620,10 @@ smgr_redo(XLogReaderState *record)
xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
SMgrRelation reln;
Relation rel;
ForkNumber forks[MAX_FORKNUM];
BlockNumber blocks[MAX_FORKNUM];
int nforks = 0;
bool need_fsm_vacuum = false;
reln = smgropen(xlrec->rnode, InvalidBackendId);
......@@ -616,23 +652,54 @@ smgr_redo(XLogReaderState *record)
*/
XLogFlush(lsn);
/* Prepare for truncation of MAIN fork */
if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
{
smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno);
forks[nforks] = MAIN_FORKNUM;
blocks[nforks] = xlrec->blkno;
nforks++;
/* Also tell xlogutils.c about it */
XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno);
}
/* Truncate FSM and VM too */
/* Prepare for truncation of FSM and VM too */
rel = CreateFakeRelcacheEntry(xlrec->rnode);
if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 &&
smgrexists(reln, FSM_FORKNUM))
FreeSpaceMapTruncateRel(rel, xlrec->blkno);
{
blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, xlrec->blkno);
if (BlockNumberIsValid(blocks[nforks]))
{
forks[nforks] = FSM_FORKNUM;
nforks++;
need_fsm_vacuum = true;
}
}
if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 &&
smgrexists(reln, VISIBILITYMAP_FORKNUM))
visibilitymap_truncate(rel, xlrec->blkno);
{
blocks[nforks] = visibilitymap_prepare_truncate(rel, xlrec->blkno);
if (BlockNumberIsValid(blocks[nforks]))
{
forks[nforks] = VISIBILITYMAP_FORKNUM;
nforks++;
}
}
/* Do the real work to truncate relation forks */
if (nforks > 0)
smgrtruncate(reln, forks, nforks, blocks);
/*
* Update upper-level FSM pages to account for the truncation.
* This is important because the just-truncated pages were likely
* marked as all-free, and would be preferentially selected.
*/
if (need_fsm_vacuum)
FreeSpaceMapVacuumRange(rel, xlrec->blkno,
InvalidBlockNumber);
FreeFakeRelcacheEntry(rel);
}
......
......@@ -2901,7 +2901,7 @@ BufferGetLSNAtomic(Buffer buffer)
* DropRelFileNodeBuffers
*
* This function removes from the buffer pool all the pages of the
* specified relation fork that have block numbers >= firstDelBlock.
* specified relation forks that have block numbers >= firstDelBlock.
* (In particular, with firstDelBlock = 0, all pages are removed.)
* Dirty pages are simply dropped, without bothering to write them
* out first. Therefore, this is NOT rollback-able, and so should be
......@@ -2924,16 +2924,21 @@ BufferGetLSNAtomic(Buffer buffer)
* --------------------------------------------------------------------
*/
void
DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum,
BlockNumber firstDelBlock)
DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
int nforks, BlockNumber *firstDelBlock)
{
int i;
int j;
/* If it's a local relation, it's localbuf.c's problem. */
if (RelFileNodeBackendIsTemp(rnode))
{
if (rnode.backend == MyBackendId)
DropRelFileNodeLocalBuffers(rnode.node, forkNum, firstDelBlock);
{
for (j = 0; j < nforks; j++)
DropRelFileNodeLocalBuffers(rnode.node, forkNum[j],
firstDelBlock[j]);
}
return;
}
......@@ -2962,11 +2967,18 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum,
continue;
buf_state = LockBufHdr(bufHdr);
for (j = 0; j < nforks; j++)
{
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) &&
bufHdr->tag.forkNum == forkNum &&
bufHdr->tag.blockNum >= firstDelBlock)
bufHdr->tag.forkNum == forkNum[j] &&
bufHdr->tag.blockNum >= firstDelBlock[j])
{
InvalidateBuffer(bufHdr); /* releases spinlock */
else
break;
}
}
if (j >= nforks)
UnlockBufHdr(bufHdr, buf_state);
}
}
......
......@@ -247,16 +247,18 @@ GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
}
/*
* FreeSpaceMapTruncateRel - adjust for truncation of a relation.
*
* The caller must hold AccessExclusiveLock on the relation, to ensure that
* other backends receive the smgr invalidation event that this function sends
* before they access the FSM again.
* FreeSpaceMapPrepareTruncateRel - prepare for truncation of a relation.
*
* nblocks is the new size of the heap.
*
* Return the number of blocks of new FSM.
* If it's InvalidBlockNumber, there is nothing to truncate;
* otherwise the caller is responsible for calling smgrtruncate()
* to truncate the FSM pages, and FreeSpaceMapVacuumRange()
* to update upper-level pages in the FSM.
*/
void
FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
BlockNumber
FreeSpaceMapPrepareTruncateRel(Relation rel, BlockNumber nblocks)
{
BlockNumber new_nfsmblocks;
FSMAddress first_removed_address;
......@@ -270,7 +272,7 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
* truncate.
*/
if (!smgrexists(rel->rd_smgr, FSM_FORKNUM))
return;
return InvalidBlockNumber;
/* Get the location in the FSM of the first removed heap block */
first_removed_address = fsm_get_location(nblocks, &first_removed_slot);
......@@ -285,7 +287,7 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
{
buf = fsm_readbuf(rel, first_removed_address, false);
if (!BufferIsValid(buf))
return; /* nothing to do; the FSM was already smaller */
return InvalidBlockNumber; /* nothing to do; the FSM was already smaller */
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
/* NO EREPORT(ERROR) from here till changes are logged */
......@@ -315,28 +317,10 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
{
new_nfsmblocks = fsm_logical_to_physical(first_removed_address);
if (smgrnblocks(rel->rd_smgr, FSM_FORKNUM) <= new_nfsmblocks)
return; /* nothing to do; the FSM was already smaller */
return InvalidBlockNumber; /* nothing to do; the FSM was already smaller */
}
/* Truncate the unused FSM pages, and send smgr inval message */
smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks);
/*
* We might as well update the local smgr_fsm_nblocks setting.
* smgrtruncate sent an smgr cache inval message, which will cause other
* backends to invalidate their copy of smgr_fsm_nblocks, and this one too
* at the next command boundary. But this ensures it isn't outright wrong
* until then.
*/
if (rel->rd_smgr)
rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks;
/*
* Update upper-level FSM pages to account for the truncation. This is
* important because the just-truncated pages were likely marked as
* all-free, and would be preferentially selected.
*/
FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber);
return new_nfsmblocks;
}
/*
......
......@@ -469,6 +469,7 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
pfree(rnodes);
}
/*
* smgrextend() -- Add a new block to a file.
*
......@@ -557,19 +558,25 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum)
}
/*
* smgrtruncate() -- Truncate supplied relation to the specified number
* of blocks
* smgrtruncate() -- Truncate the given forks of supplied relation to
* each specified numbers of blocks
*
* The truncation is done immediately, so this can't be rolled back.
*
* The caller must hold AccessExclusiveLock on the relation, to ensure that
* other backends receive the smgr invalidation event that this function sends
* before they access any forks of the relation again.
*/
void
smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
{
int i;
/*
* Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
* just drop them without bothering to write the contents.
*/
DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nblocks);
DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nforks, nblocks);
/*
* Send a shared-inval message to force other backends to close any smgr
......@@ -583,10 +590,24 @@ smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
*/
CacheInvalidateSmgr(reln->smgr_rnode);
/* Do the truncation */
for (i = 0; i < nforks; i++)
{
smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]);
/*
* Do the truncation.
*/
smgrsw[reln->smgr_which].smgr_truncate(reln, forknum, nblocks);
* We might as well update the local smgr_fsm_nblocks and
* smgr_vm_nblocks settings. The smgr cache inval message that
* this function sent will cause other backends to invalidate
* their copies of smgr_fsm_nblocks and smgr_vm_nblocks,
* and these ones too at the next command boundary.
* But these ensure they aren't outright wrong until then.
*/
if (forknum[i] == FSM_FORKNUM)
reln->smgr_fsm_nblocks = nblocks[i];
if (forknum[i] == VISIBILITYMAP_FORKNUM)
reln->smgr_vm_nblocks = nblocks[i];
}
}
/*
......
......@@ -44,6 +44,7 @@ extern void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
uint8 flags);
extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen);
extern void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks);
extern BlockNumber visibilitymap_prepare_truncate(Relation rel,
BlockNumber nheapblocks);
#endif /* VISIBILITYMAP_H */
......@@ -190,8 +190,8 @@ extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation,
extern void FlushOneBuffer(Buffer buffer);
extern void FlushRelationBuffers(Relation rel);
extern void FlushDatabaseBuffers(Oid dbid);
extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode,
ForkNumber forkNum, BlockNumber firstDelBlock);
extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
int nforks, BlockNumber *firstDelBlock);
extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes);
extern void DropDatabaseBuffers(Oid dbid);
......
......@@ -30,7 +30,8 @@ extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
Size spaceAvail);
extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks);
extern BlockNumber FreeSpaceMapPrepareTruncateRel(Relation rel,
BlockNumber nblocks);
extern void FreeSpaceMapVacuum(Relation rel);
extern void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start,
BlockNumber end);
......
......@@ -101,8 +101,8 @@ extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
extern void smgrwriteback(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, BlockNumber nblocks);
extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
extern void smgrtruncate(SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks);
extern void smgrtruncate(SMgrRelation reln, ForkNumber *forknum,
int nforks, BlockNumber *nblocks);
extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum);
extern void AtEOXact_SMgr(void);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment