Commit 73a076b0 authored by Peter Geoghegan's avatar Peter Geoghegan

Fix undercounting in VACUUM VERBOSE output.

The logic for determining how many nbtree pages in an index are deleted
pages sometimes undercounted pages.  Pages that were deleted by the
current VACUUM operation (as opposed to some previous VACUUM operation
whose deleted pages have yet to be reused) were sometimes overlooked.
The final count is exposed to users through VACUUM VERBOSE's "%u index
pages have been deleted" output.

btvacuumpage() avoided double-counting when _bt_pagedel() deleted more
than one page by assuming that only one page was deleted, and that the
additional deleted pages would get picked up during a future call to
btvacuumpage() by the same VACUUM operation.  _bt_pagedel() can
legitimately delete pages that the btvacuumscan() scan will not visit
again, though, so that assumption was slightly faulty.

Fix the accounting by teaching _bt_pagedel() about its caller's
requirements.  It now only reports on pages that it knows btvacuumscan()
won't visit again (including the current btvacuumpage() page), so
everything works out in the end.

This bug has been around forever.  Only backpatch to v11, though, to
keep _bt_pagedel() is sync on the branches that have today's bugfix
commit b0229f26.  Note that this commit changes the signature of
_bt_pagedel(), just like commit b0229f26.

Author: Peter Geoghegan
Reviewed-By: Masahiko Sawada
Discussion: https://postgr.es/m/CAH2-WzkrXBcMQWAYUJMFTTvzx_r4q=pYSjDe07JnUXhe+OZnJA@mail.gmail.com
Backpatch: 11-
parent b0229f26
...@@ -38,8 +38,10 @@ static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf); ...@@ -38,8 +38,10 @@ static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf);
static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf,
BTStack stack); BTStack stack);
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
BlockNumber scanblkno,
bool *rightsib_empty, bool *rightsib_empty,
TransactionId *oldestBtpoXact); TransactionId *oldestBtpoXact,
uint32 *ndeleted);
static TransactionId _bt_xid_horizon(Relation rel, Relation heapRel, Page page, static TransactionId _bt_xid_horizon(Relation rel, Relation heapRel, Page page,
OffsetNumber *deletable, int ndeletable); OffsetNumber *deletable, int ndeletable);
static bool _bt_lock_branch_parent(Relation rel, BlockNumber child, static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
...@@ -1489,7 +1491,9 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack, ...@@ -1489,7 +1491,9 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
* *
* Returns the number of pages successfully deleted (zero if page cannot * Returns the number of pages successfully deleted (zero if page cannot
* be deleted now; could be more than one if parent or right sibling pages * be deleted now; could be more than one if parent or right sibling pages
* were deleted too). * were deleted too). Note that this does not include pages that we delete
* that the btvacuumscan scan has yet to reach; they'll get counted later
* instead.
* *
* Maintains *oldestBtpoXact for any pages that get deleted. Caller is * Maintains *oldestBtpoXact for any pages that get deleted. Caller is
* responsible for maintaining *oldestBtpoXact in the case of pages that were * responsible for maintaining *oldestBtpoXact in the case of pages that were
...@@ -1499,15 +1503,21 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack, ...@@ -1499,15 +1503,21 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
* carefully, it's better to run it in a temp context that can be reset * carefully, it's better to run it in a temp context that can be reset
* frequently. * frequently.
*/ */
int uint32
_bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact) _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
{ {
int ndeleted = 0; uint32 ndeleted = 0;
BlockNumber rightsib; BlockNumber rightsib;
bool rightsib_empty; bool rightsib_empty;
Page page; Page page;
BTPageOpaque opaque; BTPageOpaque opaque;
/*
* Save original leafbuf block number from caller. Only deleted blocks
* that are <= scanblkno get counted in ndeleted return value.
*/
BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
/* /*
* "stack" is a search stack leading (approximately) to the target page. * "stack" is a search stack leading (approximately) to the target page.
* It is initially NULL, but when iterating, we keep it to avoid * It is initially NULL, but when iterating, we keep it to avoid
...@@ -1558,8 +1568,9 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact) ...@@ -1558,8 +1568,9 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
if (P_ISDELETED(opaque)) if (P_ISDELETED(opaque))
ereport(LOG, ereport(LOG,
(errcode(ERRCODE_INDEX_CORRUPTED), (errcode(ERRCODE_INDEX_CORRUPTED),
errmsg_internal("found deleted block %u while following right link in index \"%s\"", errmsg_internal("found deleted block %u while following right link from block %u in index \"%s\"",
BufferGetBlockNumber(leafbuf), BufferGetBlockNumber(leafbuf),
scanblkno,
RelationGetRelationName(rel)))); RelationGetRelationName(rel))));
_bt_relbuf(rel, leafbuf); _bt_relbuf(rel, leafbuf);
...@@ -1709,13 +1720,13 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact) ...@@ -1709,13 +1720,13 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
while (P_ISHALFDEAD(opaque)) while (P_ISHALFDEAD(opaque))
{ {
/* Check for interrupts in _bt_unlink_halfdead_page */ /* Check for interrupts in _bt_unlink_halfdead_page */
if (!_bt_unlink_halfdead_page(rel, leafbuf, &rightsib_empty, if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
oldestBtpoXact)) &rightsib_empty, oldestBtpoXact,
&ndeleted))
{ {
/* _bt_unlink_halfdead_page failed, released buffer */ /* _bt_unlink_halfdead_page failed, released buffer */
return ndeleted; return ndeleted;
} }
ndeleted++;
} }
Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque)); Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque));
...@@ -1974,8 +1985,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack) ...@@ -1974,8 +1985,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
* to avoid having to reacquire a lock we already released). * to avoid having to reacquire a lock we already released).
*/ */
static bool static bool
_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty, _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
TransactionId *oldestBtpoXact) bool *rightsib_empty, TransactionId *oldestBtpoXact,
uint32 *ndeleted)
{ {
BlockNumber leafblkno = BufferGetBlockNumber(leafbuf); BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
BlockNumber leafleftsib; BlockNumber leafleftsib;
...@@ -2370,6 +2382,14 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty, ...@@ -2370,6 +2382,14 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
TransactionIdPrecedes(opaque->btpo.xact, *oldestBtpoXact)) TransactionIdPrecedes(opaque->btpo.xact, *oldestBtpoXact))
*oldestBtpoXact = opaque->btpo.xact; *oldestBtpoXact = opaque->btpo.xact;
/*
* If btvacuumscan won't revisit this page in a future btvacuumpage call
* and count it as deleted then, we count it as deleted by current
* btvacuumpage call
*/
if (target <= scanblkno)
(*ndeleted)++;
/* /*
* Release the target, if it was not the leaf block. The leaf is always * Release the target, if it was not the leaf block. The leaf is always
* kept locked. * kept locked.
......
...@@ -1362,17 +1362,17 @@ restart: ...@@ -1362,17 +1362,17 @@ restart:
if (delete_now) if (delete_now)
{ {
MemoryContext oldcontext; MemoryContext oldcontext;
int ndel;
/* Run pagedel in a temp context to avoid memory leakage */ /* Run pagedel in a temp context to avoid memory leakage */
MemoryContextReset(vstate->pagedelcontext); MemoryContextReset(vstate->pagedelcontext);
oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext); oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
ndel = _bt_pagedel(rel, buf, &vstate->oldestBtpoXact); /*
* We trust the _bt_pagedel return value because it does not include
/* count only this page, else may double-count parent */ * any page that a future call here from btvacuumscan is expected to
if (ndel) * count. There will be no double-counting.
stats->pages_deleted++; */
stats->pages_deleted += _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);
/* pagedel released buffer, so we shouldn't */ /* pagedel released buffer, so we shouldn't */
......
...@@ -1080,7 +1080,7 @@ extern void _bt_delitems_vacuum(Relation rel, Buffer buf, ...@@ -1080,7 +1080,7 @@ extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
extern void _bt_delitems_delete(Relation rel, Buffer buf, extern void _bt_delitems_delete(Relation rel, Buffer buf,
OffsetNumber *deletable, int ndeletable, OffsetNumber *deletable, int ndeletable,
Relation heapRel); Relation heapRel);
extern int _bt_pagedel(Relation rel, Buffer leafbuf, extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf,
TransactionId *oldestBtpoXact); TransactionId *oldestBtpoXact);
/* /*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment