Commit e89526d4 authored by Tom Lane's avatar Tom Lane

In B-tree page deletion, clean up properly after page deletion failure.

In _bt_unlink_halfdead_page(), we might fail to find an immediate left
sibling of the target page, perhaps because of corruption of the page
sibling links.  The code intends to cope with this by just abandoning
the deletion attempt; but what actually happens is that it fails outright
due to releasing the same buffer lock twice.  (And error recovery masks
a second problem, which is possible leakage of a pin on another page.)
Seems to have been introduced by careless refactoring in commit efada2b8.
Since there are multiple cases to consider, let's make releasing the buffer
lock in the failure case the responsibility of _bt_unlink_halfdead_page()
not its caller.

Also, avoid fetching the leaf page's left-link again after we've dropped
lock on the page.  This is probably harmless, but it's not exactly good
coding practice.

Per report from Kyotaro Horiguchi.  Back-patch to 9.4 where the faulty code
was introduced.

Discussion: <20160803.173116.111915228.horiguchi.kyotaro@lab.ntt.co.jp>
parent 69dc5ae4
...@@ -1284,7 +1284,7 @@ _bt_pagedel(Relation rel, Buffer buf) ...@@ -1284,7 +1284,7 @@ _bt_pagedel(Relation rel, Buffer buf)
{ {
if (!_bt_unlink_halfdead_page(rel, buf, &rightsib_empty)) if (!_bt_unlink_halfdead_page(rel, buf, &rightsib_empty))
{ {
_bt_relbuf(rel, buf); /* _bt_unlink_halfdead_page already released buffer */
return ndeleted; return ndeleted;
} }
ndeleted++; ndeleted++;
...@@ -1501,6 +1501,11 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack) ...@@ -1501,6 +1501,11 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
* Returns 'false' if the page could not be unlinked (shouldn't happen). * Returns 'false' if the page could not be unlinked (shouldn't happen).
* If the (new) right sibling of the page is empty, *rightsib_empty is set * If the (new) right sibling of the page is empty, *rightsib_empty is set
* to true. * to true.
*
* Must hold pin and lock on leafbuf at entry (read or write doesn't matter).
* On success exit, we'll be holding pin and write lock. On failure exit,
* we'll release both pin and lock before returning (we define it that way
* to avoid having to reacquire a lock we already released).
*/ */
static bool static bool
_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
...@@ -1543,11 +1548,13 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) ...@@ -1543,11 +1548,13 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
/* /*
* If the leaf page still has a parent pointing to it (or a chain of * If the leaf page still has a parent pointing to it (or a chain of
* parents), we don't unlink the leaf page yet, but the topmost remaining * parents), we don't unlink the leaf page yet, but the topmost remaining
* parent in the branch. * parent in the branch. Set 'target' and 'buf' to reference the page
* actually being unlinked.
*/ */
if (ItemPointerIsValid(leafhikey)) if (ItemPointerIsValid(leafhikey))
{ {
target = ItemPointerGetBlockNumber(leafhikey); target = ItemPointerGetBlockNumber(leafhikey);
Assert(target != leafblkno);
/* fetch the block number of the topmost parent's left sibling */ /* fetch the block number of the topmost parent's left sibling */
buf = _bt_getbuf(rel, target, BT_READ); buf = _bt_getbuf(rel, target, BT_READ);
...@@ -1567,7 +1574,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) ...@@ -1567,7 +1574,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
target = leafblkno; target = leafblkno;
buf = leafbuf; buf = leafbuf;
leftsib = opaque->btpo_prev; leftsib = leafleftsib;
targetlevel = 0; targetlevel = 0;
} }
...@@ -1598,8 +1605,20 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) ...@@ -1598,8 +1605,20 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
_bt_relbuf(rel, lbuf); _bt_relbuf(rel, lbuf);
if (leftsib == P_NONE) if (leftsib == P_NONE)
{ {
elog(LOG, "no left sibling (concurrent deletion?) in \"%s\"", elog(LOG, "no left sibling (concurrent deletion?) of block %u in \"%s\"",
target,
RelationGetRelationName(rel)); RelationGetRelationName(rel));
if (target != leafblkno)
{
/* we have only a pin on target, but pin+lock on leafbuf */
ReleaseBuffer(buf);
_bt_relbuf(rel, leafbuf);
}
else
{
/* we have only a pin on leafbuf */
ReleaseBuffer(leafbuf);
}
return false; return false;
} }
lbuf = _bt_getbuf(rel, leftsib, BT_WRITE); lbuf = _bt_getbuf(rel, leftsib, BT_WRITE);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment