Commit dd728826 authored by Robert Haas's avatar Robert Haas

Fix locking problem in _hash_squeezebucket() / _hash_freeovflpage().

A bucket squeeze operation needs to lock each page of the bucket
before releasing the prior page, but the previous coding fumbled the
locking when freeing an overflow page during a bucket squeeze
operation.  Commit 6d46f478
introduced this bug.

Amit Kapila, with help from Kuntal Ghosh and Dilip Kumar, after
an initial trouble report by Jeff Janes.  Reviewed by me.  I also
fixed a problem with a comment.
parent 668dbbec
...@@ -377,12 +377,11 @@ _hash_firstfreebit(uint32 map) ...@@ -377,12 +377,11 @@ _hash_firstfreebit(uint32 map)
* NB: caller must not hold lock on metapage, nor on page, that's next to * NB: caller must not hold lock on metapage, nor on page, that's next to
* ovflbuf in the bucket chain. We don't acquire the lock on page that's * ovflbuf in the bucket chain. We don't acquire the lock on page that's
* prior to ovflbuf in chain if it is same as wbuf because the caller already * prior to ovflbuf in chain if it is same as wbuf because the caller already
* has a lock on same. This function releases the lock on wbuf and caller * has a lock on same.
* is responsible for releasing the pin on same.
*/ */
BlockNumber BlockNumber
_hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf, _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
bool wbuf_dirty, BufferAccessStrategy bstrategy) BufferAccessStrategy bstrategy)
{ {
HashMetaPage metap; HashMetaPage metap;
Buffer metabuf; Buffer metabuf;
...@@ -447,24 +446,10 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf, ...@@ -447,24 +446,10 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
Assert(prevopaque->hasho_bucket == bucket); Assert(prevopaque->hasho_bucket == bucket);
prevopaque->hasho_nextblkno = nextblkno; prevopaque->hasho_nextblkno = nextblkno;
MarkBufferDirty(prevbuf);
if (prevblkno != writeblkno) if (prevblkno != writeblkno)
{
MarkBufferDirty(prevbuf);
_hash_relbuf(rel, prevbuf); _hash_relbuf(rel, prevbuf);
}
else
{
/* ensure to mark prevbuf as dirty */
wbuf_dirty = true;
}
} }
/* write and unlock the write buffer */
if (wbuf_dirty)
_hash_chgbufaccess(rel, wbuf, HASH_WRITE, HASH_NOLOCK);
else
_hash_chgbufaccess(rel, wbuf, HASH_READ, HASH_NOLOCK);
if (BlockNumberIsValid(nextblkno)) if (BlockNumberIsValid(nextblkno))
{ {
Buffer nextbuf = _hash_getbuf_with_strategy(rel, Buffer nextbuf = _hash_getbuf_with_strategy(rel,
...@@ -783,30 +768,28 @@ _hash_squeezebucket(Relation rel, ...@@ -783,30 +768,28 @@ _hash_squeezebucket(Relation rel,
* Tricky point here: if our read and write pages are adjacent in the * Tricky point here: if our read and write pages are adjacent in the
* bucket chain, our write lock on wbuf will conflict with * bucket chain, our write lock on wbuf will conflict with
* _hash_freeovflpage's attempt to update the sibling links of the * _hash_freeovflpage's attempt to update the sibling links of the
* removed page. In that case, we don't need to lock it again and we * removed page. In that case, we don't need to lock it again.
* always release the lock on wbuf in _hash_freeovflpage and then
* retake it again here. This will not only simplify the code, but is
* required to atomically log the changes which will be helpful when
* we write WAL for hash indexes.
*/ */
rblkno = ropaque->hasho_prevblkno; rblkno = ropaque->hasho_prevblkno;
Assert(BlockNumberIsValid(rblkno)); Assert(BlockNumberIsValid(rblkno));
/* free this overflow page (releases rbuf) */ /* free this overflow page (releases rbuf) */
_hash_freeovflpage(rel, rbuf, wbuf, wbuf_dirty, bstrategy); _hash_freeovflpage(rel, rbuf, wbuf, bstrategy);
if (wbuf_dirty)
MarkBufferDirty(wbuf);
/* are we freeing the page adjacent to wbuf? */ /* are we freeing the page adjacent to wbuf? */
if (rblkno == wblkno) if (rblkno == wblkno)
{ {
/* retain the pin on primary bucket page till end of bucket scan */ /* retain the pin on primary bucket page till end of bucket scan */
if (wblkno != bucket_blkno) if (wblkno == bucket_blkno)
_hash_dropbuf(rel, wbuf); _hash_chgbufaccess(rel, wbuf, HASH_READ, HASH_NOLOCK);
else
_hash_relbuf(rel, wbuf);
return; return;
} }
/* lock the overflow page being written, then get the previous one */
_hash_chgbufaccess(rel, wbuf, HASH_NOLOCK, HASH_WRITE);
rbuf = _hash_getbuf_with_strategy(rel, rbuf = _hash_getbuf_with_strategy(rel,
rblkno, rblkno,
HASH_WRITE, HASH_WRITE,
......
...@@ -314,7 +314,7 @@ extern OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, ...@@ -314,7 +314,7 @@ extern OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf,
/* hashovfl.c */ /* hashovfl.c */
extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin); extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin);
extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf, extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
bool wbuf_dirty, BufferAccessStrategy bstrategy); BufferAccessStrategy bstrategy);
extern void _hash_initbitmap(Relation rel, HashMetaPage metap, extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
BlockNumber blkno, ForkNumber forkNum); BlockNumber blkno, ForkNumber forkNum);
extern void _hash_squeezebucket(Relation rel, extern void _hash_squeezebucket(Relation rel,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment