Commit a8b8f4db authored by Tom Lane's avatar Tom Lane

Clean up WAL/buffer interactions as per my recent proposal. Get rid of the

misleadingly-named WriteBuffer routine, and instead require routines that
change buffer pages to call MarkBufferDirty (which does exactly what it says).
We also require that they do so before calling XLogInsert; this takes care of
the synchronization requirement documented in SyncOneBuffer.  Note that
because bufmgr takes the buffer content lock (in shared mode) while writing
out any buffer, it doesn't matter whether MarkBufferDirty is executed before
the buffer content change is complete, so long as the content change is
completed before releasing exclusive lock on the buffer.  So it's OK to set
the dirtybit before we fill in the LSN.
This eliminates the former kluge of needing to set the dirtybit in LockBuffer.
Aside from making the code more transparent, we can also add some new
debugging assertions, in particular that the caller of MarkBufferDirty must
hold the buffer content lock, not merely a pin.
parent 89395bfa
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.130 2006/03/30 23:03:09 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.131 2006/03/31 23:32:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -111,6 +111,9 @@ gistbuild(PG_FUNCTION_ARGS)
START_CRIT_SECTION();
GISTInitBuffer(buffer, F_LEAF);
MarkBufferDirty(buffer);
if (!index->rd_istemp)
{
XLogRecPtr recptr;
......@@ -127,8 +130,8 @@ gistbuild(PG_FUNCTION_ARGS)
}
else
PageSetLSN(page, XLogRecPtrForTemp);
LockBuffer(buffer, GIST_UNLOCK);
WriteBuffer(buffer);
UnlockReleaseBuffer(buffer);
END_CRIT_SECTION();
......@@ -345,6 +348,15 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
itvec = gistjoinvector(itvec, &tlen, state->itup, state->ituplen);
newitup = gistSplit(state->r, state->stack->buffer, itvec, &tlen, &dist, giststate);
/*
* must mark buffers dirty before XLogInsert, even though we'll
* still be changing their opaque fields below
*/
for (ptr = dist; ptr; ptr = ptr->next)
{
MarkBufferDirty(ptr->buffer);
}
if (!state->r->rd_istemp)
{
XLogRecPtr recptr;
......@@ -354,21 +366,17 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
is_leaf, &(state->key), dist);
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
ptr = dist;
while (ptr)
for (ptr = dist; ptr; ptr = ptr->next)
{
PageSetLSN(BufferGetPage(ptr->buffer), recptr);
PageSetTLI(BufferGetPage(ptr->buffer), ThisTimeLineID);
ptr = ptr->next;
}
}
else
{
ptr = dist;
while (ptr)
for (ptr = dist; ptr; ptr = ptr->next)
{
PageSetLSN(BufferGetPage(ptr->buffer), XLogRecPtrForTemp);
ptr = ptr->next;
}
}
......@@ -379,17 +387,14 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
{
gistnewroot(state->r, state->stack->buffer, state->itup, state->ituplen, &(state->key));
state->needInsertComplete = false;
ptr = dist;
while (ptr)
for (ptr = dist; ptr; ptr = ptr->next)
{
Page page = (Page) BufferGetPage(ptr->buffer);
GistPageGetOpaque(page)->rightlink = (ptr->next) ?
ptr->next->block.blkno : InvalidBlockNumber;
GistPageGetOpaque(page)->nsn = PageGetLSN(page);
LockBuffer(ptr->buffer, GIST_UNLOCK);
WriteBuffer(ptr->buffer);
ptr = ptr->next;
UnlockReleaseBuffer(ptr->buffer);
}
}
else
......@@ -430,11 +435,9 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
opaque->rightlink = ourpage->next->block.blkno;
/*
* fills and write all new pages. They isn't linked into tree yet
* fill and release all new pages. They isn't linked into tree yet
*/
ptr = ourpage->next;
while (ptr)
for (ptr = ourpage->next; ptr; ptr = ptr->next)
{
page = (Page) BufferGetPage(ptr->buffer);
GistPageGetOpaque(page)->rightlink = (ptr->next) ?
......@@ -443,12 +446,8 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
GistPageGetOpaque(page)->nsn = (ptr->next) ?
opaque->nsn : oldnsn;
LockBuffer(ptr->buffer, GIST_UNLOCK);
WriteBuffer(ptr->buffer);
ptr = ptr->next;
UnlockReleaseBuffer(ptr->buffer);
}
WriteNoReleaseBuffer(state->stack->buffer);
}
END_CRIT_SECTION();
......@@ -460,6 +459,8 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
gistfillbuffer(state->r, state->stack->page, state->itup, state->ituplen, InvalidOffsetNumber);
MarkBufferDirty(state->stack->buffer);
oldlsn = PageGetLSN(state->stack->page);
if (!state->r->rd_istemp)
{
......@@ -489,7 +490,6 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
if (state->stack->blkno == GIST_ROOT_BLKNO)
state->needInsertComplete = false;
WriteNoReleaseBuffer(state->stack->buffer);
END_CRIT_SECTION();
......@@ -561,8 +561,7 @@ gistfindleaf(GISTInsertState *state, GISTSTATE *giststate)
* caused split non-root page is detected, go up to parent to
* choose best child
*/
LockBuffer(state->stack->buffer, GIST_UNLOCK);
ReleaseBuffer(state->stack->buffer);
UnlockReleaseBuffer(state->stack->buffer);
state->stack = state->stack->parent;
continue;
}
......@@ -630,8 +629,7 @@ gistfindleaf(GISTInsertState *state, GISTSTATE *giststate)
*/
/* forget buffer */
LockBuffer(state->stack->buffer, GIST_UNLOCK);
ReleaseBuffer(state->stack->buffer);
UnlockReleaseBuffer(state->stack->buffer);
state->stack = state->stack->parent;
continue;
......@@ -681,8 +679,7 @@ gistFindPath(Relation r, BlockNumber child)
if (GistPageIsLeaf(page))
{
/* we can safety go away, follows only leaf pages */
LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
return NULL;
}
......@@ -735,8 +732,7 @@ gistFindPath(Relation r, BlockNumber child)
ptr = ptr->parent;
}
top->childoffnum = i;
LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
return top;
}
else
......@@ -753,8 +749,7 @@ gistFindPath(Relation r, BlockNumber child)
}
}
LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
top = top->next;
}
......@@ -801,8 +796,7 @@ gistFindCorrectParent(Relation r, GISTInsertStack *child)
}
parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
LockBuffer(parent->buffer, GIST_UNLOCK);
ReleaseBuffer(parent->buffer);
UnlockReleaseBuffer(parent->buffer);
if (parent->blkno == InvalidBlockNumber)
/*
......@@ -881,8 +875,7 @@ gistmakedeal(GISTInsertState *state, GISTSTATE *giststate)
is_splitted = gistplacetopage(state, giststate);
/* parent locked above, so release child buffer */
LockBuffer(state->stack->buffer, GIST_UNLOCK);
ReleaseBuffer(state->stack->buffer);
UnlockReleaseBuffer(state->stack->buffer);
/* pop parent page from stack */
state->stack = state->stack->parent;
......@@ -1182,6 +1175,9 @@ gistSplit(Relation r,
return newtup;
}
/*
* buffer must be pinned and locked by caller
*/
void
gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer key)
{
......@@ -1192,9 +1188,11 @@ gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer ke
START_CRIT_SECTION();
GISTInitBuffer(buffer, 0); /* XXX not F_LEAF? */
GISTInitBuffer(buffer, 0);
gistfillbuffer(r, page, itup, len, FirstOffsetNumber);
MarkBufferDirty(buffer);
if (!r->rd_istemp)
{
XLogRecPtr recptr;
......@@ -1211,8 +1209,6 @@ gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer ke
else
PageSetLSN(page, XLogRecPtrForTemp);
WriteNoReleaseBuffer(buffer);
END_CRIT_SECTION();
}
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.17 2006/03/30 23:03:10 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.18 2006/03/31 23:32:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -71,11 +71,7 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
vacuum_delay_point();
buffer = ReadBuffer(gv->index, blkno);
/*
* This is only used during VACUUM FULL, so we need not bother to lock
* individual index pages
*/
LockBuffer(buffer, GIST_EXCLUSIVE);
gistcheckpage(gv->index, buffer);
page = (Page) BufferGetPage(buffer);
maxoff = PageGetMaxOffsetNumber(page);
......@@ -183,6 +179,11 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
}
res.itup = vec;
for (ptr = dist; ptr; ptr = ptr->next)
{
MarkBufferDirty(ptr->buffer);
}
if (!gv->index->rd_istemp)
{
XLogRecPtr recptr;
......@@ -198,12 +199,10 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
xlinfo = rdata->data;
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
ptr = dist;
while (ptr)
for (ptr = dist; ptr; ptr = ptr->next)
{
PageSetLSN(BufferGetPage(ptr->buffer), recptr);
PageSetTLI(BufferGetPage(ptr->buffer), ThisTimeLineID);
ptr = ptr->next;
}
pfree(xlinfo);
......@@ -211,21 +210,18 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
}
else
{
ptr = dist;
while (ptr)
for (ptr = dist; ptr; ptr = ptr->next)
{
PageSetLSN(BufferGetPage(ptr->buffer), XLogRecPtrForTemp);
ptr = ptr->next;
}
}
ptr = dist;
while (ptr)
for (ptr = dist; ptr; ptr = ptr->next)
{
/* we must keep the buffer lock on the head page */
if (BufferGetBlockNumber(ptr->buffer) != blkno)
LockBuffer(ptr->buffer, GIST_UNLOCK);
WriteBuffer(ptr->buffer);
ptr = ptr->next;
ReleaseBuffer(ptr->buffer);
}
if (blkno == GIST_ROOT_BLKNO)
......@@ -297,6 +293,8 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
if (needwrite)
{
MarkBufferDirty(buffer);
if (!gv->index->rd_istemp)
{
XLogRecData *rdata;
......@@ -317,13 +315,12 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
}
else
PageSetLSN(page, XLogRecPtrForTemp);
WriteBuffer(buffer);
}
else
ReleaseBuffer(buffer);
END_CRIT_SECTION();
UnlockReleaseBuffer(buffer);
if (ncompleted && !gv->index->rd_istemp)
gistxlogInsertCompletion(gv->index->rd_node, completed, ncompleted);
......@@ -429,8 +426,7 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
}
else
lastFilledBlock = blkno;
LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
}
lastBlock = npages - 1;
......@@ -569,8 +565,7 @@ gistbulkdelete(PG_FUNCTION_ARGS)
if (stack->blkno == GIST_ROOT_BLKNO && !GistPageIsLeaf(page))
{
/* only the root can become non-leaf during relock */
LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
/* one more check */
continue;
}
......@@ -617,6 +612,8 @@ gistbulkdelete(PG_FUNCTION_ARGS)
{
GistMarkTuplesDeleted(page);
MarkBufferDirty(buffer);
if (!rel->rd_istemp)
{
XLogRecData *rdata;
......@@ -638,7 +635,6 @@ gistbulkdelete(PG_FUNCTION_ARGS)
}
else
PageSetLSN(page, XLogRecPtrForTemp);
WriteNoReleaseBuffer(buffer);
}
END_CRIT_SECTION();
......@@ -666,8 +662,7 @@ gistbulkdelete(PG_FUNCTION_ARGS)
}
}
LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
ptr = stack->next;
pfree(stack);
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.13 2006/03/30 23:03:10 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.14 2006/03/31 23:32:05 tgl Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
......@@ -192,8 +192,7 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot)
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
return;
}
......@@ -236,8 +235,8 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot)
GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
if (ItemPointerIsValid(&(xlrec.data->key)))
{
......@@ -313,8 +312,8 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
if (ItemPointerIsValid(&(xlrec.data->key)))
......@@ -346,8 +345,8 @@ gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
static void
......@@ -561,8 +560,8 @@ gistContinueInsert(gistIncompleteInsert *insert)
PageSetLSN(page, insert->lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
/*
* XXX fall out to avoid making LOG message at bottom of routine.
......@@ -598,8 +597,7 @@ gistContinueInsert(gistIncompleteInsert *insert)
if (XLByteLE(insert->lsn, PageGetLSN(pages[numbuffer - 1])))
{
LockBuffer(buffers[numbuffer - 1], BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffers[numbuffer - 1]);
UnlockReleaseBuffer(buffers[numbuffer - 1]);
return;
}
......@@ -685,8 +683,8 @@ gistContinueInsert(gistIncompleteInsert *insert)
PageSetLSN(pages[j], insert->lsn);
PageSetTLI(pages[j], ThisTimeLineID);
GistPageGetOpaque(pages[j])->rightlink = InvalidBlockNumber;
LockBuffer(buffers[j], BUFFER_LOCK_UNLOCK);
WriteBuffer(buffers[j]);
MarkBufferDirty(buffers[j]);
UnlockReleaseBuffer(buffers[j]);
}
}
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.51 2006/03/05 15:58:20 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.52 2006/03/31 23:32:05 tgl Exp $
*
* NOTES
* Overflow pages look like ordinary relation pages.
......@@ -146,7 +146,8 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
ovflopaque->hasho_filler = HASHO_FILL;
_hash_wrtnorelbuf(rel, ovflbuf);
MarkBufferDirty(ovflbuf);
/* logically chain overflow page to previous page */
pageopaque->hasho_nextblkno = ovflblkno;
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.56 2006/03/05 15:58:20 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.57 2006/03/31 23:32:05 tgl Exp $
*
* NOTES
* Postgres hash pages look like ordinary relation pages. The opaque
......@@ -129,22 +129,18 @@ _hash_getbuf(Relation rel, BlockNumber blkno, int access)
/*
* _hash_relbuf() -- release a locked buffer.
*
* Lock and pin (refcount) are both dropped. Note that either read or
* write lock can be dropped this way, but if we modified the buffer,
* this is NOT the right way to release a write lock.
* Lock and pin (refcount) are both dropped.
*/
void
_hash_relbuf(Relation rel, Buffer buf)
{
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buf);
UnlockReleaseBuffer(buf);
}
/*
* _hash_dropbuf() -- release an unlocked buffer.
*
* This is used to unpin a buffer on which we hold no lock. It is assumed
* that the buffer is not dirty.
* This is used to unpin a buffer on which we hold no lock.
*/
void
_hash_dropbuf(Relation rel, Buffer buf)
......@@ -159,31 +155,16 @@ _hash_dropbuf(Relation rel, Buffer buf)
* for it. It is an error to call _hash_wrtbuf() without a write lock
* and a pin on the buffer.
*
* NOTE: actually, the buffer manager just marks the shared buffer page
* dirty here; the real I/O happens later. This is okay since we are not
* relying on write ordering anyway. The WAL mechanism is responsible for
* guaranteeing correctness after a crash.
* NOTE: this routine should go away when/if hash indexes are WAL-ified.
* The correct sequence of operations is to mark the buffer dirty, then
* write the WAL record, then release the lock and pin; so marking dirty
* can't be combined with releasing.
*/
void
_hash_wrtbuf(Relation rel, Buffer buf)
{
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
}
/*
* _hash_wrtnorelbuf() -- write a hash page to disk, but do not release
* our reference or lock.
*
* It is an error to call _hash_wrtnorelbuf() without a write lock
* and a pin on the buffer.
*
* See above NOTE.
*/
void
_hash_wrtnorelbuf(Relation rel, Buffer buf)
{
WriteNoReleaseBuffer(buf);
MarkBufferDirty(buf);
UnlockReleaseBuffer(buf);
}
/*
......@@ -204,11 +185,10 @@ _hash_chgbufaccess(Relation rel,
int from_access,
int to_access)
{
if (from_access == HASH_WRITE)
MarkBufferDirty(buf);
if (from_access != HASH_NOLOCK)
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
if (from_access == HASH_WRITE)
WriteNoReleaseBuffer(buf);
if (to_access != HASH_NOLOCK)
LockBuffer(buf, to_access);
}
......
This diff is collapsed.
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.133 2006/03/05 15:58:21 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.134 2006/03/31 23:32:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -544,10 +544,13 @@ _bt_insertonpg(Relation rel,
_bt_pgaddtup(rel, page, itemsz, itup, newitemoff, "page");
MarkBufferDirty(buf);
if (BufferIsValid(metabuf))
{
metad->btm_fastroot = itup_blkno;
metad->btm_fastlevel = lpageop->btpo.level;
MarkBufferDirty(metabuf);
}
/* XLOG stuff */
......@@ -619,11 +622,11 @@ _bt_insertonpg(Relation rel,
END_CRIT_SECTION();
/* Write out the updated page and release pin/lock */
/* release pin/lock */
if (BufferIsValid(metabuf))
_bt_wrtbuf(rel, metabuf);
_bt_relbuf(rel, metabuf);
_bt_wrtbuf(rel, buf);
_bt_relbuf(rel, buf);
}
}
......@@ -819,12 +822,21 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
* Right sibling is locked, new siblings are prepared, but original page
* is not updated yet. Log changes before continuing.
*
* NO EREPORT(ERROR) till right sibling is updated.
* NO EREPORT(ERROR) till right sibling is updated. We can get away with
* not starting the critical section till here because we haven't been
* scribbling on the original page yet, and we don't care about the
* new sibling until it's linked into the btree.
*/
START_CRIT_SECTION();
MarkBufferDirty(buf);
MarkBufferDirty(rbuf);
if (!P_RIGHTMOST(ropaque))
{
sopaque->btpo_prev = BufferGetBlockNumber(rbuf);
MarkBufferDirty(sbuf);
}
/* XLOG stuff */
if (!rel->rd_istemp)
......@@ -904,16 +916,22 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
* original. Note that this is not a waste of time, since we also require
* (in the page management code) that the center of a page always be
* clean, and the most efficient way to guarantee this is just to compact
* the data by reinserting it into a new left page.
* the data by reinserting it into a new left page. (XXX the latter
* comment is probably obsolete.)
*
* It's a bit weird that we don't fill in the left page till after writing
* the XLOG entry, but not really worth changing. Note that we use the
* origpage data (specifically its BTP_ROOT bit) while preparing the XLOG
* entry, so simply reshuffling the code won't do.
*/
PageRestoreTempPage(leftpage, origpage);
END_CRIT_SECTION();
/* write and release the old right sibling */
/* release the old right sibling */
if (!P_RIGHTMOST(ropaque))
_bt_wrtbuf(rel, sbuf);
_bt_relbuf(rel, sbuf);
/* split's done */
return rbuf;
......@@ -1169,9 +1187,9 @@ _bt_insert_parent(Relation rel,
/* create a new root node and update the metapage */
rootbuf = _bt_newroot(rel, buf, rbuf);
/* release the split buffers */
_bt_wrtbuf(rel, rootbuf);
_bt_wrtbuf(rel, rbuf);
_bt_wrtbuf(rel, buf);
_bt_relbuf(rel, rootbuf);
_bt_relbuf(rel, rbuf);
_bt_relbuf(rel, buf);
}
else
{
......@@ -1220,9 +1238,9 @@ _bt_insert_parent(Relation rel,
pbuf = _bt_getstackbuf(rel, stack, BT_WRITE);
/* Now we can write and unlock the children */
_bt_wrtbuf(rel, rbuf);
_bt_wrtbuf(rel, buf);
/* Now we can unlock the children */
_bt_relbuf(rel, rbuf);
_bt_relbuf(rel, buf);
/* Check for error only after writing children */
if (pbuf == InvalidBuffer)
......@@ -1370,7 +1388,6 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
{
Buffer rootbuf;
Page lpage,
rpage,
rootpage;
BlockNumber lbkno,
rbkno;
......@@ -1387,7 +1404,6 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
lbkno = BufferGetBlockNumber(lbuf);
rbkno = BufferGetBlockNumber(rbuf);
lpage = BufferGetPage(lbuf);
rpage = BufferGetPage(rbuf);
/* get a new root page */
rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
......@@ -1451,6 +1467,9 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
elog(PANIC, "failed to add rightkey to new root page");
pfree(new_item);
MarkBufferDirty(rootbuf);
MarkBufferDirty(metabuf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
......@@ -1483,16 +1502,12 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
PageSetTLI(rootpage, ThisTimeLineID);
PageSetLSN(metapg, recptr);
PageSetTLI(metapg, ThisTimeLineID);
PageSetLSN(lpage, recptr);
PageSetTLI(lpage, ThisTimeLineID);
PageSetLSN(rpage, recptr);
PageSetTLI(rpage, ThisTimeLineID);
}
END_CRIT_SECTION();
/* write and let go of metapage buffer */
_bt_wrtbuf(rel, metabuf);
/* done with metapage */
_bt_relbuf(rel, metabuf);
return rootbuf;
}
......
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.93 2006/03/05 15:58:21 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.94 2006/03/31 23:32:05 tgl Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
......@@ -53,13 +53,16 @@ _bt_metapinit(Relation rel)
buf = ReadBuffer(rel, P_NEW);
Assert(BufferGetBlockNumber(buf) == BTREE_METAPAGE);
LockBuffer(buf, BT_WRITE);
pg = BufferGetPage(buf);
/* NO ELOG(ERROR) from here till newmeta op is logged */
START_CRIT_SECTION();
_bt_initmetapage(pg, P_NONE, 0);
metad = BTPageGetMeta(pg);
/* NO ELOG(ERROR) from here till newmeta op is logged */
START_CRIT_SECTION();
MarkBufferDirty(buf);
/* XLOG stuff */
if (!rel->rd_istemp)
......@@ -89,7 +92,7 @@ _bt_metapinit(Relation rel)
END_CRIT_SECTION();
WriteBuffer(buf);
UnlockReleaseBuffer(buf);
}
/*
......@@ -235,6 +238,9 @@ _bt_getroot(Relation rel, int access)
metad->btm_fastroot = rootblkno;
metad->btm_fastlevel = 0;
MarkBufferDirty(rootbuf);
MarkBufferDirty(metabuf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
......@@ -261,8 +267,6 @@ _bt_getroot(Relation rel, int access)
END_CRIT_SECTION();
_bt_wrtnorelbuf(rel, rootbuf);
/*
* swap root write lock for read lock. There is no danger of anyone
* else accessing the new root page while it's unlocked, since no one
......@@ -271,8 +275,8 @@ _bt_getroot(Relation rel, int access)
LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK);
LockBuffer(rootbuf, BT_READ);
/* okay, metadata is correct, write and release it */
_bt_wrtbuf(rel, metabuf);
/* okay, metadata is correct, release lock on it */
_bt_relbuf(rel, metabuf);
}
else
{
......@@ -581,49 +585,12 @@ _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
/*
* _bt_relbuf() -- release a locked buffer.
*
* Lock and pin (refcount) are both dropped. Note that either read or
* write lock can be dropped this way, but if we modified the buffer,
* this is NOT the right way to release a write lock.
* Lock and pin (refcount) are both dropped.
*/
void
_bt_relbuf(Relation rel, Buffer buf)
{
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buf);
}
/*
* _bt_wrtbuf() -- write a btree page to disk.
*
* This routine releases the lock held on the buffer and our refcount
* for it. It is an error to call _bt_wrtbuf() without a write lock
* and a pin on the buffer.
*
* NOTE: actually, the buffer manager just marks the shared buffer page
* dirty here; the real I/O happens later. This is okay since we are not
* relying on write ordering anyway. The WAL mechanism is responsible for
* guaranteeing correctness after a crash.
*/
void
_bt_wrtbuf(Relation rel, Buffer buf)
{
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
}
/*
* _bt_wrtnorelbuf() -- write a btree page to disk, but do not release
* our reference or lock.
*
* It is an error to call _bt_wrtnorelbuf() without a write lock
* and a pin on the buffer.
*
* See above NOTE.
*/
void
_bt_wrtnorelbuf(Relation rel, Buffer buf)
{
WriteNoReleaseBuffer(buf);
UnlockReleaseBuffer(buf);
}
/*
......@@ -676,9 +643,8 @@ _bt_page_recyclable(Page page)
* non-leaf page has to be done as part of an atomic action that includes
* deleting the page it points to.
*
* This routine assumes that the caller has pinned and locked the buffer,
* and will write the buffer afterwards. Also, the given itemnos *must*
* appear in increasing order in the array.
* This routine assumes that the caller has pinned and locked the buffer.
* Also, the given itemnos *must* appear in increasing order in the array.
*/
void
_bt_delitems(Relation rel, Buffer buf,
......@@ -692,6 +658,8 @@ _bt_delitems(Relation rel, Buffer buf,
/* Fix the page */
PageIndexMultiDelete(page, itemnos, nitems);
MarkBufferDirty(buf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
......@@ -1053,8 +1021,16 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
{
metad->btm_fastroot = rightsib;
metad->btm_fastlevel = targetlevel;
MarkBufferDirty(metabuf);
}
/* Must mark buffers dirty before XLogInsert */
MarkBufferDirty(pbuf);
MarkBufferDirty(rbuf);
MarkBufferDirty(buf);
if (BufferIsValid(lbuf))
MarkBufferDirty(lbuf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
......@@ -1143,14 +1119,14 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
END_CRIT_SECTION();
/* Write and release buffers */
/* release buffers */
if (BufferIsValid(metabuf))
_bt_wrtbuf(rel, metabuf);
_bt_wrtbuf(rel, pbuf);
_bt_wrtbuf(rel, rbuf);
_bt_wrtbuf(rel, buf);
_bt_relbuf(rel, metabuf);
_bt_relbuf(rel, pbuf);
_bt_relbuf(rel, rbuf);
_bt_relbuf(rel, buf);
if (BufferIsValid(lbuf))
_bt_wrtbuf(rel, lbuf);
_bt_relbuf(rel, lbuf);
/*
* If parent became half dead, recurse to try to delete it. Otherwise, if
......
......@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.142 2006/03/05 15:58:21 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.143 2006/03/31 23:32:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -624,17 +624,12 @@ btbulkdelete(PG_FUNCTION_ARGS)
}
}
/*
* If we need to delete anything, do it and write the buffer; else
* just release the buffer.
*/
nextpage = opaque->btpo_next;
/* Apply any needed deletes */
if (ndeletable > 0)
{
_bt_delitems(rel, buf, deletable, ndeletable);
_bt_wrtbuf(rel, buf);
}
else
/* Fetch nextpage link before releasing the buffer */
nextpage = opaque->btpo_next;
_bt_relbuf(rel, buf);
/* call vacuum_delay_point while not holding any buffer lock */
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.29 2006/03/29 21:17:37 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.30 2006/03/31 23:32:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -69,8 +69,7 @@ forget_matching_split(Relation reln, RelFileNode node,
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
rightblk = ItemPointerGetBlockNumber(&(itup->t_tid));
Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
foreach(l, incomplete_splits)
{
......@@ -80,7 +79,8 @@ forget_matching_split(Relation reln, RelFileNode node,
rightblk == split->rightblk)
{
if (is_root != split->is_root)
elog(LOG, "forget_matching_split: fishy is_root data");
elog(LOG, "forget_matching_split: fishy is_root data (expected %d, got %d)",
split->is_root, is_root);
incomplete_splits = list_delete_ptr(incomplete_splits, split);
break; /* need not look further */
}
......@@ -143,8 +143,8 @@ _bt_restore_meta(Relation reln, XLogRecPtr lsn,
PageSetLSN(metapg, lsn);
PageSetTLI(metapg, ThisTimeLineID);
LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
WriteBuffer(metabuf);
MarkBufferDirty(metabuf);
UnlockReleaseBuffer(metabuf);
}
static void
......@@ -185,8 +185,7 @@ btree_xlog_insert(bool isleaf, bool ismeta,
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
}
else
{
......@@ -197,8 +196,8 @@ btree_xlog_insert(bool isleaf, bool ismeta,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
......@@ -255,8 +254,8 @@ btree_xlog_split(bool onleft, bool isroot,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
/* Right (new) sibling */
buffer = XLogReadBuffer(reln, rightsib, true);
......@@ -277,8 +276,8 @@ btree_xlog_split(bool onleft, bool isroot,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
/* Fix left-link of right (next) page */
if (!(record->xl_info & XLR_BKP_BLOCK_1))
......@@ -292,8 +291,7 @@ btree_xlog_split(bool onleft, bool isroot,
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
}
else
{
......@@ -302,8 +300,8 @@ btree_xlog_split(bool onleft, bool isroot,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
......@@ -343,8 +341,7 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
return;
}
......@@ -361,8 +358,8 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
static void
......@@ -395,8 +392,7 @@ btree_xlog_delete_page(bool ismeta,
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
}
else
{
......@@ -424,8 +420,8 @@ btree_xlog_delete_page(bool ismeta,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
......@@ -439,8 +435,7 @@ btree_xlog_delete_page(bool ismeta,
page = (Page) BufferGetPage(buffer);
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
}
else
{
......@@ -449,8 +444,8 @@ btree_xlog_delete_page(bool ismeta,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
......@@ -466,8 +461,7 @@ btree_xlog_delete_page(bool ismeta,
page = (Page) BufferGetPage(buffer);
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
}
else
{
......@@ -476,8 +470,8 @@ btree_xlog_delete_page(bool ismeta,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
}
......@@ -498,8 +492,8 @@ btree_xlog_delete_page(bool ismeta,
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
/* Update metapage if needed */
if (ismeta)
......@@ -544,8 +538,8 @@ btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
_bt_restore_meta(reln, lsn,
xlrec->rootblk, xlrec->level,
......
$PostgreSQL: pgsql/src/backend/access/transam/README,v 1.4 2006/03/29 21:17:37 tgl Exp $
$PostgreSQL: pgsql/src/backend/access/transam/README,v 1.5 2006/03/31 23:32:05 tgl Exp $
The Transaction System
----------------------
......@@ -297,7 +297,7 @@ The general schema for executing a WAL-logged action is
1. Pin and exclusive-lock the shared buffer(s) containing the data page(s)
to be modified.
2. START_CRIT_SECTION() (Any error during the next two steps must cause a
2. START_CRIT_SECTION() (Any error during the next three steps must cause a
PANIC because the shared buffers will contain unlogged changes, which we
have to ensure don't get to disk. Obviously, you should check conditions
such as whether there's enough free space on the page before you start the
......@@ -305,7 +305,10 @@ critical section.)
3. Apply the required changes to the shared buffer(s).
4. Build a WAL log record and pass it to XLogInsert(); then update the page's
4. Mark the shared buffer(s) as dirty with MarkBufferDirty(). (This must
happen before the WAL record is inserted; see notes in SyncOneBuffer().)
5. Build a WAL log record and pass it to XLogInsert(); then update the page's
LSN and TLI using the returned XLOG location. For instance,
recptr = XLogInsert(rmgr_id, info, rdata);
......@@ -313,16 +316,9 @@ LSN and TLI using the returned XLOG location. For instance,
PageSetLSN(dp, recptr);
PageSetTLI(dp, ThisTimeLineID);
5. END_CRIT_SECTION()
6. Unlock and write the buffer(s):
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
6. END_CRIT_SECTION()
(Note: WriteBuffer doesn't really "write" the buffer anymore, it just marks it
dirty and unpins it. The write will not happen until a checkpoint occurs or
the shared buffer is needed for another page.)
7. Unlock and unpin the buffer(s).
XLogInsert's "rdata" argument is an array of pointer/size items identifying
chunks of data to be written in the XLOG record, plus optional shared-buffer
......@@ -364,8 +360,8 @@ standard replay-routine pattern for this case is
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
In the case where the WAL record provides only enough information to
incrementally update the page, the rdata array *must* mention the buffer
......@@ -384,8 +380,7 @@ The standard replay-routine pattern for this case is
if (XLByteLE(lsn, PageGetLSN(page)))
{
/* changes are already applied */
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
UnlockReleaseBuffer(buffer);
return;
}
......@@ -393,8 +388,8 @@ The standard replay-routine pattern for this case is
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
As noted above, for a multi-page update you need to be able to determine
which XLR_BKP_BLOCK_n flag applies to each page. If a WAL record reflects
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.230 2006/03/29 21:17:37 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.231 2006/03/31 23:32:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -2529,8 +2529,8 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
blk += BLCKSZ - bkpb.hole_length;
}
......
......@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.42 2006/03/29 21:17:38 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.43 2006/03/31 23:32:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -30,9 +30,9 @@
*
* This is functionally comparable to ReadBuffer followed by
* LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE): you get back a pinned
* and locked buffer. (The lock is not really necessary, since we
* expect that this is only done during single-process XLOG replay,
* but in some places it simplifies sharing code with the non-XLOG case.)
* and locked buffer. (Getting the lock is not really necessary, since we
* expect that this is only used during single-process XLOG replay, but
* some subroutines such as MarkBufferDirty will complain if we don't.)
*
* If "init" is true then the caller intends to rewrite the page fully
* using the info in the XLOG record. In this case we will extend the
......@@ -74,7 +74,7 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
while (blkno >= lastblock)
{
if (buffer != InvalidBuffer)
ReleaseBuffer(buffer); /* must be WriteBuffer()? */
ReleaseBuffer(buffer);
buffer = ReadBuffer(reln, P_NEW);
lastblock++;
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.264 2006/03/24 23:02:17 tgl Exp $
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.265 2006/03/31 23:32:06 tgl Exp $
*
*
* INTERFACE ROUTINES
......@@ -1065,13 +1065,10 @@ setRelhasindex(Oid relid, bool hasindex, bool isprimary, Oid reltoastidxid)
}
}
if (pg_class_scan)
LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
if (pg_class_scan)
{
/* Write the modified tuple in-place */
WriteNoReleaseBuffer(pg_class_scan->rs_cbuf);
MarkBufferDirty(pg_class_scan->rs_cbuf);
LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
/* Send out shared cache inval if necessary */
if (!IsBootstrapProcessingMode())
CacheInvalidateHeapTuple(pg_class, tuple);
......@@ -1294,8 +1291,8 @@ UpdateStats(Oid relid, double reltuples)
LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_EXCLUSIVE);
rd_rel->relpages = (int32) relpages;
rd_rel->reltuples = (float4) reltuples;
MarkBufferDirty(pg_class_scan->rs_cbuf);
LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
WriteNoReleaseBuffer(pg_class_scan->rs_cbuf);
if (!IsBootstrapProcessingMode())
CacheInvalidateHeapTuple(pg_class, tuple);
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.131 2006/03/29 21:17:38 tgl Exp $
* $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.132 2006/03/31 23:32:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -249,6 +249,8 @@ DefineSequence(CreateSeqStmt *seq)
tuple->t_data->t_infomask |= HEAP_XMIN_COMMITTED;
}
MarkBufferDirty(buf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
......@@ -281,8 +283,8 @@ DefineSequence(CreateSeqStmt *seq)
END_CRIT_SECTION();
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
UnlockReleaseBuffer(buf);
heap_close(rel, NoLock);
}
......@@ -331,6 +333,8 @@ AlterSequence(AlterSeqStmt *stmt)
START_CRIT_SECTION();
MarkBufferDirty(buf);
/* XLOG stuff */
if (!seqrel->rd_istemp)
{
......@@ -358,9 +362,7 @@ AlterSequence(AlterSeqStmt *stmt)
END_CRIT_SECTION();
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
UnlockReleaseBuffer(buf);
relation_close(seqrel, NoLock);
}
......@@ -550,6 +552,8 @@ nextval_internal(Oid relid)
START_CRIT_SECTION();
MarkBufferDirty(buf);
/* XLOG stuff */
if (logit && !seqrel->rd_istemp)
{
......@@ -587,9 +591,7 @@ nextval_internal(Oid relid)
END_CRIT_SECTION();
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
UnlockReleaseBuffer(buf);
relation_close(seqrel, NoLock);
......@@ -720,6 +722,8 @@ do_setval(Oid relid, int64 next, bool iscalled)
START_CRIT_SECTION();
MarkBufferDirty(buf);
/* XLOG stuff */
if (!seqrel->rd_istemp)
{
......@@ -758,9 +762,7 @@ do_setval(Oid relid, int64 next, bool iscalled)
END_CRIT_SECTION();
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
UnlockReleaseBuffer(buf);
relation_close(seqrel, NoLock);
}
......@@ -1159,8 +1161,8 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record)
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
void
......
......@@ -13,7 +13,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.325 2006/03/05 15:58:25 momjian Exp $
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.326 2006/03/31 23:32:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -729,6 +729,8 @@ vac_update_relstats(Oid relid, BlockNumber num_pages, double num_tuples,
if (!hasindex)
pgcform->relhaspkey = false;
MarkBufferDirty(buffer);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
/*
......@@ -739,8 +741,7 @@ vac_update_relstats(Oid relid, BlockNumber num_pages, double num_tuples,
*/
CacheInvalidateHeapTuple(rd, &rtup);
/* Write the buffer */
WriteBuffer(buffer);
ReleaseBuffer(buffer);
heap_close(rd, RowExclusiveLock);
}
......@@ -795,11 +796,12 @@ vac_update_dbstats(Oid dbid,
dbform->datvacuumxid = vacuumXID;
dbform->datfrozenxid = frozenXID;
MarkBufferDirty(scan->rs_cbuf);
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
/* invalidate the tuple in the cache and write the buffer */
/* invalidate the tuple in the cache so we'll see the change in cache */
CacheInvalidateHeapTuple(relation, tuple);
WriteNoReleaseBuffer(scan->rs_cbuf);
heap_endscan(scan);
......@@ -1298,6 +1300,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
(errmsg("relation \"%s\" page %u is uninitialized --- fixing",
relname, blkno)));
PageInit(page, BufferGetPageSize(buf), 0);
MarkBufferDirty(buf);
vacpage->free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
free_space += vacpage->free;
empty_pages++;
......@@ -1305,8 +1308,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
vacpagecopy = copy_vac_page(vacpage);
vpage_insert(vacuum_pages, vacpagecopy);
vpage_insert(fraged_pages, vacpagecopy);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
UnlockReleaseBuffer(buf);
continue;
}
......@@ -1321,8 +1323,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
vacpagecopy = copy_vac_page(vacpage);
vpage_insert(vacuum_pages, vacpagecopy);
vpage_insert(fraged_pages, vacpagecopy);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buf);
UnlockReleaseBuffer(buf);
continue;
}
......@@ -1527,11 +1528,9 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
else
empty_end_pages = 0;
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
if (pgchanged)
WriteBuffer(buf);
else
ReleaseBuffer(buf);
MarkBufferDirty(buf);
UnlockReleaseBuffer(buf);
}
pfree(vacpage);
......@@ -1682,7 +1681,6 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
OffsetNumber offnum,
maxoff;
bool isempty,
dowrite,
chain_tuple_moved;
vacuum_delay_point();
......@@ -1714,8 +1712,6 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
isempty = PageIsEmpty(page);
dowrite = false;
/* Is the page in the vacuum_pages list? */
if (blkno == last_vacuum_block)
{
......@@ -1726,7 +1722,6 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
vacuum_page(onerel, buf, last_vacuum_page);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
dowrite = true;
}
else
Assert(isempty);
......@@ -1884,7 +1879,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
if (dst_buffer != InvalidBuffer)
{
WriteBuffer(dst_buffer);
ReleaseBuffer(dst_buffer);
dst_buffer = InvalidBuffer;
}
......@@ -2148,8 +2143,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
else
keep_tuples++;
WriteBuffer(dst_buffer);
WriteBuffer(Cbuf);
ReleaseBuffer(dst_buffer);
ReleaseBuffer(Cbuf);
} /* end of move-the-tuple-chain loop */
dst_buffer = InvalidBuffer;
......@@ -2166,7 +2161,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
{
if (dst_buffer != InvalidBuffer)
{
WriteBuffer(dst_buffer);
ReleaseBuffer(dst_buffer);
dst_buffer = InvalidBuffer;
}
for (i = 0; i < num_fraged_pages; i++)
......@@ -2273,11 +2268,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
sizeof(OffsetNumber), vac_cmp_offno);
}
vpage_insert(&Nvacpagelist, copy_vac_page(vacpage));
WriteBuffer(buf);
}
else if (dowrite)
WriteBuffer(buf);
else
ReleaseBuffer(buf);
if (offnum <= maxoff)
......@@ -2290,7 +2282,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
if (dst_buffer != InvalidBuffer)
{
Assert(num_moved > 0);
WriteBuffer(dst_buffer);
ReleaseBuffer(dst_buffer);
}
if (num_moved > 0)
......@@ -2332,8 +2324,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
page = BufferGetPage(buf);
if (!PageIsEmpty(page))
vacuum_page(onerel, buf, *curpage);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
UnlockReleaseBuffer(buf);
}
}
......@@ -2449,6 +2440,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
uncnt = PageRepairFragmentation(page, unused);
MarkBufferDirty(buf);
/* XLOG stuff */
if (!onerel->rd_istemp)
{
......@@ -2469,8 +2462,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
END_CRIT_SECTION();
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
UnlockReleaseBuffer(buf);
}
/* now - free new list of reaped pages */
......@@ -2601,6 +2593,10 @@ move_chain_tuple(Relation rel,
newtup.t_data->t_ctid = *ctid;
*ctid = newtup.t_self;
MarkBufferDirty(dst_buf);
if (dst_buf != old_buf)
MarkBufferDirty(old_buf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
......@@ -2708,6 +2704,9 @@ move_plain_tuple(Relation rel,
old_tup->t_data->t_infomask |= HEAP_MOVED_OFF;
HeapTupleHeaderSetXvac(old_tup->t_data, myXID);
MarkBufferDirty(dst_buf);
MarkBufferDirty(old_buf);
/* XLOG stuff */
if (!rel->rd_istemp)
{
......@@ -2832,8 +2831,8 @@ update_hint_bits(Relation rel, VacPageList fraged_pages, int num_fraged_pages,
else
htup->t_infomask |= HEAP_XMIN_INVALID;
}
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
MarkBufferDirty(buf);
UnlockReleaseBuffer(buf);
Assert((*curpage)->offsets_used == num_tuples);
checked_moved += num_tuples;
}
......@@ -2867,8 +2866,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
buf = ReadBuffer(onerel, (*vacpage)->blkno);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
vacuum_page(onerel, buf, *vacpage);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
UnlockReleaseBuffer(buf);
}
}
......@@ -2889,6 +2887,8 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
/*
* vacuum_page() -- free dead tuples on a page
* and repair its fragmentation.
*
* Caller must hold pin and lock on buffer.
*/
static void
vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
......@@ -2912,6 +2912,8 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
uncnt = PageRepairFragmentation(page, unused);
MarkBufferDirty(buffer);
/* XLOG stuff */
if (!onerel->rd_istemp)
{
......
......@@ -31,7 +31,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.68 2006/03/05 15:58:25 momjian Exp $
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.69 2006/03/31 23:32:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -317,8 +317,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
lazy_record_free_space(vacrelstats, blkno,
PageGetFreeSpace(page));
}
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
MarkBufferDirty(buf);
UnlockReleaseBuffer(buf);
continue;
}
......@@ -327,8 +327,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
empty_pages++;
lazy_record_free_space(vacrelstats, blkno,
PageGetFreeSpace(page));
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buf);
UnlockReleaseBuffer(buf);
continue;
}
......@@ -439,12 +438,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
if (hastup)
vacrelstats->nonempty_pages = blkno + 1;
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
if (pgchanged)
WriteBuffer(buf);
else
ReleaseBuffer(buf);
MarkBufferDirty(buf);
UnlockReleaseBuffer(buf);
}
/* save stats for use later */
......@@ -524,8 +520,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
page = BufferGetPage(buf);
lazy_record_free_space(vacrelstats, tblk,
PageGetFreeSpace(page));
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
UnlockReleaseBuffer(buf);
npages++;
}
......@@ -541,7 +536,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
* lazy_vacuum_page() -- free dead tuples on a page
* and repair its fragmentation.
*
* Caller is expected to handle reading, locking, and writing the buffer.
* Caller must hold pin and lock on the buffer.
*
* tupindex is the index in vacrelstats->dead_tuples of the first dead
* tuple for this page. We assume the rest follow sequentially.
......@@ -557,6 +552,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
ItemId itemid;
START_CRIT_SECTION();
for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
{
BlockNumber tblk;
......@@ -572,6 +568,8 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
uncnt = PageRepairFragmentation(page, unused);
MarkBufferDirty(buffer);
/* XLOG stuff */
if (!onerel->rd_istemp)
{
......@@ -871,8 +869,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
if (PageIsNew(page) || PageIsEmpty(page))
{
/* PageIsNew probably shouldn't happen... */
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buf);
UnlockReleaseBuffer(buf);
continue;
}
......@@ -928,9 +925,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
}
} /* scan along page */
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buf);
UnlockReleaseBuffer(buf);
/* Done scanning if we found a tuple here */
if (hastup)
......
$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.8 2005/03/04 20:21:06 tgl Exp $
$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.9 2006/03/31 23:32:06 tgl Exp $
Notes about shared buffer access rules
--------------------------------------
......@@ -12,19 +12,18 @@ the relation. Relation-level locks are not discussed here.)
Pins: one must "hold a pin on" a buffer (increment its reference count)
before being allowed to do anything at all with it. An unpinned buffer is
subject to being reclaimed and reused for a different page at any instant,
so touching it is unsafe. Typically a pin is acquired via ReadBuffer and
released via WriteBuffer (if one modified the page) or ReleaseBuffer (if not).
It is OK and indeed common for a single backend to pin a page more than
once concurrently; the buffer manager handles this efficiently. It is
considered OK to hold a pin for long intervals --- for example, sequential
scans hold a pin on the current page until done processing all the tuples
on the page, which could be quite a while if the scan is the outer scan of
a join. Similarly, btree index scans hold a pin on the current index page.
This is OK because normal operations never wait for a page's pin count to
drop to zero. (Anything that might need to do such a wait is instead
handled by waiting to obtain the relation-level lock, which is why you'd
better hold one first.) Pins may not be held across transaction
boundaries, however.
so touching it is unsafe. Normally a pin is acquired via ReadBuffer and
released via ReleaseBuffer. It is OK and indeed common for a single
backend to pin a page more than once concurrently; the buffer manager
handles this efficiently. It is considered OK to hold a pin for long
intervals --- for example, sequential scans hold a pin on the current page
until done processing all the tuples on the page, which could be quite a
while if the scan is the outer scan of a join. Similarly, btree index
scans hold a pin on the current index page. This is OK because normal
operations never wait for a page's pin count to drop to zero. (Anything
that might need to do such a wait is instead handled by waiting to obtain
the relation-level lock, which is why you'd better hold one first.) Pins
may not be held across transaction boundaries, however.
Buffer content locks: there are two kinds of buffer lock, shared and exclusive,
which act just as you'd expect: multiple backends can hold shared locks on
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.205 2006/03/29 21:17:39 tgl Exp $
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.206 2006/03/31 23:32:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -17,13 +17,10 @@
* and pin it so that no one can destroy it while this process
* is using it.
*
* ReleaseBuffer() -- unpin the buffer
* ReleaseBuffer() -- unpin a buffer
*
* WriteNoReleaseBuffer() -- mark the buffer contents as "dirty"
* but don't unpin. The disk IO is delayed until buffer
* replacement.
*
* WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer()
* MarkBufferDirty() -- mark a pinned buffer's contents as "dirty".
* The disk write is delayed until buffer replacement or checkpoint.
*
* BufferSync() -- flush all dirty buffers in the buffer pool.
*
......@@ -101,7 +98,6 @@ static volatile BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
bool *foundPtr);
static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
static void AtProcExit_Buffers(int code, Datum arg);
static void write_buffer(Buffer buffer, bool unpin);
/*
......@@ -634,11 +630,16 @@ retry:
}
/*
* write_buffer -- common functionality for
* WriteBuffer and WriteNoReleaseBuffer
* MarkBufferDirty
*
* Marks buffer contents as dirty (actual write happens later).
*
* Buffer must be pinned and exclusive-locked. (If caller does not hold
* exclusive lock, then somebody could be in process of writing the buffer,
* leading to risk of bad data written to disk.)
*/
static void
write_buffer(Buffer buffer, bool unpin)
void
MarkBufferDirty(Buffer buffer)
{
volatile BufferDesc *bufHdr;
......@@ -647,13 +648,15 @@ write_buffer(Buffer buffer, bool unpin)
if (BufferIsLocal(buffer))
{
WriteLocalBuffer(buffer, unpin);
MarkLocalBufferDirty(buffer);
return;
}
bufHdr = &BufferDescriptors[buffer - 1];
Assert(PrivateRefCount[buffer - 1] > 0);
/* unfortunately we can't check if the lock is held exclusively */
Assert(LWLockHeldByMe(bufHdr->content_lock));
LockBufHdr(bufHdr);
......@@ -668,35 +671,6 @@ write_buffer(Buffer buffer, bool unpin)
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
UnlockBufHdr(bufHdr);
if (unpin)
UnpinBuffer(bufHdr, true, true);
}
/*
* WriteBuffer
*
* Marks buffer contents as dirty (actual write happens later).
*
* Assume that buffer is pinned. Assume that reln is valid.
*
* Side Effects:
* Pin count is decremented.
*/
void
WriteBuffer(Buffer buffer)
{
write_buffer(buffer, true);
}
/*
* WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer
* when the operation is complete.
*/
void
WriteNoReleaseBuffer(Buffer buffer)
{
write_buffer(buffer, false);
}
/*
......@@ -1617,8 +1591,7 @@ FlushRelationBuffers(Relation rel)
}
/*
* ReleaseBuffer -- remove the pin on a buffer without
* marking it dirty.
* ReleaseBuffer -- release the pin on a buffer
*/
void
ReleaseBuffer(Buffer buffer)
......@@ -1651,6 +1624,18 @@ ReleaseBuffer(Buffer buffer)
UnpinBuffer(bufHdr, false, true);
}
/*
* UnlockReleaseBuffer -- release the content lock and pin on a buffer
*
* This is just a shorthand for a common combination.
*/
void
UnlockReleaseBuffer(Buffer buffer)
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
}
/*
* IncrBufferRefCount
* Increment the pin count on a buffer that we have *already* pinned
......@@ -1676,20 +1661,13 @@ IncrBufferRefCount(Buffer buffer)
*
* Mark a buffer dirty when we have updated tuple commit-status bits in it.
*
* This is essentially the same as WriteNoReleaseBuffer. We preserve the
* distinction as a way of documenting that the caller has not made a critical
* data change --- the status-bit update could be redone by someone else just
* as easily. Therefore, no WAL log record need be generated, whereas calls
* to WriteNoReleaseBuffer really ought to be associated with a WAL-entry-
* creating action.
*
* This routine might get called many times on the same page, if we are making
* the first scan after commit of an xact that added/deleted many tuples.
* So, be as quick as we can if the buffer is already dirty. We do this by
* not acquiring spinlock if it looks like the status bits are already OK.
* (Note it is okay if someone else clears BM_JUST_DIRTIED immediately after
* we look, because the buffer content update is already done and will be
* reflected in the I/O.)
* This is essentially the same as MarkBufferDirty, except that the caller
* might have only share-lock instead of exclusive-lock on the buffer's
* content lock. We preserve the distinction mainly as a way of documenting
* that the caller has not made a critical data change --- the status-bit
* update could be redone by someone else just as easily. Therefore, no WAL
* log record need be generated, whereas calls to MarkBufferDirty really ought
* to be associated with a WAL-entry-creating action.
*/
void
SetBufferCommitInfoNeedsSave(Buffer buffer)
......@@ -1701,19 +1679,32 @@ SetBufferCommitInfoNeedsSave(Buffer buffer)
if (BufferIsLocal(buffer))
{
WriteLocalBuffer(buffer, false);
MarkLocalBufferDirty(buffer);
return;
}
bufHdr = &BufferDescriptors[buffer - 1];
Assert(PrivateRefCount[buffer - 1] > 0);
/* here, either share or exclusive lock is OK */
Assert(LWLockHeldByMe(bufHdr->content_lock));
/*
* This routine might get called many times on the same page, if we are
* making the first scan after commit of an xact that added/deleted many
* tuples. So, be as quick as we can if the buffer is already dirty. We
* do this by not acquiring spinlock if it looks like the status bits are
* already OK. (Note it is okay if someone else clears BM_JUST_DIRTIED
* immediately after we look, because the buffer content update is already
* done and will be reflected in the I/O.)
*/
if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
(BM_DIRTY | BM_JUST_DIRTIED))
{
LockBufHdr(bufHdr);
Assert(bufHdr->refcount > 0);
if (!(bufHdr->flags & BM_DIRTY) && VacuumCostActive)
VacuumCostBalance += VacuumCostPageDirty;
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
UnlockBufHdr(bufHdr);
}
......@@ -1767,7 +1758,7 @@ LockBuffer(Buffer buffer, int mode)
Assert(BufferIsValid(buffer));
if (BufferIsLocal(buffer))
return;
return; /* local buffers need no lock */
buf = &(BufferDescriptors[buffer - 1]);
......@@ -1776,19 +1767,7 @@ LockBuffer(Buffer buffer, int mode)
else if (mode == BUFFER_LOCK_SHARE)
LWLockAcquire(buf->content_lock, LW_SHARED);
else if (mode == BUFFER_LOCK_EXCLUSIVE)
{
LWLockAcquire(buf->content_lock, LW_EXCLUSIVE);
/*
* This is not the best place to mark buffer dirty (eg indices do not
* always change buffer they lock in excl mode). But please remember
* that it's critical to set dirty bit *before* logging changes with
* XLogInsert() - see comments in SyncOneBuffer().
*/
LockBufHdr(buf);
buf->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
UnlockBufHdr(buf);
}
else
elog(ERROR, "unrecognized buffer lock mode: %d", mode);
}
......@@ -1809,21 +1788,7 @@ ConditionalLockBuffer(Buffer buffer)
buf = &(BufferDescriptors[buffer - 1]);
if (LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE))
{
/*
* This is not the best place to mark buffer dirty (eg indices do not
* always change buffer they lock in excl mode). But please remember
* that it's critical to set dirty bit *before* logging changes with
* XLogInsert() - see comments in SyncOneBuffer().
*/
LockBufHdr(buf);
buf->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
UnlockBufHdr(buf);
return true;
}
return false;
return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE);
}
/*
......
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.73 2006/03/05 15:58:36 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.74 2006/03/31 23:32:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -209,11 +209,11 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
}
/*
* WriteLocalBuffer -
* writes out a local buffer (actually, just marks it dirty)
* MarkLocalBufferDirty -
* mark a local buffer dirty
*/
void
WriteLocalBuffer(Buffer buffer, bool release)
MarkLocalBufferDirty(Buffer buffer)
{
int bufid;
BufferDesc *bufHdr;
......@@ -221,7 +221,7 @@ WriteLocalBuffer(Buffer buffer, bool release)
Assert(BufferIsLocal(buffer));
#ifdef LBDEBUG
fprintf(stderr, "LB WRITE %d\n", buffer);
fprintf(stderr, "LB DIRTY %d\n", buffer);
#endif
bufid = -(buffer + 1);
......@@ -230,15 +230,6 @@ WriteLocalBuffer(Buffer buffer, bool release)
bufHdr = &LocalBufferDescriptors[bufid];
bufHdr->flags |= BM_DIRTY;
if (release)
{
LocalRefCount[bufid]--;
if (LocalRefCount[bufid] == 0 &&
bufHdr->usage_count < BM_MAX_USAGE_COUNT)
bufHdr->usage_count++;
ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
}
}
/*
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/hash.h,v 1.67 2006/03/24 04:32:13 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/hash.h,v 1.68 2006/03/31 23:32:06 tgl Exp $
*
* NOTES
* modeled after Margo Seltzer's hash implementation for unix.
......@@ -278,7 +278,6 @@ extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access);
extern void _hash_relbuf(Relation rel, Buffer buf);
extern void _hash_dropbuf(Relation rel, Buffer buf);
extern void _hash_wrtbuf(Relation rel, Buffer buf);
extern void _hash_wrtnorelbuf(Relation rel, Buffer buf);
extern void _hash_chgbufaccess(Relation rel, Buffer buf, int from_access,
int to_access);
extern void _hash_metapinit(Relation rel);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.93 2006/03/24 04:32:13 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.94 2006/03/31 23:32:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -419,8 +419,6 @@ extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf,
BlockNumber blkno, int access);
extern void _bt_relbuf(Relation rel, Buffer buf);
extern void _bt_wrtbuf(Relation rel, Buffer buf);
extern void _bt_wrtnorelbuf(Relation rel, Buffer buf);
extern void _bt_pageinit(Page page, Size size);
extern bool _bt_page_recyclable(Page page);
extern void _bt_delitems(Relation rel, Buffer buf,
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.85 2006/03/05 15:58:59 momjian Exp $
* $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.86 2006/03/31 23:32:07 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -189,7 +189,7 @@ extern void BufTableDelete(BufferTag *tagPtr);
/* localbuf.c */
extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum,
bool *foundPtr);
extern void WriteLocalBuffer(Buffer buffer, bool release);
extern void MarkLocalBufferDirty(Buffer buffer);
extern void DropRelFileNodeLocalBuffers(RelFileNode rnode,
BlockNumber firstDelBlock);
extern void AtEOXact_LocalBuffers(bool isCommit);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.99 2006/03/29 21:17:39 tgl Exp $
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.100 2006/03/31 23:32:07 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -115,9 +115,9 @@ extern DLLIMPORT int32 *LocalRefCount;
*/
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer);
extern void MarkBufferDirty(Buffer buffer);
extern void IncrBufferRefCount(Buffer buffer);
extern void WriteBuffer(Buffer buffer);
extern void WriteNoReleaseBuffer(Buffer buffer);
extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
BlockNumber blockNum);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment