Commit 81c8c244 authored by Vadim B. Mikheev's avatar Vadim B. Mikheev

No more #ifdef XLOG.

parent b16516b8
......@@ -6,7 +6,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.66 2000/11/21 21:15:53 petere Exp $
* $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.67 2000/11/30 08:46:20 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -23,9 +23,7 @@
#include "miscadmin.h"
#include "utils/syscache.h"
#ifdef XLOG
#include "access/xlogutils.h"
#endif
/* non-export function prototypes */
static InsertIndexResult gistdoinsert(Relation r, IndexTuple itup,
......@@ -1348,7 +1346,6 @@ int_range_out(INTRANGE *r)
#endif /* defined GISTDEBUG */
#ifdef XLOG
void
gist_redo(XLogRecPtr lsn, XLogRecord *record)
{
......@@ -1365,4 +1362,3 @@ void
gist_desc(char *buf, uint8 xl_info, char* rec)
{
}
#endif
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.45 2000/11/21 21:15:54 petere Exp $
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.46 2000/11/30 08:46:20 vadim Exp $
*
* NOTES
* This file contains only the public interface routines.
......@@ -27,9 +27,7 @@
bool BuildingHash = false;
#ifdef XLOG
#include "access/xlogutils.h"
#endif
/*
......@@ -482,7 +480,6 @@ hashdelete(PG_FUNCTION_ARGS)
PG_RETURN_VOID();
}
#ifdef XLOG
void
hash_redo(XLogRecPtr lsn, XLogRecord *record)
{
......@@ -499,4 +496,3 @@ void
hash_desc(char *buf, uint8 xl_info, char* rec)
{
}
#endif
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.96 2000/11/21 21:15:54 petere Exp $
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.97 2000/11/30 08:46:20 vadim Exp $
*
*
* INTERFACE ROUTINES
......@@ -86,7 +86,6 @@
#include "utils/inval.h"
#include "utils/relcache.h"
#ifdef XLOG
#include "access/xlogutils.h"
XLogRecPtr log_heap_move(Relation reln, ItemPointerData from, HeapTuple newtup);
......@@ -99,8 +98,6 @@ static XLogRecPtr log_heap_update(Relation reln, ItemPointerData from,
static void HeapPageCleanup(Buffer buffer);
#endif
/* ----------------------------------------------------------------
* heap support routines
......@@ -1370,7 +1367,6 @@ heap_insert(Relation relation, HeapTuple tup)
/* NO ELOG(ERROR) from here till changes are logged */
RelationPutHeapTuple(relation, buffer, tup);
#ifdef XLOG
/* XLOG stuff */
{
xl_heap_insert xlrec;
......@@ -1392,7 +1388,6 @@ heap_insert(Relation relation, HeapTuple tup)
PageSetLSN(BufferGetPage(buffer), recptr);
PageSetSUI(BufferGetPage(buffer), ThisStartUpID);
}
#endif
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
......@@ -1485,7 +1480,6 @@ l1:
return result;
}
#ifdef XLOG
/* XLOG stuff */
{
xl_heap_delete xlrec;
......@@ -1500,7 +1494,6 @@ l1:
PageSetLSN(dp, recptr);
PageSetSUI(dp, ThisStartUpID);
}
#endif
/* store transaction information of xact deleting the tuple */
TransactionIdStore(GetCurrentTransactionId(), &(tp.t_data->t_xmax));
......@@ -1638,7 +1631,6 @@ l2:
newbuf = buffer;
else
{
#ifdef XLOG
/*
* We have to unlock old tuple buffer before extending table
* file but have to keep lock on the old tuple. To avoid second
......@@ -1650,7 +1642,7 @@ l2:
_locked_tuple_.node = relation->rd_node;
_locked_tuple_.tid = *otid;
XactPushRollback(_heap_unlock_tuple, (void*) &_locked_tuple_);
#endif
TransactionIdStore(GetCurrentTransactionId(), &(oldtup.t_data->t_xmax));
oldtup.t_data->t_cmax = GetCurrentCommandId();
oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
......@@ -1677,15 +1669,12 @@ l2:
else
{
oldtup.t_data->t_infomask &= ~HEAP_XMAX_UNLOGGED;
#ifdef XLOG
XactPopRollback();
#endif
}
/* record address of new tuple in t_ctid of old one */
oldtup.t_data->t_ctid = newtup->t_self;
#ifdef XLOG
/* XLOG stuff */
{
XLogRecPtr recptr = log_heap_update(relation,
......@@ -1699,7 +1688,6 @@ l2:
PageSetLSN(BufferGetPage(buffer), recptr);
PageSetSUI(BufferGetPage(buffer), ThisStartUpID);
}
#endif
if (newbuf != buffer)
{
......@@ -1791,13 +1779,11 @@ l3:
return result;
}
#ifdef XLOG
/*
* XLOG stuff: no logging is required as long as we have no
* savepoints. For savepoints private log could be used...
*/
((PageHeader) BufferGetPage(*buffer))->pd_sui = ThisStartUpID;
#endif
/* store transaction information of xact marking the tuple */
TransactionIdStore(GetCurrentTransactionId(), &(tuple->t_data->t_xmax));
......@@ -1984,8 +1970,6 @@ heap_restrpos(HeapScanDesc scan)
}
}
#ifdef XLOG
static XLogRecPtr
log_heap_update(Relation reln, ItemPointerData from,
HeapTuple newtup, bool move)
......@@ -2634,5 +2618,3 @@ heap_desc(char *buf, uint8 xl_info, char* rec)
else
strcat(buf, "UNKNOWN");
}
#endif /* XLOG */
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.68 2000/11/16 05:50:58 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.69 2000/11/30 08:46:21 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -61,9 +61,7 @@ static void _bt_pgaddtup(Relation rel, Page page,
static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
int keysz, ScanKey scankey);
#ifdef XLOG
static Relation _xlheapRel; /* temporary hack */
#endif
/*
* _bt_doinsert() -- Handle insertion of a single btitem in the tree.
......@@ -123,9 +121,7 @@ top:
}
}
#ifdef XLOG
_xlheapRel = heapRel; /* temporary hack */
#endif
/* do the insertion */
res = _bt_insertonpg(rel, buf, stack, natts, itup_scankey, btitem, 0);
......@@ -522,7 +518,6 @@ _bt_insertonpg(Relation rel,
}
else
{
#ifdef XLOG
/* XLOG stuff */
{
char xlbuf[sizeof(xl_btree_insert) +
......@@ -562,7 +557,7 @@ _bt_insertonpg(Relation rel,
PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID);
}
#endif
_bt_pgaddtup(rel, page, itemsz, btitem, newitemoff, "page");
itup_off = newitemoff;
itup_blkno = BufferGetBlockNumber(buf);
......@@ -612,10 +607,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
rightoff;
OffsetNumber maxoff;
OffsetNumber i;
#ifdef XLOG
BTItem lhikey;
#endif
rbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
origpage = BufferGetPage(buf);
......@@ -685,9 +677,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
itemsz = ItemIdGetLength(itemid);
item = (BTItem) PageGetItem(origpage, itemid);
}
#ifdef XLOG
lhikey = item;
#endif
if (PageAddItem(leftpage, (Item) item, itemsz, leftoff,
LP_USED) == InvalidOffsetNumber)
elog(STOP, "btree: failed to add hikey to the left sibling");
......@@ -775,7 +765,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
spage = BufferGetPage(sbuf);
}
#ifdef XLOG
/*
* Right sibling is locked, new siblings are prepared, but original
* page is not updated yet. Log changes before continuing.
......@@ -860,7 +849,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
PageSetSUI(spage, ThisStartUpID);
}
}
#endif
/*
* By here, the original data page has been split into two new halves,
......@@ -1165,19 +1153,13 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
BTItem item;
Size itemsz;
BTItem new_item;
#ifdef XLOG
Buffer metabuf;
#endif
/* get a new root page */
rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
rootpage = BufferGetPage(rootbuf);
rootblknum = BufferGetBlockNumber(rootbuf);
#ifdef XLOG
metabuf = _bt_getbuf(rel, BTREE_METAPAGE,BT_WRITE);
#endif
/* NO ELOG(ERROR) from here till newroot op is logged */
......@@ -1237,7 +1219,6 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
elog(STOP, "btree: failed to add rightkey to new root page");
pfree(new_item);
#ifdef XLOG
/* XLOG stuff */
{
xl_btree_newroot xlrec;
......@@ -1267,16 +1248,10 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
_bt_wrtbuf(rel, metabuf);
}
#endif
/* write and let go of the new root buffer */
_bt_wrtbuf(rel, rootbuf);
#ifndef XLOG
/* update metadata page with new root block number */
_bt_metaproot(rel, rootblknum, 0);
#endif
/* update and release new sibling, and finally the old root */
_bt_wrtbuf(rel, rbuf);
_bt_wrtbuf(rel, lbuf);
......
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.41 2000/11/30 01:39:06 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.42 2000/11/30 08:46:21 vadim Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
......@@ -170,7 +170,6 @@ _bt_getroot(Relation rel, int access)
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT);
#ifdef XLOG
/* XLOG stuff */
{
xl_btree_newroot xlrec;
......@@ -187,7 +186,6 @@ _bt_getroot(Relation rel, int access)
PageSetLSN(metapg, recptr);
PageSetSUI(metapg, ThisStartUpID);
}
#endif
metad->btm_root = rootblkno;
metad->btm_level = 1;
......@@ -403,7 +401,6 @@ _bt_pagedel(Relation rel, ItemPointer tid)
buf = _bt_getbuf(rel, blkno, BT_WRITE);
page = BufferGetPage(buf);
#ifdef XLOG
/* XLOG stuff */
{
xl_btree_delete xlrec;
......@@ -417,7 +414,6 @@ _bt_pagedel(Relation rel, ItemPointer tid)
PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID);
}
#endif
PageIndexTupleDelete(page, offno);
......
......@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.71 2000/11/21 21:15:55 petere Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.72 2000/11/30 08:46:21 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -32,11 +32,8 @@ bool BuildingBtree = false; /* see comment in btbuild() */
bool FastBuild = true; /* use sort/build instead of insertion
* build */
#ifdef XLOG
#include "access/xlogutils.h"
#endif
static void _bt_restscan(IndexScanDesc scan);
/*
......@@ -733,8 +730,6 @@ _bt_restscan(IndexScanDesc scan)
}
}
#ifdef XLOG
static bool
_bt_cleanup_page(Page page, RelFileNode hnode)
{
......@@ -1529,5 +1524,3 @@ btree_desc(char *buf, uint8 xl_info, char* rec)
else
strcat(buf, "UNKNOWN");
}
#endif
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.56 2000/11/21 21:15:55 petere Exp $
* $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.57 2000/11/30 08:46:21 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -22,9 +22,7 @@
#include "executor/executor.h"
#include "miscadmin.h"
#ifdef XLOG
#include "access/xlogutils.h"
#endif
typedef struct SPLITVEC
{
......@@ -1069,7 +1067,6 @@ _rtdump(Relation r)
#endif /* defined RTDEBUG */
#ifdef XLOG
void
rtree_redo(XLogRecPtr lsn, XLogRecord *record)
{
......@@ -1086,4 +1083,3 @@ void
rtree_desc(char *buf, uint8 xl_info, char* rec)
{
}
#endif
......@@ -27,4 +27,3 @@ RmgrData RmgrTable[] = {
{"Gist", gist_redo, gist_undo, gist_desc},
{"Sequence", seq_redo, seq_undo, seq_desc}
};
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/transam.c,v 1.37 2000/11/21 21:15:57 petere Exp $
* $Header: /cvsroot/pgsql/src/backend/access/transam/transam.c,v 1.38 2000/11/30 08:46:22 vadim Exp $
*
* NOTES
* This file contains the high level access-method interface to the
......@@ -424,23 +424,12 @@ InitializeTransactionLog(void)
SpinAcquire(OidGenLockId);
if (!TransactionIdDidCommit(AmiTransactionId))
{
/* ----------------
* SOMEDAY initialize the information stored in
* the headers of the log/variable relations.
* ----------------
*/
TransactionLogUpdate(AmiTransactionId, XID_COMMIT);
TransactionIdStore(AmiTransactionId, &cachedTestXid);
cachedTestXidStatus = XID_COMMIT;
#ifdef XLOG
Assert(!IsUnderPostmaster &&
ShmemVariableCache->nextXid <= FirstTransactionId);
ShmemVariableCache->nextXid = FirstTransactionId;
#else
VariableRelationPutNextXid(FirstTransactionId);
#endif
}
else if (RecoveryCheckingEnabled())
{
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/transsup.c,v 1.26 2000/10/28 16:20:53 vadim Exp $
* $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/transsup.c,v 1.27 2000/11/30 08:46:22 vadim Exp $
*
* NOTES
* This file contains support functions for the high
......@@ -186,9 +186,7 @@ TransBlockGetXidStatus(Block tblock,
bits8 bit2;
BitIndex offset;
#ifdef XLOG
tblock = (Block) ((char*) tblock + sizeof(XLogRecPtr));
#endif
/* ----------------
* calculate the index into the transaction data where
......@@ -231,9 +229,7 @@ TransBlockSetXidStatus(Block tblock,
Index index;
BitIndex offset;
#ifdef XLOG
tblock = (Block) ((char*) tblock + sizeof(XLogRecPtr));
#endif
/* ----------------
* calculate the index into the transaction data where
......
/*-------------------------------------------------------------------------
*
* varsup.c
* postgres variable relation support routines
*
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
* postgres OID & XID variables support routines
*
* Copyright (c) 2000, PostgreSQL, Inc
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.33 2000/11/20 16:47:30 petere Exp $
* $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.34 2000/11/30 08:46:22 vadim Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#ifdef XLOG
#include "xlog_varsup.c"
#else
#include "postgres.h"
#include "access/heapam.h"
#include "catalog/catname.h"
#include "access/transam.h"
#include "storage/proc.h"
static void GetNewObjectIdBlock(Oid *oid_return, int oid_block_size);
static void VariableRelationGetNextOid(Oid *oid_return);
static void VariableRelationGetNextXid(TransactionId *xidP);
static void VariableRelationPutNextOid(Oid oid);
SPINLOCK OidGenLockId;
/* ---------------------
* spin lock for oid generation
* ---------------------
*/
int OidGenLockId;
extern SPINLOCK XidGenLockId;
extern void XLogPutNextOid(Oid nextOid);
/* ---------------------
* pointer to "variable cache" in shared memory (set up by shmem.c)
* ---------------------
*/
/* pointer to "variable cache" in shared memory (set up by shmem.c) */
VariableCache ShmemVariableCache = NULL;
/* ----------------------------------------------------------------
* variable relation query/update routines
* ----------------------------------------------------------------
*/
/* --------------------------------
* VariableRelationGetNextXid
* --------------------------------
*/
static void
VariableRelationGetNextXid(TransactionId *xidP)
{
Buffer buf;
VariableRelationContents var;
/* ----------------
* We assume that a spinlock has been acquired to guarantee
* exclusive access to the variable relation.
* ----------------
*/
/* ----------------
* do nothing before things are initialized
* ----------------
*/
if (!RelationIsValid(VariableRelation))
return;
/* ----------------
* read the variable page, get the the nextXid field and
* release the buffer
* ----------------
*/
buf = ReadBuffer(VariableRelation, 0);
if (!BufferIsValid(buf))
{
SpinRelease(OidGenLockId);
elog(ERROR, "VariableRelationGetNextXid: ReadBuffer failed");
}
var = (VariableRelationContents) BufferGetBlock(buf);
TransactionIdStore(var->nextXidData, xidP);
ReleaseBuffer(buf);
}
/* --------------------------------
* VariableRelationPutNextXid
* --------------------------------
*/
void
VariableRelationPutNextXid(TransactionId xid)
{
Buffer buf;
VariableRelationContents var;
/* ----------------
* We assume that a spinlock has been acquired to guarantee
* exclusive access to the variable relation.
* ----------------
*/
/* ----------------
* do nothing before things are initialized
* ----------------
*/
if (!RelationIsValid(VariableRelation))
return;
/* ----------------
* read the variable page, update the nextXid field and
* write the page back out to disk (with immediate write).
* ----------------
*/
buf = ReadBuffer(VariableRelation, 0);
if (!BufferIsValid(buf))
{
SpinRelease(OidGenLockId);
elog(ERROR, "VariableRelationPutNextXid: ReadBuffer failed");
}
var = (VariableRelationContents) BufferGetBlock(buf);
TransactionIdStore(xid, &(var->nextXidData));
FlushBuffer(buf, true, true);
}
/* --------------------------------
* VariableRelationGetNextOid
* --------------------------------
*/
static void
VariableRelationGetNextOid(Oid *oid_return)
{
Buffer buf;
VariableRelationContents var;
/* ----------------
* We assume that a spinlock has been acquired to guarantee
* exclusive access to the variable relation.
* ----------------
*/
/* ----------------
* if the variable relation is not initialized, then we
* assume we are running at bootstrap time and so we return
* an invalid object id (this path should never be taken, probably).
* ----------------
*/
if (!RelationIsValid(VariableRelation))
{
(*oid_return) = InvalidOid;
return;
}
/* ----------------
* read the variable page, get the the nextOid field and
* release the buffer
* ----------------
*/
buf = ReadBuffer(VariableRelation, 0);
if (!BufferIsValid(buf))
{
SpinRelease(OidGenLockId);
elog(ERROR, "VariableRelationGetNextOid: ReadBuffer failed");
}
var = (VariableRelationContents) BufferGetBlock(buf);
(*oid_return) = var->nextOid;
ReleaseBuffer(buf);
}
/* --------------------------------
* VariableRelationPutNextOid
* --------------------------------
*/
static void
VariableRelationPutNextOid(Oid oid)
{
Buffer buf;
VariableRelationContents var;
/* ----------------
* We assume that a spinlock has been acquired to guarantee
* exclusive access to the variable relation.
* ----------------
*/
/* ----------------
* do nothing before things are initialized
* ----------------
*/
if (!RelationIsValid(VariableRelation))
return;
/* ----------------
* read the variable page, update the nextXid field and
* write the page back out to disk.
* ----------------
*/
buf = ReadBuffer(VariableRelation, 0);
if (!BufferIsValid(buf))
{
SpinRelease(OidGenLockId);
elog(ERROR, "VariableRelationPutNextOid: ReadBuffer failed");
}
var = (VariableRelationContents) BufferGetBlock(buf);
var->nextOid = oid;
WriteBuffer(buf);
}
/* ----------------------------------------------------------------
* transaction id generation support
* ----------------------------------------------------------------
*/
/* ----------------
* GetNewTransactionId
*
* Transaction IDs are allocated via a cache in shared memory.
* Each time we need more IDs, we advance the "next XID" value
* in pg_variable by VAR_XID_PREFETCH and set the cache to
* show that many XIDs as available. Then, allocating those XIDs
* requires just a spinlock and not a buffer read/write cycle.
*
* Since the cache is shared across all backends, cached but unused
* XIDs are not lost when a backend exits, only when the postmaster
* quits or forces shared memory reinit. So we can afford to have
* a pretty big value of VAR_XID_PREFETCH.
*
* This code does not worry about initializing the transaction counter
* (see transam.c's InitializeTransactionLog() for that). We also
* ignore the possibility that the counter could someday wrap around.
* ----------------
*/
#define VAR_XID_PREFETCH 1024
void
GetNewTransactionId(TransactionId *xid)
{
/* ----------------
* during bootstrap initialization, we return the special
* bootstrap transaction id.
* ----------------
/*
* During bootstrap initialization, we return the special
* bootstrap transaction id.
*/
if (AMI_OVERRIDE)
{
TransactionIdStore(AmiTransactionId, xid);
*xid = AmiTransactionId;
return;
}
SpinAcquire(OidGenLockId); /* not good for concurrency... */
if (ShmemVariableCache->xid_count == 0)
{
TransactionId nextid;
VariableRelationGetNextXid(&nextid);
TransactionIdStore(nextid, &(ShmemVariableCache->nextXid));
ShmemVariableCache->xid_count = VAR_XID_PREFETCH;
TransactionIdAdd(&nextid, VAR_XID_PREFETCH);
VariableRelationPutNextXid(nextid);
}
TransactionIdStore(ShmemVariableCache->nextXid, xid);
TransactionIdAdd(&(ShmemVariableCache->nextXid), 1);
(ShmemVariableCache->xid_count)--;
SpinAcquire(XidGenLockId);
*xid = ShmemVariableCache->nextXid;
(ShmemVariableCache->nextXid)++;
if (MyProc != (PROC *) NULL)
MyProc->xid = *xid;
SpinRelease(OidGenLockId);
SpinRelease(XidGenLockId);
}
/*
......@@ -294,30 +55,20 @@ void
ReadNewTransactionId(TransactionId *xid)
{
/* ----------------
* during bootstrap initialization, we return the special
* bootstrap transaction id.
* ----------------
/*
* During bootstrap initialization, we return the special
* bootstrap transaction id.
*/
if (AMI_OVERRIDE)
{
TransactionIdStore(AmiTransactionId, xid);
*xid = AmiTransactionId;
return;
}
SpinAcquire(OidGenLockId); /* not good for concurrency... */
SpinAcquire(XidGenLockId);
*xid = ShmemVariableCache->nextXid;
SpinRelease(XidGenLockId);
/*
* Note that we don't check is ShmemVariableCache->xid_count equal to
* 0 or not. This will work as long as we don't call
* ReadNewTransactionId() before GetNewTransactionId().
*/
if (ShmemVariableCache->nextXid == 0)
elog(ERROR, "ReadNewTransactionId: ShmemVariableCache->nextXid is not initialized");
TransactionIdStore(ShmemVariableCache->nextXid, xid);
SpinRelease(OidGenLockId);
}
/* ----------------------------------------------------------------
......@@ -325,199 +76,67 @@ ReadNewTransactionId(TransactionId *xid)
* ----------------------------------------------------------------
*/
/* ----------------
* GetNewObjectIdBlock
*
* This support function is used to allocate a block of object ids
* of the given size.
* ----------------
*/
static void
GetNewObjectIdBlock(Oid *oid_return, /* place to return the first new
* object id */
int oid_block_size) /* number of oids desired */
{
Oid firstfreeoid;
Oid nextoid;
/* ----------------
* Obtain exclusive access to the variable relation page
* ----------------
*/
SpinAcquire(OidGenLockId);
/* ----------------
* get the "next" oid from the variable relation
* ----------------
*/
VariableRelationGetNextOid(&firstfreeoid);
/* ----------------
* Allocate the range of OIDs to be returned to the caller.
*
* There are two things going on here.
*
* One: in a virgin database pg_variable will initially contain zeroes,
* so we will read out firstfreeoid = InvalidOid. We want to start
* allocating OIDs at BootstrapObjectIdData instead (OIDs below that
* are reserved for static assignment in the initial catalog data).
*
* Two: if a database is run long enough, the OID counter will wrap
* around. We must not generate an invalid OID when that happens,
* and it seems wise not to generate anything in the reserved range.
* Therefore we advance to BootstrapObjectIdData in this case too.
*
* The comparison here assumes that Oid is an unsigned type.
*/
nextoid = firstfreeoid + oid_block_size;
if (! OidIsValid(firstfreeoid) || nextoid < firstfreeoid)
{
/* Initialization or wraparound time, force it up to safe range */
firstfreeoid = BootstrapObjectIdData;
nextoid = firstfreeoid + oid_block_size;
}
(*oid_return) = firstfreeoid;
/* ----------------
* Update the variable relation to show the block range as used.
* ----------------
*/
VariableRelationPutNextOid(nextoid);
/* ----------------
* Relinquish our lock on the variable relation page
* ----------------
*/
SpinRelease(OidGenLockId);
}
/* ----------------
* GetNewObjectId
*
* This function allocates and parses out object ids. Like
* GetNewTransactionId(), it "prefetches" 32 object ids by
* incrementing the nextOid stored in the var relation by 32 and then
* returning these id's one at a time until they are exhausted.
* This means we reduce the number of accesses to the variable
* relation by 32 for each backend.
*
* Note: 32 has no special significance. We don't want the
* number to be too large because when the backend
* terminates, we lose the oids we cached.
*
* Question: couldn't we use a shared-memory cache just like XIDs?
* That would allow a larger interval between pg_variable updates
* without cache losses. Note, however, that we can assign an OID
* without even a spinlock from the backend-local OID cache.
* Maybe two levels of caching would be good.
* ----------------
*/
#define VAR_OID_PREFETCH 32
static int prefetched_oid_count = 0;
static Oid next_prefetched_oid;
#define VAR_OID_PREFETCH 8192
static Oid lastSeenOid = InvalidOid;
void
GetNewObjectId(Oid *oid_return) /* place to return the new object id */
GetNewObjectId(Oid *oid_return)
{
/* ----------------
* if we run out of prefetched oids, then we get some
* more before handing them out to the caller.
* ----------------
*/
SpinAcquire(OidGenLockId);
if (prefetched_oid_count == 0)
/* If we run out of logged for use oids then we log more */
if (ShmemVariableCache->oidCount == 0)
{
int oid_block_size = VAR_OID_PREFETCH;
/* ----------------
* Make sure pg_variable is open.
* ----------------
*/
if (!RelationIsValid(VariableRelation))
VariableRelation = heap_openr(VariableRelationName, NoLock);
/* ----------------
* get a new block of prefetched object ids.
* ----------------
*/
GetNewObjectIdBlock(&next_prefetched_oid, oid_block_size);
/* ----------------
* now reset the prefetched_oid_count.
* ----------------
*/
prefetched_oid_count = oid_block_size;
XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH);
ShmemVariableCache->oidCount = VAR_OID_PREFETCH;
}
/* ----------------
* return the next prefetched oid in the pointer passed by
* the user and decrement the prefetch count.
* ----------------
*/
if (PointerIsValid(oid_return))
(*oid_return) = next_prefetched_oid;
lastSeenOid = (*oid_return) = ShmemVariableCache->nextOid;
next_prefetched_oid++;
prefetched_oid_count--;
(ShmemVariableCache->nextOid)++;
(ShmemVariableCache->oidCount)--;
SpinRelease(OidGenLockId);
}
void
CheckMaxObjectId(Oid assigned_oid)
{
Oid temp_oid;
if (prefetched_oid_count == 0) /* make sure next/max is set, or
* reload */
GetNewObjectId(&temp_oid);
/* ----------------
* If we are below prefetched limits, do nothing
* ----------------
*/
if (assigned_oid < next_prefetched_oid)
if (lastSeenOid != InvalidOid && assigned_oid < lastSeenOid)
return;
/* ----------------
* If we are here, we are coming from a 'copy from' with oid's
*
* If we are in the prefetched oid range, just bump it up
* ----------------
*/
SpinAcquire(OidGenLockId);
if (assigned_oid < ShmemVariableCache->nextOid)
{
lastSeenOid = ShmemVariableCache->nextOid - 1;
SpinRelease(OidGenLockId);
return;
}
if (assigned_oid <= next_prefetched_oid + prefetched_oid_count - 1)
/* If we are in the logged oid range, just bump nextOid up */
if (assigned_oid <= ShmemVariableCache->nextOid +
ShmemVariableCache->oidCount - 1)
{
prefetched_oid_count -= assigned_oid - next_prefetched_oid + 1;
next_prefetched_oid = assigned_oid + 1;
ShmemVariableCache->oidCount -=
assigned_oid - ShmemVariableCache->nextOid + 1;
ShmemVariableCache->nextOid = assigned_oid + 1;
SpinRelease(OidGenLockId);
return;
}
/* ----------------
* We have exceeded the prefetch oid range
*
* We should lock the database and kill all other backends
* but we are loading oid's that we can not guarantee are unique
* anyway, so we must rely on the user
*
* We now:
* set the variable relation with the new max oid
* force the backend to reload its oid cache
*
* By reloading the oid cache, we don't have to update the variable
* relation every time when sequential OIDs are being loaded by COPY.
* ----------------
/*
* We have exceeded the logged oid range.
* We should lock the database and kill all other backends
* but we are loading oid's that we can not guarantee are unique
* anyway, so we must rely on the user.
*/
SpinAcquire(OidGenLockId);
VariableRelationPutNextOid(assigned_oid);
XLogPutNextOid(assigned_oid + VAR_OID_PREFETCH);
ShmemVariableCache->oidCount = VAR_OID_PREFETCH - 1;
ShmemVariableCache->nextOid = assigned_oid + 1;
SpinRelease(OidGenLockId);
prefetched_oid_count = 0; /* force reload */
GetNewObjectId(&temp_oid); /* cause target OID to be allocated */
}
#endif /* !XLOG */
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.85 2000/11/30 01:47:31 vadim Exp $
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.86 2000/11/30 08:46:22 vadim Exp $
*
* NOTES
* Transaction aborts can now occur two ways:
......@@ -219,7 +219,6 @@ TransactionState CurrentTransactionState = &CurrentTransactionStateData;
int DefaultXactIsoLevel = XACT_READ_COMMITTED;
int XactIsoLevel;
#ifdef XLOG
#include "access/xlogutils.h"
int CommitDelay = 5; /* 1/200000 sec */
......@@ -227,8 +226,6 @@ int CommitDelay = 5; /* 1/200000 sec */
static void (*_RollbackFunc)(void*) = NULL;
static void *_RollbackData = NULL;
#endif
/* ----------------
* info returned when the system is disabled
*
......@@ -662,19 +659,10 @@ RecordTransactionCommit()
TransactionId xid;
int leak;
/* ----------------
* get the current transaction id
* ----------------
*/
xid = GetCurrentTransactionId();
/*
* flush the buffer manager pages. Note: if we have stable main
* memory, dirty shared buffers are not flushed plai 8/7/90
*/
leak = BufferPoolCheckLeak();
#ifdef XLOG
if (MyLastRecPtr.xrecoff != 0)
{
xl_xact_commit xlrec;
......@@ -685,7 +673,7 @@ RecordTransactionCommit()
xlrec.xtime = time(NULL);
/*
* MUST SAVE ARRAY OF RELFILENODE-s TO DROP
* SHOULD SAVE ARRAY OF RELFILENODE-s TO DROP
*/
recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT,
(char*) &xlrec, SizeOfXactCommit, NULL, 0);
......@@ -704,30 +692,6 @@ RecordTransactionCommit()
MyProc->logRec.xrecoff = 0;
}
#else
/*
* If no one shared buffer was changed by this transaction then we
* don't flush shared buffers and don't record commit status.
*/
if (SharedBufferChanged)
{
FlushBufferPool();
if (leak)
ResetBufferPool(true);
/*
* have the transaction access methods record the status of this
* transaction id in the pg_log relation.
*/
TransactionIdCommit(xid);
/*
* Now write the log info to the disk too.
*/
leak = BufferPoolCheckLeak();
FlushBufferPool();
}
#endif
if (leak)
ResetBufferPool(true);
......@@ -815,23 +779,8 @@ AtCommit_Memory(void)
static void
RecordTransactionAbort(void)
{
TransactionId xid;
TransactionId xid = GetCurrentTransactionId();
/* ----------------
* get the current transaction id
* ----------------
*/
xid = GetCurrentTransactionId();
/*
* Have the transaction access methods record the status of this
* transaction id in the pg_log relation. We skip it if no one shared
* buffer was changed by this transaction.
*/
if (SharedBufferChanged && !TransactionIdDidCommit(xid))
TransactionIdAbort(xid);
#ifdef XLOG
if (MyLastRecPtr.xrecoff != 0)
{
xl_xact_abort xlrec;
......@@ -841,9 +790,9 @@ RecordTransactionAbort(void)
recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT,
(char*) &xlrec, SizeOfXactAbort, NULL, 0);
TransactionIdAbort(xid);
MyProc->logRec.xrecoff = 0;
}
#endif
/*
* Tell bufmgr and smgr to release resources.
......@@ -1748,8 +1697,6 @@ IsTransactionBlock(void)
return false;
}
#ifdef XLOG
void
xact_redo(XLogRecPtr lsn, XLogRecord *record)
{
......@@ -1760,7 +1707,7 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record)
xl_xact_commit *xlrec = (xl_xact_commit*) XLogRecGetData(record);
TransactionIdCommit(record->xl_xid);
/* MUST REMOVE FILES OF ALL DROPPED RELATIONS */
/* SHOULD REMOVE FILES OF ALL DROPPED RELATIONS */
}
else if (info == XLOG_XACT_ABORT)
{
......@@ -1825,5 +1772,3 @@ XactPopRollback(void)
{
_RollbackFunc = NULL;
}
#endif
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.37 2000/11/30 01:47:31 vadim Exp $
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.38 2000/11/30 08:46:22 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -1443,12 +1443,10 @@ void
BootStrapXLOG()
{
CheckPoint checkPoint;
#ifdef XLOG
char buffer[BLCKSZ];
bool usexistent = false;
XLogPageHeader page = (XLogPageHeader) buffer;
XLogRecord *record;
#endif
checkPoint.redo.xlogid = 0;
checkPoint.redo.xrecoff = SizeOfXLogPHD;
......@@ -1462,8 +1460,6 @@ BootStrapXLOG()
ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0;
#ifdef XLOG
memset(buffer, 0, BLCKSZ);
page->xlp_magic = XLOG_PAGE_MAGIC;
page->xlp_info = 0;
......@@ -1488,8 +1484,6 @@ BootStrapXLOG()
close(logFile);
logFile = -1;
#endif
memset(ControlFile, 0, sizeof(ControlFileData));
ControlFile->logId = 0;
ControlFile->logSeg = 1;
......@@ -1513,14 +1507,12 @@ str_time(time_t tnow)
return buf;
}
/*
* This func must be called ONCE on system startup
*/
void
StartupXLOG()
{
#ifdef XLOG
XLogCtlInsert *Insert;
CheckPoint checkPoint;
XLogRecPtr RecPtr,
......@@ -1529,8 +1521,6 @@ StartupXLOG()
char buffer[MAXLOGRECSZ + SizeOfXLogRecord];
bool sie_saved = false;
#endif
elog(LOG, "starting up");
XLogCtl->xlblocks = (XLogRecPtr *) (((char *) XLogCtl) + sizeof(XLogCtlData));
......@@ -1580,8 +1570,6 @@ StartupXLOG()
elog(LOG, "database system was interrupted at %s",
str_time(ControlFile->time));
#ifdef XLOG
LastRec = RecPtr = ControlFile->checkPoint;
if (!XRecOffIsValid(RecPtr.xrecoff))
elog(STOP, "Invalid checkPoint in control file");
......@@ -1602,12 +1590,7 @@ StartupXLOG()
checkPoint.nextXid, checkPoint.nextOid);
if (checkPoint.nextXid < FirstTransactionId ||
checkPoint.nextOid < BootstrapObjectIdData)
#ifdef XLOG_2
elog(STOP, "Invalid NextTransactionId/NextOid");
#else
elog(LOG, "Invalid NextTransactionId/NextOid");
#endif
ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextOid = checkPoint.nextOid;
......@@ -1751,8 +1734,6 @@ StartupXLOG()
}
InRecovery = false;
#endif /* XLOG */
ControlFile->state = DB_IN_PRODUCTION;
ControlFile->time = time(NULL);
UpdateControlFile();
......@@ -1783,9 +1764,7 @@ ShutdownXLOG()
{
elog(LOG, "shutting down");
#ifdef XLOG
CreateDummyCaches();
#endif
CreateCheckPoint(true);
elog(LOG, "database system is shut down");
......@@ -1796,7 +1775,6 @@ extern XLogRecPtr GetUndoRecPtr(void);
void
CreateCheckPoint(bool shutdown)
{
#ifdef XLOG
CheckPoint checkPoint;
XLogRecPtr recptr;
XLogCtlInsert *Insert = &XLogCtl->Insert;
......@@ -1880,12 +1858,9 @@ CreateCheckPoint(bool shutdown)
XLogFlush(recptr);
#endif /* XLOG */
SpinAcquire(ControlFileLockId);
if (shutdown)
ControlFile->state = DB_SHUTDOWNED;
#ifdef XLOG
else /* create new log file */
{
if (recptr.xrecoff % XLogSegSize >=
......@@ -1914,16 +1889,10 @@ CreateCheckPoint(bool shutdown)
_logSeg = ControlFile->logSeg - 1;
strcpy(archdir, ControlFile->archdir);
#else
ControlFile->checkPoint.xlogid = 0;
ControlFile->checkPoint.xrecoff = SizeOfXLogPHD;
#endif
ControlFile->time = time(NULL);
UpdateControlFile();
SpinRelease(ControlFileLockId);
#ifdef XLOG
/*
* Delete offline log files. Get oldest online
* log file from undo rec if it's valid.
......@@ -1948,7 +1917,6 @@ CreateCheckPoint(bool shutdown)
S_UNLOCK(&(XLogCtl->chkp_lck));
MyLastRecPtr.xrecoff = 0; /* to avoid commit record */
#endif
return;
}
......
/*-------------------------------------------------------------------------
*
* varsup.c
* postgres OID & XID variables support routines
*
* Copyright (c) 2000, PostgreSQL, Inc
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/xlog_varsup.c,v 1.1 2000/11/03 11:39:35 vadim Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/transam.h"
#include "storage/proc.h"
SPINLOCK OidGenLockId;
extern SPINLOCK XidGenLockId;
extern void XLogPutNextOid(Oid nextOid);
/* pointer to "variable cache" in shared memory (set up by shmem.c) */
VariableCache ShmemVariableCache = NULL;
void
GetNewTransactionId(TransactionId *xid)
{
/*
* During bootstrap initialization, we return the special
* bootstrap transaction id.
*/
if (AMI_OVERRIDE)
{
*xid = AmiTransactionId;
return;
}
SpinAcquire(XidGenLockId);
*xid = ShmemVariableCache->nextXid;
(ShmemVariableCache->nextXid)++;
if (MyProc != (PROC *) NULL)
MyProc->xid = *xid;
SpinRelease(XidGenLockId);
}
/*
* Like GetNewTransactionId reads nextXid but don't fetch it.
*/
void
ReadNewTransactionId(TransactionId *xid)
{
/*
* During bootstrap initialization, we return the special
* bootstrap transaction id.
*/
if (AMI_OVERRIDE)
{
*xid = AmiTransactionId;
return;
}
SpinAcquire(XidGenLockId);
*xid = ShmemVariableCache->nextXid;
SpinRelease(XidGenLockId);
}
/* ----------------------------------------------------------------
* object id generation support
* ----------------------------------------------------------------
*/
#define VAR_OID_PREFETCH 8192
static Oid lastSeenOid = InvalidOid;
void
GetNewObjectId(Oid *oid_return)
{
SpinAcquire(OidGenLockId);
/* If we run out of logged for use oids then we log more */
if (ShmemVariableCache->oidCount == 0)
{
XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH);
ShmemVariableCache->oidCount = VAR_OID_PREFETCH;
}
if (PointerIsValid(oid_return))
lastSeenOid = (*oid_return) = ShmemVariableCache->nextOid;
(ShmemVariableCache->nextOid)++;
(ShmemVariableCache->oidCount)--;
SpinRelease(OidGenLockId);
}
void
CheckMaxObjectId(Oid assigned_oid)
{
if (lastSeenOid != InvalidOid && assigned_oid < lastSeenOid)
return;
SpinAcquire(OidGenLockId);
if (assigned_oid < ShmemVariableCache->nextOid)
{
lastSeenOid = ShmemVariableCache->nextOid - 1;
SpinRelease(OidGenLockId);
return;
}
/* If we are in the logged oid range, just bump nextOid up */
if (assigned_oid <= ShmemVariableCache->nextOid +
ShmemVariableCache->oidCount - 1)
{
ShmemVariableCache->oidCount -=
assigned_oid - ShmemVariableCache->nextOid + 1;
ShmemVariableCache->nextOid = assigned_oid + 1;
SpinRelease(OidGenLockId);
return;
}
/*
* We have exceeded the logged oid range.
* We should lock the database and kill all other backends
* but we are loading oid's that we can not guarantee are unique
* anyway, so we must rely on the user.
*/
XLogPutNextOid(assigned_oid + VAR_OID_PREFETCH);
ShmemVariableCache->oidCount = VAR_OID_PREFETCH - 1;
ShmemVariableCache->nextOid = assigned_oid + 1;
SpinRelease(OidGenLockId);
}
......@@ -10,9 +10,6 @@
*/
#include "postgres.h"
#ifdef XLOG
#include "access/xlog.h"
#include "access/transam.h"
#include "access/xact.h"
......@@ -397,5 +394,3 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode)
return(&(res->reldata));
}
#endif
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/dbcommands.c,v 1.69 2000/11/18 03:36:48 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/commands/dbcommands.c,v 1.70 2000/11/30 08:46:22 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -153,13 +153,11 @@ createdb(const char *dbname, const char *dbpath,
elog(ERROR, "database path may not contain single quotes");
/* ... otherwise we'd be open to shell exploits below */
#ifdef XLOG
/* Force dirty buffers out to disk, to ensure source database is
* up-to-date for the copy. (We really only need to flush buffers
* for the source database...)
*/
BufferSync();
#endif
/*
* Close virtual file descriptors so the kernel has more available for
......@@ -255,13 +253,11 @@ createdb(const char *dbname, const char *dbpath,
/* Close pg_database, but keep lock till commit */
heap_close(pg_database_rel, NoLock);
#ifdef XLOG
/* Force dirty buffers out to disk, so that newly-connecting backends
* will see the new database in pg_database right away. (They'll see
* an uncommitted tuple, but they don't care; see GetRawDatabaseInfo.)
*/
BufferSync();
#endif
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.173 2000/11/16 22:30:19 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.174 2000/11/30 08:46:22 vadim Exp $
*
*-------------------------------------------------------------------------
......@@ -47,11 +47,9 @@
#include <sys/resource.h>
#endif
#ifdef XLOG
#include "access/xlog.h"
XLogRecPtr log_heap_move(Relation reln,
ItemPointerData from, HeapTuple newtup);
#endif
extern XLogRecPtr log_heap_move(Relation reln,
ItemPointerData from, HeapTuple newtup);
static MemoryContext vac_context = NULL;
......@@ -1492,7 +1490,6 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid);
ItemPointerSet(&(newtup.t_self), destvacpage->blkno, newoff);
#ifdef XLOG
{
XLogRecPtr recptr =
log_heap_move(onerel, tuple.t_self, &newtup);
......@@ -1505,7 +1502,6 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
PageSetLSN(ToPage, recptr);
PageSetSUI(ToPage, ThisStartUpID);
}
#endif
if (((int) destvacpage->blkno) > last_move_dest_block)
last_move_dest_block = destvacpage->blkno;
......@@ -1655,7 +1651,6 @@ failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)"
~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_IN);
tuple.t_data->t_infomask |= HEAP_MOVED_OFF;
#ifdef XLOG
{
XLogRecPtr recptr =
log_heap_move(onerel, tuple.t_self, &newtup);
......@@ -1665,7 +1660,6 @@ failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)"
PageSetLSN(ToPage, recptr);
PageSetSUI(ToPage, ThisStartUpID);
}
#endif
cur_page->offsets_used++;
num_moved++;
......@@ -1786,19 +1780,12 @@ failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)"
if (num_moved > 0)
{
#ifdef XLOG
RecordTransactionCommit();
#else
/*
* We have to commit our tuple' movings before we'll truncate
* relation, but we shouldn't lose our locks. And so - quick hack:
* flush buffers and record status of current transaction as
* committed, and continue. - vadim 11/13/96
* record status of current transaction as committed, and continue.
*/
FlushBufferPool();
TransactionIdCommit(myXID);
FlushBufferPool();
#endif
RecordTransactionCommit();
}
/*
......
/*-------------------------------------------------------------------------
*
* bufmgr.c
* xlog_bufmgr.c
* buffer manager interface routines
*
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.96 2000/11/30 01:39:07 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.97 2000/11/30 08:46:23 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -31,9 +31,6 @@
*
* WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer()
*
* FlushBuffer() -- Write buffer immediately. Can unpin, or not,
* depending on parameter.
*
* BufferSync() -- flush all dirty buffers in the buffer pool.
*
* InitBufferPool() -- Init the buffer module.
......@@ -42,13 +39,8 @@
* freelist.c -- chooses victim for buffer replacement
* buf_table.c -- manages the buffer lookup table
*/
#include "postgres.h"
#ifdef XLOG
#include "xlog_bufmgr.c"
#else
#include <sys/types.h>
#include <sys/file.h>
#include <math.h>
......@@ -61,10 +53,11 @@
#include "storage/s_lock.h"
#include "storage/smgr.h"
#include "utils/relcache.h"
#ifdef XLOG
#include "catalog/pg_database.h"
#endif
#define BufferGetLSN(bufHdr) \
(*((XLogRecPtr*)MAKE_PTR((bufHdr)->data)))
extern SPINLOCK BufMgrLock;
extern long int ReadBufferCount;
......@@ -99,9 +92,6 @@ static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum,
bool bufferLockHeld);
static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
bool *foundPtr, bool bufferLockHeld);
static void SetBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr);
static void ClearBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr);
static void BufferSync(void);
static int BufferReplace(BufferDesc *bufHdr);
void PrintBufferDescs(void);
......@@ -169,48 +159,6 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
return ReadBufferWithBufferLock(reln, blockNum, false);
}
/*
* is_userbuffer
*
* XXX caller must have already acquired BufMgrLock
*/
#ifdef NOT_USED
static bool
is_userbuffer(Buffer buffer)
{
BufferDesc *buf = &BufferDescriptors[buffer - 1];
if (IsSystemRelationName(buf->blind.relname))
return false;
return true;
}
#endif
#ifdef NOT_USED
Buffer
ReadBuffer_Debug(char *file,
int line,
Relation reln,
BlockNumber blockNum)
{
Buffer buffer;
buffer = ReadBufferWithBufferLock(reln, blockNum, false);
if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))
{
BufferDesc *buf = &BufferDescriptors[buffer - 1];
fprintf(stderr, "PIN(RD) %ld relname = %s, blockNum = %d, \
refcount = %ld, file: %s, line: %d\n",
buffer, buf->blind.relname, buf->tag.blockNum,
PrivateRefCount[buffer - 1], file, line);
}
return buffer;
}
#endif
/*
* ReadBufferWithBufferLock -- does the work of
* ReadBuffer() but with the possibility that
......@@ -447,7 +395,7 @@ BufferAlloc(Relation reln,
buf->refcount = 1;
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1;
if (buf->flags & BM_DIRTY)
if (buf->flags & BM_DIRTY || buf->cntxDirty)
{
bool smok;
......@@ -505,18 +453,18 @@ BufferAlloc(Relation reln,
}
else
{
/*
* BM_JUST_DIRTIED cleared by BufferReplace and shouldn't
* be setted by anyone. - vadim 01/17/97
*/
if (buf->flags & BM_JUST_DIRTIED)
{
elog(FATAL, "BufferAlloc: content of block %u (%s) changed while flushing",
elog(STOP, "BufferAlloc: content of block %u (%s) changed while flushing",
buf->tag.blockNum, buf->blind.relname);
}
else
buf->flags &= ~BM_DIRTY;
buf->cntxDirty = false;
}
/*
......@@ -676,131 +624,15 @@ WriteBuffer(Buffer buffer)
SpinAcquire(BufMgrLock);
Assert(bufHdr->refcount > 0);
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
SetBufferDirtiedByMe(buffer, bufHdr);
UnpinBuffer(bufHdr);
SpinRelease(BufMgrLock);
return TRUE;
}
#ifdef NOT_USED
void
WriteBuffer_Debug(char *file, int line, Buffer buffer)
{
WriteBuffer(buffer);
if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer))
{
BufferDesc *buf;
buf = &BufferDescriptors[buffer - 1];
fprintf(stderr, "UNPIN(WR) %ld relname = %s, blockNum = %d, \
refcount = %ld, file: %s, line: %d\n",
buffer, buf->blind.relname, buf->tag.blockNum,
PrivateRefCount[buffer - 1], file, line);
}
}
#endif
/*
* FlushBuffer -- like WriteBuffer, but write the page immediately,
* rather than just marking it dirty. On success return, the buffer will
* no longer be dirty.
*
* 'buffer' is known to be dirty/pinned, so there should not be a
* problem reading the BufferDesc members without the BufMgrLock
* (nobody should be able to change tags out from under us).
*
* If 'sync' is true, a synchronous write is wanted (wait for buffer to hit
* the disk). Otherwise it's sufficient to issue the kernel write call.
*
* Unpin buffer if 'release' is true.
*/
int
FlushBuffer(Buffer buffer, bool sync, bool release)
{
BufferDesc *bufHdr;
Relation bufrel;
int status;
if (BufferIsLocal(buffer))
return FlushLocalBuffer(buffer, sync, release) ? STATUS_OK : STATUS_ERROR;
if (BAD_BUFFER_ID(buffer))
return STATUS_ERROR;
Assert(PrivateRefCount[buffer - 1] > 0); /* else caller didn't pin */
bufHdr = &BufferDescriptors[buffer - 1];
bufrel = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
Assert(bufrel != (Relation) NULL);
SharedBufferChanged = true;
/* To check if block content changed while flushing. - vadim 01/17/97 */
SpinAcquire(BufMgrLock);
WaitIO(bufHdr, BufMgrLock); /* confirm end of IO */
bufHdr->flags &= ~BM_JUST_DIRTIED;
StartBufferIO(bufHdr, false); /* output IO start */
SpinRelease(BufMgrLock);
/*
* Grab a read lock on the buffer to ensure that no
* other backend changes its contents while we write it;
* see comments in BufferSync().
*/
LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_SHARE);
if (sync)
status = smgrflush(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
else
status = smgrwrite(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_UNLOCK);
/* drop relcache refcnt incremented by RelationNodeCacheGetRelation */
RelationDecrementReferenceCount(bufrel);
if (status == SM_FAIL)
{
elog(ERROR, "FlushBuffer: cannot flush block %u of the relation %s",
bufHdr->tag.blockNum, bufHdr->blind.relname);
return STATUS_ERROR;
}
BufferFlushCount++;
SpinAcquire(BufMgrLock);
bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */
TerminateBufferIO(bufHdr); /* output IO finished */
/*
* If this buffer was marked by someone as DIRTY while we were
* flushing it out we must not clear shared DIRTY flag - vadim
* 01/17/97
*
* ... but we can clear BufferDirtiedByMe anyway - tgl 3/31/00
*/
if (bufHdr->flags & BM_JUST_DIRTIED)
{
elog(NOTICE, "FlushBuffer: content of block %u (%s) changed while flushing",
bufHdr->tag.blockNum, bufHdr->blind.relname);
}
else
bufHdr->flags &= ~BM_DIRTY;
ClearBufferDirtiedByMe(buffer, bufHdr);
if (release)
UnpinBuffer(bufHdr);
SpinRelease(BufMgrLock);
return STATUS_OK;
}
/*
* WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer
* when the operation is complete.
......@@ -822,8 +654,9 @@ WriteNoReleaseBuffer(Buffer buffer)
SpinAcquire(BufMgrLock);
Assert(bufHdr->refcount > 0);
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
SetBufferDirtiedByMe(buffer, bufHdr);
SpinRelease(BufMgrLock);
return STATUS_OK;
......@@ -876,307 +709,138 @@ ReleaseAndReadBuffer(Buffer buffer,
}
/*
* SetBufferDirtiedByMe -- mark a shared buffer as being dirtied by this xact
*
* This flag essentially remembers that we need to write and fsync this buffer
* before we can commit the transaction. The write might end up getting done
* by another backend, but we must do the fsync ourselves (else we could
* commit before the data actually reaches disk). We do not issue fsync
* instantly upon write; the storage manager keeps track of which files need
* to be fsync'd before commit can occur. A key aspect of this data structure
* is that we will be able to notify the storage manager that an fsync is
* needed even after another backend has done the physical write and replaced
* the buffer contents with something else!
* BufferSync -- Write all dirty buffers in the pool.
*
* NB: we must be holding the bufmgr lock at entry, and the buffer must be
* pinned so that no other backend can take it away from us.
* This is called at checkpoint time and write out all dirty buffers.
*/
static void
SetBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr)
{
BufferTag *tagLastDirtied = &BufferTagLastDirtied[buffer - 1];
Relation reln;
int status;
/*
* If the flag is already set, check to see whether the buffertag is
* the same. If not, some other backend already wrote the buffer data
* that we dirtied. We must tell the storage manager to make an fsync
* pending on that file before we can overwrite the old tag value.
*/
if (BufferDirtiedByMe[buffer - 1])
{
if (RelFileNodeEquals(bufHdr->tag.rnode, tagLastDirtied->rnode) &&
bufHdr->tag.blockNum == tagLastDirtied->blockNum)
return; /* Same tag already dirtied, so no work */
#ifndef OPTIMIZE_SINGLE
SpinRelease(BufMgrLock);
#endif /* OPTIMIZE_SINGLE */
reln = RelationNodeCacheGetRelation(tagLastDirtied->rnode);
if (reln == (Relation) NULL)
{
status = smgrblindmarkdirty(DEFAULT_SMGR,
tagLastDirtied->rnode,
tagLastDirtied->blockNum);
}
else
{
Assert(RelFileNodeEquals(tagLastDirtied->rnode, reln->rd_node));
status = smgrmarkdirty(DEFAULT_SMGR, reln,
tagLastDirtied->blockNum);
/*
* drop relcache refcnt incremented by
* RelationNodeCacheGetRelation
*/
RelationDecrementReferenceCount(reln);
}
if (status == SM_FAIL)
{
elog(ERROR, "SetBufferDirtiedByMe: cannot mark %u for %s",
tagLastDirtied->blockNum,
BufferBlindLastDirtied[buffer - 1].relname);
}
#ifndef OPTIMIZE_SINGLE
SpinAcquire(BufMgrLock);
#endif /* OPTIMIZE_SINGLE */
}
*tagLastDirtied = bufHdr->tag;
BufferBlindLastDirtied[buffer - 1] = bufHdr->blind;
BufferDirtiedByMe[buffer - 1] = true;
}
/*
* ClearBufferDirtiedByMe -- mark a shared buffer as no longer needing fsync
*
* If we write out a buffer ourselves, then the storage manager will set its
* needs-fsync flag for that file automatically, and so we can clear our own
* flag that says it needs to be done later.
*
* NB: we must be holding the bufmgr lock at entry.
*/
static void
ClearBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr)
{
BufferTag *tagLastDirtied = &BufferTagLastDirtied[buffer - 1];
/*
* Do *not* clear the flag if it refers to some other buffertag than
* the data we just wrote. This is unlikely, but possible if some
* other backend replaced the buffer contents since we set our flag.
*/
if (RelFileNodeEquals(bufHdr->tag.rnode, tagLastDirtied->rnode) &&
bufHdr->tag.blockNum == tagLastDirtied->blockNum)
BufferDirtiedByMe[buffer - 1] = false;
}
/*
* BufferSync -- Flush all dirty buffers in the pool.
*
* This is called at transaction commit time. We find all buffers
* that have been dirtied by the current xact and flush them to disk.
* We do *not* flush dirty buffers that have been dirtied by other xacts.
* (This is a substantial change from pre-7.0 behavior.)
*/
static void
void
BufferSync()
{
int i;
BufferDesc *bufHdr;
Buffer buffer;
int status;
Relation reln;
bool didwrite;
RelFileNode rnode;
XLogRecPtr recptr;
Relation reln = NULL;
for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
{
/* Ignore buffers that were not dirtied by me */
if (!BufferDirtiedByMe[i])
continue;
SpinAcquire(BufMgrLock);
/*
* We only need to write if the buffer is still dirty and still
* contains the same disk page that it contained when we dirtied
* it. Otherwise, someone else has already written our changes for
* us, and we need only fsync.
*
* (NOTE: it's still possible to do an unnecessary write, if other
* xacts have written and then re-dirtied the page since our last
* change to it. But that should be pretty uncommon, and there's
* no easy way to detect it anyway.)
*/
reln = NULL;
didwrite = false;
if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
if (!(bufHdr->flags & BM_VALID))
{
if (RelFileNodeEquals(bufHdr->tag.rnode, BufferTagLastDirtied[i].rnode) &&
bufHdr->tag.blockNum == BufferTagLastDirtied[i].blockNum)
{
/*
* Try to find relation for buf. This could fail, if the
* rel has been flushed from the relcache since we dirtied
* the page. That should be uncommon, so paying the extra
* cost of a blind write when it happens seems OK.
*/
if (!InRecovery)
reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
/*
* We have to pin buffer to keep anyone from stealing it
* from the buffer pool while we are flushing it or
* waiting in WaitIO. It's bad for GetFreeBuffer in
* BufferAlloc, but there is no other way to prevent
* writing into disk block data from some other buffer,
* getting smgr status of some other block and clearing
* BM_DIRTY of ... - VAdim 09/16/96
*/
PinBuffer(bufHdr);
if (bufHdr->flags & BM_IO_IN_PROGRESS)
{
WaitIO(bufHdr, BufMgrLock);
UnpinBuffer(bufHdr);
if (bufHdr->flags & BM_IO_ERROR)
{
elog(ERROR, "BufferSync: write error %u for %s",
bufHdr->tag.blockNum, bufHdr->blind.relname);
}
}
else
{
/*
* To check if block content changed while flushing
* (see below). - vadim 01/17/97
*/
WaitIO(bufHdr, BufMgrLock); /* confirm end of IO */
bufHdr->flags &= ~BM_JUST_DIRTIED;
StartBufferIO(bufHdr, false); /* output IO start */
SpinRelease(BufMgrLock);
/*
* Grab a read lock on the buffer to ensure that no
* other backend changes its contents while we write it;
* otherwise we could write a non-self-consistent page
* image to disk, which'd be bad news if the other
* transaction aborts before writing its changes.
*
* Note that we still need the BM_JUST_DIRTIED mechanism
* in case someone dirties the buffer just before we
* grab this lock or just after we release it.
*/
LockBuffer(BufferDescriptorGetBuffer(bufHdr),
BUFFER_LOCK_SHARE);
SpinRelease(BufMgrLock);
continue;
}
/*
* If we didn't have the reldesc in our local cache,
* write this page out using the 'blind write' storage
* manager routine. If we did find it, use the
* standard interface.
*/
if (reln == (Relation) NULL)
{
status = smgrblindwrt(DEFAULT_SMGR,
bufHdr->tag.rnode,
bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data),
true); /* must fsync */
}
else
{
status = smgrwrite(DEFAULT_SMGR, reln,
bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
}
/*
* Pin buffer and ensure that no one reads it from disk
*/
PinBuffer(bufHdr);
/* Synchronize with BufferAlloc */
if (bufHdr->flags & BM_IO_IN_PROGRESS)
WaitIO(bufHdr, BufMgrLock);
/*
* Release the per-buffer readlock, reacquire BufMgrLock.
*/
LockBuffer(BufferDescriptorGetBuffer(bufHdr),
BUFFER_LOCK_UNLOCK);
buffer = BufferDescriptorGetBuffer(bufHdr);
rnode = bufHdr->tag.rnode;
SpinAcquire(BufMgrLock);
SpinRelease(BufMgrLock);
UnpinBuffer(bufHdr);
if (status == SM_FAIL)
{
bufHdr->flags |= BM_IO_ERROR;
elog(ERROR, "BufferSync: cannot write %u for %s",
bufHdr->tag.blockNum, bufHdr->blind.relname);
}
bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */
TerminateBufferIO(bufHdr); /* Sync IO finished */
BufferFlushCount++;
didwrite = true;
/*
* Try to find relation for buffer
*/
reln = RelationNodeCacheGetRelation(rnode);
/*
* If this buffer was marked by someone as DIRTY while
* we were flushing it out we must not clear DIRTY
* flag - vadim 01/17/97
*
* but it is OK to clear BufferDirtiedByMe - tgl 3/31/00
*/
if (!(bufHdr->flags & BM_JUST_DIRTIED))
bufHdr->flags &= ~BM_DIRTY;
}
/*
* Protect buffer content against concurrent update
*/
LockBuffer(buffer, BUFFER_LOCK_SHARE);
/* drop refcnt obtained by RelationNodeCacheGetRelation */
if (reln != (Relation) NULL)
RelationDecrementReferenceCount(reln);
}
}
/*
* Force XLOG flush for buffer' LSN
*/
recptr = BufferGetLSN(bufHdr);
XLogFlush(recptr);
/*
* If we did not write the buffer (because someone else did), we
* must still fsync the file containing it, to ensure that the
* write is down to disk before we commit.
* Now it's safe to write buffer to disk
* (if needed at all -:))
*/
if (!didwrite)
SpinAcquire(BufMgrLock);
if (bufHdr->flags & BM_IO_IN_PROGRESS)
WaitIO(bufHdr, BufMgrLock);
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
{
#ifndef OPTIMIZE_SINGLE
bufHdr->flags &= ~BM_JUST_DIRTIED;
StartBufferIO(bufHdr, false); /* output IO start */
SpinRelease(BufMgrLock);
#endif /* OPTIMIZE_SINGLE */
reln = RelationNodeCacheGetRelation(BufferTagLastDirtied[i].rnode);
if (reln == (Relation) NULL)
{
status = smgrblindmarkdirty(DEFAULT_SMGR,
BufferTagLastDirtied[i].rnode,
BufferTagLastDirtied[i].blockNum);
status = smgrblindwrt(DEFAULT_SMGR,
bufHdr->tag.rnode,
bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data),
true); /* must fsync */
}
else
{
status = smgrmarkdirty(DEFAULT_SMGR, reln,
BufferTagLastDirtied[i].blockNum);
status = smgrwrite(DEFAULT_SMGR, reln,
bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
}
/*
* drop relcache refcnt incremented by
* RelationNodeCacheGetRelation
*/
RelationDecrementReferenceCount(reln);
if (status == SM_FAIL) /* disk failure ?! */
elog(STOP, "BufferSync: cannot write %u for %s",
bufHdr->tag.blockNum, bufHdr->blind.relname);
/*
* Note that it's safe to change cntxDirty here because of
* we protect it from upper writers by share lock and from
* other bufmgr routines by BM_IO_IN_PROGRESS
*/
bufHdr->cntxDirty = false;
/*
* Release the per-buffer readlock, reacquire BufMgrLock.
*/
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
BufferFlushCount++;
}
#ifndef OPTIMIZE_SINGLE
SpinAcquire(BufMgrLock);
#endif /* OPTIMIZE_SINGLE */
bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */
TerminateBufferIO(bufHdr); /* Sync IO finished */
/*
* If this buffer was marked by someone as DIRTY while
* we were flushing it out we must not clear DIRTY
* flag - vadim 01/17/97
*/
if (!(bufHdr->flags & BM_JUST_DIRTIED))
bufHdr->flags &= ~BM_DIRTY;
}
else
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
BufferDirtiedByMe[i] = false;
UnpinBuffer(bufHdr);
SpinRelease(BufMgrLock);
/* drop refcnt obtained by RelationNodeCacheGetRelation */
if (reln != (Relation) NULL)
{
RelationDecrementReferenceCount(reln);
reln = NULL;
}
}
#ifndef XLOG
LocalBufferSync();
#endif
}
}
/*
* WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf' is cleared.
......@@ -1278,9 +942,6 @@ ResetBufferPool(bool isCommit)
SpinRelease(BufMgrLock);
}
PrivateRefCount[i] = 0;
if (!isCommit)
BufferDirtiedByMe[i] = false;
}
ResetLocalBufferPool();
......@@ -1321,16 +982,29 @@ relname=%s, blockNum=%d, flags=0x%x, refcount=%d %ld)",
}
/* ------------------------------------------------
* FlushBufferPool
*
* flush all dirty blocks in buffer pool to disk
* FlushBufferPool
*
* Flush all dirty blocks in buffer pool to disk
* at the checkpoint time
* ------------------------------------------------
*/
void
FlushBufferPool(void)
{
BufferSync();
smgrsync();
}
/*
* At the commit time we have to flush local buffer pool only
*/
void
BufmgrCommit(void)
{
LocalBufferSync();
/*
* All files created in current transaction will be fsync-ed
*/
smgrcommit();
}
......@@ -1358,35 +1032,28 @@ BufferGetBlockNumber(Buffer buffer)
*
* Write out the buffer corresponding to 'bufHdr'
*
* This routine used to flush the data to disk (ie, force immediate fsync)
* but that's no longer necessary because BufferSync is smarter than before.
*
* BufMgrLock must be held at entry, and the buffer must be pinned.
*/
static int
BufferReplace(BufferDesc *bufHdr)
{
Relation reln;
XLogRecPtr recptr;
int status;
/*
* first try to find the reldesc in the cache, if no luck, don't
* bother to build the reldesc from scratch, just do a blind write.
*/
reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
/* To check if block content changed while flushing. - vadim 01/17/97 */
bufHdr->flags &= ~BM_JUST_DIRTIED;
SpinRelease(BufMgrLock);
/*
* Grab a read lock on the buffer to ensure that no
* other backend changes its contents while we write it;
* see comments in BufferSync().
* No need to lock buffer context - no one should be able to
* end ReadBuffer
*/
LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_SHARE);
recptr = BufferGetLSN(bufHdr);
XLogFlush(recptr);
reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
if (reln != (Relation) NULL)
{
......@@ -1401,25 +1068,15 @@ BufferReplace(BufferDesc *bufHdr)
false); /* no fsync */
}
LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_UNLOCK);
SpinAcquire(BufMgrLock);
/* drop relcache refcnt incremented by RelationNodeCacheGetRelation */
if (reln != (Relation) NULL)
RelationDecrementReferenceCount(reln);
SpinAcquire(BufMgrLock);
if (status == SM_FAIL)
return FALSE;
/*
* If we had marked this buffer as needing to be fsync'd, we can
* forget about that, because it's now the storage manager's
* responsibility (but only if we called smgrwrite, not smgrblindwrt).
*/
if (reln != (Relation) NULL)
ClearBufferDirtiedByMe(BufferDescriptorGetBuffer(bufHdr), bufHdr);
BufferFlushCount++;
return TRUE;
......@@ -1438,7 +1095,8 @@ BlockNumber
RelationGetNumberOfBlocks(Relation relation)
{
return ((relation->rd_myxactonly) ? relation->rd_nblocks :
smgrnblocks(DEFAULT_SMGR, relation));
((relation->rd_rel->relkind == RELKIND_VIEW) ? 0 :
smgrnblocks(DEFAULT_SMGR, relation)));
}
/* ---------------------------------------------------------------------
......@@ -1471,6 +1129,7 @@ DropRelationBuffers(Relation rel)
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
{
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
LocalRefCount[i] = 0;
bufHdr->tag.rnode.relNode = InvalidOid;
}
......@@ -1503,6 +1162,7 @@ recheck:
}
/* Now we can do what we came for */
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
/*
* Release any refcount we may have.
......@@ -1526,20 +1186,6 @@ recheck:
*/
BufTableDelete(bufHdr);
}
/*
* Also check to see if BufferDirtiedByMe info for this buffer
* refers to the target relation, and clear it if so. This is
* independent of whether the current contents of the buffer
* belong to the target relation!
*
* NOTE: we have no way to clear BufferDirtiedByMe info in other
* backends, but hopefully there are none with that bit set for
* this rel, since we hold exclusive lock on this rel.
*/
if (RelFileNodeEquals(rel->rd_node,
BufferTagLastDirtied[i - 1].rnode))
BufferDirtiedByMe[i - 1] = false;
}
SpinRelease(BufMgrLock);
......@@ -1570,6 +1216,7 @@ DropRelFileNodeBuffers(RelFileNode rnode)
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode))
{
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
LocalRefCount[i] = 0;
bufHdr->tag.rnode.relNode = InvalidOid;
}
......@@ -1600,6 +1247,7 @@ recheck:
}
/* Now we can do what we came for */
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
/*
* Release any refcount we may have.
......@@ -1623,20 +1271,6 @@ recheck:
*/
BufTableDelete(bufHdr);
}
/*
* Also check to see if BufferDirtiedByMe info for this buffer
* refers to the target relation, and clear it if so. This is
* independent of whether the current contents of the buffer
* belong to the target relation!
*
* NOTE: we have no way to clear BufferDirtiedByMe info in other
* backends, but hopefully there are none with that bit set for
* this rel, since we hold exclusive lock on this rel.
*/
if (RelFileNodeEquals(rnode,
BufferTagLastDirtied[i - 1].rnode))
BufferDirtiedByMe[i - 1] = false;
}
SpinRelease(BufMgrLock);
......@@ -1689,6 +1323,7 @@ recheck:
}
/* Now we can do what we came for */
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
/*
* The thing should be free, if caller has checked that no
......@@ -1700,17 +1335,6 @@ recheck:
*/
BufTableDelete(bufHdr);
}
/*
* Also check to see if BufferDirtiedByMe info for this buffer
* refers to the target database, and clear it if so. This is
* independent of whether the current contents of the buffer
* belong to the target database!
*
* (Actually, this is probably unnecessary, since I shouldn't have
* ever dirtied pages of the target database, but...)
*/
if (BufferTagLastDirtied[i - 1].rnode.tblNode == dbid)
BufferDirtiedByMe[i - 1] = false;
}
SpinRelease(BufMgrLock);
}
......@@ -1847,6 +1471,8 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
{
int i;
BufferDesc *bufHdr;
XLogRecPtr recptr;
int status;
if (rel->rd_myxactonly)
{
......@@ -1855,22 +1481,27 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
bufHdr = &LocalBufferDescriptors[i];
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
{
if (bufHdr->flags & BM_DIRTY)
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
{
if (FlushBuffer(-i - 1, false, false) != STATUS_OK)
status = smgrwrite(DEFAULT_SMGR, rel,
bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
if (status == SM_FAIL)
{
elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty, could not flush it",
RelationGetRelationName(rel), firstDelBlock,
bufHdr->tag.blockNum);
return -1;
return(-1);
}
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
}
if (LocalRefCount[i] > 0)
{
elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is referenced (%ld)",
RelationGetRelationName(rel), firstDelBlock,
bufHdr->tag.blockNum, LocalRefCount[i]);
return -2;
return(-2);
}
if (bufHdr->tag.blockNum >= firstDelBlock)
{
......@@ -1887,22 +1518,57 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
bufHdr = &BufferDescriptors[i];
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
{
if (bufHdr->flags & BM_DIRTY)
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
{
PinBuffer(bufHdr);
if (bufHdr->flags & BM_IO_IN_PROGRESS)
WaitIO(bufHdr, BufMgrLock);
SpinRelease(BufMgrLock);
if (FlushBuffer(i + 1, false, false) != STATUS_OK)
/*
* Force XLOG flush for buffer' LSN
*/
recptr = BufferGetLSN(bufHdr);
XLogFlush(recptr);
/*
* Now it's safe to write buffer to disk
*/
SpinAcquire(BufMgrLock);
if (bufHdr->flags & BM_IO_IN_PROGRESS)
WaitIO(bufHdr, BufMgrLock);
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
{
SpinAcquire(BufMgrLock);
UnpinBuffer(bufHdr);
bufHdr->flags &= ~BM_JUST_DIRTIED;
StartBufferIO(bufHdr, false); /* output IO start */
SpinRelease(BufMgrLock);
elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is dirty (private %ld, global %d), could not flush it",
RelationGetRelationName(rel), firstDelBlock,
bufHdr->tag.blockNum,
PrivateRefCount[i], bufHdr->refcount);
return -1;
status = smgrwrite(DEFAULT_SMGR, rel,
bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
if (status == SM_FAIL) /* disk failure ?! */
elog(STOP, "FlushRelationBuffers: cannot write %u for %s",
bufHdr->tag.blockNum, bufHdr->blind.relname);
BufferFlushCount++;
SpinAcquire(BufMgrLock);
bufHdr->flags &= ~BM_IO_IN_PROGRESS;
TerminateBufferIO(bufHdr);
Assert(!(bufHdr->flags & BM_JUST_DIRTIED));
bufHdr->flags &= ~BM_DIRTY;
/*
* Note that it's safe to change cntxDirty here because
* of we protect it from upper writers by
* AccessExclusiveLock and from other bufmgr routines
* by BM_IO_IN_PROGRESS
*/
bufHdr->cntxDirty = false;
}
SpinAcquire(BufMgrLock);
UnpinBuffer(bufHdr);
}
if (!(bufHdr->flags & BM_FREE))
......@@ -2341,6 +2007,9 @@ LockBuffer(Buffer buffer, int mode)
}
buf->w_lock = true;
*buflock |= BL_W_LOCK;
buf->cntxDirty = true;
if (*buflock & BL_RI_LOCK)
{
......@@ -2458,11 +2127,11 @@ AbortBufferIO(void)
Assert(buf->flags & BM_IO_IN_PROGRESS);
SpinAcquire(BufMgrLock);
if (IsForInput)
Assert(!(buf->flags & BM_DIRTY));
Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty));
else
{
Assert((buf->flags & BM_DIRTY) != 0);
if ((buf->flags & BM_IO_ERROR) != 0)
Assert(buf->flags & BM_DIRTY || buf->cntxDirty);
if (buf->flags & BM_IO_ERROR)
{
elog(NOTICE, "write error may be permanent: cannot write block %u for %s/%s",
buf->tag.blockNum, buf->blind.dbname, buf->blind.relname);
......@@ -2528,5 +2197,3 @@ MarkBufferForCleanup(Buffer buffer, void (*CleanupFunc)(Buffer))
SpinRelease(BufMgrLock);
return;
}
#endif /* ! XLOG */
/*-------------------------------------------------------------------------
*
* xlog_bufmgr.c
* buffer manager interface routines
*
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/xlog_bufmgr.c,v 1.6 2000/11/30 01:39:07 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
*
* BufferAlloc() -- lookup a buffer in the buffer table. If
* it isn't there add it, but do not read data into memory.
* This is used when we are about to reinitialize the
* buffer so don't care what the current disk contents are.
* BufferAlloc() also pins the new buffer in memory.
*
* ReadBuffer() -- like BufferAlloc() but reads the data
* on a buffer cache miss.
*
* ReleaseBuffer() -- unpin the buffer
*
* WriteNoReleaseBuffer() -- mark the buffer contents as "dirty"
* but don't unpin. The disk IO is delayed until buffer
* replacement.
*
* WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer()
*
* BufferSync() -- flush all dirty buffers in the buffer pool.
*
* InitBufferPool() -- Init the buffer module.
*
* See other files:
* freelist.c -- chooses victim for buffer replacement
* buf_table.c -- manages the buffer lookup table
*/
#include "postgres.h"
#include <sys/types.h>
#include <sys/file.h>
#include <math.h>
#include <signal.h>
#include "executor/execdebug.h"
#include "miscadmin.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/s_lock.h"
#include "storage/smgr.h"
#include "utils/relcache.h"
#ifdef XLOG
#include "catalog/pg_database.h"
#endif
#define BufferGetLSN(bufHdr) \
(*((XLogRecPtr*)MAKE_PTR((bufHdr)->data)))
extern SPINLOCK BufMgrLock;
extern long int ReadBufferCount;
extern long int ReadLocalBufferCount;
extern long int BufferHitCount;
extern long int LocalBufferHitCount;
extern long int BufferFlushCount;
extern long int LocalBufferFlushCount;
/*
* It's used to avoid disk writes for read-only transactions
* (i.e. when no one shared buffer was changed by transaction).
* We set it to true in WriteBuffer/WriteNoReleaseBuffer when
* marking shared buffer as dirty. We set it to false in xact.c
* after transaction is committed/aborted.
*/
bool SharedBufferChanged = false;
static void WaitIO(BufferDesc *buf, SPINLOCK spinlock);
static void StartBufferIO(BufferDesc *buf, bool forInput);
static void TerminateBufferIO(BufferDesc *buf);
static void ContinueBufferIO(BufferDesc *buf, bool forInput);
extern void AbortBufferIO(void);
/*
* Macro : BUFFER_IS_BROKEN
* Note that write error doesn't mean the buffer broken
*/
#define BUFFER_IS_BROKEN(buf) ((buf->flags & BM_IO_ERROR) && !(buf->flags & BM_DIRTY))
static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum,
bool bufferLockHeld);
static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
bool *foundPtr, bool bufferLockHeld);
static int BufferReplace(BufferDesc *bufHdr);
void PrintBufferDescs(void);
/* ---------------------------------------------------
* RelationGetBufferWithBuffer
* see if the given buffer is what we want
* if yes, we don't need to bother the buffer manager
* ---------------------------------------------------
*/
Buffer
RelationGetBufferWithBuffer(Relation relation,
BlockNumber blockNumber,
Buffer buffer)
{
BufferDesc *bufHdr;
if (BufferIsValid(buffer))
{
if (!BufferIsLocal(buffer))
{
bufHdr = &BufferDescriptors[buffer - 1];
SpinAcquire(BufMgrLock);
if (bufHdr->tag.blockNum == blockNumber &&
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
{
SpinRelease(BufMgrLock);
return buffer;
}
return ReadBufferWithBufferLock(relation, blockNumber, true);
}
else
{
bufHdr = &LocalBufferDescriptors[-buffer - 1];
if (bufHdr->tag.blockNum == blockNumber &&
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
return buffer;
}
}
return ReadBuffer(relation, blockNumber);
}
/*
* ReadBuffer -- returns a buffer containing the requested
* block of the requested relation. If the blknum
* requested is P_NEW, extend the relation file and
* allocate a new block.
*
* Returns: the buffer number for the buffer containing
* the block read or NULL on an error.
*
* Assume when this function is called, that reln has been
* opened already.
*/
#undef ReadBuffer /* conflicts with macro when BUFMGR_DEBUG
* defined */
/*
* ReadBuffer
*
*/
Buffer
ReadBuffer(Relation reln, BlockNumber blockNum)
{
return ReadBufferWithBufferLock(reln, blockNum, false);
}
/*
* ReadBufferWithBufferLock -- does the work of
* ReadBuffer() but with the possibility that
* the buffer lock has already been held. this
* is yet another effort to reduce the number of
* semops in the system.
*/
static Buffer
ReadBufferWithBufferLock(Relation reln,
BlockNumber blockNum,
bool bufferLockHeld)
{
BufferDesc *bufHdr;
int extend; /* extending the file by one block */
int status;
bool found;
bool isLocalBuf;
extend = (blockNum == P_NEW);
isLocalBuf = reln->rd_myxactonly;
if (isLocalBuf)
{
ReadLocalBufferCount++;
bufHdr = LocalBufferAlloc(reln, blockNum, &found);
if (found)
LocalBufferHitCount++;
}
else
{
ReadBufferCount++;
/*
* lookup the buffer. IO_IN_PROGRESS is set if the requested
* block is not currently in memory.
*/
bufHdr = BufferAlloc(reln, blockNum, &found, bufferLockHeld);
if (found)
BufferHitCount++;
}
if (!bufHdr)
return InvalidBuffer;
/* if it's already in the buffer pool, we're done */
if (found)
{
/*
* This happens when a bogus buffer was returned previously and is
* floating around in the buffer pool. A routine calling this
* would want this extended.
*/
if (extend)
{
/* new buffers are zero-filled */
MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);
smgrextend(DEFAULT_SMGR, reln,
(char *) MAKE_PTR(bufHdr->data));
}
return BufferDescriptorGetBuffer(bufHdr);
}
/*
* if we have gotten to this point, the reln pointer must be ok and
* the relation file must be open.
*/
if (extend)
{
/* new buffers are zero-filled */
MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);
status = smgrextend(DEFAULT_SMGR, reln,
(char *) MAKE_PTR(bufHdr->data));
}
else
{
status = smgrread(DEFAULT_SMGR, reln, blockNum,
(char *) MAKE_PTR(bufHdr->data));
}
if (isLocalBuf)
return BufferDescriptorGetBuffer(bufHdr);
/* lock buffer manager again to update IO IN PROGRESS */
SpinAcquire(BufMgrLock);
if (status == SM_FAIL)
{
/* IO Failed. cleanup the data structures and go home */
if (!BufTableDelete(bufHdr))
{
SpinRelease(BufMgrLock);
elog(FATAL, "BufRead: buffer table broken after IO error\n");
}
/* remember that BufferAlloc() pinned the buffer */
UnpinBuffer(bufHdr);
/*
* Have to reset the flag so that anyone waiting for the buffer
* can tell that the contents are invalid.
*/
bufHdr->flags |= BM_IO_ERROR;
bufHdr->flags &= ~BM_IO_IN_PROGRESS;
}
else
{
/* IO Succeeded. clear the flags, finish buffer update */
bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS);
}
/* If anyone was waiting for IO to complete, wake them up now */
TerminateBufferIO(bufHdr);
SpinRelease(BufMgrLock);
if (status == SM_FAIL)
return InvalidBuffer;
return BufferDescriptorGetBuffer(bufHdr);
}
/*
* BufferAlloc -- Get a buffer from the buffer pool but dont
* read it.
*
* Returns: descriptor for buffer
*
* When this routine returns, the BufMgrLock is guaranteed NOT be held.
*/
static BufferDesc *
BufferAlloc(Relation reln,
BlockNumber blockNum,
bool *foundPtr,
bool bufferLockHeld)
{
BufferDesc *buf,
*buf2;
BufferTag newTag; /* identity of requested block */
bool inProgress; /* buffer undergoing IO */
bool newblock = FALSE;
/* create a new tag so we can lookup the buffer */
/* assume that the relation is already open */
if (blockNum == P_NEW)
{
newblock = TRUE;
blockNum = smgrnblocks(DEFAULT_SMGR, reln);
}
INIT_BUFFERTAG(&newTag, reln, blockNum);
if (!bufferLockHeld)
SpinAcquire(BufMgrLock);
/* see if the block is in the buffer pool already */
buf = BufTableLookup(&newTag);
if (buf != NULL)
{
/*
* Found it. Now, (a) pin the buffer so no one steals it from the
* buffer pool, (b) check IO_IN_PROGRESS, someone may be faulting
* the buffer into the buffer pool.
*/
PinBuffer(buf);
inProgress = (buf->flags & BM_IO_IN_PROGRESS);
*foundPtr = TRUE;
if (inProgress) /* confirm end of IO */
{
WaitIO(buf, BufMgrLock);
inProgress = (buf->flags & BM_IO_IN_PROGRESS);
}
if (BUFFER_IS_BROKEN(buf))
{
/*
* I couldn't understand the following old comment. If there's
* no IO for the buffer and the buffer is BROKEN,it should be
* read again. So start a new buffer IO here.
*
* wierd race condition:
*
* We were waiting for someone else to read the buffer. While we
* were waiting, the reader boof'd in some way, so the
* contents of the buffer are still invalid. By saying that
* we didn't find it, we can make the caller reinitialize the
* buffer. If two processes are waiting for this block, both
* will read the block. The second one to finish may
* overwrite any updates made by the first. (Assume higher
* level synchronization prevents this from happening).
*
* This is never going to happen, don't worry about it.
*/
*foundPtr = FALSE;
}
#ifdef BMTRACE
_bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCFND);
#endif /* BMTRACE */
if (!(*foundPtr))
StartBufferIO(buf, true);
SpinRelease(BufMgrLock);
return buf;
}
*foundPtr = FALSE;
/*
* Didn't find it in the buffer pool. We'll have to initialize a new
* buffer. First, grab one from the free list. If it's dirty, flush
* it to disk. Remember to unlock BufMgr spinlock while doing the IOs.
*/
inProgress = FALSE;
for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;)
{
buf = GetFreeBuffer();
/* GetFreeBuffer will abort if it can't find a free buffer */
Assert(buf);
/*
* There should be exactly one pin on the buffer after it is
* allocated -- ours. If it had a pin it wouldn't have been on
* the free list. No one else could have pinned it between
* GetFreeBuffer and here because we have the BufMgrLock.
*/
Assert(buf->refcount == 0);
buf->refcount = 1;
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1;
if (buf->flags & BM_DIRTY || buf->cntxDirty)
{
bool smok;
/*
* skip write error buffers
*/
if ((buf->flags & BM_IO_ERROR) != 0)
{
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
buf->refcount--;
buf = (BufferDesc *) NULL;
continue;
}
/*
* Set BM_IO_IN_PROGRESS to keep anyone from doing anything
* with the contents of the buffer while we write it out. We
* don't really care if they try to read it, but if they can
* complete a BufferAlloc on it they can then scribble into
* it, and we'd really like to avoid that while we are
* flushing the buffer. Setting this flag should block them
* in WaitIO until we're done.
*/
inProgress = TRUE;
/*
* All code paths that acquire this lock pin the buffer first;
* since no one had it pinned (it just came off the free
* list), no one else can have this lock.
*/
StartBufferIO(buf, false);
/*
* Write the buffer out, being careful to release BufMgrLock
* before starting the I/O.
*/
smok = BufferReplace(buf);
if (smok == FALSE)
{
elog(NOTICE, "BufferAlloc: cannot write block %u for %s/%s",
buf->tag.blockNum, buf->blind.dbname, buf->blind.relname);
inProgress = FALSE;
buf->flags |= BM_IO_ERROR;
buf->flags &= ~BM_IO_IN_PROGRESS;
TerminateBufferIO(buf);
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
Assert(buf->refcount > 0);
buf->refcount--;
if (buf->refcount == 0)
{
AddBufferToFreelist(buf);
buf->flags |= BM_FREE;
}
buf = (BufferDesc *) NULL;
}
else
{
/*
* BM_JUST_DIRTIED cleared by BufferReplace and shouldn't
* be setted by anyone. - vadim 01/17/97
*/
if (buf->flags & BM_JUST_DIRTIED)
{
elog(STOP, "BufferAlloc: content of block %u (%s) changed while flushing",
buf->tag.blockNum, buf->blind.relname);
}
else
buf->flags &= ~BM_DIRTY;
buf->cntxDirty = false;
}
/*
* Somebody could have pinned the buffer while we were doing
* the I/O and had given up the BufMgrLock (though they would
* be waiting for us to clear the BM_IO_IN_PROGRESS flag).
* That's why this is a loop -- if so, we need to clear the
* I/O flags, remove our pin and start all over again.
*
* People may be making buffers free at any time, so there's no
* reason to think that we have an immediate disaster on our
* hands.
*/
if (buf && buf->refcount > 1)
{
inProgress = FALSE;
buf->flags &= ~BM_IO_IN_PROGRESS;
TerminateBufferIO(buf);
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
buf->refcount--;
buf = (BufferDesc *) NULL;
}
/*
* Somebody could have allocated another buffer for the same
* block we are about to read in. (While we flush out the
* dirty buffer, we don't hold the lock and someone could have
* allocated another buffer for the same block. The problem is
* we haven't gotten around to insert the new tag into the
* buffer table. So we need to check here. -ay 3/95
*/
buf2 = BufTableLookup(&newTag);
if (buf2 != NULL)
{
/*
* Found it. Someone has already done what we're about to
* do. We'll just handle this as if it were found in the
* buffer pool in the first place.
*/
if (buf != NULL)
{
buf->flags &= ~BM_IO_IN_PROGRESS;
TerminateBufferIO(buf);
/* give up the buffer since we don't need it any more */
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
Assert(buf->refcount > 0);
buf->refcount--;
if (buf->refcount == 0)
{
AddBufferToFreelist(buf);
buf->flags |= BM_FREE;
}
}
PinBuffer(buf2);
inProgress = (buf2->flags & BM_IO_IN_PROGRESS);
*foundPtr = TRUE;
if (inProgress)
{
WaitIO(buf2, BufMgrLock);
inProgress = (buf2->flags & BM_IO_IN_PROGRESS);
}
if (BUFFER_IS_BROKEN(buf2))
*foundPtr = FALSE;
if (!(*foundPtr))
StartBufferIO(buf2, true);
SpinRelease(BufMgrLock);
return buf2;
}
}
}
/*
* At this point we should have the sole pin on a non-dirty buffer and
* we may or may not already have the BM_IO_IN_PROGRESS flag set.
*/
/*
* Change the name of the buffer in the lookup table:
*
* Need to update the lookup table before the read starts. If someone
* comes along looking for the buffer while we are reading it in, we
* don't want them to allocate a new buffer. For the same reason, we
* didn't want to erase the buf table entry for the buffer we were
* writing back until now, either.
*/
if (!BufTableDelete(buf))
{
SpinRelease(BufMgrLock);
elog(FATAL, "buffer wasn't in the buffer table\n");
}
/* record the database name and relation name for this buffer */
strcpy(buf->blind.dbname, (DatabaseName) ? DatabaseName : "Recovery");
strcpy(buf->blind.relname, RelationGetPhysicalRelationName(reln));
INIT_BUFFERTAG(&(buf->tag), reln, blockNum);
if (!BufTableInsert(buf))
{
SpinRelease(BufMgrLock);
elog(FATAL, "Buffer in lookup table twice \n");
}
/*
* Buffer contents are currently invalid. Have to mark IO IN PROGRESS
* so no one fiddles with them until the read completes. If this
* routine has been called simply to allocate a buffer, no io will be
* attempted, so the flag isnt set.
*/
if (!inProgress)
StartBufferIO(buf, true);
else
ContinueBufferIO(buf, true);
#ifdef BMTRACE
_bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND);
#endif /* BMTRACE */
SpinRelease(BufMgrLock);
return buf;
}
/*
* WriteBuffer
*
* Marks buffer contents as dirty (actual write happens later).
*
* Assume that buffer is pinned. Assume that reln is
* valid.
*
* Side Effects:
* Pin count is decremented.
*/
#undef WriteBuffer
int
WriteBuffer(Buffer buffer)
{
BufferDesc *bufHdr;
if (BufferIsLocal(buffer))
return WriteLocalBuffer(buffer, TRUE);
if (BAD_BUFFER_ID(buffer))
return FALSE;
bufHdr = &BufferDescriptors[buffer - 1];
SharedBufferChanged = true;
SpinAcquire(BufMgrLock);
Assert(bufHdr->refcount > 0);
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
UnpinBuffer(bufHdr);
SpinRelease(BufMgrLock);
return TRUE;
}
/*
* WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer
* when the operation is complete.
*/
int
WriteNoReleaseBuffer(Buffer buffer)
{
BufferDesc *bufHdr;
if (BufferIsLocal(buffer))
return WriteLocalBuffer(buffer, FALSE);
if (BAD_BUFFER_ID(buffer))
return STATUS_ERROR;
bufHdr = &BufferDescriptors[buffer - 1];
SharedBufferChanged = true;
SpinAcquire(BufMgrLock);
Assert(bufHdr->refcount > 0);
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
SpinRelease(BufMgrLock);
return STATUS_OK;
}
#undef ReleaseAndReadBuffer
/*
* ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer()
* so that only one semop needs to be called.
*
*/
Buffer
ReleaseAndReadBuffer(Buffer buffer,
Relation relation,
BlockNumber blockNum)
{
BufferDesc *bufHdr;
Buffer retbuf;
if (BufferIsLocal(buffer))
{
Assert(LocalRefCount[-buffer - 1] > 0);
LocalRefCount[-buffer - 1]--;
}
else
{
if (BufferIsValid(buffer))
{
bufHdr = &BufferDescriptors[buffer - 1];
Assert(PrivateRefCount[buffer - 1] > 0);
PrivateRefCount[buffer - 1]--;
if (PrivateRefCount[buffer - 1] == 0)
{
SpinAcquire(BufMgrLock);
Assert(bufHdr->refcount > 0);
bufHdr->refcount--;
if (bufHdr->refcount == 0)
{
AddBufferToFreelist(bufHdr);
bufHdr->flags |= BM_FREE;
}
retbuf = ReadBufferWithBufferLock(relation, blockNum, true);
return retbuf;
}
}
}
return ReadBuffer(relation, blockNum);
}
/*
* BufferSync -- Write all dirty buffers in the pool.
*
* This is called at checkpoint time and write out all dirty buffers.
*/
void
BufferSync()
{
int i;
BufferDesc *bufHdr;
Buffer buffer;
int status;
RelFileNode rnode;
XLogRecPtr recptr;
Relation reln = NULL;
for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
{
SpinAcquire(BufMgrLock);
if (!(bufHdr->flags & BM_VALID))
{
SpinRelease(BufMgrLock);
continue;
}
/*
* Pin buffer and ensure that no one reads it from disk
*/
PinBuffer(bufHdr);
/* Synchronize with BufferAlloc */
if (bufHdr->flags & BM_IO_IN_PROGRESS)
WaitIO(bufHdr, BufMgrLock);
buffer = BufferDescriptorGetBuffer(bufHdr);
rnode = bufHdr->tag.rnode;
SpinRelease(BufMgrLock);
/*
* Try to find relation for buffer
*/
reln = RelationNodeCacheGetRelation(rnode);
/*
* Protect buffer content against concurrent update
*/
LockBuffer(buffer, BUFFER_LOCK_SHARE);
/*
* Force XLOG flush for buffer' LSN
*/
recptr = BufferGetLSN(bufHdr);
XLogFlush(recptr);
/*
* Now it's safe to write buffer to disk
* (if needed at all -:))
*/
SpinAcquire(BufMgrLock);
if (bufHdr->flags & BM_IO_IN_PROGRESS)
WaitIO(bufHdr, BufMgrLock);
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
{
bufHdr->flags &= ~BM_JUST_DIRTIED;
StartBufferIO(bufHdr, false); /* output IO start */
SpinRelease(BufMgrLock);
if (reln == (Relation) NULL)
{
status = smgrblindwrt(DEFAULT_SMGR,
bufHdr->tag.rnode,
bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data),
true); /* must fsync */
}
else
{
status = smgrwrite(DEFAULT_SMGR, reln,
bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
}
if (status == SM_FAIL) /* disk failure ?! */
elog(STOP, "BufferSync: cannot write %u for %s",
bufHdr->tag.blockNum, bufHdr->blind.relname);
/*
* Note that it's safe to change cntxDirty here because of
* we protect it from upper writers by share lock and from
* other bufmgr routines by BM_IO_IN_PROGRESS
*/
bufHdr->cntxDirty = false;
/*
* Release the per-buffer readlock, reacquire BufMgrLock.
*/
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
BufferFlushCount++;
SpinAcquire(BufMgrLock);
bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */
TerminateBufferIO(bufHdr); /* Sync IO finished */
/*
* If this buffer was marked by someone as DIRTY while
* we were flushing it out we must not clear DIRTY
* flag - vadim 01/17/97
*/
if (!(bufHdr->flags & BM_JUST_DIRTIED))
bufHdr->flags &= ~BM_DIRTY;
}
else
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
UnpinBuffer(bufHdr);
SpinRelease(BufMgrLock);
/* drop refcnt obtained by RelationNodeCacheGetRelation */
if (reln != (Relation) NULL)
{
RelationDecrementReferenceCount(reln);
reln = NULL;
}
}
}
/*
* WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf' is cleared.
*
* Should be entered with buffer manager spinlock held; releases it before
* waiting and re-acquires it afterwards.
*/
static void
WaitIO(BufferDesc *buf, SPINLOCK spinlock)
{
/*
* Changed to wait until there's no IO - Inoue 01/13/2000
*/
while ((buf->flags & BM_IO_IN_PROGRESS) != 0)
{
SpinRelease(spinlock);
S_LOCK(&(buf->io_in_progress_lock));
S_UNLOCK(&(buf->io_in_progress_lock));
SpinAcquire(spinlock);
}
}
long NDirectFileRead; /* some I/O's are direct file access.
* bypass bufmgr */
long NDirectFileWrite; /* e.g., I/O in psort and hashjoin. */
void
PrintBufferUsage(FILE *statfp)
{
float hitrate;
float localhitrate;
if (ReadBufferCount == 0)
hitrate = 0.0;
else
hitrate = (float) BufferHitCount *100.0 / ReadBufferCount;
if (ReadLocalBufferCount == 0)
localhitrate = 0.0;
else
localhitrate = (float) LocalBufferHitCount *100.0 / ReadLocalBufferCount;
fprintf(statfp, "!\tShared blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n",
ReadBufferCount - BufferHitCount, BufferFlushCount, hitrate);
fprintf(statfp, "!\tLocal blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n",
ReadLocalBufferCount - LocalBufferHitCount, LocalBufferFlushCount, localhitrate);
fprintf(statfp, "!\tDirect blocks: %10ld read, %10ld written\n",
NDirectFileRead, NDirectFileWrite);
}
void
ResetBufferUsage()
{
BufferHitCount = 0;
ReadBufferCount = 0;
BufferFlushCount = 0;
LocalBufferHitCount = 0;
ReadLocalBufferCount = 0;
LocalBufferFlushCount = 0;
NDirectFileRead = 0;
NDirectFileWrite = 0;
}
/* ----------------------------------------------
* ResetBufferPool
*
* This routine is supposed to be called when a transaction aborts.
* it will release all the buffer pins held by the transaction.
* Currently, we also call it during commit if BufferPoolCheckLeak
* detected a problem --- in that case, isCommit is TRUE, and we
* only clean up buffer pin counts.
*
* During abort, we also forget any pending fsync requests. Dirtied buffers
* will still get written, eventually, but there will be no fsync for them.
*
* ----------------------------------------------
*/
void
ResetBufferPool(bool isCommit)
{
int i;
for (i = 0; i < NBuffers; i++)
{
if (PrivateRefCount[i] != 0)
{
BufferDesc *buf = &BufferDescriptors[i];
SpinAcquire(BufMgrLock);
Assert(buf->refcount > 0);
buf->refcount--;
if (buf->refcount == 0)
{
AddBufferToFreelist(buf);
buf->flags |= BM_FREE;
}
SpinRelease(BufMgrLock);
}
PrivateRefCount[i] = 0;
}
ResetLocalBufferPool();
if (!isCommit)
smgrabort();
}
/* -----------------------------------------------
* BufferPoolCheckLeak
*
* check if there is buffer leak
*
* -----------------------------------------------
*/
int
BufferPoolCheckLeak()
{
int i;
int result = 0;
for (i = 1; i <= NBuffers; i++)
{
if (PrivateRefCount[i - 1] != 0)
{
BufferDesc *buf = &(BufferDescriptors[i - 1]);
elog(NOTICE,
"Buffer Leak: [%03d] (freeNext=%ld, freePrev=%ld, \
relname=%s, blockNum=%d, flags=0x%x, refcount=%d %ld)",
i - 1, buf->freeNext, buf->freePrev,
buf->blind.relname, buf->tag.blockNum, buf->flags,
buf->refcount, PrivateRefCount[i - 1]);
result = 1;
}
}
return result;
}
/* ------------------------------------------------
* FlushBufferPool
*
* Flush all dirty blocks in buffer pool to disk
* at the checkpoint time
* ------------------------------------------------
*/
void
FlushBufferPool(void)
{
BufferSync();
smgrsync();
}
/*
* At the commit time we have to flush local buffer pool only
*/
void
BufmgrCommit(void)
{
LocalBufferSync();
/*
* All files created in current transaction will be fsync-ed
*/
smgrcommit();
}
/*
* BufferGetBlockNumber
* Returns the block number associated with a buffer.
*
* Note:
* Assumes that the buffer is valid.
*/
BlockNumber
BufferGetBlockNumber(Buffer buffer)
{
Assert(BufferIsValid(buffer));
/* XXX should be a critical section */
if (BufferIsLocal(buffer))
return LocalBufferDescriptors[-buffer - 1].tag.blockNum;
else
return BufferDescriptors[buffer - 1].tag.blockNum;
}
/*
* BufferReplace
*
* Write out the buffer corresponding to 'bufHdr'
*
* BufMgrLock must be held at entry, and the buffer must be pinned.
*/
static int
BufferReplace(BufferDesc *bufHdr)
{
Relation reln;
XLogRecPtr recptr;
int status;
/* To check if block content changed while flushing. - vadim 01/17/97 */
bufHdr->flags &= ~BM_JUST_DIRTIED;
SpinRelease(BufMgrLock);
/*
* No need to lock buffer context - no one should be able to
* end ReadBuffer
*/
recptr = BufferGetLSN(bufHdr);
XLogFlush(recptr);
reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
if (reln != (Relation) NULL)
{
status = smgrwrite(DEFAULT_SMGR, reln, bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
}
else
{
status = smgrblindwrt(DEFAULT_SMGR, bufHdr->tag.rnode,
bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data),
false); /* no fsync */
}
/* drop relcache refcnt incremented by RelationNodeCacheGetRelation */
if (reln != (Relation) NULL)
RelationDecrementReferenceCount(reln);
SpinAcquire(BufMgrLock);
if (status == SM_FAIL)
return FALSE;
BufferFlushCount++;
return TRUE;
}
/*
* RelationGetNumberOfBlocks
* Returns the buffer descriptor associated with a page in a relation.
*
* Note:
* XXX may fail for huge relations.
* XXX should be elsewhere.
* XXX maybe should be hidden
*/
BlockNumber
RelationGetNumberOfBlocks(Relation relation)
{
return ((relation->rd_myxactonly) ? relation->rd_nblocks :
((relation->rd_rel->relkind == RELKIND_VIEW) ? 0 :
smgrnblocks(DEFAULT_SMGR, relation)));
}
/* ---------------------------------------------------------------------
* DropRelationBuffers
*
* This function removes all the buffered pages for a relation
* from the buffer pool. Dirty pages are simply dropped, without
* bothering to write them out first. This is NOT rollback-able,
* and so should be used only with extreme caution!
*
* We assume that the caller holds an exclusive lock on the relation,
* which should assure that no new buffers will be acquired for the rel
* meanwhile.
*
* XXX currently it sequentially searches the buffer pool, should be
* changed to more clever ways of searching.
* --------------------------------------------------------------------
*/
void
DropRelationBuffers(Relation rel)
{
int i;
BufferDesc *bufHdr;
if (rel->rd_myxactonly)
{
for (i = 0; i < NLocBuffer; i++)
{
bufHdr = &LocalBufferDescriptors[i];
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
{
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
LocalRefCount[i] = 0;
bufHdr->tag.rnode.relNode = InvalidOid;
}
}
return;
}
SpinAcquire(BufMgrLock);
for (i = 1; i <= NBuffers; i++)
{
bufHdr = &BufferDescriptors[i - 1];
recheck:
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
{
/*
* If there is I/O in progress, better wait till it's done;
* don't want to delete the relation out from under someone
* who's just trying to flush the buffer!
*/
if (bufHdr->flags & BM_IO_IN_PROGRESS)
{
WaitIO(bufHdr, BufMgrLock);
/*
* By now, the buffer very possibly belongs to some other
* rel, so check again before proceeding.
*/
goto recheck;
}
/* Now we can do what we came for */
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
/*
* Release any refcount we may have.
*
* This is very probably dead code, and if it isn't then it's
* probably wrong. I added the Assert to find out --- tgl
* 11/99.
*/
if (!(bufHdr->flags & BM_FREE))
{
/* Assert checks that buffer will actually get freed! */
Assert(PrivateRefCount[i - 1] == 1 &&
bufHdr->refcount == 1);
/* ReleaseBuffer expects we do not hold the lock at entry */
SpinRelease(BufMgrLock);
ReleaseBuffer(i);
SpinAcquire(BufMgrLock);
}
/*
* And mark the buffer as no longer occupied by this rel.
*/
BufTableDelete(bufHdr);
}
}
SpinRelease(BufMgrLock);
}
/* ---------------------------------------------------------------------
* DropRelFileNodeBuffers
*
* This is the same as DropRelationBuffers, except that the target
* relation is specified by RelFileNode.
*
* This is NOT rollback-able. One legitimate use is to clear the
* buffer cache of buffers for a relation that is being deleted
* during transaction abort.
* --------------------------------------------------------------------
*/
void
DropRelFileNodeBuffers(RelFileNode rnode)
{
int i;
BufferDesc *bufHdr;
/* We have to search both local and shared buffers... */
for (i = 0; i < NLocBuffer; i++)
{
bufHdr = &LocalBufferDescriptors[i];
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode))
{
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
LocalRefCount[i] = 0;
bufHdr->tag.rnode.relNode = InvalidOid;
}
}
SpinAcquire(BufMgrLock);
for (i = 1; i <= NBuffers; i++)
{
bufHdr = &BufferDescriptors[i - 1];
recheck:
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode))
{
/*
* If there is I/O in progress, better wait till it's done;
* don't want to delete the relation out from under someone
* who's just trying to flush the buffer!
*/
if (bufHdr->flags & BM_IO_IN_PROGRESS)
{
WaitIO(bufHdr, BufMgrLock);
/*
* By now, the buffer very possibly belongs to some other
* rel, so check again before proceeding.
*/
goto recheck;
}
/* Now we can do what we came for */
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
/*
* Release any refcount we may have.
*
* This is very probably dead code, and if it isn't then it's
* probably wrong. I added the Assert to find out --- tgl
* 11/99.
*/
if (!(bufHdr->flags & BM_FREE))
{
/* Assert checks that buffer will actually get freed! */
Assert(PrivateRefCount[i - 1] == 1 &&
bufHdr->refcount == 1);
/* ReleaseBuffer expects we do not hold the lock at entry */
SpinRelease(BufMgrLock);
ReleaseBuffer(i);
SpinAcquire(BufMgrLock);
}
/*
* And mark the buffer as no longer occupied by this rel.
*/
BufTableDelete(bufHdr);
}
}
SpinRelease(BufMgrLock);
}
/* ---------------------------------------------------------------------
* DropBuffers
*
* This function removes all the buffers in the buffer cache for a
* particular database. Dirty pages are simply dropped, without
* bothering to write them out first. This is used when we destroy a
* database, to avoid trying to flush data to disk when the directory
* tree no longer exists. Implementation is pretty similar to
* DropRelationBuffers() which is for destroying just one relation.
* --------------------------------------------------------------------
*/
void
DropBuffers(Oid dbid)
{
int i;
BufferDesc *bufHdr;
SpinAcquire(BufMgrLock);
for (i = 1; i <= NBuffers; i++)
{
bufHdr = &BufferDescriptors[i - 1];
recheck:
/*
* We know that currently database OID is tblNode but
* this probably will be changed in future and this
* func will be used to drop tablespace buffers.
*/
if (bufHdr->tag.rnode.tblNode == dbid)
{
/*
* If there is I/O in progress, better wait till it's done;
* don't want to delete the database out from under someone
* who's just trying to flush the buffer!
*/
if (bufHdr->flags & BM_IO_IN_PROGRESS)
{
WaitIO(bufHdr, BufMgrLock);
/*
* By now, the buffer very possibly belongs to some other
* DB, so check again before proceeding.
*/
goto recheck;
}
/* Now we can do what we came for */
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
/*
* The thing should be free, if caller has checked that no
* backends are running in that database.
*/
Assert(bufHdr->flags & BM_FREE);
/*
* And mark the buffer as no longer occupied by this page.
*/
BufTableDelete(bufHdr);
}
}
SpinRelease(BufMgrLock);
}
/* -----------------------------------------------------------------
* PrintBufferDescs
*
* this function prints all the buffer descriptors, for debugging
* use only.
* -----------------------------------------------------------------
*/
void
PrintBufferDescs()
{
int i;
BufferDesc *buf = BufferDescriptors;
if (IsUnderPostmaster)
{
SpinAcquire(BufMgrLock);
for (i = 0; i < NBuffers; ++i, ++buf)
{
elog(DEBUG, "[%02d] (freeNext=%ld, freePrev=%ld, relname=%s, \
blockNum=%d, flags=0x%x, refcount=%d %ld)",
i, buf->freeNext, buf->freePrev,
buf->blind.relname, buf->tag.blockNum, buf->flags,
buf->refcount, PrivateRefCount[i]);
}
SpinRelease(BufMgrLock);
}
else
{
/* interactive backend */
for (i = 0; i < NBuffers; ++i, ++buf)
{
printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld)\n",
i, buf->blind.relname, buf->tag.blockNum,
buf->flags, buf->refcount, PrivateRefCount[i]);
}
}
}
void
PrintPinnedBufs()
{
int i;
BufferDesc *buf = BufferDescriptors;
SpinAcquire(BufMgrLock);
for (i = 0; i < NBuffers; ++i, ++buf)
{
if (PrivateRefCount[i] > 0)
elog(NOTICE, "[%02d] (freeNext=%ld, freePrev=%ld, relname=%s, \
blockNum=%d, flags=0x%x, refcount=%d %ld)\n",
i, buf->freeNext, buf->freePrev, buf->blind.relname,
buf->tag.blockNum, buf->flags,
buf->refcount, PrivateRefCount[i]);
}
SpinRelease(BufMgrLock);
}
/*
* BufferPoolBlowaway
*
* this routine is solely for the purpose of experiments -- sometimes
* you may want to blowaway whatever is left from the past in buffer
* pool and start measuring some performance with a clean empty buffer
* pool.
*/
#ifdef NOT_USED
void
BufferPoolBlowaway()
{
int i;
BufferSync();
for (i = 1; i <= NBuffers; i++)
{
if (BufferIsValid(i))
{
while (BufferIsValid(i))
ReleaseBuffer(i);
}
BufTableDelete(&BufferDescriptors[i - 1]);
}
}
#endif
/* ---------------------------------------------------------------------
* FlushRelationBuffers
*
* This function writes all dirty pages of a relation out to disk.
* Furthermore, pages that have blocknumber >= firstDelBlock are
* actually removed from the buffer pool. An error code is returned
* if we fail to dump a dirty buffer or if we find one of
* the target pages is pinned into the cache.
*
* This is called by DROP TABLE to clear buffers for the relation
* from the buffer pool. Note that we must write dirty buffers,
* rather than just dropping the changes, because our transaction
* might abort later on; we want to roll back safely in that case.
*
* This is also called by VACUUM before truncating the relation to the
* given number of blocks. It might seem unnecessary for VACUUM to
* write dirty pages before firstDelBlock, since VACUUM should already
* have committed its changes. However, it is possible for there still
* to be dirty pages: if some page had unwritten on-row tuple status
* updates from a prior transaction, and VACUUM had no additional
* changes to make to that page, then VACUUM won't have written it.
* This is harmless in most cases but will break pg_upgrade, which
* relies on VACUUM to ensure that *all* tuples have correct on-row
* status. So, we check and flush all dirty pages of the rel
* regardless of block number.
*
* In all cases, the caller should be holding AccessExclusiveLock on
* the target relation to ensure that no other backend is busy reading
* more blocks of the relation (or might do so before we commit).
*
* Formerly, we considered it an error condition if we found dirty
* buffers here. However, since BufferSync no longer forces out all
* dirty buffers at every xact commit, it's possible for dirty buffers
* to still be present in the cache due to failure of an earlier
* transaction. So, must flush dirty buffers without complaint.
*
* Returns: 0 - Ok, -1 - FAILED TO WRITE DIRTY BUFFER, -2 - PINNED
*
* XXX currently it sequentially searches the buffer pool, should be
* changed to more clever ways of searching.
* --------------------------------------------------------------------
*/
int
FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
{
int i;
BufferDesc *bufHdr;
XLogRecPtr recptr;
int status;
if (rel->rd_myxactonly)
{
for (i = 0; i < NLocBuffer; i++)
{
bufHdr = &LocalBufferDescriptors[i];
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
{
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
{
status = smgrwrite(DEFAULT_SMGR, rel,
bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
if (status == SM_FAIL)
{
elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty, could not flush it",
RelationGetRelationName(rel), firstDelBlock,
bufHdr->tag.blockNum);
return(-1);
}
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
}
if (LocalRefCount[i] > 0)
{
elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is referenced (%ld)",
RelationGetRelationName(rel), firstDelBlock,
bufHdr->tag.blockNum, LocalRefCount[i]);
return(-2);
}
if (bufHdr->tag.blockNum >= firstDelBlock)
{
bufHdr->tag.rnode.relNode = InvalidOid;
}
}
}
return 0;
}
SpinAcquire(BufMgrLock);
for (i = 0; i < NBuffers; i++)
{
bufHdr = &BufferDescriptors[i];
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
{
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
{
PinBuffer(bufHdr);
if (bufHdr->flags & BM_IO_IN_PROGRESS)
WaitIO(bufHdr, BufMgrLock);
SpinRelease(BufMgrLock);
/*
* Force XLOG flush for buffer' LSN
*/
recptr = BufferGetLSN(bufHdr);
XLogFlush(recptr);
/*
* Now it's safe to write buffer to disk
*/
SpinAcquire(BufMgrLock);
if (bufHdr->flags & BM_IO_IN_PROGRESS)
WaitIO(bufHdr, BufMgrLock);
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
{
bufHdr->flags &= ~BM_JUST_DIRTIED;
StartBufferIO(bufHdr, false); /* output IO start */
SpinRelease(BufMgrLock);
status = smgrwrite(DEFAULT_SMGR, rel,
bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
if (status == SM_FAIL) /* disk failure ?! */
elog(STOP, "FlushRelationBuffers: cannot write %u for %s",
bufHdr->tag.blockNum, bufHdr->blind.relname);
BufferFlushCount++;
SpinAcquire(BufMgrLock);
bufHdr->flags &= ~BM_IO_IN_PROGRESS;
TerminateBufferIO(bufHdr);
Assert(!(bufHdr->flags & BM_JUST_DIRTIED));
bufHdr->flags &= ~BM_DIRTY;
/*
* Note that it's safe to change cntxDirty here because
* of we protect it from upper writers by
* AccessExclusiveLock and from other bufmgr routines
* by BM_IO_IN_PROGRESS
*/
bufHdr->cntxDirty = false;
}
UnpinBuffer(bufHdr);
}
if (!(bufHdr->flags & BM_FREE))
{
SpinRelease(BufMgrLock);
elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is referenced (private %ld, global %d)",
RelationGetRelationName(rel), firstDelBlock,
bufHdr->tag.blockNum,
PrivateRefCount[i], bufHdr->refcount);
return -2;
}
if (bufHdr->tag.blockNum >= firstDelBlock)
{
BufTableDelete(bufHdr);
}
}
}
SpinRelease(BufMgrLock);
return 0;
}
#undef ReleaseBuffer
/*
* ReleaseBuffer -- remove the pin on a buffer without
* marking it dirty.
*
*/
int
ReleaseBuffer(Buffer buffer)
{
BufferDesc *bufHdr;
if (BufferIsLocal(buffer))
{
Assert(LocalRefCount[-buffer - 1] > 0);
LocalRefCount[-buffer - 1]--;
return STATUS_OK;
}
if (BAD_BUFFER_ID(buffer))
return STATUS_ERROR;
bufHdr = &BufferDescriptors[buffer - 1];
Assert(PrivateRefCount[buffer - 1] > 0);
PrivateRefCount[buffer - 1]--;
if (PrivateRefCount[buffer - 1] == 0)
{
SpinAcquire(BufMgrLock);
Assert(bufHdr->refcount > 0);
bufHdr->refcount--;
if (bufHdr->refcount == 0)
{
AddBufferToFreelist(bufHdr);
bufHdr->flags |= BM_FREE;
}
SpinRelease(BufMgrLock);
}
return STATUS_OK;
}
#ifdef NOT_USED
void
IncrBufferRefCount_Debug(char *file, int line, Buffer buffer)
{
IncrBufferRefCount(buffer);
if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))
{
BufferDesc *buf = &BufferDescriptors[buffer - 1];
fprintf(stderr, "PIN(Incr) %ld relname = %s, blockNum = %d, \
refcount = %ld, file: %s, line: %d\n",
buffer, buf->blind.relname, buf->tag.blockNum,
PrivateRefCount[buffer - 1], file, line);
}
}
#endif
#ifdef NOT_USED
void
ReleaseBuffer_Debug(char *file, int line, Buffer buffer)
{
ReleaseBuffer(buffer);
if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))
{
BufferDesc *buf = &BufferDescriptors[buffer - 1];
fprintf(stderr, "UNPIN(Rel) %ld relname = %s, blockNum = %d, \
refcount = %ld, file: %s, line: %d\n",
buffer, buf->blind.relname, buf->tag.blockNum,
PrivateRefCount[buffer - 1], file, line);
}
}
#endif
#ifdef NOT_USED
int
ReleaseAndReadBuffer_Debug(char *file,
int line,
Buffer buffer,
Relation relation,
BlockNumber blockNum)
{
bool bufferValid;
Buffer b;
bufferValid = BufferIsValid(buffer);
b = ReleaseAndReadBuffer(buffer, relation, blockNum);
if (ShowPinTrace && bufferValid && BufferIsLocal(buffer)
&& is_userbuffer(buffer))
{
BufferDesc *buf = &BufferDescriptors[buffer - 1];
fprintf(stderr, "UNPIN(Rel&Rd) %ld relname = %s, blockNum = %d, \
refcount = %ld, file: %s, line: %d\n",
buffer, buf->blind.relname, buf->tag.blockNum,
PrivateRefCount[buffer - 1], file, line);
}
if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer))
{
BufferDesc *buf = &BufferDescriptors[b - 1];
fprintf(stderr, "PIN(Rel&Rd) %ld relname = %s, blockNum = %d, \
refcount = %ld, file: %s, line: %d\n",
b, buf->blind.relname, buf->tag.blockNum,
PrivateRefCount[b - 1], file, line);
}
return b;
}
#endif
#ifdef BMTRACE
/*
* trace allocations and deallocations in a circular buffer in
* shared memory. check the buffer before doing the allocation,
* and die if there's anything fishy.
*/
_bm_trace(Oid dbId, Oid relId, int blkNo, int bufNo, int allocType)
{
long start,
cur;
bmtrace *tb;
start = *CurTraceBuf;
if (start > 0)
cur = start - 1;
else
cur = BMT_LIMIT - 1;
for (;;)
{
tb = &TraceBuf[cur];
if (tb->bmt_op != BMT_NOTUSED)
{
if (tb->bmt_buf == bufNo)
{
if ((tb->bmt_op == BMT_DEALLOC)
|| (tb->bmt_dbid == dbId && tb->bmt_relid == relId
&& tb->bmt_blkno == blkNo))
goto okay;
/* die holding the buffer lock */
_bm_die(dbId, relId, blkNo, bufNo, allocType, start, cur);
}
}
if (cur == start)
goto okay;
if (cur == 0)
cur = BMT_LIMIT - 1;
else
cur--;
}
okay:
tb = &TraceBuf[start];
tb->bmt_pid = MyProcPid;
tb->bmt_buf = bufNo;
tb->bmt_dbid = dbId;
tb->bmt_relid = relId;
tb->bmt_blkno = blkNo;
tb->bmt_op = allocType;
*CurTraceBuf = (start + 1) % BMT_LIMIT;
}
_bm_die(Oid dbId, Oid relId, int blkNo, int bufNo,
int allocType, long start, long cur)
{
FILE *fp;
bmtrace *tb;
int i;
tb = &TraceBuf[cur];
if ((fp = AllocateFile("/tmp/death_notice", "w")) == NULL)
elog(FATAL, "buffer alloc trace error and can't open log file");
fprintf(fp, "buffer alloc trace detected the following error:\n\n");
fprintf(fp, " buffer %d being %s inconsistently with a previous %s\n\n",
bufNo, (allocType == BMT_DEALLOC ? "deallocated" : "allocated"),
(tb->bmt_op == BMT_DEALLOC ? "deallocation" : "allocation"));
fprintf(fp, "the trace buffer contains:\n");
i = start;
for (;;)
{
tb = &TraceBuf[i];
if (tb->bmt_op != BMT_NOTUSED)
{
fprintf(fp, " [%3d]%spid %d buf %2d for <%d,%u,%d> ",
i, (i == cur ? " ---> " : "\t"),
tb->bmt_pid, tb->bmt_buf,
tb->bmt_dbid, tb->bmt_relid, tb->bmt_blkno);
switch (tb->bmt_op)
{
case BMT_ALLOCFND:
fprintf(fp, "allocate (found)\n");
break;
case BMT_ALLOCNOTFND:
fprintf(fp, "allocate (not found)\n");
break;
case BMT_DEALLOC:
fprintf(fp, "deallocate\n");
break;
default:
fprintf(fp, "unknown op type %d\n", tb->bmt_op);
break;
}
}
i = (i + 1) % BMT_LIMIT;
if (i == start)
break;
}
fprintf(fp, "\noperation causing error:\n");
fprintf(fp, "\tpid %d buf %d for <%d,%u,%d> ",
getpid(), bufNo, dbId, relId, blkNo);
switch (allocType)
{
case BMT_ALLOCFND:
fprintf(fp, "allocate (found)\n");
break;
case BMT_ALLOCNOTFND:
fprintf(fp, "allocate (not found)\n");
break;
case BMT_DEALLOC:
fprintf(fp, "deallocate\n");
break;
default:
fprintf(fp, "unknown op type %d\n", allocType);
break;
}
FreeFile(fp);
kill(getpid(), SIGILL);
}
#endif /* BMTRACE */
/*
* SetBufferCommitInfoNeedsSave
*
* Mark a buffer dirty when we have updated tuple commit-status bits in it.
*
* This is similar to WriteNoReleaseBuffer, except that we do not set
* SharedBufferChanged or BufferDirtiedByMe, because we have not made a
* critical change that has to be flushed to disk before xact commit --- the
* status-bit update could be redone by someone else just as easily. The
* buffer will be marked dirty, but it will not be written to disk until
* there is another reason to write it.
*
* This routine might get called many times on the same page, if we are making
* the first scan after commit of an xact that added/deleted many tuples.
* So, be as quick as we can if the buffer is already dirty.
*/
void
SetBufferCommitInfoNeedsSave(Buffer buffer)
{
BufferDesc *bufHdr;
if (BufferIsLocal(buffer))
return;
if (BAD_BUFFER_ID(buffer))
return;
bufHdr = &BufferDescriptors[buffer - 1];
if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
(BM_DIRTY | BM_JUST_DIRTIED))
{
SpinAcquire(BufMgrLock);
Assert(bufHdr->refcount > 0);
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
SpinRelease(BufMgrLock);
}
}
void
UnlockBuffers()
{
BufferDesc *buf;
int i;
for (i = 0; i < NBuffers; i++)
{
if (BufferLocks[i] == 0)
continue;
Assert(BufferIsValid(i + 1));
buf = &(BufferDescriptors[i]);
S_LOCK(&(buf->cntx_lock));
if (BufferLocks[i] & BL_R_LOCK)
{
Assert(buf->r_locks > 0);
(buf->r_locks)--;
}
if (BufferLocks[i] & BL_RI_LOCK)
{
/*
* Someone else could remove our RI lock when acquiring W
* lock. This is possible if we came here from elog(ERROR)
* from IpcSemaphore{Lock|Unlock}(WaitCLSemId). And so we
* don't do Assert(buf->ri_lock) here.
*/
buf->ri_lock = false;
}
if (BufferLocks[i] & BL_W_LOCK)
{
Assert(buf->w_lock);
buf->w_lock = false;
}
S_UNLOCK(&(buf->cntx_lock));
BufferLocks[i] = 0;
}
}
void
LockBuffer(Buffer buffer, int mode)
{
BufferDesc *buf;
bits8 *buflock;
Assert(BufferIsValid(buffer));
if (BufferIsLocal(buffer))
return;
buf = &(BufferDescriptors[buffer - 1]);
buflock = &(BufferLocks[buffer - 1]);
S_LOCK(&(buf->cntx_lock));
if (mode == BUFFER_LOCK_UNLOCK)
{
if (*buflock & BL_R_LOCK)
{
Assert(buf->r_locks > 0);
Assert(!(buf->w_lock));
Assert(!(*buflock & (BL_W_LOCK | BL_RI_LOCK)));
(buf->r_locks)--;
*buflock &= ~BL_R_LOCK;
}
else if (*buflock & BL_W_LOCK)
{
Assert(buf->w_lock);
Assert(buf->r_locks == 0);
Assert(!(*buflock & (BL_R_LOCK | BL_RI_LOCK)));
buf->w_lock = false;
*buflock &= ~BL_W_LOCK;
}
else
elog(ERROR, "UNLockBuffer: buffer %lu is not locked", buffer);
}
else if (mode == BUFFER_LOCK_SHARE)
{
unsigned i = 0;
Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK)));
while (buf->ri_lock || buf->w_lock)
{
S_UNLOCK(&(buf->cntx_lock));
s_lock_sleep(i++);
S_LOCK(&(buf->cntx_lock));
}
(buf->r_locks)++;
*buflock |= BL_R_LOCK;
}
else if (mode == BUFFER_LOCK_EXCLUSIVE)
{
unsigned i = 0;
Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK)));
while (buf->r_locks > 0 || buf->w_lock)
{
if (buf->r_locks > 3 || (*buflock & BL_RI_LOCK))
{
/*
* Our RI lock might be removed by concurrent W lock
* acquiring (see what we do with RI locks below when our
* own W acquiring succeeded) and so we set RI lock again
* if we already did this.
*/
*buflock |= BL_RI_LOCK;
buf->ri_lock = true;
}
S_UNLOCK(&(buf->cntx_lock));
s_lock_sleep(i++);
S_LOCK(&(buf->cntx_lock));
}
buf->w_lock = true;
*buflock |= BL_W_LOCK;
buf->cntxDirty = true;
if (*buflock & BL_RI_LOCK)
{
/*
* It's possible to remove RI locks acquired by another W
* lockers here, but they'll take care about it.
*/
buf->ri_lock = false;
*buflock &= ~BL_RI_LOCK;
}
}
else
elog(ERROR, "LockBuffer: unknown lock mode %d", mode);
S_UNLOCK(&(buf->cntx_lock));
}
/*
* Functions for IO error handling
*
* Note : We assume that nested buffer IO never occur.
* i.e at most one io_in_progress spinlock is held
* per proc.
*/
static BufferDesc *InProgressBuf = (BufferDesc *) NULL;
static bool IsForInput;
/*
* Function:StartBufferIO
* (Assumptions)
* My process is executing no IO
* BufMgrLock is held
* BM_IO_IN_PROGRESS mask is not set for the buffer
* The buffer is Pinned
*
*/
static void
StartBufferIO(BufferDesc *buf, bool forInput)
{
Assert(!InProgressBuf);
Assert(!(buf->flags & BM_IO_IN_PROGRESS));
buf->flags |= BM_IO_IN_PROGRESS;
/*
* There used to be
*
* Assert(S_LOCK_FREE(&(buf->io_in_progress_lock)));
*
* here, but that's wrong because of the way WaitIO works: someone else
* waiting for the I/O to complete will succeed in grabbing the lock
* for a few instructions, and if we context-swap back to here the
* Assert could fail. Tiny window for failure, but I've seen it
* happen -- tgl
*/
S_LOCK(&(buf->io_in_progress_lock));
InProgressBuf = buf;
IsForInput = forInput;
}
/*
* Function:TerminateBufferIO
* (Assumptions)
* My process is executing IO for the buffer
* BufMgrLock is held
* The buffer is Pinned
*
*/
static void
TerminateBufferIO(BufferDesc *buf)
{
Assert(buf == InProgressBuf);
S_UNLOCK(&(buf->io_in_progress_lock));
InProgressBuf = (BufferDesc *) 0;
}
/*
* Function:ContinueBufferIO
* (Assumptions)
* My process is executing IO for the buffer
* BufMgrLock is held
* The buffer is Pinned
*
*/
static void
ContinueBufferIO(BufferDesc *buf, bool forInput)
{
Assert(buf == InProgressBuf);
Assert(buf->flags & BM_IO_IN_PROGRESS);
IsForInput = forInput;
}
#ifdef NOT_USED
void
InitBufferIO(void)
{
InProgressBuf = (BufferDesc *) 0;
}
#endif
/*
* This function is called from ProcReleaseSpins().
* BufMgrLock isn't held when this function is called.
* BM_IO_ERROR is always set. If BM_IO_ERROR was already
* set in case of output,this routine would kill all
* backends and reset postmaster.
*/
void
AbortBufferIO(void)
{
BufferDesc *buf = InProgressBuf;
if (buf)
{
Assert(buf->flags & BM_IO_IN_PROGRESS);
SpinAcquire(BufMgrLock);
if (IsForInput)
Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty));
else
{
Assert(buf->flags & BM_DIRTY || buf->cntxDirty);
if (buf->flags & BM_IO_ERROR)
{
elog(NOTICE, "write error may be permanent: cannot write block %u for %s/%s",
buf->tag.blockNum, buf->blind.dbname, buf->blind.relname);
}
buf->flags |= BM_DIRTY;
}
buf->flags |= BM_IO_ERROR;
buf->flags &= ~BM_IO_IN_PROGRESS;
TerminateBufferIO(buf);
SpinRelease(BufMgrLock);
}
}
/*
* Cleanup buffer or mark it for cleanup. Buffer may be cleaned
* up if it's pinned only once.
*
* NOTE: buffer must be excl locked.
*/
void
MarkBufferForCleanup(Buffer buffer, void (*CleanupFunc)(Buffer))
{
BufferDesc *bufHdr = &BufferDescriptors[buffer - 1];
Assert(PrivateRefCount[buffer - 1] > 0);
if (PrivateRefCount[buffer - 1] > 1)
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
PrivateRefCount[buffer - 1]--;
SpinAcquire(BufMgrLock);
Assert(bufHdr->refcount > 0);
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->CleanupFunc = CleanupFunc;
SpinRelease(BufMgrLock);
return;
}
SpinAcquire(BufMgrLock);
Assert(bufHdr->refcount > 0);
if (bufHdr->refcount == 1)
{
SpinRelease(BufMgrLock);
CleanupFunc(buffer);
CleanupFunc = NULL;
}
else
SpinRelease(BufMgrLock);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
PrivateRefCount[buffer - 1]--;
SpinAcquire(BufMgrLock);
Assert(bufHdr->refcount > 0);
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->CleanupFunc = CleanupFunc;
bufHdr->refcount--;
if (bufHdr->refcount == 0)
{
AddBufferToFreelist(bufHdr);
bufHdr->flags |= BM_FREE;
}
SpinRelease(BufMgrLock);
return;
}
/*-------------------------------------------------------------------------
*
* xlog_localbuf.c
* local buffer manager. Fast buffer manager for temporary tables
* or special cases when the operation is not visible to other backends.
*
* When a relation is being created, the descriptor will have rd_islocal
* set to indicate that the local buffer manager should be used. During
* the same transaction the relation is being created, any inserts or
* selects from the newly created relation will use the local buffer
* pool. rd_islocal is reset at the end of a transaction (commit/abort).
* This is useful for queries like SELECT INTO TABLE and create index.
*
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994-5, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/xlog_localbuf.c,v 1.2 2000/11/30 01:39:07 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <sys/types.h>
#include <sys/file.h>
#include <math.h>
#include <signal.h>
#include "executor/execdebug.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/smgr.h"
#include "utils/relcache.h"
extern long int LocalBufferFlushCount;
int NLocBuffer = 64;
BufferDesc *LocalBufferDescriptors = NULL;
Block *LocalBufferBlockPointers = NULL;
long *LocalRefCount = NULL;
static int nextFreeLocalBuf = 0;
/*#define LBDEBUG*/
/*
* LocalBufferAlloc -
* allocate a local buffer. We do round robin allocation for now.
*/
BufferDesc *
LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
{
int i;
BufferDesc *bufHdr = (BufferDesc *) NULL;
if (blockNum == P_NEW)
{
blockNum = reln->rd_nblocks;
reln->rd_nblocks++;
}
/* a low tech search for now -- not optimized for scans */
for (i = 0; i < NLocBuffer; i++)
{
if (LocalBufferDescriptors[i].tag.rnode.relNode ==
reln->rd_node.relNode &&
LocalBufferDescriptors[i].tag.blockNum == blockNum)
{
#ifdef LBDEBUG
fprintf(stderr, "LB ALLOC (%u,%d) %d\n",
RelationGetRelid(reln), blockNum, -i - 1);
#endif
LocalRefCount[i]++;
*foundPtr = TRUE;
return &LocalBufferDescriptors[i];
}
}
#ifdef LBDEBUG
fprintf(stderr, "LB ALLOC (%u,%d) %d\n",
RelationGetRelid(reln), blockNum, -nextFreeLocalBuf - 1);
#endif
/* need to get a new buffer (round robin for now) */
for (i = 0; i < NLocBuffer; i++)
{
int b = (nextFreeLocalBuf + i) % NLocBuffer;
if (LocalRefCount[b] == 0)
{
bufHdr = &LocalBufferDescriptors[b];
LocalRefCount[b]++;
nextFreeLocalBuf = (b + 1) % NLocBuffer;
break;
}
}
if (bufHdr == NULL)
elog(ERROR, "no empty local buffer.");
/*
* this buffer is not referenced but it might still be dirty (the last
* transaction to touch it doesn't need its contents but has not
* flushed it). if that's the case, write it out before reusing it!
*/
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
{
Relation bufrel = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
Assert(bufrel != NULL);
/* flush this page */
smgrwrite(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
LocalBufferFlushCount++;
/*
* drop relcache refcount incremented by
* RelationIdCacheGetRelation
*/
RelationDecrementReferenceCount(bufrel);
}
/*
* it's all ours now.
*
* We need not in tblNode currently but will in future I think,
* when we'll give up rel->rd_fd to fmgr cache.
*/
bufHdr->tag.rnode = reln->rd_node;
bufHdr->tag.blockNum = blockNum;
bufHdr->flags &= ~BM_DIRTY;
bufHdr->cntxDirty = false;
/*
* lazy memory allocation: allocate space on first use of a buffer.
*/
if (bufHdr->data == (SHMEM_OFFSET) 0)
{
char *data = (char *) malloc(BLCKSZ);
if (data == NULL)
elog(FATAL, "Out of memory in LocalBufferAlloc");
/*
* This is a bit of a hack: bufHdr->data needs to be a shmem offset
* for consistency with the shared-buffer case, so make it one
* even though it's not really a valid shmem offset.
*/
bufHdr->data = MAKE_OFFSET(data);
/*
* Set pointer for use by BufferGetBlock() macro.
*/
LocalBufferBlockPointers[-(bufHdr->buf_id + 2)] = (Block) data;
}
*foundPtr = FALSE;
return bufHdr;
}
/*
* WriteLocalBuffer -
* writes out a local buffer
*/
int
WriteLocalBuffer(Buffer buffer, bool release)
{
int bufid;
Assert(BufferIsLocal(buffer));
#ifdef LBDEBUG
fprintf(stderr, "LB WRITE %d\n", buffer);
#endif
bufid = -(buffer + 1);
LocalBufferDescriptors[bufid].flags |= BM_DIRTY;
if (release)
{
Assert(LocalRefCount[bufid] > 0);
LocalRefCount[bufid]--;
}
return true;
}
/*
* InitLocalBuffer -
* init the local buffer cache. Since most queries (esp. multi-user ones)
* don't involve local buffers, we delay allocating actual memory for the
* buffer until we need it.
*/
void
InitLocalBuffer(void)
{
int i;
/*
* these aren't going away. I'm not gonna use palloc.
*/
LocalBufferDescriptors =
(BufferDesc *) calloc(NLocBuffer, sizeof(BufferDesc));
LocalBufferBlockPointers = (Block *) calloc(NLocBuffer, sizeof(Block));
LocalRefCount = (long *) calloc(NLocBuffer, sizeof(long));
nextFreeLocalBuf = 0;
for (i = 0; i < NLocBuffer; i++)
{
BufferDesc *buf = &LocalBufferDescriptors[i];
/*
* negative to indicate local buffer. This is tricky: shared
* buffers start with 0. We have to start with -2. (Note that the
* routine BufferDescriptorGetBuffer adds 1 to buf_id so our first
* buffer id is -1.)
*/
buf->buf_id = -i - 2;
}
}
/*
* LocalBufferSync
*
* Flush all dirty buffers in the local buffer cache at commit time.
* Since the buffer cache is only used for keeping relations visible
* during a transaction, we will not need these buffers again.
*
* Note that we have to *flush* local buffers because of them are not
* visible to checkpoint makers. But we can skip XLOG flush check.
*/
void
LocalBufferSync(void)
{
int i;
for (i = 0; i < NLocBuffer; i++)
{
BufferDesc *buf = &LocalBufferDescriptors[i];
Relation bufrel;
if (buf->flags & BM_DIRTY || buf->cntxDirty)
{
#ifdef LBDEBUG
fprintf(stderr, "LB SYNC %d\n", -i - 1);
#endif
bufrel = RelationNodeCacheGetRelation(buf->tag.rnode);
Assert(bufrel != NULL);
smgrwrite(DEFAULT_SMGR, bufrel, buf->tag.blockNum,
(char *) MAKE_PTR(buf->data));
smgrmarkdirty(DEFAULT_SMGR, bufrel, buf->tag.blockNum);
LocalBufferFlushCount++;
/* drop relcache refcount from RelationIdCacheGetRelation */
RelationDecrementReferenceCount(bufrel);
buf->flags &= ~BM_DIRTY;
buf->cntxDirty = false;
}
}
MemSet(LocalRefCount, 0, sizeof(long) * NLocBuffer);
nextFreeLocalBuf = 0;
}
void
ResetLocalBufferPool(void)
{
int i;
for (i = 0; i < NLocBuffer; i++)
{
BufferDesc *buf = &LocalBufferDescriptors[i];
buf->tag.rnode.relNode = InvalidOid;
buf->flags &= ~BM_DIRTY;
buf->cntxDirty = false;
}
MemSet(LocalRefCount, 0, sizeof(long) * NLocBuffer);
nextFreeLocalBuf = 0;
}
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.67 2000/11/23 01:08:57 vadim Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.68 2000/11/30 08:46:23 vadim Exp $
*
* NOTES:
*
......@@ -192,20 +192,6 @@ static File fileNameOpenFile(FileName fileName, int fileFlags, int fileMode);
static char *filepath(char *filename);
static long pg_nofile(void);
#ifndef XLOG
/*
* pg_fsync --- same as fsync except does nothing if -F switch was given
*/
int
pg_fsync(int fd)
{
if (enableFsync)
return fsync(fd);
else
return 0;
}
#endif
/*
* BasicOpenFile --- same as open(2) except can free other FDs if needed
*
......@@ -665,7 +651,6 @@ fileNameOpenFile(FileName fileName,
vfdP->fileFlags = fileFlags & ~(O_TRUNC | O_EXCL);
vfdP->fileMode = fileMode;
vfdP->seekPos = 0;
#ifdef XLOG
/*
* Have to fsync file on commit. Alternative way - log
* file creation and fsync log before actual file creation.
......@@ -673,7 +658,6 @@ fileNameOpenFile(FileName fileName,
if (fileFlags & O_CREAT)
vfdP->fdstate = FD_DIRTY;
else
#endif
vfdP->fdstate = 0x0;
return file;
......@@ -832,13 +816,7 @@ FileWrite(File file, char *buffer, int amount)
FileAccess(file);
returnCode = write(VfdCache[file].fd, buffer, amount);
if (returnCode > 0)
{
VfdCache[file].seekPos += returnCode;
#ifndef XLOG
/* mark the file as needing fsync */
VfdCache[file].fdstate |= FD_DIRTY;
#endif
}
else
VfdCache[file].seekPos = FileUnknownPos;
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.79 2000/11/10 03:53:45 vadim Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.80 2000/11/30 08:46:24 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -569,14 +569,6 @@ mdblindwrt(RelFileNode rnode,
elog(DEBUG, "mdblindwrt: write() failed: %m");
status = SM_FAIL;
}
#ifndef XLOG
else if (dofsync &&
pg_fsync(fd) < 0)
{
elog(DEBUG, "mdblindwrt: fsync() failed: %m");
status = SM_FAIL;
}
#endif
if (close(fd) < 0)
{
......@@ -840,7 +832,6 @@ mdabort()
return SM_SUCCESS;
}
#ifdef XLOG
/*
* mdsync() -- Sync storage.
*
......@@ -854,7 +845,6 @@ mdsync()
sync();
return SM_SUCCESS;
}
#endif
/*
* _fdvec_alloc () -- grab a free (or new) md file descriptor vector.
......
......@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.45 2000/11/21 21:16:01 petere Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.46 2000/11/30 08:46:24 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -47,9 +47,7 @@ typedef struct f_smgr
int (*smgr_truncate) (Relation reln, int nblocks);
int (*smgr_commit) (void); /* may be NULL */
int (*smgr_abort) (void); /* may be NULL */
#ifdef XLOG
int (*smgr_sync) (void);
#endif
} f_smgr;
/*
......@@ -62,11 +60,7 @@ static f_smgr smgrsw[] = {
/* magnetic disk */
{mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose,
mdread, mdwrite, mdflush, mdblindwrt, mdmarkdirty, mdblindmarkdirty,
#ifdef XLOG
mdnblocks, mdtruncate, mdcommit, mdabort, mdsync
#else
mdnblocks, mdtruncate, mdcommit, mdabort
#endif
},
#ifdef STABLE_MEMORY_STORAGE
......@@ -545,7 +539,6 @@ smgrabort()
return SM_SUCCESS;
}
#ifdef XLOG
int
smgrsync()
{
......@@ -564,7 +557,6 @@ smgrsync()
return SM_SUCCESS;
}
#endif
#ifdef NOT_USED
bool
......@@ -578,8 +570,6 @@ smgriswo(int16 smgrno)
#endif
#ifdef XLOG
void
smgr_redo(XLogRecPtr lsn, XLogRecord *record)
{
......@@ -594,4 +584,3 @@ void
smgr_desc(char *buf, uint8 xl_info, char* rec)
{
}
#endif
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.116 2000/11/10 00:33:10 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.117 2000/11/30 08:46:24 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -2027,8 +2027,7 @@ RelationCacheInitializePhase2(void)
}
}
#ifdef XLOG /* used by XLogInitCache */
/* used by XLogInitCache */
void CreateDummyCaches(void);
void DestroyDummyCaches(void);
......@@ -2082,8 +2081,6 @@ DestroyDummyCaches(void)
MemoryContextSwitchTo(oldcxt);
}
#endif /* XLOG */
static void
AttrDefaultFetch(Relation relation)
{
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.73 2000/11/28 23:27:57 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.74 2000/11/30 08:46:25 vadim Exp $
*
*
*-------------------------------------------------------------------------
......@@ -177,11 +177,6 @@ InitPostgres(const char *dbname, const char *username)
{
bool bootstrap = IsBootstrapProcessingMode();
#ifndef XLOG
if (!TransactionFlushEnabled())
on_shmem_exit(FlushBufferPool, 0);
#endif
SetDatabaseName(dbname);
/* ----------------
* initialize the database id used for system caches and lock tables
......@@ -190,11 +185,7 @@ InitPostgres(const char *dbname, const char *username)
if (bootstrap)
{
MyDatabaseId = TemplateDbOid;
#ifdef OLD_FILE_NAMING
SetDatabasePath(ExpandDatabasePath(dbname));
#else
SetDatabasePath(GetDatabasePath(MyDatabaseId));
#endif
LockDisable(true);
}
else
......@@ -228,13 +219,7 @@ InitPostgres(const char *dbname, const char *username)
"Database \"%s\" does not exist in the system catalog.",
dbname);
#ifdef OLD_FILE_NAMING
fullpath = ExpandDatabasePath(datpath);
if (!fullpath)
elog(FATAL, "Database path could not be resolved.");
#else
fullpath = GetDatabasePath(MyDatabaseId);
#endif
/* Verify the database path */
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: htup.h,v 1.40 2000/11/30 01:47:32 vadim Exp $
* $Id: htup.h,v 1.41 2000/11/30 08:46:25 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -63,11 +63,6 @@ typedef struct HeapTupleHeaderData
typedef HeapTupleHeaderData *HeapTupleHeader;
#ifdef XLOG
/* XLOG stuff */
/*
* XLOG allows to store some information in high 4 bits of log
* record xl_info field
......@@ -127,11 +122,6 @@ typedef struct xl_heap_update
#define SizeOfHeapUpdate (offsetof(xl_heap_update, mask) + sizeof(uint8))
/* end of XLOG stuff */
#endif /* XLOG */
/*
* MaxTupleSize is the maximum allowed size of a tuple, including header and
* MAXALIGN alignment padding. Basically it's BLCKSZ minus the other stuff
......@@ -147,7 +137,6 @@ typedef struct xl_heap_update
#define MaxTupleSize \
(BLCKSZ - MAXALIGN(sizeof(PageHeaderData) + MaxSpecialSpace))
/*
* MaxAttrSize is a somewhat arbitrary upper limit on the declared size of
* data fields of char(n) and similar types. It need not have anything
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: nbtree.h,v 1.47 2000/11/21 21:16:05 petere Exp $
* $Id: nbtree.h,v 1.48 2000/11/30 08:46:25 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -42,10 +42,7 @@ typedef struct BTPageOpaqueData
#define BTP_ROOT (1 << 1) /* It's the root page (has no parent) */
#define BTP_FREE (1 << 2) /* not currently used... */
#define BTP_META (1 << 3) /* Set in the meta-page only */
#ifdef XLOG
#define BTP_REORDER (1 << 4) /* items must be re-ordered */
#endif
} BTPageOpaqueData;
typedef BTPageOpaqueData *BTPageOpaque;
......@@ -209,11 +206,6 @@ typedef BTStackData *BTStack;
#define P_FIRSTKEY ((OffsetNumber) 2)
#define P_FIRSTDATAKEY(opaque) (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY)
#ifdef XLOG
/* XLOG stuff */
/*
* XLOG allows to store some information in high 4 bits of log
* record xl_info field
......@@ -257,7 +249,6 @@ typedef struct xl_btree_insert
#define SizeOfBtreeInsert (offsetof(xl_btreetid, tid) + SizeOfIptrData)
/*
* This is what we need to know about insert with split -
* 22 + {4 + 8 | left hi-key} + [btitem] + right sibling btitems. Note that
......@@ -292,10 +283,6 @@ typedef struct xl_btree_newroot
#define SizeOfBtreeNewroot (offsetof(xl_btree_newroot, rootblk) + sizeof(BlockIdData))
/* end of XLOG stuff */
#endif /* XLOG */
/*
* Operator strategy numbers -- ordering of these is <, <=, =, >=, >
*/
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: transam.h,v 1.27 2000/11/21 21:16:05 petere Exp $
* $Id: transam.h,v 1.28 2000/11/30 08:46:25 vadim Exp $
*
* NOTES
* Transaction System Version 101 now support proper oid
......@@ -67,11 +67,7 @@ typedef unsigned char XidStatus;/* (2 bits) */
* transaction page definitions
* ----------------
*/
#ifdef XLOG
#define TP_DataSize (BLCKSZ - sizeof(XLogRecPtr))
#else
#define TP_DataSize BLCKSZ
#endif
#define TP_NumXidStatusPerBlock (TP_DataSize * 4)
/* ----------------
......@@ -88,10 +84,8 @@ typedef unsigned char XidStatus;/* (2 bits) */
*/
typedef struct LogRelationContentsData
{
#ifdef XLOG
XLogRecPtr LSN; /* temp hack: LSN is member of any block */
/* so should be described in bufmgr */
#endif
int TransSystemVersion;
} LogRelationContentsData;
......@@ -115,9 +109,7 @@ typedef LogRelationContentsData *LogRelationContents;
*/
typedef struct VariableRelationContentsData
{
#ifdef XLOG
XLogRecPtr LSN;
#endif
int TransSystemVersion;
TransactionId nextXidData;
TransactionId lastXidData; /* unused */
......@@ -127,21 +119,14 @@ typedef struct VariableRelationContentsData
typedef VariableRelationContentsData *VariableRelationContents;
/*
* VariableCache is placed in shmem and used by backends to
* get next available XID & OID without access to
* variable relation. Actually, I would like to have two
* different on-disk storages for next XID and OID...
* But hoping that someday we will use per database OID
* generator I leaved this as is. - vadim 07/21/98
* VariableCache is placed in shmem and used by
* backends to get next available XID & OID.
*/
typedef struct VariableCacheData
{
#ifndef XLOG
uint32 xid_count;
#endif
TransactionId nextXid;
Oid nextOid;
uint32 oidCount;
TransactionId nextXid;
Oid nextOid;
uint32 oidCount;
} VariableCacheData;
typedef VariableCacheData *VariableCache;
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: xact.h,v 1.30 2000/11/21 21:16:05 petere Exp $
* $Id: xact.h,v 1.31 2000/11/30 08:46:25 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -78,9 +78,6 @@ typedef TransactionStateData *TransactionState;
#define StoreInvalidTransactionId(dest) \
(*((TransactionId*) (dest)) = NullTransactionId)
#ifdef XLOG
/*
* XLOG allows to store some information in high 4 bits of log
* record xl_info field
......@@ -106,8 +103,6 @@ typedef struct xl_xact_abort
#define SizeOfXactAbort ((offsetof(xl_xact_abort, xtime) + sizeof(time_t)))
#endif
/* ----------------
* extern definitions
* ----------------
......
#ifndef XLOG_UTILS_H
#define XLOG_UTILS_H
#include "access/rmgr.h"
#include "utils/rel.h"
......
......@@ -8,7 +8,7 @@
* or in config.h afterwards. Of course, if you edit config.h, then your
* changes will be overwritten the next time you run configure.
*
* $Id: config.h.in,v 1.150 2000/11/29 20:59:54 tgl Exp $
* $Id: config.h.in,v 1.151 2000/11/30 08:46:25 vadim Exp $
*/
#ifndef CONFIG_H
......@@ -234,9 +234,6 @@
# define HAVE_UNIX_SOCKETS 1
#endif
/* Enable WAL. Don't disable this, it was only used during development. */
#define XLOG 1
/*
*------------------------------------------------------------------------
* These hand-configurable symbols are for enabling debugging code,
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: buf_internals.h,v 1.45 2000/11/30 01:39:08 tgl Exp $
* $Id: buf_internals.h,v 1.46 2000/11/30 08:46:26 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -18,7 +18,6 @@
#include "storage/lmgr.h"
#include "storage/s_lock.h"
/* Buf Mgr constants */
/* in bufmgr.c */
extern int Data_Descriptors;
......@@ -28,7 +27,6 @@ extern int Num_Descriptors;
extern int ShowPinTrace;
/*
* Flags for buffer descriptors
*/
......@@ -105,9 +103,7 @@ typedef struct sbufdesc
bool ri_lock; /* read-intent lock */
bool w_lock; /* context exclusively locked */
#ifdef XLOG
bool cntxDirty; /* new way to mark block as dirty */
#endif
BufferBlindId blind; /* was used to support blind write */
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: bufmgr.h,v 1.45 2000/11/30 01:39:08 tgl Exp $
* $Id: bufmgr.h,v 1.46 2000/11/30 08:46:26 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -192,10 +192,8 @@ extern void AbortBufferIO(void);
extern bool BufferIsUpdatable(Buffer buffer);
extern void MarkBufferForCleanup(Buffer buffer, void (*CleanupFunc)(Buffer));
#ifdef XLOG
extern void BufmgrCommit(void);
extern void BufferSync(void);
#endif
extern void InitLocalBuffer(void);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: bufpage.h,v 1.36 2000/11/20 21:12:26 vadim Exp $
* $Id: bufpage.h,v 1.37 2000/11/30 08:46:26 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -118,13 +118,13 @@ typedef OpaqueData *Opaque;
*/
typedef struct PageHeaderData
{
#ifdef XLOG /* XXX LSN is member of *any* block, not */
/* XXX LSN is member of *any* block, not */
/* only page-organized - 'll change later */
XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog */
/* record for last change of this page */
StartUpID pd_sui; /* SUI of last changes (currently it's */
/* used by heap AM only) */
#endif
LocationIndex pd_lower; /* offset to start of free space */
LocationIndex pd_upper; /* offset to end of free space */
LocationIndex pd_special; /* offset to start of special space */
......@@ -298,8 +298,6 @@ typedef enum
(sizeof(PageHeaderData) - sizeof(ItemIdData)))) \
/ ((int) sizeof(ItemIdData)))
#ifdef XLOG
#define PageGetLSN(page) \
(((PageHeader) (page))->pd_lsn)
#define PageSetLSN(page, lsn) \
......@@ -310,8 +308,6 @@ typedef enum
#define PageSetSUI(page, sui) \
(((PageHeader) (page))->pd_sui = (StartUpID) (sui))
#endif
/* ----------------------------------------------------------------
* extern declarations
* ----------------------------------------------------------------
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: fd.h,v 1.23 2000/11/10 03:53:45 vadim Exp $
* $Id: fd.h,v 1.24 2000/11/30 08:46:26 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -71,10 +71,6 @@ extern int BasicOpenFile(FileName fileName, int fileFlags, int fileMode);
extern void closeAllVfds(void);
extern void AtEOXact_Files(void);
#ifdef XLOG
#define pg_fsync(fd) fsync(fd)
#else
extern int pg_fsync(int fd);
#endif
#endif /* FD_H */
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: smgr.h,v 1.25 2000/11/21 21:16:05 petere Exp $
* $Id: smgr.h,v 1.26 2000/11/30 08:46:26 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -47,14 +47,11 @@ extern int smgrtruncate(int16 which, Relation reln, int nblocks);
extern int smgrDoPendingDeletes(bool isCommit);
extern int smgrcommit(void);
extern int smgrabort(void);
#ifdef XLOG
extern int smgrsync(void);
extern void smgr_redo(XLogRecPtr lsn, XLogRecord *record);
extern void smgr_undo(XLogRecPtr lsn, XLogRecord *record);
extern void smgr_desc(char *buf, uint8 xl_info, char* rec);
#endif
/* internals: move me elsewhere -- ay 7/94 */
......@@ -77,10 +74,7 @@ extern int mdnblocks(Relation reln);
extern int mdtruncate(Relation reln, int nblocks);
extern int mdcommit(void);
extern int mdabort(void);
#ifdef XLOG
extern int mdsync(void);
#endif
/* mm.c */
extern SPINLOCK MMCacheLock;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment