No more #ifdef XLOG.

81c8c244 · Vadim B. Mikheev · b16516b8 · 81c8c244 · 81c8c244 · 81c8c244
Commit 81c8c244 authored Nov 30, 2000 by Vadim B. Mikheev
36 changed files
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -6,7 +6,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.66 2000/11/21 21:15:53 petere Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.67 2000/11/30 08:46:20 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -23,9 +23,7 @@
 #include "miscadmin.h"
 #include "utils/syscache.h"
-#ifdef XLOG
 #include "access/xlogutils.h"
-#endif
 /* non-export function prototypes */
 static InsertIndexResult gistdoinsert(Relation r, IndexTuple itup,
@@ -1348,7 +1346,6 @@ int_range_out(INTRANGE *r)
 #endif	 /* defined GISTDEBUG */
-#ifdef XLOG
 void
 gist_redo(XLogRecPtr lsn, XLogRecord *record)
 {
@@ -1365,4 +1362,3 @@ void
 gist_desc(char *buf, uint8 xl_info, char* rec)
 {
 }
-#endif
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.45 2000/11/21 21:15:54 petere Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.46 2000/11/30 08:46:20 vadim Exp $
 *
 * NOTES
 *	  This file contains only the public interface routines.
@@ -27,9 +27,7 @@
 bool		BuildingHash = false;
-#ifdef XLOG
 #include "access/xlogutils.h"
-#endif
 /*
@@ -482,7 +480,6 @@ hashdelete(PG_FUNCTION_ARGS)
 	PG_RETURN_VOID();
 }
-#ifdef XLOG
 void
 hash_redo(XLogRecPtr lsn, XLogRecord *record)
 {
@@ -499,4 +496,3 @@ void
 hash_desc(char *buf, uint8 xl_info, char* rec)
 {
 }
-#endif
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.96 2000/11/21 21:15:54 petere Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.97 2000/11/30 08:46:20 vadim Exp $
 *
 *
 * INTERFACE ROUTINES
@@ -86,7 +86,6 @@
 #include "utils/inval.h"
 #include "utils/relcache.h"
-#ifdef XLOG
 #include "access/xlogutils.h"
 XLogRecPtr	log_heap_move(Relation reln, ItemPointerData from, HeapTuple newtup);
@@ -99,8 +98,6 @@ static XLogRecPtr log_heap_update(Relation reln, ItemPointerData from,
 static void HeapPageCleanup(Buffer buffer);
-#endif
 /* ----------------------------------------------------------------
 *						 heap support routines
@@ -1370,7 +1367,6 @@ heap_insert(Relation relation, HeapTuple tup)
 	/* NO ELOG(ERROR) from here till changes are logged */
 	RelationPutHeapTuple(relation, buffer, tup);
-#ifdef XLOG
 	/* XLOG stuff */
 	{
 		xl_heap_insert	xlrec;
@@ -1392,7 +1388,6 @@ heap_insert(Relation relation, HeapTuple tup)
 		PageSetLSN(BufferGetPage(buffer), recptr);
 		PageSetSUI(BufferGetPage(buffer), ThisStartUpID);
 	}
-#endif
 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 	WriteBuffer(buffer);
@@ -1485,7 +1480,6 @@ l1:
 		return result;
 	}
-#ifdef XLOG
 	/* XLOG stuff */
 	{
 		xl_heap_delete	xlrec;
@@ -1500,7 +1494,6 @@ l1:
 		PageSetLSN(dp, recptr);
 		PageSetSUI(dp, ThisStartUpID);
 	}
-#endif
 	/* store transaction information of xact deleting the tuple */
 	TransactionIdStore(GetCurrentTransactionId(), &(tp.t_data->t_xmax));
@@ -1638,7 +1631,6 @@ l2:
 		newbuf = buffer;
 	else
 	{
-#ifdef XLOG
 		/* 
 		 * We have to unlock old tuple buffer before extending table
 		 * file but have to keep lock on the old tuple. To avoid second
@@ -1650,7 +1642,7 @@ l2:
 		_locked_tuple_.node = relation->rd_node;
 		_locked_tuple_.tid = *otid;
 		XactPushRollback(_heap_unlock_tuple, (void*) &_locked_tuple_);
-#endif
 		TransactionIdStore(GetCurrentTransactionId(), &(oldtup.t_data->t_xmax));
 		oldtup.t_data->t_cmax = GetCurrentCommandId();
 		oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
@@ -1677,15 +1669,12 @@ l2:
 	else
 	{
 		oldtup.t_data->t_infomask &= ~HEAP_XMAX_UNLOGGED;
-#ifdef XLOG
 		XactPopRollback();
-#endif
 	}
 	/* record address of new tuple in t_ctid of old one */
 	oldtup.t_data->t_ctid = newtup->t_self;
-#ifdef XLOG
 	/* XLOG stuff */
 	{
 		XLogRecPtr	recptr = log_heap_update(relation, 
@@ -1699,7 +1688,6 @@ l2:
 		PageSetLSN(BufferGetPage(buffer), recptr);
 		PageSetSUI(BufferGetPage(buffer), ThisStartUpID);
 	}
-#endif
 	if (newbuf != buffer)
 	{
@@ -1791,13 +1779,11 @@ l3:
 		return result;
 	}
-#ifdef XLOG
 	/*
 	 * XLOG stuff: no logging is required as long as we have no
 	 * savepoints. For savepoints private log could be used...
 	 */
 	((PageHeader) BufferGetPage(*buffer))->pd_sui = ThisStartUpID;
-#endif
 	/* store transaction information of xact marking the tuple */
 	TransactionIdStore(GetCurrentTransactionId(), &(tuple->t_data->t_xmax));
@@ -1984,8 +1970,6 @@ heap_restrpos(HeapScanDesc scan)
 	}
 }
-#ifdef XLOG
 static XLogRecPtr
 log_heap_update(Relation reln, ItemPointerData from, 
 	HeapTuple newtup, bool move)
@@ -2634,5 +2618,3 @@ heap_desc(char *buf, uint8 xl_info, char* rec)
 	else
 		strcat(buf, "UNKNOWN");
 }
-#endif	/* XLOG */
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.68 2000/11/16 05:50:58 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.69 2000/11/30 08:46:21 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -61,9 +61,7 @@ static void _bt_pgaddtup(Relation rel, Page page,
 static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
 						int keysz, ScanKey scankey);
-#ifdef XLOG
 static Relation		_xlheapRel;	/* temporary hack */
-#endif
 /*
 *	_bt_doinsert() -- Handle insertion of a single btitem in the tree.
@@ -123,9 +121,7 @@ top:
 		}
 	}
-#ifdef XLOG
 	_xlheapRel = heapRel;	/* temporary hack */
-#endif
 	/* do the insertion */
 	res = _bt_insertonpg(rel, buf, stack, natts, itup_scankey, btitem, 0);
@@ -522,7 +518,6 @@ _bt_insertonpg(Relation rel,
 	}
 	else
 	{
-#ifdef XLOG
 		/* XLOG stuff */
 		{
 			char				xlbuf[sizeof(xl_btree_insert) + 
@@ -562,7 +557,7 @@ _bt_insertonpg(Relation rel,
 			PageSetLSN(page, recptr);
 			PageSetSUI(page, ThisStartUpID);
 		}
-#endif
 		_bt_pgaddtup(rel, page, itemsz, btitem, newitemoff, "page");
 		itup_off = newitemoff;
 		itup_blkno = BufferGetBlockNumber(buf);
@@ -612,10 +607,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
 				rightoff;
 	OffsetNumber maxoff;
 	OffsetNumber i;
-#ifdef XLOG
 	BTItem		lhikey;
-#endif
 	rbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
 	origpage = BufferGetPage(buf);
@@ -685,9 +677,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
 		itemsz = ItemIdGetLength(itemid);
 		item = (BTItem) PageGetItem(origpage, itemid);
 	}
-#ifdef XLOG
 	lhikey = item;
-#endif
 	if (PageAddItem(leftpage, (Item) item, itemsz, leftoff,
 					LP_USED) == InvalidOffsetNumber)
 		elog(STOP, "btree: failed to add hikey to the left sibling");
@@ -775,7 +765,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
 		spage = BufferGetPage(sbuf);
 	}
-#ifdef XLOG
 	/*
 	 * Right sibling is locked, new siblings are prepared, but original
 	 * page is not updated yet. Log changes before continuing.
@@ -860,7 +849,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
 			PageSetSUI(spage, ThisStartUpID);
 		}
 	}
-#endif
 	/*
 	 * By here, the original data page has been split into two new halves,
@@ -1165,19 +1153,13 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
 	BTItem		item;
 	Size		itemsz;
 	BTItem		new_item;
-#ifdef XLOG
 	Buffer		metabuf;
-#endif
 	/* get a new root page */
 	rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
 	rootpage = BufferGetPage(rootbuf);
 	rootblknum = BufferGetBlockNumber(rootbuf);
-#ifdef XLOG
 	metabuf = _bt_getbuf(rel, BTREE_METAPAGE,BT_WRITE);
-#endif
 	/* NO ELOG(ERROR) from here till newroot op is logged */
@@ -1237,7 +1219,6 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
 		elog(STOP, "btree: failed to add rightkey to new root page");
 	pfree(new_item);
-#ifdef XLOG
 	/* XLOG stuff */
 	{
 		xl_btree_newroot	xlrec;
@@ -1267,16 +1248,10 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
 		_bt_wrtbuf(rel, metabuf);
 	}
-#endif
 	/* write and let go of the new root buffer */
 	_bt_wrtbuf(rel, rootbuf);
-#ifndef XLOG
-	/* update metadata page with new root block number */
-	_bt_metaproot(rel, rootblknum, 0);
-#endif
 	/* update and release new sibling, and finally the old root */
 	_bt_wrtbuf(rel, rbuf);
 	_bt_wrtbuf(rel, lbuf);

--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.41 2000/11/30 01:39:06 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.42 2000/11/30 08:46:21 vadim Exp $
 *
 *	NOTES
 *	   Postgres btree pages look like ordinary relation pages.	The opaque
@@ -170,7 +170,6 @@ _bt_getroot(Relation rel, int access)
 			rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
 			rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT);
-#ifdef XLOG
 			/* XLOG stuff */
 			{
 				xl_btree_newroot	xlrec;
@@ -187,7 +186,6 @@ _bt_getroot(Relation rel, int access)
 				PageSetLSN(metapg, recptr);
 				PageSetSUI(metapg, ThisStartUpID);
 			}
-#endif
 			metad->btm_root = rootblkno;
 			metad->btm_level = 1;
@@ -403,7 +401,6 @@ _bt_pagedel(Relation rel, ItemPointer tid)
 	buf = _bt_getbuf(rel, blkno, BT_WRITE);
 	page = BufferGetPage(buf);
-#ifdef XLOG
 	/* XLOG stuff */
 	{
 		xl_btree_delete	xlrec;
@@ -417,7 +414,6 @@ _bt_pagedel(Relation rel, ItemPointer tid)
 		PageSetLSN(page, recptr);
 		PageSetSUI(page, ThisStartUpID);
 	}
-#endif
 	PageIndexTupleDelete(page, offno);

--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.71 2000/11/21 21:15:55 petere Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.72 2000/11/30 08:46:21 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -32,11 +32,8 @@ bool		BuildingBtree = false;		/* see comment in btbuild() */
 bool		FastBuild = true;	/* use sort/build instead of insertion
 								 * build */
-#ifdef XLOG
 #include "access/xlogutils.h"
-#endif
 static void _bt_restscan(IndexScanDesc scan);
 /*
@@ -733,8 +730,6 @@ _bt_restscan(IndexScanDesc scan)
 	}
 }
-#ifdef XLOG
 static bool
 _bt_cleanup_page(Page page, RelFileNode hnode)
 {
@@ -1529,5 +1524,3 @@ btree_desc(char *buf, uint8 xl_info, char* rec)
 	else
 		strcat(buf, "UNKNOWN");
 }
-#endif
--- a/src/backend/access/rtree/rtree.c
+++ b/src/backend/access/rtree/rtree.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.56 2000/11/21 21:15:55 petere Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.57 2000/11/30 08:46:21 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -22,9 +22,7 @@
 #include "executor/executor.h"
 #include "miscadmin.h"
-#ifdef XLOG
 #include "access/xlogutils.h"
-#endif
 typedef struct SPLITVEC
 {
@@ -1069,7 +1067,6 @@ _rtdump(Relation r)
 #endif	 /* defined RTDEBUG */
-#ifdef XLOG
 void
 rtree_redo(XLogRecPtr lsn, XLogRecord *record)
 {
@@ -1086,4 +1083,3 @@ void
 rtree_desc(char *buf, uint8 xl_info, char* rec)
 {
 }
-#endif
--- a/src/backend/access/transam/rmgr.c
+++ b/src/backend/access/transam/rmgr.c
@@ -27,4 +27,3 @@ RmgrData   RmgrTable[] = {
 {"Gist", gist_redo, gist_undo, gist_desc},
 {"Sequence", seq_redo, seq_undo, seq_desc}
 };
--- a/src/backend/access/transam/transam.c
+++ b/src/backend/access/transam/transam.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/transam/transam.c,v 1.37 2000/11/21 21:15:57 petere Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/transam/transam.c,v 1.38 2000/11/30 08:46:22 vadim Exp $
 *
 * NOTES
 *	  This file contains the high level access-method interface to the
@@ -424,23 +424,12 @@ InitializeTransactionLog(void)
 	SpinAcquire(OidGenLockId);
 	if (!TransactionIdDidCommit(AmiTransactionId))
 	{
-		/* ----------------
-		 *	SOMEDAY initialize the information stored in
-		 *			the headers of the log/variable relations.
-		 * ----------------
-		 */
 		TransactionLogUpdate(AmiTransactionId, XID_COMMIT);
 		TransactionIdStore(AmiTransactionId, &cachedTestXid);
 		cachedTestXidStatus = XID_COMMIT;
-#ifdef XLOG
 		Assert(!IsUnderPostmaster && 
 				ShmemVariableCache->nextXid <= FirstTransactionId);
 		ShmemVariableCache->nextXid = FirstTransactionId;
-#else
-		VariableRelationPutNextXid(FirstTransactionId);
-#endif
 	}
 	else if (RecoveryCheckingEnabled())
 	{

--- a/src/backend/access/transam/transsup.c
+++ b/src/backend/access/transam/transsup.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/transsup.c,v 1.26 2000/10/28 16:20:53 vadim Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/transsup.c,v 1.27 2000/11/30 08:46:22 vadim Exp $
 *
 * NOTES
 *	  This file contains support functions for the high
@@ -186,9 +186,7 @@ TransBlockGetXidStatus(Block tblock,
 	bits8		bit2;
 	BitIndex	offset;
-#ifdef XLOG
 	tblock = (Block) ((char*) tblock + sizeof(XLogRecPtr));
-#endif
 	/* ----------------
 	 *	calculate the index into the transaction data where
@@ -231,9 +229,7 @@ TransBlockSetXidStatus(Block tblock,
 	Index		index;
 	BitIndex	offset;
-#ifdef XLOG
 	tblock = (Block) ((char*) tblock + sizeof(XLogRecPtr));
-#endif
 	/* ----------------
 	 *	calculate the index into the transaction data where

--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
 /*-------------------------------------------------------------------------
 *
 * varsup.c
- *	  postgres variable relation support routines
+ *	  postgres OID & XID variables support routines
- *
- * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
- * Portions Copyright (c) 1994, Regents of the University of California
 *
+ * Copyright (c) 2000, PostgreSQL, Inc
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.33 2000/11/20 16:47:30 petere Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.34 2000/11/30 08:46:22 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
-#include "postgres.h"
-#ifdef XLOG
+#include "postgres.h"
-#include "xlog_varsup.c"
-#else
-#include "access/heapam.h"
+#include "access/transam.h"
-#include "catalog/catname.h"
 #include "storage/proc.h"
-static void GetNewObjectIdBlock(Oid *oid_return, int oid_block_size);
+SPINLOCK OidGenLockId;
-static void VariableRelationGetNextOid(Oid *oid_return);
-static void VariableRelationGetNextXid(TransactionId *xidP);
-static void VariableRelationPutNextOid(Oid oid);
-/* ---------------------
+extern SPINLOCK XidGenLockId;
- *		spin lock for oid generation
+extern void XLogPutNextOid(Oid nextOid);
- * ---------------------
- */
-int			OidGenLockId;
-/* ---------------------
+/* pointer to "variable cache" in shared memory (set up by shmem.c) */
- *		pointer to "variable cache" in shared memory (set up by shmem.c)
- * ---------------------
- */
 VariableCache ShmemVariableCache = NULL;
-/* ----------------------------------------------------------------
- *			  variable relation query/update routines
- * ----------------------------------------------------------------
- */
-/* --------------------------------
- *		VariableRelationGetNextXid
- * --------------------------------
- */
-static void
-VariableRelationGetNextXid(TransactionId *xidP)
-{
-	Buffer		buf;
-	VariableRelationContents var;
-	/* ----------------
-	 * We assume that a spinlock has been acquired to guarantee
-	 * exclusive access to the variable relation.
-	 * ----------------
-	 */
-	/* ----------------
-	 *	do nothing before things are initialized
-	 * ----------------
-	 */
-	if (!RelationIsValid(VariableRelation))
-		return;
-	/* ----------------
-	 *	read the variable page, get the the nextXid field and
-	 *	release the buffer
-	 * ----------------
-	 */
-	buf = ReadBuffer(VariableRelation, 0);
-	if (!BufferIsValid(buf))
-	{
-		SpinRelease(OidGenLockId);
-		elog(ERROR, "VariableRelationGetNextXid: ReadBuffer failed");
-	}
-	var = (VariableRelationContents) BufferGetBlock(buf);
-	TransactionIdStore(var->nextXidData, xidP);
-	ReleaseBuffer(buf);
-}
-/* --------------------------------
- *		VariableRelationPutNextXid
- * --------------------------------
- */
-void
-VariableRelationPutNextXid(TransactionId xid)
-{
-	Buffer		buf;
-	VariableRelationContents var;
-	/* ----------------
-	 * We assume that a spinlock has been acquired to guarantee
-	 * exclusive access to the variable relation.
-	 * ----------------
-	 */
-	/* ----------------
-	 *	do nothing before things are initialized
-	 * ----------------
-	 */
-	if (!RelationIsValid(VariableRelation))
-		return;
-	/* ----------------
-	 *	read the variable page, update the nextXid field and
-	 *	write the page back out to disk (with immediate write).
-	 * ----------------
-	 */
-	buf = ReadBuffer(VariableRelation, 0);
-	if (!BufferIsValid(buf))
-	{
-		SpinRelease(OidGenLockId);
-		elog(ERROR, "VariableRelationPutNextXid: ReadBuffer failed");
-	}
-	var = (VariableRelationContents) BufferGetBlock(buf);
-	TransactionIdStore(xid, &(var->nextXidData));
-	FlushBuffer(buf, true, true);
-}
-/* --------------------------------
- *		VariableRelationGetNextOid
- * --------------------------------
- */
-static void
-VariableRelationGetNextOid(Oid *oid_return)
-{
-	Buffer		buf;
-	VariableRelationContents var;
-	/* ----------------
-	 * We assume that a spinlock has been acquired to guarantee
-	 * exclusive access to the variable relation.
-	 * ----------------
-	 */
-	/* ----------------
-	 *	if the variable relation is not initialized, then we
-	 *	assume we are running at bootstrap time and so we return
-	 *	an invalid object id (this path should never be taken, probably).
-	 * ----------------
-	 */
-	if (!RelationIsValid(VariableRelation))
-	{
-		(*oid_return) = InvalidOid;
-		return;
-	}
-	/* ----------------
-	 *	read the variable page, get the the nextOid field and
-	 *	release the buffer
-	 * ----------------
-	 */
-	buf = ReadBuffer(VariableRelation, 0);
-	if (!BufferIsValid(buf))
-	{
-		SpinRelease(OidGenLockId);
-		elog(ERROR, "VariableRelationGetNextOid: ReadBuffer failed");
-	}
-	var = (VariableRelationContents) BufferGetBlock(buf);
-	(*oid_return) = var->nextOid;
-	ReleaseBuffer(buf);
-}
-/* --------------------------------
- *		VariableRelationPutNextOid
- * --------------------------------
- */
-static void
-VariableRelationPutNextOid(Oid oid)
-{
-	Buffer		buf;
-	VariableRelationContents var;
-	/* ----------------
-	 * We assume that a spinlock has been acquired to guarantee
-	 * exclusive access to the variable relation.
-	 * ----------------
-	 */
-	/* ----------------
-	 *	do nothing before things are initialized
-	 * ----------------
-	 */
-	if (!RelationIsValid(VariableRelation))
-		return;
-	/* ----------------
-	 *	read the variable page, update the nextXid field and
-	 *	write the page back out to disk.
-	 * ----------------
-	 */
-	buf = ReadBuffer(VariableRelation, 0);
-	if (!BufferIsValid(buf))
-	{
-		SpinRelease(OidGenLockId);
-		elog(ERROR, "VariableRelationPutNextOid: ReadBuffer failed");
-	}
-	var = (VariableRelationContents) BufferGetBlock(buf);
-	var->nextOid = oid;
-	WriteBuffer(buf);
-}
-/* ----------------------------------------------------------------
- *				transaction id generation support
- * ----------------------------------------------------------------
- */
-/* ----------------
- *		GetNewTransactionId
- *
- *		Transaction IDs are allocated via a cache in shared memory.
- *		Each time we need more IDs, we advance the "next XID" value
- *		in pg_variable by VAR_XID_PREFETCH and set the cache to
- *		show that many XIDs as available.  Then, allocating those XIDs
- *		requires just a spinlock and not a buffer read/write cycle.
- *
- *		Since the cache is shared across all backends, cached but unused
- *		XIDs are not lost when a backend exits, only when the postmaster
- *		quits or forces shared memory reinit.  So we can afford to have
- *		a pretty big value of VAR_XID_PREFETCH.
- *
- *		This code does not worry about initializing the transaction counter
- *		(see transam.c's InitializeTransactionLog() for that).  We also
- *		ignore the possibility that the counter could someday wrap around.
- * ----------------
- */
-#define VAR_XID_PREFETCH		1024
 void
 GetNewTransactionId(TransactionId *xid)
 {
+	/*
-	/* ----------------
+	 * During bootstrap initialization, we return the special
-	 *	during bootstrap initialization, we return the special
+	 * bootstrap transaction id.
-	 *	bootstrap transaction id.
-	 * ----------------
 	 */
 	if (AMI_OVERRIDE)
 	{
-		TransactionIdStore(AmiTransactionId, xid);
+		*xid = AmiTransactionId;
 		return;
 	}
-	SpinAcquire(OidGenLockId);	/* not good for concurrency... */
+	SpinAcquire(XidGenLockId);
+	*xid = ShmemVariableCache->nextXid;
-	if (ShmemVariableCache->xid_count == 0)
+	(ShmemVariableCache->nextXid)++;
-	{
-		TransactionId nextid;
-		VariableRelationGetNextXid(&nextid);
-		TransactionIdStore(nextid, &(ShmemVariableCache->nextXid));
-		ShmemVariableCache->xid_count = VAR_XID_PREFETCH;
-		TransactionIdAdd(&nextid, VAR_XID_PREFETCH);
-		VariableRelationPutNextXid(nextid);
-	}
-	TransactionIdStore(ShmemVariableCache->nextXid, xid);
-	TransactionIdAdd(&(ShmemVariableCache->nextXid), 1);
-	(ShmemVariableCache->xid_count)--;
 	if (MyProc != (PROC *) NULL)
 		MyProc->xid = *xid;
-	SpinRelease(OidGenLockId);
+	SpinRelease(XidGenLockId);
 }
 /*
@@ -294,30 +55,20 @@ void
 ReadNewTransactionId(TransactionId *xid)
 {
-	/* ----------------
+	/*
-	 *	during bootstrap initialization, we return the special
+	 * During bootstrap initialization, we return the special
-	 *	bootstrap transaction id.
+	 * bootstrap transaction id.
-	 * ----------------
 	 */
 	if (AMI_OVERRIDE)
 	{
-		TransactionIdStore(AmiTransactionId, xid);
+		*xid = AmiTransactionId;
 		return;
 	}
-	SpinAcquire(OidGenLockId);	/* not good for concurrency... */
+	SpinAcquire(XidGenLockId);
+	*xid = ShmemVariableCache->nextXid;
+	SpinRelease(XidGenLockId);
-	/*
-	 * Note that we don't check is ShmemVariableCache->xid_count equal to
-	 * 0 or not. This will work as long as we don't call
-	 * ReadNewTransactionId() before GetNewTransactionId().
-	 */
-	if (ShmemVariableCache->nextXid == 0)
-		elog(ERROR, "ReadNewTransactionId: ShmemVariableCache->nextXid is not initialized");
-	TransactionIdStore(ShmemVariableCache->nextXid, xid);
-	SpinRelease(OidGenLockId);
 }
 /* ----------------------------------------------------------------
@@ -325,199 +76,67 @@ ReadNewTransactionId(TransactionId *xid)
 * ----------------------------------------------------------------
 */
-/* ----------------
+#define VAR_OID_PREFETCH		8192
- *		GetNewObjectIdBlock
+static Oid lastSeenOid = InvalidOid;
- *
- *		This support function is used to allocate a block of object ids
- *		of the given size.
- * ----------------
- */
-static void
-GetNewObjectIdBlock(Oid *oid_return,	/* place to return the first new
-										 * object id */
-					int oid_block_size) /* number of oids desired */
-{
-	Oid			firstfreeoid;
-	Oid			nextoid;
-	/* ----------------
-	 *  Obtain exclusive access to the variable relation page
-	 * ----------------
-	 */
-	SpinAcquire(OidGenLockId);
-	/* ----------------
-	 *	get the "next" oid from the variable relation
-	 * ----------------
-	 */
-	VariableRelationGetNextOid(&firstfreeoid);
-	/* ----------------
-	 *	Allocate the range of OIDs to be returned to the caller.
-	 *
-	 *	There are two things going on here.
-	 *
-	 *	One: in a virgin database pg_variable will initially contain zeroes,
-	 *	so we will read out firstfreeoid = InvalidOid.  We want to start
-	 *	allocating OIDs at BootstrapObjectIdData instead (OIDs below that
-	 *	are reserved for static assignment in the initial catalog data).
-	 *
-	 *	Two: if a database is run long enough, the OID counter will wrap
-	 *	around.  We must not generate an invalid OID when that happens,
-	 *	and it seems wise not to generate anything in the reserved range.
-	 *	Therefore we advance to BootstrapObjectIdData in this case too.
-	 *
-	 *	The comparison here assumes that Oid is an unsigned type.
-	 */
-	nextoid = firstfreeoid + oid_block_size;
-	if (! OidIsValid(firstfreeoid) || nextoid < firstfreeoid)
-	{
-		/* Initialization or wraparound time, force it up to safe range */
-		firstfreeoid = BootstrapObjectIdData;
-		nextoid = firstfreeoid + oid_block_size;
-	}
-	(*oid_return) = firstfreeoid;
-	/* ----------------
-	 *	Update the variable relation to show the block range as used.
-	 * ----------------
-	 */
-	VariableRelationPutNextOid(nextoid);
-	/* ----------------
-	 *	Relinquish our lock on the variable relation page
-	 * ----------------
-	 */
-	SpinRelease(OidGenLockId);
-}
-/* ----------------
- *		GetNewObjectId
- *
- *		This function allocates and parses out object ids.	Like
- *		GetNewTransactionId(), it "prefetches" 32 object ids by
- *		incrementing the nextOid stored in the var relation by 32 and then
- *		returning these id's one at a time until they are exhausted.
- *		This means we reduce the number of accesses to the variable
- *		relation by 32 for each backend.
- *
- *		Note:  32 has no special significance.	We don't want the
- *			   number to be too large because when the backend
- *			   terminates, we lose the oids we cached.
- *
- *		Question: couldn't we use a shared-memory cache just like XIDs?
- *		That would allow a larger interval between pg_variable updates
- *		without cache losses.  Note, however, that we can assign an OID
- *		without even a spinlock from the backend-local OID cache.
- *		Maybe two levels of caching would be good.
- * ----------------
- */
-#define VAR_OID_PREFETCH		32
-static int	prefetched_oid_count = 0;
-static Oid	next_prefetched_oid;
 void
-GetNewObjectId(Oid *oid_return) /* place to return the new object id */
+GetNewObjectId(Oid *oid_return)
 {
-	/* ----------------
+	SpinAcquire(OidGenLockId);
-	 *	if we run out of prefetched oids, then we get some
-	 *	more before handing them out to the caller.
-	 * ----------------
-	 */
-	if (prefetched_oid_count == 0)
+	/* If we run out of logged for use oids then we log more */
+	if (ShmemVariableCache->oidCount == 0)
 	{
-		int			oid_block_size = VAR_OID_PREFETCH;
+		XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH);
+		ShmemVariableCache->oidCount = VAR_OID_PREFETCH;
-		/* ----------------
-		 *		Make sure pg_variable is open.
-		 * ----------------
-		 */
-		if (!RelationIsValid(VariableRelation))
-			VariableRelation = heap_openr(VariableRelationName, NoLock);
-		/* ----------------
-		 *		get a new block of prefetched object ids.
-		 * ----------------
-		 */
-		GetNewObjectIdBlock(&next_prefetched_oid, oid_block_size);
-		/* ----------------
-		 *		now reset the prefetched_oid_count.
-		 * ----------------
-		 */
-		prefetched_oid_count = oid_block_size;
 	}
-	/* ----------------
-	 *	return the next prefetched oid in the pointer passed by
-	 *	the user and decrement the prefetch count.
-	 * ----------------
-	 */
 	if (PointerIsValid(oid_return))
-		(*oid_return) = next_prefetched_oid;
+		lastSeenOid = (*oid_return) = ShmemVariableCache->nextOid;
-	next_prefetched_oid++;
+	(ShmemVariableCache->nextOid)++;
-	prefetched_oid_count--;
+	(ShmemVariableCache->oidCount)--;
+	SpinRelease(OidGenLockId);
 }
 void
 CheckMaxObjectId(Oid assigned_oid)
 {
-	Oid			temp_oid;
-	if (prefetched_oid_count == 0)		/* make sure next/max is set, or
+	if (lastSeenOid != InvalidOid && assigned_oid < lastSeenOid)
-										 * reload */
-		GetNewObjectId(&temp_oid);
-	/* ----------------
-	 *	If we are below prefetched limits, do nothing
-	 * ----------------
-	 */
-	if (assigned_oid < next_prefetched_oid)
 		return;
-	/* ----------------
+	SpinAcquire(OidGenLockId);
-	 *	If we are here, we are coming from a 'copy from' with oid's
+	if (assigned_oid < ShmemVariableCache->nextOid)
-	 *
+	{
-	 *	If we are in the prefetched oid range, just bump it up
+		lastSeenOid = ShmemVariableCache->nextOid - 1;
-	 * ----------------
+		SpinRelease(OidGenLockId);
-	 */
+		return;
+	}
-	if (assigned_oid <= next_prefetched_oid + prefetched_oid_count - 1)
+	/* If we are in the logged oid range, just bump nextOid up */
+	if (assigned_oid <= ShmemVariableCache->nextOid + 
+						ShmemVariableCache->oidCount - 1)
 	{
-		prefetched_oid_count -= assigned_oid - next_prefetched_oid + 1;
+		ShmemVariableCache->oidCount -= 
-		next_prefetched_oid = assigned_oid + 1;
+			assigned_oid - ShmemVariableCache->nextOid + 1;
+		ShmemVariableCache->nextOid = assigned_oid + 1;
+		SpinRelease(OidGenLockId);
 		return;
 	}
-	/* ----------------
+	/*
-	 *	We have exceeded the prefetch oid range
+	 * We have exceeded the logged oid range.
-	 *
+	 * We should lock the database and kill all other backends
-	 *	We should lock the database and kill all other backends
+	 * but we are loading oid's that we can not guarantee are unique
-	 *	but we are loading oid's that we can not guarantee are unique
+	 * anyway, so we must rely on the user.
-	 *	anyway, so we must rely on the user
-	 *
-	 * We now:
-	 *	  set the variable relation with the new max oid
-	 *	  force the backend to reload its oid cache
-	 *
-	 * By reloading the oid cache, we don't have to update the variable
-	 * relation every time when sequential OIDs are being loaded by COPY.
-	 * ----------------
 	 */
-	SpinAcquire(OidGenLockId);
+	XLogPutNextOid(assigned_oid + VAR_OID_PREFETCH);
-	VariableRelationPutNextOid(assigned_oid);
+	ShmemVariableCache->oidCount = VAR_OID_PREFETCH - 1;
+	ShmemVariableCache->nextOid = assigned_oid + 1;
 	SpinRelease(OidGenLockId);
-	prefetched_oid_count = 0;	/* force reload */
-	GetNewObjectId(&temp_oid);	/* cause target OID to be allocated */
 }
-#endif	/* !XLOG */
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.85 2000/11/30 01:47:31 vadim Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.86 2000/11/30 08:46:22 vadim Exp $
 *
 * NOTES
 *		Transaction aborts can now occur two ways:
@@ -219,7 +219,6 @@ TransactionState CurrentTransactionState = &CurrentTransactionStateData;
 int			DefaultXactIsoLevel = XACT_READ_COMMITTED;
 int			XactIsoLevel;
-#ifdef XLOG
 #include "access/xlogutils.h"
 int			CommitDelay = 5;	/* 1/200000 sec */
@@ -227,8 +226,6 @@ int			CommitDelay = 5;	/* 1/200000 sec */
 static void (*_RollbackFunc)(void*) = NULL;
 static void *_RollbackData = NULL;
-#endif
 /* ----------------
 *		info returned when the system is disabled
 *
@@ -662,19 +659,10 @@ RecordTransactionCommit()
 	TransactionId xid;
 	int			leak;
-	/* ----------------
-	 *	get the current transaction id
-	 * ----------------
-	 */
 	xid = GetCurrentTransactionId();
-	/*
-	 * flush the buffer manager pages.	Note: if we have stable main
-	 * memory, dirty shared buffers are not flushed plai 8/7/90
-	 */
 	leak = BufferPoolCheckLeak();
-#ifdef XLOG
 	if (MyLastRecPtr.xrecoff != 0)
 	{
 		xl_xact_commit	xlrec;
@@ -685,7 +673,7 @@ RecordTransactionCommit()
 		xlrec.xtime = time(NULL);
 		/*
-		 * MUST SAVE ARRAY OF RELFILENODE-s TO DROP
+		 * SHOULD SAVE ARRAY OF RELFILENODE-s TO DROP
 		 */
 		recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT,
 			(char*) &xlrec, SizeOfXactCommit, NULL, 0);
@@ -704,30 +692,6 @@ RecordTransactionCommit()
 		MyProc->logRec.xrecoff = 0;
 	}
-#else
-	/*
-	 * If no one shared buffer was changed by this transaction then we
-	 * don't flush shared buffers and don't record commit status.
-	 */
-	if (SharedBufferChanged)
-	{
-		FlushBufferPool();
-		if (leak)
-			ResetBufferPool(true);
-		/*
-		 * have the transaction access methods record the status of this
-		 * transaction id in the pg_log relation.
-		 */
-		TransactionIdCommit(xid);
-		/*
-		 * Now write the log info to the disk too.
-		 */
-		leak = BufferPoolCheckLeak();
-		FlushBufferPool();
-	}
-#endif
 	if (leak)
 		ResetBufferPool(true);
@@ -815,23 +779,8 @@ AtCommit_Memory(void)
 static void
 RecordTransactionAbort(void)
 {
-	TransactionId xid;
+	TransactionId xid = GetCurrentTransactionId();
-	/* ----------------
-	 *	get the current transaction id
-	 * ----------------
-	 */
-	xid = GetCurrentTransactionId();
-	/*
-	 * Have the transaction access methods record the status of this
-	 * transaction id in the pg_log relation. We skip it if no one shared
-	 * buffer was changed by this transaction.
-	 */
-	if (SharedBufferChanged && !TransactionIdDidCommit(xid))
-		TransactionIdAbort(xid);
-#ifdef XLOG
 	if (MyLastRecPtr.xrecoff != 0)
 	{
 		xl_xact_abort	xlrec;
@@ -841,9 +790,9 @@ RecordTransactionAbort(void)
 		recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT,
 			(char*) &xlrec, SizeOfXactAbort, NULL, 0);
+		TransactionIdAbort(xid);
 		MyProc->logRec.xrecoff = 0;
 	}
-#endif
 	/*
 	 * Tell bufmgr and smgr to release resources.
@@ -1748,8 +1697,6 @@ IsTransactionBlock(void)
 	return false;
 }
-#ifdef XLOG
 void
 xact_redo(XLogRecPtr lsn, XLogRecord *record)
 {
@@ -1760,7 +1707,7 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record)
 		xl_xact_commit	*xlrec = (xl_xact_commit*) XLogRecGetData(record);
 		TransactionIdCommit(record->xl_xid);
-		/* MUST REMOVE FILES OF ALL DROPPED RELATIONS */
+		/* SHOULD REMOVE FILES OF ALL DROPPED RELATIONS */
 	}
 	else if (info == XLOG_XACT_ABORT)
 	{
@@ -1825,5 +1772,3 @@ XactPopRollback(void)
 {
 	_RollbackFunc = NULL;
 }
-#endif
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6,7 +6,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.37 2000/11/30 01:47:31 vadim Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.38 2000/11/30 08:46:22 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -1443,12 +1443,10 @@ void
 BootStrapXLOG()
 {
 	CheckPoint	checkPoint;
-#ifdef XLOG
 	char		buffer[BLCKSZ];
 	bool        usexistent = false;
 	XLogPageHeader page = (XLogPageHeader) buffer;
 	XLogRecord *record;
-#endif
 	checkPoint.redo.xlogid = 0;
 	checkPoint.redo.xrecoff = SizeOfXLogPHD;
@@ -1462,8 +1460,6 @@ BootStrapXLOG()
 	ShmemVariableCache->nextOid = checkPoint.nextOid;
 	ShmemVariableCache->oidCount = 0;
-#ifdef XLOG
 	memset(buffer, 0, BLCKSZ);
 	page->xlp_magic = XLOG_PAGE_MAGIC;
 	page->xlp_info = 0;
@@ -1488,8 +1484,6 @@ BootStrapXLOG()
 	close(logFile);
 	logFile = -1;
-#endif
 	memset(ControlFile, 0, sizeof(ControlFileData));
 	ControlFile->logId = 0;
 	ControlFile->logSeg = 1;
@@ -1513,14 +1507,12 @@ str_time(time_t tnow)
 	return buf;
 }
 /*
 * This func must be called ONCE on system startup
 */
 void
 StartupXLOG()
 {
-#ifdef XLOG
 	XLogCtlInsert *Insert;
 	CheckPoint	checkPoint;
 	XLogRecPtr	RecPtr,
@@ -1529,8 +1521,6 @@ StartupXLOG()
 	char		buffer[MAXLOGRECSZ + SizeOfXLogRecord];
 	bool		sie_saved = false;
-#endif
 	elog(LOG, "starting up");
 	XLogCtl->xlblocks = (XLogRecPtr *) (((char *) XLogCtl) + sizeof(XLogCtlData));
@@ -1580,8 +1570,6 @@ StartupXLOG()
 		elog(LOG, "database system was interrupted at %s",
 			 str_time(ControlFile->time));
-#ifdef XLOG
 	LastRec = RecPtr = ControlFile->checkPoint;
 	if (!XRecOffIsValid(RecPtr.xrecoff))
 		elog(STOP, "Invalid checkPoint in control file");
@@ -1602,12 +1590,7 @@ StartupXLOG()
 		 checkPoint.nextXid, checkPoint.nextOid);
 	if (checkPoint.nextXid < FirstTransactionId ||
 		checkPoint.nextOid < BootstrapObjectIdData)
-#ifdef XLOG_2
 		elog(STOP, "Invalid NextTransactionId/NextOid");
-#else
-		elog(LOG, "Invalid NextTransactionId/NextOid");
-#endif
 	ShmemVariableCache->nextXid = checkPoint.nextXid;
 	ShmemVariableCache->nextOid = checkPoint.nextOid;
@@ -1751,8 +1734,6 @@ StartupXLOG()
 	}
 	InRecovery = false;
-#endif	 /* XLOG */
 	ControlFile->state = DB_IN_PRODUCTION;
 	ControlFile->time = time(NULL);
 	UpdateControlFile();
@@ -1783,9 +1764,7 @@ ShutdownXLOG()
 {
 	elog(LOG, "shutting down");
-#ifdef XLOG
 	CreateDummyCaches();
-#endif
 	CreateCheckPoint(true);
 	elog(LOG, "database system is shut down");
@@ -1796,7 +1775,6 @@ extern XLogRecPtr	GetUndoRecPtr(void);
 void
 CreateCheckPoint(bool shutdown)
 {
-#ifdef XLOG
 	CheckPoint	checkPoint;
 	XLogRecPtr	recptr;
 	XLogCtlInsert *Insert = &XLogCtl->Insert;
@@ -1880,12 +1858,9 @@ CreateCheckPoint(bool shutdown)
 	XLogFlush(recptr);
-#endif	 /* XLOG */
 	SpinAcquire(ControlFileLockId);
 	if (shutdown)
 		ControlFile->state = DB_SHUTDOWNED;
-#ifdef XLOG
 	else	/* create new log file */
 	{
 		if (recptr.xrecoff % XLogSegSize >= 
@@ -1914,16 +1889,10 @@ CreateCheckPoint(bool shutdown)
 	_logSeg = ControlFile->logSeg - 1;
 	strcpy(archdir, ControlFile->archdir);
-#else
-	ControlFile->checkPoint.xlogid = 0;
-	ControlFile->checkPoint.xrecoff = SizeOfXLogPHD;
-#endif
 	ControlFile->time = time(NULL);
 	UpdateControlFile();
 	SpinRelease(ControlFileLockId);
-#ifdef XLOG
 	/*
 	 * Delete offline log files. Get oldest online
 	 * log file from undo rec if it's valid.
@@ -1948,7 +1917,6 @@ CreateCheckPoint(bool shutdown)
 	S_UNLOCK(&(XLogCtl->chkp_lck));
 	MyLastRecPtr.xrecoff = 0;	/* to avoid commit record */
-#endif
 	return;
 }

--- a/src/backend/access/transam/xlog_varsup.c
+++ b/src/backend/access/transam/xlog_varsup.c
-/*-------------------------------------------------------------------------
- *
- * varsup.c
- *	  postgres OID & XID variables support routines
- *
- * Copyright (c) 2000, PostgreSQL, Inc
- *
- * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/xlog_varsup.c,v 1.1 2000/11/03 11:39:35 vadim Exp $
- *
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-#include "access/transam.h"
-#include "storage/proc.h"
-SPINLOCK OidGenLockId;
-extern SPINLOCK XidGenLockId;
-extern void XLogPutNextOid(Oid nextOid);
-/* pointer to "variable cache" in shared memory (set up by shmem.c) */
-VariableCache ShmemVariableCache = NULL;
-void
-GetNewTransactionId(TransactionId *xid)
-{
-	/*
-	 * During bootstrap initialization, we return the special
-	 * bootstrap transaction id.
-	 */
-	if (AMI_OVERRIDE)
-	{
-		*xid = AmiTransactionId;
-		return;
-	}
-	SpinAcquire(XidGenLockId);
-	*xid = ShmemVariableCache->nextXid;
-	(ShmemVariableCache->nextXid)++;
-	if (MyProc != (PROC *) NULL)
-		MyProc->xid = *xid;
-	SpinRelease(XidGenLockId);
-}
-/*
- * Like GetNewTransactionId reads nextXid but don't fetch it.
- */
-void
-ReadNewTransactionId(TransactionId *xid)
-{
-	/*
-	 * During bootstrap initialization, we return the special
-	 * bootstrap transaction id.
-	 */
-	if (AMI_OVERRIDE)
-	{
-		*xid = AmiTransactionId;
-		return;
-	}
-	SpinAcquire(XidGenLockId);
-	*xid = ShmemVariableCache->nextXid;
-	SpinRelease(XidGenLockId);
-}
-/* ----------------------------------------------------------------
- *					object id generation support
- * ----------------------------------------------------------------
- */
-#define VAR_OID_PREFETCH		8192
-static Oid lastSeenOid = InvalidOid;
-void
-GetNewObjectId(Oid *oid_return)
-{
-	SpinAcquire(OidGenLockId);
-	/* If we run out of logged for use oids then we log more */
-	if (ShmemVariableCache->oidCount == 0)
-	{
-		XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH);
-		ShmemVariableCache->oidCount = VAR_OID_PREFETCH;
-	}
-	if (PointerIsValid(oid_return))
-		lastSeenOid = (*oid_return) = ShmemVariableCache->nextOid;
-	(ShmemVariableCache->nextOid)++;
-	(ShmemVariableCache->oidCount)--;
-	SpinRelease(OidGenLockId);
-}
-void
-CheckMaxObjectId(Oid assigned_oid)
-{
-	if (lastSeenOid != InvalidOid && assigned_oid < lastSeenOid)
-		return;
-	SpinAcquire(OidGenLockId);
-	if (assigned_oid < ShmemVariableCache->nextOid)
-	{
-		lastSeenOid = ShmemVariableCache->nextOid - 1;
-		SpinRelease(OidGenLockId);
-		return;
-	}
-	/* If we are in the logged oid range, just bump nextOid up */
-	if (assigned_oid <= ShmemVariableCache->nextOid + 
-						ShmemVariableCache->oidCount - 1)
-	{
-		ShmemVariableCache->oidCount -= 
-			assigned_oid - ShmemVariableCache->nextOid + 1;
-		ShmemVariableCache->nextOid = assigned_oid + 1;
-		SpinRelease(OidGenLockId);
-		return;
-	}
-	/*
-	 * We have exceeded the logged oid range.
-	 * We should lock the database and kill all other backends
-	 * but we are loading oid's that we can not guarantee are unique
-	 * anyway, so we must rely on the user.
-	 */
-	XLogPutNextOid(assigned_oid + VAR_OID_PREFETCH);
-	ShmemVariableCache->oidCount = VAR_OID_PREFETCH - 1;
-	ShmemVariableCache->nextOid = assigned_oid + 1;
-	SpinRelease(OidGenLockId);
-}
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -10,9 +10,6 @@
 */
 #include "postgres.h"
-#ifdef XLOG
 #include "access/xlog.h"
 #include "access/transam.h"
 #include "access/xact.h"
@@ -397,5 +394,3 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode)
 	return(&(res->reldata));
 }
-#endif
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/dbcommands.c,v 1.69 2000/11/18 03:36:48 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/dbcommands.c,v 1.70 2000/11/30 08:46:22 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -153,13 +153,11 @@ createdb(const char *dbname, const char *dbpath,
 		elog(ERROR, "database path may not contain single quotes");
 	/* ... otherwise we'd be open to shell exploits below */
-#ifdef XLOG
 	/* Force dirty buffers out to disk, to ensure source database is
 	 * up-to-date for the copy.  (We really only need to flush buffers
 	 * for the source database...)
 	 */
 	BufferSync();
-#endif
 	/*
 	 * Close virtual file descriptors so the kernel has more available for
@@ -255,13 +253,11 @@ createdb(const char *dbname, const char *dbpath,
 	/* Close pg_database, but keep lock till commit */
 	heap_close(pg_database_rel, NoLock);
-#ifdef XLOG
 	/* Force dirty buffers out to disk, so that newly-connecting backends
 	 * will see the new database in pg_database right away.  (They'll see
 	 * an uncommitted tuple, but they don't care; see GetRawDatabaseInfo.)
 	 */
 	BufferSync();
-#endif
 }

--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.173 2000/11/16 22:30:19 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.174 2000/11/30 08:46:22 vadim Exp $
 *
 *-------------------------------------------------------------------------
@@ -47,11 +47,9 @@
 #include <sys/resource.h>
 #endif
-#ifdef XLOG
 #include "access/xlog.h"
-XLogRecPtr	log_heap_move(Relation reln, 
+extern XLogRecPtr	log_heap_move(Relation reln, 
-				ItemPointerData from, HeapTuple newtup);
+						ItemPointerData from, HeapTuple newtup);
-#endif
 static MemoryContext vac_context = NULL;
@@ -1492,7 +1490,6 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
 					newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid);
 					ItemPointerSet(&(newtup.t_self), destvacpage->blkno, newoff);
-#ifdef XLOG
 					{
 						XLogRecPtr	recptr = 
 							log_heap_move(onerel, tuple.t_self, &newtup);
@@ -1505,7 +1502,6 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
 						PageSetLSN(ToPage, recptr);
 						PageSetSUI(ToPage, ThisStartUpID);
 					}
-#endif
 					if (((int) destvacpage->blkno) > last_move_dest_block)
 						last_move_dest_block = destvacpage->blkno;
@@ -1655,7 +1651,6 @@ failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)"
 				~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_IN);
 			tuple.t_data->t_infomask |= HEAP_MOVED_OFF;
-#ifdef XLOG
 			{
 				XLogRecPtr	recptr = 
 					log_heap_move(onerel, tuple.t_self, &newtup);
@@ -1665,7 +1660,6 @@ failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)"
 				PageSetLSN(ToPage, recptr);
 				PageSetSUI(ToPage, ThisStartUpID);
 			}
-#endif
 			cur_page->offsets_used++;
 			num_moved++;
@@ -1786,19 +1780,12 @@ failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)"
 	if (num_moved > 0)
 	{
-#ifdef XLOG
-		RecordTransactionCommit();
-#else
 		/*
 		 * We have to commit our tuple' movings before we'll truncate
 		 * relation, but we shouldn't lose our locks. And so - quick hack:
-		 * flush buffers and record status of current transaction as
+		 * record status of current transaction as committed, and continue.
-		 * committed, and continue. - vadim 11/13/96
 		 */
-		FlushBufferPool();
+		RecordTransactionCommit();
-		TransactionIdCommit(myXID);
-		FlushBufferPool();
-#endif
 	}
 	/*

--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
 /*-------------------------------------------------------------------------
 *
- * bufmgr.c
+ * xlog_bufmgr.c
 *	  buffer manager interface routines
 *
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.96 2000/11/30 01:39:07 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.97 2000/11/30 08:46:23 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -31,9 +31,6 @@
 *
 * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer()
 *
- * FlushBuffer() -- Write buffer immediately.  Can unpin, or not,
- *		depending on parameter.
- *
 * BufferSync() -- flush all dirty buffers in the buffer pool.
 *
 * InitBufferPool() -- Init the buffer module.
@@ -42,13 +39,8 @@
 *		freelist.c -- chooses victim for buffer replacement
 *		buf_table.c -- manages the buffer lookup table
 */
 #include "postgres.h"
-#ifdef XLOG
-#include "xlog_bufmgr.c"
-#else
 #include <sys/types.h>
 #include <sys/file.h>
 #include <math.h>
@@ -61,10 +53,11 @@
 #include "storage/s_lock.h"
 #include "storage/smgr.h"
 #include "utils/relcache.h"
-#ifdef XLOG
 #include "catalog/pg_database.h"
-#endif
+#define BufferGetLSN(bufHdr)	\
+	(*((XLogRecPtr*)MAKE_PTR((bufHdr)->data)))
 extern SPINLOCK BufMgrLock;
 extern long int ReadBufferCount;
@@ -99,9 +92,6 @@ static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum,
 						 bool bufferLockHeld);
 static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
 			bool *foundPtr, bool bufferLockHeld);
-static void SetBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr);
-static void ClearBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr);
-static void BufferSync(void);
 static int	BufferReplace(BufferDesc *bufHdr);
 void		PrintBufferDescs(void);
@@ -169,48 +159,6 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
 	return ReadBufferWithBufferLock(reln, blockNum, false);
 }
-/*
- * is_userbuffer
- *
- * XXX caller must have already acquired BufMgrLock
- */
-#ifdef NOT_USED
-static bool
-is_userbuffer(Buffer buffer)
-{
-	BufferDesc *buf = &BufferDescriptors[buffer - 1];
-	if (IsSystemRelationName(buf->blind.relname))
-		return false;
-	return true;
-}
-#endif
-#ifdef NOT_USED
-Buffer
-ReadBuffer_Debug(char *file,
-				 int line,
-				 Relation reln,
-				 BlockNumber blockNum)
-{
-	Buffer		buffer;
-	buffer = ReadBufferWithBufferLock(reln, blockNum, false);
-	if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))
-	{
-		BufferDesc *buf = &BufferDescriptors[buffer - 1];
-		fprintf(stderr, "PIN(RD) %ld relname = %s, blockNum = %d, \
-refcount = %ld, file: %s, line: %d\n",
-				buffer, buf->blind.relname, buf->tag.blockNum,
-				PrivateRefCount[buffer - 1], file, line);
-	}
-	return buffer;
-}
-#endif
 /*
 * ReadBufferWithBufferLock -- does the work of
 *		ReadBuffer() but with the possibility that
@@ -447,7 +395,7 @@ BufferAlloc(Relation reln,
 		buf->refcount = 1;
 		PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1;
-		if (buf->flags & BM_DIRTY)
+		if (buf->flags & BM_DIRTY || buf->cntxDirty)
 		{
 			bool		smok;
@@ -505,18 +453,18 @@ BufferAlloc(Relation reln,
 			}
 			else
 			{
 				/*
 				 * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't
 				 * be setted by anyone.		- vadim 01/17/97
 				 */
 				if (buf->flags & BM_JUST_DIRTIED)
 				{
-					elog(FATAL, "BufferAlloc: content of block %u (%s) changed while flushing",
+					elog(STOP, "BufferAlloc: content of block %u (%s) changed while flushing",
 						 buf->tag.blockNum, buf->blind.relname);
 				}
 				else
 					buf->flags &= ~BM_DIRTY;
+				buf->cntxDirty = false;
 			}
 			/*
@@ -676,131 +624,15 @@ WriteBuffer(Buffer buffer)
 	SpinAcquire(BufMgrLock);
 	Assert(bufHdr->refcount > 0);
 	bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
-	SetBufferDirtiedByMe(buffer, bufHdr);
 	UnpinBuffer(bufHdr);
 	SpinRelease(BufMgrLock);
 	return TRUE;
 }
-#ifdef NOT_USED
-void
-WriteBuffer_Debug(char *file, int line, Buffer buffer)
-{
-	WriteBuffer(buffer);
-	if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer))
-	{
-		BufferDesc *buf;
-		buf = &BufferDescriptors[buffer - 1];
-		fprintf(stderr, "UNPIN(WR) %ld relname = %s, blockNum = %d, \
-refcount = %ld, file: %s, line: %d\n",
-				buffer, buf->blind.relname, buf->tag.blockNum,
-				PrivateRefCount[buffer - 1], file, line);
-	}
-}
-#endif
-/*
- * FlushBuffer -- like WriteBuffer, but write the page immediately,
- * rather than just marking it dirty.  On success return, the buffer will
- * no longer be dirty.
- *
- * 'buffer' is known to be dirty/pinned, so there should not be a
- * problem reading the BufferDesc members without the BufMgrLock
- * (nobody should be able to change tags out from under us).
- *
- * If 'sync' is true, a synchronous write is wanted (wait for buffer to hit
- * the disk).  Otherwise it's sufficient to issue the kernel write call.
- *
- * Unpin buffer if 'release' is true.
- */
-int
-FlushBuffer(Buffer buffer, bool sync, bool release)
-{
-	BufferDesc *bufHdr;
-	Relation	bufrel;
-	int			status;
-	if (BufferIsLocal(buffer))
-		return FlushLocalBuffer(buffer, sync, release) ? STATUS_OK : STATUS_ERROR;
-	if (BAD_BUFFER_ID(buffer))
-		return STATUS_ERROR;
-	Assert(PrivateRefCount[buffer - 1] > 0);	/* else caller didn't pin */
-	bufHdr = &BufferDescriptors[buffer - 1];
-	bufrel = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
-	Assert(bufrel != (Relation) NULL);
-	SharedBufferChanged = true;
-	/* To check if block content changed while flushing. - vadim 01/17/97 */
-	SpinAcquire(BufMgrLock);
-	WaitIO(bufHdr, BufMgrLock); /* confirm end of IO */
-	bufHdr->flags &= ~BM_JUST_DIRTIED;
-	StartBufferIO(bufHdr, false);		/* output IO start */
-	SpinRelease(BufMgrLock);
-	/*
-	 * Grab a read lock on the buffer to ensure that no
-	 * other backend changes its contents while we write it;
-	 * see comments in BufferSync().
-	 */
-	LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_SHARE);
-	if (sync)
-		status = smgrflush(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum,
-						   (char *) MAKE_PTR(bufHdr->data));
-	else
-		status = smgrwrite(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum,
-						   (char *) MAKE_PTR(bufHdr->data));
-	LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_UNLOCK);
-	/* drop relcache refcnt incremented by RelationNodeCacheGetRelation */
-	RelationDecrementReferenceCount(bufrel);
-	if (status == SM_FAIL)
-	{
-		elog(ERROR, "FlushBuffer: cannot flush block %u of the relation %s",
-			 bufHdr->tag.blockNum, bufHdr->blind.relname);
-		return STATUS_ERROR;
-	}
-	BufferFlushCount++;
-	SpinAcquire(BufMgrLock);
-	bufHdr->flags &= ~BM_IO_IN_PROGRESS;		/* mark IO finished */
-	TerminateBufferIO(bufHdr);	/* output IO finished */
-	/*
-	 * If this buffer was marked by someone as DIRTY while we were
-	 * flushing it out we must not clear shared DIRTY flag - vadim
-	 * 01/17/97
-	 *
-	 * ... but we can clear BufferDirtiedByMe anyway - tgl 3/31/00
-	 */
-	if (bufHdr->flags & BM_JUST_DIRTIED)
-	{
-		elog(NOTICE, "FlushBuffer: content of block %u (%s) changed while flushing",
-			 bufHdr->tag.blockNum, bufHdr->blind.relname);
-	}
-	else
-		bufHdr->flags &= ~BM_DIRTY;
-	ClearBufferDirtiedByMe(buffer, bufHdr);
-	if (release)
-		UnpinBuffer(bufHdr);
-	SpinRelease(BufMgrLock);
-	return STATUS_OK;
-}
 /*
 * WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer
 *						   when the operation is complete.
@@ -822,8 +654,9 @@ WriteNoReleaseBuffer(Buffer buffer)
 	SpinAcquire(BufMgrLock);
 	Assert(bufHdr->refcount > 0);
 	bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
-	SetBufferDirtiedByMe(buffer, bufHdr);
 	SpinRelease(BufMgrLock);
 	return STATUS_OK;
@@ -876,307 +709,138 @@ ReleaseAndReadBuffer(Buffer buffer,
 }
 /*
- * SetBufferDirtiedByMe -- mark a shared buffer as being dirtied by this xact
+ * BufferSync -- Write all dirty buffers in the pool.
- *
- * This flag essentially remembers that we need to write and fsync this buffer
- * before we can commit the transaction.  The write might end up getting done
- * by another backend, but we must do the fsync ourselves (else we could
- * commit before the data actually reaches disk).  We do not issue fsync
- * instantly upon write; the storage manager keeps track of which files need
- * to be fsync'd before commit can occur.  A key aspect of this data structure
- * is that we will be able to notify the storage manager that an fsync is
- * needed even after another backend has done the physical write and replaced
- * the buffer contents with something else!
 *
- * NB: we must be holding the bufmgr lock at entry, and the buffer must be
+ * This is called at checkpoint time and write out all dirty buffers.
- * pinned so that no other backend can take it away from us.
 */
-static void
+void
-SetBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr)
-{
-	BufferTag  *tagLastDirtied = &BufferTagLastDirtied[buffer - 1];
-	Relation	reln;
-	int			status;
-	/*
-	 * If the flag is already set, check to see whether the buffertag is
-	 * the same.  If not, some other backend already wrote the buffer data
-	 * that we dirtied.  We must tell the storage manager to make an fsync
-	 * pending on that file before we can overwrite the old tag value.
-	 */
-	if (BufferDirtiedByMe[buffer - 1])
-	{
-		if (RelFileNodeEquals(bufHdr->tag.rnode, tagLastDirtied->rnode) &&
-			bufHdr->tag.blockNum == tagLastDirtied->blockNum)
-			return;				/* Same tag already dirtied, so no work */
-#ifndef OPTIMIZE_SINGLE
-		SpinRelease(BufMgrLock);
-#endif	 /* OPTIMIZE_SINGLE */
-		reln = RelationNodeCacheGetRelation(tagLastDirtied->rnode);
-		if (reln == (Relation) NULL)
-		{
-			status = smgrblindmarkdirty(DEFAULT_SMGR,
-							tagLastDirtied->rnode,
-							tagLastDirtied->blockNum);
-		}
-		else
-		{
-			Assert(RelFileNodeEquals(tagLastDirtied->rnode, reln->rd_node));
-			status = smgrmarkdirty(DEFAULT_SMGR, reln,
-								   tagLastDirtied->blockNum);
-			/*
-			 * drop relcache refcnt incremented by
-			 * RelationNodeCacheGetRelation
-			 */
-			RelationDecrementReferenceCount(reln);
-		}
-		if (status == SM_FAIL)
-		{
-			elog(ERROR, "SetBufferDirtiedByMe: cannot mark %u for %s",
-				 tagLastDirtied->blockNum,
-				 BufferBlindLastDirtied[buffer - 1].relname);
-		}
-#ifndef OPTIMIZE_SINGLE
-		SpinAcquire(BufMgrLock);
-#endif	 /* OPTIMIZE_SINGLE */
-	}
-	*tagLastDirtied = bufHdr->tag;
-	BufferBlindLastDirtied[buffer - 1] = bufHdr->blind;
-	BufferDirtiedByMe[buffer - 1] = true;
-}
-/*
- * ClearBufferDirtiedByMe -- mark a shared buffer as no longer needing fsync
- *
- * If we write out a buffer ourselves, then the storage manager will set its
- * needs-fsync flag for that file automatically, and so we can clear our own
- * flag that says it needs to be done later.
- *
- * NB: we must be holding the bufmgr lock at entry.
- */
-static void
-ClearBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr)
-{
-	BufferTag  *tagLastDirtied = &BufferTagLastDirtied[buffer - 1];
-	/*
-	 * Do *not* clear the flag if it refers to some other buffertag than
-	 * the data we just wrote.	This is unlikely, but possible if some
-	 * other backend replaced the buffer contents since we set our flag.
-	 */
-	if (RelFileNodeEquals(bufHdr->tag.rnode, tagLastDirtied->rnode) &&
-			bufHdr->tag.blockNum == tagLastDirtied->blockNum)
-		BufferDirtiedByMe[buffer - 1] = false;
-}
-/*
- * BufferSync -- Flush all dirty buffers in the pool.
- *
- *		This is called at transaction commit time.	We find all buffers
- *		that have been dirtied by the current xact and flush them to disk.
- *		We do *not* flush dirty buffers that have been dirtied by other xacts.
- *		(This is a substantial change from pre-7.0 behavior.)
- */
-static void
 BufferSync()
 {
 	int			i;
 	BufferDesc *bufHdr;
+	Buffer		buffer;
 	int			status;
-	Relation	reln;
+	RelFileNode	rnode;
-	bool		didwrite;
+	XLogRecPtr	recptr;
+	Relation	reln = NULL;
 	for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
 	{
-		/* Ignore buffers that were not dirtied by me */
-		if (!BufferDirtiedByMe[i])
-			continue;
 		SpinAcquire(BufMgrLock);
-		/*
+		if (!(bufHdr->flags & BM_VALID))
-		 * We only need to write if the buffer is still dirty and still
-		 * contains the same disk page that it contained when we dirtied
-		 * it. Otherwise, someone else has already written our changes for
-		 * us, and we need only fsync.
-		 *
-		 * (NOTE: it's still possible to do an unnecessary write, if other
-		 * xacts have written and then re-dirtied the page since our last
-		 * change to it.  But that should be pretty uncommon, and there's
-		 * no easy way to detect it anyway.)
-		 */
-		reln = NULL;
-		didwrite = false;
-		if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
 		{
-			if (RelFileNodeEquals(bufHdr->tag.rnode, BufferTagLastDirtied[i].rnode) &&
+			SpinRelease(BufMgrLock);
-				bufHdr->tag.blockNum == BufferTagLastDirtied[i].blockNum)
+			continue;
-			{
+		}
-				/*
-				 * Try to find relation for buf.  This could fail, if the
-				 * rel has been flushed from the relcache since we dirtied
-				 * the page.  That should be uncommon, so paying the extra
-				 * cost of a blind write when it happens seems OK.
-				 */
-				if (!InRecovery)
-					reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
-				/*
-				 * We have to pin buffer to keep anyone from stealing it
-				 * from the buffer pool while we are flushing it or
-				 * waiting in WaitIO. It's bad for GetFreeBuffer in
-				 * BufferAlloc, but there is no other way to prevent
-				 * writing into disk block data from some other buffer,
-				 * getting smgr status of some other block and clearing
-				 * BM_DIRTY of ...			  - VAdim 09/16/96
-				 */
-				PinBuffer(bufHdr);
-				if (bufHdr->flags & BM_IO_IN_PROGRESS)
-				{
-					WaitIO(bufHdr, BufMgrLock);
-					UnpinBuffer(bufHdr);
-					if (bufHdr->flags & BM_IO_ERROR)
-					{
-						elog(ERROR, "BufferSync: write error %u for %s",
-							 bufHdr->tag.blockNum, bufHdr->blind.relname);
-					}
-				}
-				else
-				{
-					/*
-					 * To check if block content changed while flushing
-					 * (see below). - vadim 01/17/97
-					 */
-					WaitIO(bufHdr, BufMgrLock); /* confirm end of IO */
-					bufHdr->flags &= ~BM_JUST_DIRTIED;
-					StartBufferIO(bufHdr, false);		/* output IO start */
-					SpinRelease(BufMgrLock);
-					/*
-					 * Grab a read lock on the buffer to ensure that no
-					 * other backend changes its contents while we write it;
-					 * otherwise we could write a non-self-consistent page
-					 * image to disk, which'd be bad news if the other
-					 * transaction aborts before writing its changes.
-					 *
-					 * Note that we still need the BM_JUST_DIRTIED mechanism
-					 * in case someone dirties the buffer just before we
-					 * grab this lock or just after we release it.
-					 */
-					LockBuffer(BufferDescriptorGetBuffer(bufHdr),
-							   BUFFER_LOCK_SHARE);
-					/*
+		/*
-					 * If we didn't have the reldesc in our local cache,
+		 * Pin buffer and ensure that no one reads it from disk
-					 * write this page out using the 'blind write' storage
+		 */
-					 * manager routine.  If we did find it, use the
+		PinBuffer(bufHdr);
-					 * standard interface.
+		/* Synchronize with BufferAlloc */
-					 */
+		if (bufHdr->flags & BM_IO_IN_PROGRESS)
-					if (reln == (Relation) NULL)
+			WaitIO(bufHdr, BufMgrLock);
-					{
-						status = smgrblindwrt(DEFAULT_SMGR,
-											bufHdr->tag.rnode,
-											bufHdr->tag.blockNum,
-											(char *) MAKE_PTR(bufHdr->data),
-											true);	/* must fsync */
-					}
-					else
-					{
-						status = smgrwrite(DEFAULT_SMGR, reln,
-										   bufHdr->tag.blockNum,
-										(char *) MAKE_PTR(bufHdr->data));
-					}
-					/*
+		buffer = BufferDescriptorGetBuffer(bufHdr);
-					 * Release the per-buffer readlock, reacquire BufMgrLock.
+		rnode = bufHdr->tag.rnode;
-					 */
-					LockBuffer(BufferDescriptorGetBuffer(bufHdr),
-							   BUFFER_LOCK_UNLOCK);
-					SpinAcquire(BufMgrLock);
+		SpinRelease(BufMgrLock);
-					UnpinBuffer(bufHdr);
+		/*
-					if (status == SM_FAIL)
+		 * Try to find relation for buffer
-					{
+		 */
-						bufHdr->flags |= BM_IO_ERROR;
+		reln = RelationNodeCacheGetRelation(rnode);
-						elog(ERROR, "BufferSync: cannot write %u for %s",
-							 bufHdr->tag.blockNum, bufHdr->blind.relname);
-					}
-					bufHdr->flags &= ~BM_IO_IN_PROGRESS;		/* mark IO finished */
-					TerminateBufferIO(bufHdr);	/* Sync IO finished */
-					BufferFlushCount++;
-					didwrite = true;
-					/*
+		/*
-					 * If this buffer was marked by someone as DIRTY while
+		 * Protect buffer content against concurrent update
-					 * we were flushing it out we must not clear DIRTY
+		 */
-					 * flag - vadim 01/17/97
+		LockBuffer(buffer, BUFFER_LOCK_SHARE);
-					 *
-					 * but it is OK to clear BufferDirtiedByMe - tgl 3/31/00
-					 */
-					if (!(bufHdr->flags & BM_JUST_DIRTIED))
-						bufHdr->flags &= ~BM_DIRTY;
-				}
-				/* drop refcnt obtained by RelationNodeCacheGetRelation */
+		/*
-				if (reln != (Relation) NULL)
+		 * Force XLOG flush for buffer' LSN
-					RelationDecrementReferenceCount(reln);
+		 */
-			}
+		recptr = BufferGetLSN(bufHdr);
-		}
+		XLogFlush(recptr);
 		/*
-		 * If we did not write the buffer (because someone else did), we
+		 * Now it's safe to write buffer to disk
-		 * must still fsync the file containing it, to ensure that the
+		 * (if needed at all -:))
-		 * write is down to disk before we commit.
 		 */
-		if (!didwrite)
+		SpinAcquire(BufMgrLock);
+		if (bufHdr->flags & BM_IO_IN_PROGRESS)
+			WaitIO(bufHdr, BufMgrLock);
+		if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
 		{
-#ifndef OPTIMIZE_SINGLE
+			bufHdr->flags &= ~BM_JUST_DIRTIED;
+			StartBufferIO(bufHdr, false);		/* output IO start */
 			SpinRelease(BufMgrLock);
-#endif	 /* OPTIMIZE_SINGLE */
-			reln = RelationNodeCacheGetRelation(BufferTagLastDirtied[i].rnode);
 			if (reln == (Relation) NULL)
 			{
-				status = smgrblindmarkdirty(DEFAULT_SMGR,
+				status = smgrblindwrt(DEFAULT_SMGR,
-									BufferTagLastDirtied[i].rnode,
+									bufHdr->tag.rnode,
-									BufferTagLastDirtied[i].blockNum);
+									bufHdr->tag.blockNum,
+									(char *) MAKE_PTR(bufHdr->data),
+									true);	/* must fsync */
 			}
 			else
 			{
-				status = smgrmarkdirty(DEFAULT_SMGR, reln,
+				status = smgrwrite(DEFAULT_SMGR, reln,
-									   BufferTagLastDirtied[i].blockNum);
+								bufHdr->tag.blockNum,
+								(char *) MAKE_PTR(bufHdr->data));
+			}
-				/*
+			if (status == SM_FAIL)	/* disk failure ?! */
-				 * drop relcache refcnt incremented by
+				elog(STOP, "BufferSync: cannot write %u for %s",
-				 * RelationNodeCacheGetRelation
+					 bufHdr->tag.blockNum, bufHdr->blind.relname);
-				 */
-				RelationDecrementReferenceCount(reln);
+			/*
+			 * Note that it's safe to change cntxDirty here because of
+			 * we protect it from upper writers by share lock and from
+			 * other bufmgr routines by BM_IO_IN_PROGRESS
+			 */
+			bufHdr->cntxDirty = false;
+			/*
+			 * Release the per-buffer readlock, reacquire BufMgrLock.
+			 */
+			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+			BufferFlushCount++;
-			}
-#ifndef OPTIMIZE_SINGLE
 			SpinAcquire(BufMgrLock);
-#endif	 /* OPTIMIZE_SINGLE */
+			bufHdr->flags &= ~BM_IO_IN_PROGRESS;	/* mark IO finished */
+			TerminateBufferIO(bufHdr);				/* Sync IO finished */
+			/*
+			 * If this buffer was marked by someone as DIRTY while
+			 * we were flushing it out we must not clear DIRTY
+			 * flag - vadim 01/17/97
+			 */
+			if (!(bufHdr->flags & BM_JUST_DIRTIED))
+				bufHdr->flags &= ~BM_DIRTY;
 		}
+		else
+			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-		BufferDirtiedByMe[i] = false;
+		UnpinBuffer(bufHdr);
 		SpinRelease(BufMgrLock);
+		/* drop refcnt obtained by RelationNodeCacheGetRelation */
+		if (reln != (Relation) NULL)
+		{
+			RelationDecrementReferenceCount(reln);
+			reln = NULL;
+		}
 	}
-#ifndef XLOG
-	LocalBufferSync();
-#endif
-}
+}
 /*
 * WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf' is cleared.
@@ -1278,9 +942,6 @@ ResetBufferPool(bool isCommit)
 			SpinRelease(BufMgrLock);
 		}
 		PrivateRefCount[i] = 0;
-		if (!isCommit)
-			BufferDirtiedByMe[i] = false;
 	}
 	ResetLocalBufferPool();
@@ -1321,16 +982,29 @@ relname=%s, blockNum=%d, flags=0x%x, refcount=%d %ld)",
 }
 /* ------------------------------------------------
- *		FlushBufferPool
+ * FlushBufferPool
- *
- *		flush all dirty blocks in buffer pool to disk
 *
+ * Flush all dirty blocks in buffer pool to disk
+ * at the checkpoint time
 * ------------------------------------------------
 */
 void
 FlushBufferPool(void)
 {
 	BufferSync();
+	smgrsync();
+}
+/*
+ * At the commit time we have to flush local buffer pool only
+ */
+void
+BufmgrCommit(void)
+{
+	LocalBufferSync();
+	/*
+	 * All files created in current transaction will be fsync-ed
+	 */
 	smgrcommit();
 }
@@ -1358,35 +1032,28 @@ BufferGetBlockNumber(Buffer buffer)
 *
 * Write out the buffer corresponding to 'bufHdr'
 *
- * This routine used to flush the data to disk (ie, force immediate fsync)
- * but that's no longer necessary because BufferSync is smarter than before.
- *
 * BufMgrLock must be held at entry, and the buffer must be pinned.
 */
 static int
 BufferReplace(BufferDesc *bufHdr)
 {
 	Relation	reln;
+	XLogRecPtr	recptr;
 	int			status;
-	/*
-	 * first try to find the reldesc in the cache, if no luck, don't
-	 * bother to build the reldesc from scratch, just do a blind write.
-	 */
-	reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
 	/* To check if block content changed while flushing. - vadim 01/17/97 */
 	bufHdr->flags &= ~BM_JUST_DIRTIED;
 	SpinRelease(BufMgrLock);
 	/*
-	 * Grab a read lock on the buffer to ensure that no
+	 * No need to lock buffer context - no one should be able to
-	 * other backend changes its contents while we write it;
+	 * end ReadBuffer
-	 * see comments in BufferSync().
 	 */
-	LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_SHARE);
+	recptr = BufferGetLSN(bufHdr);
+	XLogFlush(recptr);
+	reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
 	if (reln != (Relation) NULL)
 	{
@@ -1401,25 +1068,15 @@ BufferReplace(BufferDesc *bufHdr)
 							  false);	/* no fsync */
 	}
-	LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_UNLOCK);
-	SpinAcquire(BufMgrLock);
 	/* drop relcache refcnt incremented by RelationNodeCacheGetRelation */
 	if (reln != (Relation) NULL)
 		RelationDecrementReferenceCount(reln);
+	SpinAcquire(BufMgrLock);
 	if (status == SM_FAIL)
 		return FALSE;
-	/*
-	 * If we had marked this buffer as needing to be fsync'd, we can
-	 * forget about that, because it's now the storage manager's
-	 * responsibility (but only if we called smgrwrite, not smgrblindwrt).
-	 */
-	if (reln != (Relation) NULL)
-		ClearBufferDirtiedByMe(BufferDescriptorGetBuffer(bufHdr), bufHdr);
 	BufferFlushCount++;
 	return TRUE;
@@ -1438,7 +1095,8 @@ BlockNumber
 RelationGetNumberOfBlocks(Relation relation)
 {
 	return ((relation->rd_myxactonly) ? relation->rd_nblocks :
-			smgrnblocks(DEFAULT_SMGR, relation));
+		((relation->rd_rel->relkind == RELKIND_VIEW) ? 0 :
+			smgrnblocks(DEFAULT_SMGR, relation)));
 }
 /* ---------------------------------------------------------------------
@@ -1471,6 +1129,7 @@ DropRelationBuffers(Relation rel)
 			if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
 			{
 				bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
+				bufHdr->cntxDirty = false;
 				LocalRefCount[i] = 0;
 				bufHdr->tag.rnode.relNode = InvalidOid;
 			}
@@ -1503,6 +1162,7 @@ recheck:
 			}
 			/* Now we can do what we came for */
 			bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
+			bufHdr->cntxDirty = false;
 			/*
 			 * Release any refcount we may have.
@@ -1526,20 +1186,6 @@ recheck:
 			 */
 			BufTableDelete(bufHdr);
 		}
-		/*
-		 * Also check to see if BufferDirtiedByMe info for this buffer
-		 * refers to the target relation, and clear it if so.  This is
-		 * independent of whether the current contents of the buffer
-		 * belong to the target relation!
-		 *
-		 * NOTE: we have no way to clear BufferDirtiedByMe info in other
-		 * backends, but hopefully there are none with that bit set for
-		 * this rel, since we hold exclusive lock on this rel.
-		 */
-		if (RelFileNodeEquals(rel->rd_node, 
-							  BufferTagLastDirtied[i - 1].rnode))
-			BufferDirtiedByMe[i - 1] = false;
 	}
 	SpinRelease(BufMgrLock);
@@ -1570,6 +1216,7 @@ DropRelFileNodeBuffers(RelFileNode rnode)
 		if (RelFileNodeEquals(bufHdr->tag.rnode, rnode))
 		{
 			bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
+			bufHdr->cntxDirty = false;
 			LocalRefCount[i] = 0;
 			bufHdr->tag.rnode.relNode = InvalidOid;
 		}
@@ -1600,6 +1247,7 @@ recheck:
 			}
 			/* Now we can do what we came for */
 			bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
+			bufHdr->cntxDirty = false;
 			/*
 			 * Release any refcount we may have.
@@ -1623,20 +1271,6 @@ recheck:
 			 */
 			BufTableDelete(bufHdr);
 		}
-		/*
-		 * Also check to see if BufferDirtiedByMe info for this buffer
-		 * refers to the target relation, and clear it if so.  This is
-		 * independent of whether the current contents of the buffer
-		 * belong to the target relation!
-		 *
-		 * NOTE: we have no way to clear BufferDirtiedByMe info in other
-		 * backends, but hopefully there are none with that bit set for
-		 * this rel, since we hold exclusive lock on this rel.
-		 */
-		if (RelFileNodeEquals(rnode, 
-							  BufferTagLastDirtied[i - 1].rnode))
-			BufferDirtiedByMe[i - 1] = false;
 	}
 	SpinRelease(BufMgrLock);
@@ -1689,6 +1323,7 @@ recheck:
 			}
 			/* Now we can do what we came for */
 			bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
+			bufHdr->cntxDirty = false;
 			/*
 			 * The thing should be free, if caller has checked that no
@@ -1700,17 +1335,6 @@ recheck:
 			 */
 			BufTableDelete(bufHdr);
 		}
-		/*
-		 * Also check to see if BufferDirtiedByMe info for this buffer
-		 * refers to the target database, and clear it if so.  This is
-		 * independent of whether the current contents of the buffer
-		 * belong to the target database!
-		 *
-		 * (Actually, this is probably unnecessary, since I shouldn't have
-		 * ever dirtied pages of the target database, but...)
-		 */
-		if (BufferTagLastDirtied[i - 1].rnode.tblNode == dbid)
-			BufferDirtiedByMe[i - 1] = false;
 	}
 	SpinRelease(BufMgrLock);
 }
@@ -1847,6 +1471,8 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
 {
 	int			i;
 	BufferDesc *bufHdr;
+	XLogRecPtr	recptr;
+	int			status;
 	if (rel->rd_myxactonly)
 	{
@@ -1855,22 +1481,27 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
 			bufHdr = &LocalBufferDescriptors[i];
 			if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
 			{
-				if (bufHdr->flags & BM_DIRTY)
+				if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
 				{
-					if (FlushBuffer(-i - 1, false, false) != STATUS_OK)
+					status = smgrwrite(DEFAULT_SMGR, rel, 
+								bufHdr->tag.blockNum,
+								(char *) MAKE_PTR(bufHdr->data));
+					if (status == SM_FAIL)
 					{
 						elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty, could not flush it",
 							 RelationGetRelationName(rel), firstDelBlock,
 							 bufHdr->tag.blockNum);
-						return -1;
+						return(-1);
 					}
+					bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
+					bufHdr->cntxDirty = false;
 				}
 				if (LocalRefCount[i] > 0)
 				{
 					elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is referenced (%ld)",
 						 RelationGetRelationName(rel), firstDelBlock,
 						 bufHdr->tag.blockNum, LocalRefCount[i]);
-					return -2;
+					return(-2);
 				}
 				if (bufHdr->tag.blockNum >= firstDelBlock)
 				{
@@ -1887,22 +1518,57 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
 		bufHdr = &BufferDescriptors[i];
 		if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
 		{
-			if (bufHdr->flags & BM_DIRTY)
+			if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
 			{
 				PinBuffer(bufHdr);
+				if (bufHdr->flags & BM_IO_IN_PROGRESS)
+					WaitIO(bufHdr, BufMgrLock);
 				SpinRelease(BufMgrLock);
-				if (FlushBuffer(i + 1, false, false) != STATUS_OK)
+				/*
+				 * Force XLOG flush for buffer' LSN
+				 */
+				recptr = BufferGetLSN(bufHdr);
+				XLogFlush(recptr);
+				/*
+				 * Now it's safe to write buffer to disk
+				 */
+				SpinAcquire(BufMgrLock);
+				if (bufHdr->flags & BM_IO_IN_PROGRESS)
+					WaitIO(bufHdr, BufMgrLock);
+				if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
 				{
-					SpinAcquire(BufMgrLock);
+					bufHdr->flags &= ~BM_JUST_DIRTIED;
-					UnpinBuffer(bufHdr);
+					StartBufferIO(bufHdr, false);		/* output IO start */
 					SpinRelease(BufMgrLock);
-					elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is dirty (private %ld, global %d), could not flush it",
-						 RelationGetRelationName(rel), firstDelBlock,
+					status = smgrwrite(DEFAULT_SMGR, rel,
-						 bufHdr->tag.blockNum,
+									bufHdr->tag.blockNum,
-						 PrivateRefCount[i], bufHdr->refcount);
+									(char *) MAKE_PTR(bufHdr->data));
-					return -1;
+					if (status == SM_FAIL)	/* disk failure ?! */
+						elog(STOP, "FlushRelationBuffers: cannot write %u for %s",
+							 bufHdr->tag.blockNum, bufHdr->blind.relname);
+					BufferFlushCount++;
+					SpinAcquire(BufMgrLock);
+					bufHdr->flags &= ~BM_IO_IN_PROGRESS;
+					TerminateBufferIO(bufHdr);
+					Assert(!(bufHdr->flags & BM_JUST_DIRTIED));
+					bufHdr->flags &= ~BM_DIRTY;
+					/*
+					 * Note that it's safe to change cntxDirty here because
+					 * of we protect it from upper writers by
+					 * AccessExclusiveLock and from other bufmgr routines
+					 * by BM_IO_IN_PROGRESS
+					 */
+					bufHdr->cntxDirty = false;
 				}
-				SpinAcquire(BufMgrLock);
 				UnpinBuffer(bufHdr);
 			}
 			if (!(bufHdr->flags & BM_FREE))
@@ -2341,6 +2007,9 @@ LockBuffer(Buffer buffer, int mode)
 		}
 		buf->w_lock = true;
 		*buflock |= BL_W_LOCK;
+		buf->cntxDirty = true;
 		if (*buflock & BL_RI_LOCK)
 		{
@@ -2458,11 +2127,11 @@ AbortBufferIO(void)
 		Assert(buf->flags & BM_IO_IN_PROGRESS);
 		SpinAcquire(BufMgrLock);
 		if (IsForInput)
-			Assert(!(buf->flags & BM_DIRTY));
+			Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty));
 		else
 		{
-			Assert((buf->flags & BM_DIRTY) != 0);
+			Assert(buf->flags & BM_DIRTY || buf->cntxDirty);
-			if ((buf->flags & BM_IO_ERROR) != 0)
+			if (buf->flags & BM_IO_ERROR)
 			{
 				elog(NOTICE, "write error may be permanent: cannot write block %u for %s/%s",
 				buf->tag.blockNum, buf->blind.dbname, buf->blind.relname);
@@ -2528,5 +2197,3 @@ MarkBufferForCleanup(Buffer buffer, void (*CleanupFunc)(Buffer))
 	SpinRelease(BufMgrLock);
 	return;
 }
-#endif	/* ! XLOG */
--- a/src/backend/storage/buffer/xlog_bufmgr.c
+++ b/src/backend/storage/buffer/xlog_bufmgr.c
-/*-------------------------------------------------------------------------
- *
- * xlog_bufmgr.c
- *	  buffer manager interface routines
- *
- * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *
- * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/xlog_bufmgr.c,v 1.6 2000/11/30 01:39:07 tgl Exp $
- *
- *-------------------------------------------------------------------------
- */
-/*
- *
- * BufferAlloc() -- lookup a buffer in the buffer table.  If
- *		it isn't there add it, but do not read data into memory.
- *		This is used when we are about to reinitialize the
- *		buffer so don't care what the current disk contents are.
- *		BufferAlloc() also pins the new buffer in memory.
- *
- * ReadBuffer() -- like BufferAlloc() but reads the data
- *		on a buffer cache miss.
- *
- * ReleaseBuffer() -- unpin the buffer
- *
- * WriteNoReleaseBuffer() -- mark the buffer contents as "dirty"
- *		but don't unpin.  The disk IO is delayed until buffer
- *		replacement.
- *
- * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer()
- *
- * BufferSync() -- flush all dirty buffers in the buffer pool.
- *
- * InitBufferPool() -- Init the buffer module.
- *
- * See other files:
- *		freelist.c -- chooses victim for buffer replacement
- *		buf_table.c -- manages the buffer lookup table
- */
-#include "postgres.h"
-#include <sys/types.h>
-#include <sys/file.h>
-#include <math.h>
-#include <signal.h>
-#include "executor/execdebug.h"
-#include "miscadmin.h"
-#include "storage/buf_internals.h"
-#include "storage/bufmgr.h"
-#include "storage/s_lock.h"
-#include "storage/smgr.h"
-#include "utils/relcache.h"
-#ifdef XLOG
-#include "catalog/pg_database.h"
-#endif
-#define BufferGetLSN(bufHdr)	\
-	(*((XLogRecPtr*)MAKE_PTR((bufHdr)->data)))
-extern SPINLOCK BufMgrLock;
-extern long int ReadBufferCount;
-extern long int ReadLocalBufferCount;
-extern long int BufferHitCount;
-extern long int LocalBufferHitCount;
-extern long int BufferFlushCount;
-extern long int LocalBufferFlushCount;
-/*
- * It's used to avoid disk writes for read-only transactions
- * (i.e. when no one shared buffer was changed by transaction).
- * We set it to true in WriteBuffer/WriteNoReleaseBuffer when
- * marking shared buffer as dirty. We set it to false in xact.c
- * after transaction is committed/aborted.
- */
-bool		SharedBufferChanged = false;
-static void WaitIO(BufferDesc *buf, SPINLOCK spinlock);
-static void StartBufferIO(BufferDesc *buf, bool forInput);
-static void TerminateBufferIO(BufferDesc *buf);
-static void ContinueBufferIO(BufferDesc *buf, bool forInput);
-extern void AbortBufferIO(void);
-/*
- * Macro : BUFFER_IS_BROKEN
- *		Note that write error doesn't mean the buffer broken
-*/
-#define BUFFER_IS_BROKEN(buf) ((buf->flags & BM_IO_ERROR) && !(buf->flags & BM_DIRTY))
-static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum,
-						 bool bufferLockHeld);
-static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
-			bool *foundPtr, bool bufferLockHeld);
-static int	BufferReplace(BufferDesc *bufHdr);
-void		PrintBufferDescs(void);
-/* ---------------------------------------------------
- * RelationGetBufferWithBuffer
- *		see if the given buffer is what we want
- *		if yes, we don't need to bother the buffer manager
- * ---------------------------------------------------
- */
-Buffer
-RelationGetBufferWithBuffer(Relation relation,
-							BlockNumber blockNumber,
-							Buffer buffer)
-{
-	BufferDesc *bufHdr;
-	if (BufferIsValid(buffer))
-	{
-		if (!BufferIsLocal(buffer))
-		{
-			bufHdr = &BufferDescriptors[buffer - 1];
-			SpinAcquire(BufMgrLock);
-			if (bufHdr->tag.blockNum == blockNumber &&
-				RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
-			{
-				SpinRelease(BufMgrLock);
-				return buffer;
-			}
-			return ReadBufferWithBufferLock(relation, blockNumber, true);
-		}
-		else
-		{
-			bufHdr = &LocalBufferDescriptors[-buffer - 1];
-			if (bufHdr->tag.blockNum == blockNumber &&
-				RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
-				return buffer;
-		}
-	}
-	return ReadBuffer(relation, blockNumber);
-}
-/*
- * ReadBuffer -- returns a buffer containing the requested
- *		block of the requested relation.  If the blknum
- *		requested is P_NEW, extend the relation file and
- *		allocate a new block.
- *
- * Returns: the buffer number for the buffer containing
- *		the block read or NULL on an error.
- *
- * Assume when this function is called, that reln has been
- *		opened already.
- */
-#undef ReadBuffer				/* conflicts with macro when BUFMGR_DEBUG
-								 * defined */
-/*
- * ReadBuffer
- *
- */
-Buffer
-ReadBuffer(Relation reln, BlockNumber blockNum)
-{
-	return ReadBufferWithBufferLock(reln, blockNum, false);
-}
-/*
- * ReadBufferWithBufferLock -- does the work of
- *		ReadBuffer() but with the possibility that
- *		the buffer lock has already been held. this
- *		is yet another effort to reduce the number of
- *		semops in the system.
- */
-static Buffer
-ReadBufferWithBufferLock(Relation reln,
-						 BlockNumber blockNum,
-						 bool bufferLockHeld)
-{
-	BufferDesc *bufHdr;
-	int			extend;			/* extending the file by one block */
-	int			status;
-	bool		found;
-	bool		isLocalBuf;
-	extend = (blockNum == P_NEW);
-	isLocalBuf = reln->rd_myxactonly;
-	if (isLocalBuf)
-	{
-		ReadLocalBufferCount++;
-		bufHdr = LocalBufferAlloc(reln, blockNum, &found);
-		if (found)
-			LocalBufferHitCount++;
-	}
-	else
-	{
-		ReadBufferCount++;
-		/*
-		 * lookup the buffer.  IO_IN_PROGRESS is set if the requested
-		 * block is not currently in memory.
-		 */
-		bufHdr = BufferAlloc(reln, blockNum, &found, bufferLockHeld);
-		if (found)
-			BufferHitCount++;
-	}
-	if (!bufHdr)
-		return InvalidBuffer;
-	/* if it's already in the buffer pool, we're done */
-	if (found)
-	{
-		/*
-		 * This happens when a bogus buffer was returned previously and is
-		 * floating around in the buffer pool.	A routine calling this
-		 * would want this extended.
-		 */
-		if (extend)
-		{
-			/* new buffers are zero-filled */
-			MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);
-			smgrextend(DEFAULT_SMGR, reln,
-					   (char *) MAKE_PTR(bufHdr->data));
-		}
-		return BufferDescriptorGetBuffer(bufHdr);
-	}
-	/*
-	 * if we have gotten to this point, the reln pointer must be ok and
-	 * the relation file must be open.
-	 */
-	if (extend)
-	{
-		/* new buffers are zero-filled */
-		MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);
-		status = smgrextend(DEFAULT_SMGR, reln,
-							(char *) MAKE_PTR(bufHdr->data));
-	}
-	else
-	{
-		status = smgrread(DEFAULT_SMGR, reln, blockNum,
-						  (char *) MAKE_PTR(bufHdr->data));
-	}
-	if (isLocalBuf)
-		return BufferDescriptorGetBuffer(bufHdr);
-	/* lock buffer manager again to update IO IN PROGRESS */
-	SpinAcquire(BufMgrLock);
-	if (status == SM_FAIL)
-	{
-		/* IO Failed.  cleanup the data structures and go home */
-		if (!BufTableDelete(bufHdr))
-		{
-			SpinRelease(BufMgrLock);
-			elog(FATAL, "BufRead: buffer table broken after IO error\n");
-		}
-		/* remember that BufferAlloc() pinned the buffer */
-		UnpinBuffer(bufHdr);
-		/*
-		 * Have to reset the flag so that anyone waiting for the buffer
-		 * can tell that the contents are invalid.
-		 */
-		bufHdr->flags |= BM_IO_ERROR;
-		bufHdr->flags &= ~BM_IO_IN_PROGRESS;
-	}
-	else
-	{
-		/* IO Succeeded.  clear the flags, finish buffer update */
-		bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS);
-	}
-	/* If anyone was waiting for IO to complete, wake them up now */
-	TerminateBufferIO(bufHdr);
-	SpinRelease(BufMgrLock);
-	if (status == SM_FAIL)
-		return InvalidBuffer;
-	return BufferDescriptorGetBuffer(bufHdr);
-}
-/*
- * BufferAlloc -- Get a buffer from the buffer pool but dont
- *		read it.
- *
- * Returns: descriptor for buffer
- *
- * When this routine returns, the BufMgrLock is guaranteed NOT be held.
- */
-static BufferDesc *
-BufferAlloc(Relation reln,
-			BlockNumber blockNum,
-			bool *foundPtr,
-			bool bufferLockHeld)
-{
-	BufferDesc *buf,
-			   *buf2;
-	BufferTag	newTag;			/* identity of requested block */
-	bool		inProgress;		/* buffer undergoing IO */
-	bool		newblock = FALSE;
-	/* create a new tag so we can lookup the buffer */
-	/* assume that the relation is already open */
-	if (blockNum == P_NEW)
-	{
-		newblock = TRUE;
-		blockNum = smgrnblocks(DEFAULT_SMGR, reln);
-	}
-	INIT_BUFFERTAG(&newTag, reln, blockNum);
-	if (!bufferLockHeld)
-		SpinAcquire(BufMgrLock);
-	/* see if the block is in the buffer pool already */
-	buf = BufTableLookup(&newTag);
-	if (buf != NULL)
-	{
-		/*
-		 * Found it.  Now, (a) pin the buffer so no one steals it from the
-		 * buffer pool, (b) check IO_IN_PROGRESS, someone may be faulting
-		 * the buffer into the buffer pool.
-		 */
-		PinBuffer(buf);
-		inProgress = (buf->flags & BM_IO_IN_PROGRESS);
-		*foundPtr = TRUE;
-		if (inProgress)			/* confirm end of IO */
-		{
-			WaitIO(buf, BufMgrLock);
-			inProgress = (buf->flags & BM_IO_IN_PROGRESS);
-		}
-		if (BUFFER_IS_BROKEN(buf))
-		{
-			/*
-			 * I couldn't understand the following old comment. If there's
-			 * no IO for the buffer and the buffer is BROKEN,it should be
-			 * read again. So start a new buffer IO here.
-			 *
-			 * wierd race condition:
-			 *
-			 * We were waiting for someone else to read the buffer. While we
-			 * were waiting, the reader boof'd in some way, so the
-			 * contents of the buffer are still invalid.  By saying that
-			 * we didn't find it, we can make the caller reinitialize the
-			 * buffer.	If two processes are waiting for this block, both
-			 * will read the block.  The second one to finish may
-			 * overwrite any updates made by the first.  (Assume higher
-			 * level synchronization prevents this from happening).
-			 *
-			 * This is never going to happen, don't worry about it.
-			 */
-			*foundPtr = FALSE;
-		}
-#ifdef BMTRACE
-		_bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCFND);
-#endif	 /* BMTRACE */
-		if (!(*foundPtr))
-			StartBufferIO(buf, true);
-		SpinRelease(BufMgrLock);
-		return buf;
-	}
-	*foundPtr = FALSE;
-	/*
-	 * Didn't find it in the buffer pool.  We'll have to initialize a new
-	 * buffer.	First, grab one from the free list.  If it's dirty, flush
-	 * it to disk. Remember to unlock BufMgr spinlock while doing the IOs.
-	 */
-	inProgress = FALSE;
-	for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;)
-	{
-		buf = GetFreeBuffer();
-		/* GetFreeBuffer will abort if it can't find a free buffer */
-		Assert(buf);
-		/*
-		 * There should be exactly one pin on the buffer after it is
-		 * allocated -- ours.  If it had a pin it wouldn't have been on
-		 * the free list.  No one else could have pinned it between
-		 * GetFreeBuffer and here because we have the BufMgrLock.
-		 */
-		Assert(buf->refcount == 0);
-		buf->refcount = 1;
-		PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1;
-		if (buf->flags & BM_DIRTY || buf->cntxDirty)
-		{
-			bool		smok;
-			/*
-			 *	skip write error buffers 
-			 */
-			if ((buf->flags & BM_IO_ERROR) != 0)
-			{
-				PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
-				buf->refcount--;
-				buf = (BufferDesc *) NULL;
-				continue;
-			}
-			/*
-			 * Set BM_IO_IN_PROGRESS to keep anyone from doing anything
-			 * with the contents of the buffer while we write it out. We
-			 * don't really care if they try to read it, but if they can
-			 * complete a BufferAlloc on it they can then scribble into
-			 * it, and we'd really like to avoid that while we are
-			 * flushing the buffer.  Setting this flag should block them
-			 * in WaitIO until we're done.
-			 */
-			inProgress = TRUE;
-			/*
-			 * All code paths that acquire this lock pin the buffer first;
-			 * since no one had it pinned (it just came off the free
-			 * list), no one else can have this lock.
-			 */
-			StartBufferIO(buf, false);
-			/*
-			 * Write the buffer out, being careful to release BufMgrLock
-			 * before starting the I/O.
-			 */
-			smok = BufferReplace(buf);
-			if (smok == FALSE)
-			{
-				elog(NOTICE, "BufferAlloc: cannot write block %u for %s/%s",
-				buf->tag.blockNum, buf->blind.dbname, buf->blind.relname);
-				inProgress = FALSE;
-				buf->flags |= BM_IO_ERROR;
-				buf->flags &= ~BM_IO_IN_PROGRESS;
-				TerminateBufferIO(buf);
-				PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
-				Assert(buf->refcount > 0);
-				buf->refcount--;
-				if (buf->refcount == 0)
-				{
-					AddBufferToFreelist(buf);
-					buf->flags |= BM_FREE;
-				}
-				buf = (BufferDesc *) NULL;
-			}
-			else
-			{
-				/*
-				 * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't
-				 * be setted by anyone.		- vadim 01/17/97
-				 */
-				if (buf->flags & BM_JUST_DIRTIED)
-				{
-					elog(STOP, "BufferAlloc: content of block %u (%s) changed while flushing",
-						 buf->tag.blockNum, buf->blind.relname);
-				}
-				else
-					buf->flags &= ~BM_DIRTY;
-				buf->cntxDirty = false;
-			}
-			/*
-			 * Somebody could have pinned the buffer while we were doing
-			 * the I/O and had given up the BufMgrLock (though they would
-			 * be waiting for us to clear the BM_IO_IN_PROGRESS flag).
-			 * That's why this is a loop -- if so, we need to clear the
-			 * I/O flags, remove our pin and start all over again.
-			 *
-			 * People may be making buffers free at any time, so there's no
-			 * reason to think that we have an immediate disaster on our
-			 * hands.
-			 */
-			if (buf && buf->refcount > 1)
-			{
-				inProgress = FALSE;
-				buf->flags &= ~BM_IO_IN_PROGRESS;
-				TerminateBufferIO(buf);
-				PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
-				buf->refcount--;
-				buf = (BufferDesc *) NULL;
-			}
-			/*
-			 * Somebody could have allocated another buffer for the same
-			 * block we are about to read in. (While we flush out the
-			 * dirty buffer, we don't hold the lock and someone could have
-			 * allocated another buffer for the same block. The problem is
-			 * we haven't gotten around to insert the new tag into the
-			 * buffer table. So we need to check here.		-ay 3/95
-			 */
-			buf2 = BufTableLookup(&newTag);
-			if (buf2 != NULL)
-			{
-				/*
-				 * Found it. Someone has already done what we're about to
-				 * do. We'll just handle this as if it were found in the
-				 * buffer pool in the first place.
-				 */
-				if (buf != NULL)
-				{
-					buf->flags &= ~BM_IO_IN_PROGRESS;
-					TerminateBufferIO(buf);
-					/* give up the buffer since we don't need it any more */
-					PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
-					Assert(buf->refcount > 0);
-					buf->refcount--;
-					if (buf->refcount == 0)
-					{
-						AddBufferToFreelist(buf);
-						buf->flags |= BM_FREE;
-					}
-				}
-				PinBuffer(buf2);
-				inProgress = (buf2->flags & BM_IO_IN_PROGRESS);
-				*foundPtr = TRUE;
-				if (inProgress)
-				{
-					WaitIO(buf2, BufMgrLock);
-					inProgress = (buf2->flags & BM_IO_IN_PROGRESS);
-				}
-				if (BUFFER_IS_BROKEN(buf2))
-					*foundPtr = FALSE;
-				if (!(*foundPtr))
-					StartBufferIO(buf2, true);
-				SpinRelease(BufMgrLock);
-				return buf2;
-			}
-		}
-	}
-	/*
-	 * At this point we should have the sole pin on a non-dirty buffer and
-	 * we may or may not already have the BM_IO_IN_PROGRESS flag set.
-	 */
-	/*
-	 * Change the name of the buffer in the lookup table:
-	 *
-	 * Need to update the lookup table before the read starts. If someone
-	 * comes along looking for the buffer while we are reading it in, we
-	 * don't want them to allocate a new buffer.  For the same reason, we
-	 * didn't want to erase the buf table entry for the buffer we were
-	 * writing back until now, either.
-	 */
-	if (!BufTableDelete(buf))
-	{
-		SpinRelease(BufMgrLock);
-		elog(FATAL, "buffer wasn't in the buffer table\n");
-	}
-	/* record the database name and relation name for this buffer */
-	strcpy(buf->blind.dbname, (DatabaseName) ? DatabaseName : "Recovery");
-	strcpy(buf->blind.relname, RelationGetPhysicalRelationName(reln));
-	INIT_BUFFERTAG(&(buf->tag), reln, blockNum);
-	if (!BufTableInsert(buf))
-	{
-		SpinRelease(BufMgrLock);
-		elog(FATAL, "Buffer in lookup table twice \n");
-	}
-	/*
-	 * Buffer contents are currently invalid.  Have to mark IO IN PROGRESS
-	 * so no one fiddles with them until the read completes.  If this
-	 * routine has been called simply to allocate a buffer, no io will be
-	 * attempted, so the flag isnt set.
-	 */
-	if (!inProgress)
-		StartBufferIO(buf, true);
-	else
-		ContinueBufferIO(buf, true);
-#ifdef BMTRACE
-	_bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND);
-#endif	 /* BMTRACE */
-	SpinRelease(BufMgrLock);
-	return buf;
-}
-/*
- * WriteBuffer
- *
- *		Marks buffer contents as dirty (actual write happens later).
- *
- * Assume that buffer is pinned.  Assume that reln is
- *		valid.
- *
- * Side Effects:
- *		Pin count is decremented.
- */
-#undef WriteBuffer
-int
-WriteBuffer(Buffer buffer)
-{
-	BufferDesc *bufHdr;
-	if (BufferIsLocal(buffer))
-		return WriteLocalBuffer(buffer, TRUE);
-	if (BAD_BUFFER_ID(buffer))
-		return FALSE;
-	bufHdr = &BufferDescriptors[buffer - 1];
-	SharedBufferChanged = true;
-	SpinAcquire(BufMgrLock);
-	Assert(bufHdr->refcount > 0);
-	bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
-	UnpinBuffer(bufHdr);
-	SpinRelease(BufMgrLock);
-	return TRUE;
-}
-/*
- * WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer
- *						   when the operation is complete.
- */
-int
-WriteNoReleaseBuffer(Buffer buffer)
-{
-	BufferDesc *bufHdr;
-	if (BufferIsLocal(buffer))
-		return WriteLocalBuffer(buffer, FALSE);
-	if (BAD_BUFFER_ID(buffer))
-		return STATUS_ERROR;
-	bufHdr = &BufferDescriptors[buffer - 1];
-	SharedBufferChanged = true;
-	SpinAcquire(BufMgrLock);
-	Assert(bufHdr->refcount > 0);
-	bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
-	SpinRelease(BufMgrLock);
-	return STATUS_OK;
-}
-#undef ReleaseAndReadBuffer
-/*
- * ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer()
- *		so that only one semop needs to be called.
- *
- */
-Buffer
-ReleaseAndReadBuffer(Buffer buffer,
-					 Relation relation,
-					 BlockNumber blockNum)
-{
-	BufferDesc *bufHdr;
-	Buffer		retbuf;
-	if (BufferIsLocal(buffer))
-	{
-		Assert(LocalRefCount[-buffer - 1] > 0);
-		LocalRefCount[-buffer - 1]--;
-	}
-	else
-	{
-		if (BufferIsValid(buffer))
-		{
-			bufHdr = &BufferDescriptors[buffer - 1];
-			Assert(PrivateRefCount[buffer - 1] > 0);
-			PrivateRefCount[buffer - 1]--;
-			if (PrivateRefCount[buffer - 1] == 0)
-			{
-				SpinAcquire(BufMgrLock);
-				Assert(bufHdr->refcount > 0);
-				bufHdr->refcount--;
-				if (bufHdr->refcount == 0)
-				{
-					AddBufferToFreelist(bufHdr);
-					bufHdr->flags |= BM_FREE;
-				}
-				retbuf = ReadBufferWithBufferLock(relation, blockNum, true);
-				return retbuf;
-			}
-		}
-	}
-	return ReadBuffer(relation, blockNum);
-}
-/*
- * BufferSync -- Write all dirty buffers in the pool.
- *
- * This is called at checkpoint time and write out all dirty buffers.
- */
-void
-BufferSync()
-{
-	int			i;
-	BufferDesc *bufHdr;
-	Buffer		buffer;
-	int			status;
-	RelFileNode	rnode;
-	XLogRecPtr	recptr;
-	Relation	reln = NULL;
-	for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
-	{
-		SpinAcquire(BufMgrLock);
-		if (!(bufHdr->flags & BM_VALID))
-		{
-			SpinRelease(BufMgrLock);
-			continue;
-		}
-		/*
-		 * Pin buffer and ensure that no one reads it from disk
-		 */
-		PinBuffer(bufHdr);
-		/* Synchronize with BufferAlloc */
-		if (bufHdr->flags & BM_IO_IN_PROGRESS)
-			WaitIO(bufHdr, BufMgrLock);
-		buffer = BufferDescriptorGetBuffer(bufHdr);
-		rnode = bufHdr->tag.rnode;
-		SpinRelease(BufMgrLock);
-		/*
-		 * Try to find relation for buffer
-		 */
-		reln = RelationNodeCacheGetRelation(rnode);
-		/*
-		 * Protect buffer content against concurrent update
-		 */
-		LockBuffer(buffer, BUFFER_LOCK_SHARE);
-		/*
-		 * Force XLOG flush for buffer' LSN
-		 */
-		recptr = BufferGetLSN(bufHdr);
-		XLogFlush(recptr);
-		/*
-		 * Now it's safe to write buffer to disk
-		 * (if needed at all -:))
-		 */
-		SpinAcquire(BufMgrLock);
-		if (bufHdr->flags & BM_IO_IN_PROGRESS)
-			WaitIO(bufHdr, BufMgrLock);
-		if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
-		{
-			bufHdr->flags &= ~BM_JUST_DIRTIED;
-			StartBufferIO(bufHdr, false);		/* output IO start */
-			SpinRelease(BufMgrLock);
-			if (reln == (Relation) NULL)
-			{
-				status = smgrblindwrt(DEFAULT_SMGR,
-									bufHdr->tag.rnode,
-									bufHdr->tag.blockNum,
-									(char *) MAKE_PTR(bufHdr->data),
-									true);	/* must fsync */
-			}
-			else
-			{
-				status = smgrwrite(DEFAULT_SMGR, reln,
-								bufHdr->tag.blockNum,
-								(char *) MAKE_PTR(bufHdr->data));
-			}
-			if (status == SM_FAIL)	/* disk failure ?! */
-				elog(STOP, "BufferSync: cannot write %u for %s",
-					 bufHdr->tag.blockNum, bufHdr->blind.relname);
-			/*
-			 * Note that it's safe to change cntxDirty here because of
-			 * we protect it from upper writers by share lock and from
-			 * other bufmgr routines by BM_IO_IN_PROGRESS
-			 */
-			bufHdr->cntxDirty = false;
-			/*
-			 * Release the per-buffer readlock, reacquire BufMgrLock.
-			 */
-			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-			BufferFlushCount++;
-			SpinAcquire(BufMgrLock);
-			bufHdr->flags &= ~BM_IO_IN_PROGRESS;	/* mark IO finished */
-			TerminateBufferIO(bufHdr);				/* Sync IO finished */
-			/*
-			 * If this buffer was marked by someone as DIRTY while
-			 * we were flushing it out we must not clear DIRTY
-			 * flag - vadim 01/17/97
-			 */
-			if (!(bufHdr->flags & BM_JUST_DIRTIED))
-				bufHdr->flags &= ~BM_DIRTY;
-		}
-		else
-			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-		UnpinBuffer(bufHdr);
-		SpinRelease(BufMgrLock);
-		/* drop refcnt obtained by RelationNodeCacheGetRelation */
-		if (reln != (Relation) NULL)
-		{
-			RelationDecrementReferenceCount(reln);
-			reln = NULL;
-		}
-	}
-}
-/*
- * WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf' is cleared.
- *
- * Should be entered with buffer manager spinlock held; releases it before
- * waiting and re-acquires it afterwards.
- */
-static void
-WaitIO(BufferDesc *buf, SPINLOCK spinlock)
-{
-	/*
-	 * Changed to wait until there's no IO - Inoue 01/13/2000
-	 */
-	while ((buf->flags & BM_IO_IN_PROGRESS) != 0)
-	{
-		SpinRelease(spinlock);
-		S_LOCK(&(buf->io_in_progress_lock));
-		S_UNLOCK(&(buf->io_in_progress_lock));
-		SpinAcquire(spinlock);
-	}
-}
-long		NDirectFileRead;	/* some I/O's are direct file access.
-								 * bypass bufmgr */
-long		NDirectFileWrite;	/* e.g., I/O in psort and hashjoin.					*/
-void
-PrintBufferUsage(FILE *statfp)
-{
-	float		hitrate;
-	float		localhitrate;
-	if (ReadBufferCount == 0)
-		hitrate = 0.0;
-	else
-		hitrate = (float) BufferHitCount *100.0 / ReadBufferCount;
-	if (ReadLocalBufferCount == 0)
-		localhitrate = 0.0;
-	else
-		localhitrate = (float) LocalBufferHitCount *100.0 / ReadLocalBufferCount;
-	fprintf(statfp, "!\tShared blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n",
-			ReadBufferCount - BufferHitCount, BufferFlushCount, hitrate);
-	fprintf(statfp, "!\tLocal  blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n",
-			ReadLocalBufferCount - LocalBufferHitCount, LocalBufferFlushCount, localhitrate);
-	fprintf(statfp, "!\tDirect blocks: %10ld read, %10ld written\n",
-			NDirectFileRead, NDirectFileWrite);
-}
-void
-ResetBufferUsage()
-{
-	BufferHitCount = 0;
-	ReadBufferCount = 0;
-	BufferFlushCount = 0;
-	LocalBufferHitCount = 0;
-	ReadLocalBufferCount = 0;
-	LocalBufferFlushCount = 0;
-	NDirectFileRead = 0;
-	NDirectFileWrite = 0;
-}
-/* ----------------------------------------------
- *		ResetBufferPool
- *
- *		This routine is supposed to be called when a transaction aborts.
- *		it will release all the buffer pins held by the transaction.
- *		Currently, we also call it during commit if BufferPoolCheckLeak
- *		detected a problem --- in that case, isCommit is TRUE, and we
- *		only clean up buffer pin counts.
- *
- * During abort, we also forget any pending fsync requests.  Dirtied buffers
- * will still get written, eventually, but there will be no fsync for them.
- *
- * ----------------------------------------------
- */
-void
-ResetBufferPool(bool isCommit)
-{
-	int			i;
-	for (i = 0; i < NBuffers; i++)
-	{
-		if (PrivateRefCount[i] != 0)
-		{
-			BufferDesc *buf = &BufferDescriptors[i];
-			SpinAcquire(BufMgrLock);
-			Assert(buf->refcount > 0);
-			buf->refcount--;
-			if (buf->refcount == 0)
-			{
-				AddBufferToFreelist(buf);
-				buf->flags |= BM_FREE;
-			}
-			SpinRelease(BufMgrLock);
-		}
-		PrivateRefCount[i] = 0;
-	}
-	ResetLocalBufferPool();
-	if (!isCommit)
-		smgrabort();
-}
-/* -----------------------------------------------
- *		BufferPoolCheckLeak
- *
- *		check if there is buffer leak
- *
- * -----------------------------------------------
- */
-int
-BufferPoolCheckLeak()
-{
-	int			i;
-	int			result = 0;
-	for (i = 1; i <= NBuffers; i++)
-	{
-		if (PrivateRefCount[i - 1] != 0)
-		{
-			BufferDesc *buf = &(BufferDescriptors[i - 1]);
-			elog(NOTICE,
-				 "Buffer Leak: [%03d] (freeNext=%ld, freePrev=%ld, \
-relname=%s, blockNum=%d, flags=0x%x, refcount=%d %ld)",
-				 i - 1, buf->freeNext, buf->freePrev,
-				 buf->blind.relname, buf->tag.blockNum, buf->flags,
-				 buf->refcount, PrivateRefCount[i - 1]);
-			result = 1;
-		}
-	}
-	return result;
-}
-/* ------------------------------------------------
- * FlushBufferPool
- *
- * Flush all dirty blocks in buffer pool to disk
- * at the checkpoint time
- * ------------------------------------------------
- */
-void
-FlushBufferPool(void)
-{
-	BufferSync();
-	smgrsync();
-}
-/*
- * At the commit time we have to flush local buffer pool only
- */
-void
-BufmgrCommit(void)
-{
-	LocalBufferSync();
-	/*
-	 * All files created in current transaction will be fsync-ed
-	 */
-	smgrcommit();
-}
-/*
- * BufferGetBlockNumber
- *		Returns the block number associated with a buffer.
- *
- * Note:
- *		Assumes that the buffer is valid.
- */
-BlockNumber
-BufferGetBlockNumber(Buffer buffer)
-{
-	Assert(BufferIsValid(buffer));
-	/* XXX should be a critical section */
-	if (BufferIsLocal(buffer))
-		return LocalBufferDescriptors[-buffer - 1].tag.blockNum;
-	else
-		return BufferDescriptors[buffer - 1].tag.blockNum;
-}
-/*
- * BufferReplace
- *
- * Write out the buffer corresponding to 'bufHdr'
- *
- * BufMgrLock must be held at entry, and the buffer must be pinned.
- */
-static int
-BufferReplace(BufferDesc *bufHdr)
-{
-	Relation	reln;
-	XLogRecPtr	recptr;
-	int			status;
-	/* To check if block content changed while flushing. - vadim 01/17/97 */
-	bufHdr->flags &= ~BM_JUST_DIRTIED;
-	SpinRelease(BufMgrLock);
-	/*
-	 * No need to lock buffer context - no one should be able to
-	 * end ReadBuffer
-	 */
-	recptr = BufferGetLSN(bufHdr);
-	XLogFlush(recptr);
-	reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
-	if (reln != (Relation) NULL)
-	{
-		status = smgrwrite(DEFAULT_SMGR, reln, bufHdr->tag.blockNum,
-						   (char *) MAKE_PTR(bufHdr->data));
-	}
-	else
-	{
-		status = smgrblindwrt(DEFAULT_SMGR, bufHdr->tag.rnode,
-							  bufHdr->tag.blockNum,
-							  (char *) MAKE_PTR(bufHdr->data),
-							  false);	/* no fsync */
-	}
-	/* drop relcache refcnt incremented by RelationNodeCacheGetRelation */
-	if (reln != (Relation) NULL)
-		RelationDecrementReferenceCount(reln);
-	SpinAcquire(BufMgrLock);
-	if (status == SM_FAIL)
-		return FALSE;
-	BufferFlushCount++;
-	return TRUE;
-}
-/*
- * RelationGetNumberOfBlocks
- *		Returns the buffer descriptor associated with a page in a relation.
- *
- * Note:
- *		XXX may fail for huge relations.
- *		XXX should be elsewhere.
- *		XXX maybe should be hidden
- */
-BlockNumber
-RelationGetNumberOfBlocks(Relation relation)
-{
-	return ((relation->rd_myxactonly) ? relation->rd_nblocks :
-		((relation->rd_rel->relkind == RELKIND_VIEW) ? 0 :
-			smgrnblocks(DEFAULT_SMGR, relation)));
-}
-/* ---------------------------------------------------------------------
- *		DropRelationBuffers
- *
- *		This function removes all the buffered pages for a relation
- *		from the buffer pool.  Dirty pages are simply dropped, without
- *		bothering to write them out first.  This is NOT rollback-able,
- *		and so should be used only with extreme caution!
- *
- *		We assume that the caller holds an exclusive lock on the relation,
- *		which should assure that no new buffers will be acquired for the rel
- *		meanwhile.
- *
- *		XXX currently it sequentially searches the buffer pool, should be
- *		changed to more clever ways of searching.
- * --------------------------------------------------------------------
- */
-void
-DropRelationBuffers(Relation rel)
-{
-	int			i;
-	BufferDesc *bufHdr;
-	if (rel->rd_myxactonly)
-	{
-		for (i = 0; i < NLocBuffer; i++)
-		{
-			bufHdr = &LocalBufferDescriptors[i];
-			if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
-			{
-				bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
-				bufHdr->cntxDirty = false;
-				LocalRefCount[i] = 0;
-				bufHdr->tag.rnode.relNode = InvalidOid;
-			}
-		}
-		return;
-	}
-	SpinAcquire(BufMgrLock);
-	for (i = 1; i <= NBuffers; i++)
-	{
-		bufHdr = &BufferDescriptors[i - 1];
-recheck:
-		if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
-		{
-			/*
-			 * If there is I/O in progress, better wait till it's done;
-			 * don't want to delete the relation out from under someone
-			 * who's just trying to flush the buffer!
-			 */
-			if (bufHdr->flags & BM_IO_IN_PROGRESS)
-			{
-				WaitIO(bufHdr, BufMgrLock);
-				/*
-				 * By now, the buffer very possibly belongs to some other
-				 * rel, so check again before proceeding.
-				 */
-				goto recheck;
-			}
-			/* Now we can do what we came for */
-			bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
-			bufHdr->cntxDirty = false;
-			/*
-			 * Release any refcount we may have.
-			 *
-			 * This is very probably dead code, and if it isn't then it's
-			 * probably wrong.	I added the Assert to find out --- tgl
-			 * 11/99.
-			 */
-			if (!(bufHdr->flags & BM_FREE))
-			{
-				/* Assert checks that buffer will actually get freed! */
-				Assert(PrivateRefCount[i - 1] == 1 &&
-					   bufHdr->refcount == 1);
-				/* ReleaseBuffer expects we do not hold the lock at entry */
-				SpinRelease(BufMgrLock);
-				ReleaseBuffer(i);
-				SpinAcquire(BufMgrLock);
-			}
-			/*
-			 * And mark the buffer as no longer occupied by this rel.
-			 */
-			BufTableDelete(bufHdr);
-		}
-	}
-	SpinRelease(BufMgrLock);
-}
-/* ---------------------------------------------------------------------
- *		DropRelFileNodeBuffers
- *
- *		This is the same as DropRelationBuffers, except that the target
- *		relation is specified by RelFileNode.
- *
- *		This is NOT rollback-able.  One legitimate use is to clear the
- *		buffer cache of buffers for a relation that is being deleted
- *		during transaction abort.
- * --------------------------------------------------------------------
- */
-void
-DropRelFileNodeBuffers(RelFileNode rnode)
-{
-	int			i;
-	BufferDesc *bufHdr;
-	/* We have to search both local and shared buffers... */
-	for (i = 0; i < NLocBuffer; i++)
-	{
-		bufHdr = &LocalBufferDescriptors[i];
-		if (RelFileNodeEquals(bufHdr->tag.rnode, rnode))
-		{
-			bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
-			bufHdr->cntxDirty = false;
-			LocalRefCount[i] = 0;
-			bufHdr->tag.rnode.relNode = InvalidOid;
-		}
-	}
-	SpinAcquire(BufMgrLock);
-	for (i = 1; i <= NBuffers; i++)
-	{
-		bufHdr = &BufferDescriptors[i - 1];
-recheck:
-		if (RelFileNodeEquals(bufHdr->tag.rnode, rnode))
-		{
-			/*
-			 * If there is I/O in progress, better wait till it's done;
-			 * don't want to delete the relation out from under someone
-			 * who's just trying to flush the buffer!
-			 */
-			if (bufHdr->flags & BM_IO_IN_PROGRESS)
-			{
-				WaitIO(bufHdr, BufMgrLock);
-				/*
-				 * By now, the buffer very possibly belongs to some other
-				 * rel, so check again before proceeding.
-				 */
-				goto recheck;
-			}
-			/* Now we can do what we came for */
-			bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
-			bufHdr->cntxDirty = false;
-			/*
-			 * Release any refcount we may have.
-			 *
-			 * This is very probably dead code, and if it isn't then it's
-			 * probably wrong.	I added the Assert to find out --- tgl
-			 * 11/99.
-			 */
-			if (!(bufHdr->flags & BM_FREE))
-			{
-				/* Assert checks that buffer will actually get freed! */
-				Assert(PrivateRefCount[i - 1] == 1 &&
-					   bufHdr->refcount == 1);
-				/* ReleaseBuffer expects we do not hold the lock at entry */
-				SpinRelease(BufMgrLock);
-				ReleaseBuffer(i);
-				SpinAcquire(BufMgrLock);
-			}
-			/*
-			 * And mark the buffer as no longer occupied by this rel.
-			 */
-			BufTableDelete(bufHdr);
-		}
-	}
-	SpinRelease(BufMgrLock);
-}
-/* ---------------------------------------------------------------------
- *		DropBuffers
- *
- *		This function removes all the buffers in the buffer cache for a
- *		particular database.  Dirty pages are simply dropped, without
- *		bothering to write them out first.  This is used when we destroy a
- *		database, to avoid trying to flush data to disk when the directory
- *		tree no longer exists.	Implementation is pretty similar to
- *		DropRelationBuffers() which is for destroying just one relation.
- * --------------------------------------------------------------------
- */
-void
-DropBuffers(Oid dbid)
-{
-	int			i;
-	BufferDesc *bufHdr;
-	SpinAcquire(BufMgrLock);
-	for (i = 1; i <= NBuffers; i++)
-	{
-		bufHdr = &BufferDescriptors[i - 1];
-recheck:
-		/*
-		 * We know that currently database OID is tblNode but
-		 * this probably will be changed in future and this
-		 * func will be used to drop tablespace buffers.
-		 */
-		if (bufHdr->tag.rnode.tblNode == dbid)
-		{
-			/*
-			 * If there is I/O in progress, better wait till it's done;
-			 * don't want to delete the database out from under someone
-			 * who's just trying to flush the buffer!
-			 */
-			if (bufHdr->flags & BM_IO_IN_PROGRESS)
-			{
-				WaitIO(bufHdr, BufMgrLock);
-				/*
-				 * By now, the buffer very possibly belongs to some other
-				 * DB, so check again before proceeding.
-				 */
-				goto recheck;
-			}
-			/* Now we can do what we came for */
-			bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
-			bufHdr->cntxDirty = false;
-			/*
-			 * The thing should be free, if caller has checked that no
-			 * backends are running in that database.
-			 */
-			Assert(bufHdr->flags & BM_FREE);
-			/*
-			 * And mark the buffer as no longer occupied by this page.
-			 */
-			BufTableDelete(bufHdr);
-		}
-	}
-	SpinRelease(BufMgrLock);
-}
-/* -----------------------------------------------------------------
- *		PrintBufferDescs
- *
- *		this function prints all the buffer descriptors, for debugging
- *		use only.
- * -----------------------------------------------------------------
- */
-void
-PrintBufferDescs()
-{
-	int			i;
-	BufferDesc *buf = BufferDescriptors;
-	if (IsUnderPostmaster)
-	{
-		SpinAcquire(BufMgrLock);
-		for (i = 0; i < NBuffers; ++i, ++buf)
-		{
-			elog(DEBUG, "[%02d] (freeNext=%ld, freePrev=%ld, relname=%s, \
-blockNum=%d, flags=0x%x, refcount=%d %ld)",
-				 i, buf->freeNext, buf->freePrev,
-				 buf->blind.relname, buf->tag.blockNum, buf->flags,
-				 buf->refcount, PrivateRefCount[i]);
-		}
-		SpinRelease(BufMgrLock);
-	}
-	else
-	{
-		/* interactive backend */
-		for (i = 0; i < NBuffers; ++i, ++buf)
-		{
-			printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld)\n",
-					i, buf->blind.relname, buf->tag.blockNum,
-					buf->flags, buf->refcount, PrivateRefCount[i]);
-		}
-	}
-}
-void
-PrintPinnedBufs()
-{
-	int			i;
-	BufferDesc *buf = BufferDescriptors;
-	SpinAcquire(BufMgrLock);
-	for (i = 0; i < NBuffers; ++i, ++buf)
-	{
-		if (PrivateRefCount[i] > 0)
-			elog(NOTICE, "[%02d] (freeNext=%ld, freePrev=%ld, relname=%s, \
-blockNum=%d, flags=0x%x, refcount=%d %ld)\n",
-				 i, buf->freeNext, buf->freePrev, buf->blind.relname,
-				 buf->tag.blockNum, buf->flags,
-				 buf->refcount, PrivateRefCount[i]);
-	}
-	SpinRelease(BufMgrLock);
-}
-/*
- * BufferPoolBlowaway
- *
- * this routine is solely for the purpose of experiments -- sometimes
- * you may want to blowaway whatever is left from the past in buffer
- * pool and start measuring some performance with a clean empty buffer
- * pool.
- */
-#ifdef NOT_USED
-void
-BufferPoolBlowaway()
-{
-	int			i;
-	BufferSync();
-	for (i = 1; i <= NBuffers; i++)
-	{
-		if (BufferIsValid(i))
-		{
-			while (BufferIsValid(i))
-				ReleaseBuffer(i);
-		}
-		BufTableDelete(&BufferDescriptors[i - 1]);
-	}
-}
-#endif
-/* ---------------------------------------------------------------------
- *		FlushRelationBuffers
- *
- *		This function writes all dirty pages of a relation out to disk.
- *		Furthermore, pages that have blocknumber >= firstDelBlock are
- *		actually removed from the buffer pool.  An error code is returned
- *		if we fail to dump a dirty buffer or if we find one of
- *		the target pages is pinned into the cache.
- *
- *		This is called by DROP TABLE to clear buffers for the relation
- *		from the buffer pool.  Note that we must write dirty buffers,
- *		rather than just dropping the changes, because our transaction
- *		might abort later on; we want to roll back safely in that case.
- *
- *		This is also called by VACUUM before truncating the relation to the
- *		given number of blocks.  It might seem unnecessary for VACUUM to
- *		write dirty pages before firstDelBlock, since VACUUM should already
- *		have committed its changes.  However, it is possible for there still
- *		to be dirty pages: if some page had unwritten on-row tuple status
- *		updates from a prior transaction, and VACUUM had no additional
- *		changes to make to that page, then VACUUM won't have written it.
- *		This is harmless in most cases but will break pg_upgrade, which
- *		relies on VACUUM to ensure that *all* tuples have correct on-row
- *		status.  So, we check and flush all dirty pages of the rel
- *		regardless of block number.
- *
- *		In all cases, the caller should be holding AccessExclusiveLock on
- *		the target relation to ensure that no other backend is busy reading
- *		more blocks of the relation (or might do so before we commit).
- *
- *		Formerly, we considered it an error condition if we found dirty
- *		buffers here.	However, since BufferSync no longer forces out all
- *		dirty buffers at every xact commit, it's possible for dirty buffers
- *		to still be present in the cache due to failure of an earlier
- *		transaction.  So, must flush dirty buffers without complaint.
- *
- *		Returns: 0 - Ok, -1 - FAILED TO WRITE DIRTY BUFFER, -2 - PINNED
- *
- *		XXX currently it sequentially searches the buffer pool, should be
- *		changed to more clever ways of searching.
- * --------------------------------------------------------------------
- */
-int
-FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
-{
-	int			i;
-	BufferDesc *bufHdr;
-	XLogRecPtr	recptr;
-	int			status;
-	if (rel->rd_myxactonly)
-	{
-		for (i = 0; i < NLocBuffer; i++)
-		{
-			bufHdr = &LocalBufferDescriptors[i];
-			if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
-			{
-				if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
-				{
-					status = smgrwrite(DEFAULT_SMGR, rel, 
-								bufHdr->tag.blockNum,
-								(char *) MAKE_PTR(bufHdr->data));
-					if (status == SM_FAIL)
-					{
-						elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty, could not flush it",
-							 RelationGetRelationName(rel), firstDelBlock,
-							 bufHdr->tag.blockNum);
-						return(-1);
-					}
-					bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
-					bufHdr->cntxDirty = false;
-				}
-				if (LocalRefCount[i] > 0)
-				{
-					elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is referenced (%ld)",
-						 RelationGetRelationName(rel), firstDelBlock,
-						 bufHdr->tag.blockNum, LocalRefCount[i]);
-					return(-2);
-				}
-				if (bufHdr->tag.blockNum >= firstDelBlock)
-				{
-					bufHdr->tag.rnode.relNode = InvalidOid;
-				}
-			}
-		}
-		return 0;
-	}
-	SpinAcquire(BufMgrLock);
-	for (i = 0; i < NBuffers; i++)
-	{
-		bufHdr = &BufferDescriptors[i];
-		if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
-		{
-			if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
-			{
-				PinBuffer(bufHdr);
-				if (bufHdr->flags & BM_IO_IN_PROGRESS)
-					WaitIO(bufHdr, BufMgrLock);
-				SpinRelease(BufMgrLock);
-				/*
-				 * Force XLOG flush for buffer' LSN
-				 */
-				recptr = BufferGetLSN(bufHdr);
-				XLogFlush(recptr);
-				/*
-				 * Now it's safe to write buffer to disk
-				 */
-				SpinAcquire(BufMgrLock);
-				if (bufHdr->flags & BM_IO_IN_PROGRESS)
-					WaitIO(bufHdr, BufMgrLock);
-				if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
-				{
-					bufHdr->flags &= ~BM_JUST_DIRTIED;
-					StartBufferIO(bufHdr, false);		/* output IO start */
-					SpinRelease(BufMgrLock);
-					status = smgrwrite(DEFAULT_SMGR, rel,
-									bufHdr->tag.blockNum,
-									(char *) MAKE_PTR(bufHdr->data));
-					if (status == SM_FAIL)	/* disk failure ?! */
-						elog(STOP, "FlushRelationBuffers: cannot write %u for %s",
-							 bufHdr->tag.blockNum, bufHdr->blind.relname);
-					BufferFlushCount++;
-					SpinAcquire(BufMgrLock);
-					bufHdr->flags &= ~BM_IO_IN_PROGRESS;
-					TerminateBufferIO(bufHdr);
-					Assert(!(bufHdr->flags & BM_JUST_DIRTIED));
-					bufHdr->flags &= ~BM_DIRTY;
-					/*
-					 * Note that it's safe to change cntxDirty here because
-					 * of we protect it from upper writers by
-					 * AccessExclusiveLock and from other bufmgr routines
-					 * by BM_IO_IN_PROGRESS
-					 */
-					bufHdr->cntxDirty = false;
-				}
-				UnpinBuffer(bufHdr);
-			}
-			if (!(bufHdr->flags & BM_FREE))
-			{
-				SpinRelease(BufMgrLock);
-				elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is referenced (private %ld, global %d)",
-					 RelationGetRelationName(rel), firstDelBlock,
-					 bufHdr->tag.blockNum,
-					 PrivateRefCount[i], bufHdr->refcount);
-				return -2;
-			}
-			if (bufHdr->tag.blockNum >= firstDelBlock)
-			{
-				BufTableDelete(bufHdr);
-			}
-		}
-	}
-	SpinRelease(BufMgrLock);
-	return 0;
-}
-#undef ReleaseBuffer
-/*
- * ReleaseBuffer -- remove the pin on a buffer without
- *		marking it dirty.
- *
- */
-int
-ReleaseBuffer(Buffer buffer)
-{
-	BufferDesc *bufHdr;
-	if (BufferIsLocal(buffer))
-	{
-		Assert(LocalRefCount[-buffer - 1] > 0);
-		LocalRefCount[-buffer - 1]--;
-		return STATUS_OK;
-	}
-	if (BAD_BUFFER_ID(buffer))
-		return STATUS_ERROR;
-	bufHdr = &BufferDescriptors[buffer - 1];
-	Assert(PrivateRefCount[buffer - 1] > 0);
-	PrivateRefCount[buffer - 1]--;
-	if (PrivateRefCount[buffer - 1] == 0)
-	{
-		SpinAcquire(BufMgrLock);
-		Assert(bufHdr->refcount > 0);
-		bufHdr->refcount--;
-		if (bufHdr->refcount == 0)
-		{
-			AddBufferToFreelist(bufHdr);
-			bufHdr->flags |= BM_FREE;
-		}
-		SpinRelease(BufMgrLock);
-	}
-	return STATUS_OK;
-}
-#ifdef NOT_USED
-void
-IncrBufferRefCount_Debug(char *file, int line, Buffer buffer)
-{
-	IncrBufferRefCount(buffer);
-	if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))
-	{
-		BufferDesc *buf = &BufferDescriptors[buffer - 1];
-		fprintf(stderr, "PIN(Incr) %ld relname = %s, blockNum = %d, \
-refcount = %ld, file: %s, line: %d\n",
-				buffer, buf->blind.relname, buf->tag.blockNum,
-				PrivateRefCount[buffer - 1], file, line);
-	}
-}
-#endif
-#ifdef NOT_USED
-void
-ReleaseBuffer_Debug(char *file, int line, Buffer buffer)
-{
-	ReleaseBuffer(buffer);
-	if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))
-	{
-		BufferDesc *buf = &BufferDescriptors[buffer - 1];
-		fprintf(stderr, "UNPIN(Rel) %ld relname = %s, blockNum = %d, \
-refcount = %ld, file: %s, line: %d\n",
-				buffer, buf->blind.relname, buf->tag.blockNum,
-				PrivateRefCount[buffer - 1], file, line);
-	}
-}
-#endif
-#ifdef NOT_USED
-int
-ReleaseAndReadBuffer_Debug(char *file,
-						   int line,
-						   Buffer buffer,
-						   Relation relation,
-						   BlockNumber blockNum)
-{
-	bool		bufferValid;
-	Buffer		b;
-	bufferValid = BufferIsValid(buffer);
-	b = ReleaseAndReadBuffer(buffer, relation, blockNum);
-	if (ShowPinTrace && bufferValid && BufferIsLocal(buffer)
-		&& is_userbuffer(buffer))
-	{
-		BufferDesc *buf = &BufferDescriptors[buffer - 1];
-		fprintf(stderr, "UNPIN(Rel&Rd) %ld relname = %s, blockNum = %d, \
-refcount = %ld, file: %s, line: %d\n",
-				buffer, buf->blind.relname, buf->tag.blockNum,
-				PrivateRefCount[buffer - 1], file, line);
-	}
-	if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer))
-	{
-		BufferDesc *buf = &BufferDescriptors[b - 1];
-		fprintf(stderr, "PIN(Rel&Rd) %ld relname = %s, blockNum = %d, \
-refcount = %ld, file: %s, line: %d\n",
-				b, buf->blind.relname, buf->tag.blockNum,
-				PrivateRefCount[b - 1], file, line);
-	}
-	return b;
-}
-#endif
-#ifdef BMTRACE
-/*
- *	trace allocations and deallocations in a circular buffer in
- *	shared memory.	check the buffer before doing the allocation,
- *	and die if there's anything fishy.
- */
-_bm_trace(Oid dbId, Oid relId, int blkNo, int bufNo, int allocType)
-{
-	long		start,
-				cur;
-	bmtrace    *tb;
-	start = *CurTraceBuf;
-	if (start > 0)
-		cur = start - 1;
-	else
-		cur = BMT_LIMIT - 1;
-	for (;;)
-	{
-		tb = &TraceBuf[cur];
-		if (tb->bmt_op != BMT_NOTUSED)
-		{
-			if (tb->bmt_buf == bufNo)
-			{
-				if ((tb->bmt_op == BMT_DEALLOC)
-					|| (tb->bmt_dbid == dbId && tb->bmt_relid == relId
-						&& tb->bmt_blkno == blkNo))
-					goto okay;
-				/* die holding the buffer lock */
-				_bm_die(dbId, relId, blkNo, bufNo, allocType, start, cur);
-			}
-		}
-		if (cur == start)
-			goto okay;
-		if (cur == 0)
-			cur = BMT_LIMIT - 1;
-		else
-			cur--;
-	}
-okay:
-	tb = &TraceBuf[start];
-	tb->bmt_pid = MyProcPid;
-	tb->bmt_buf = bufNo;
-	tb->bmt_dbid = dbId;
-	tb->bmt_relid = relId;
-	tb->bmt_blkno = blkNo;
-	tb->bmt_op = allocType;
-	*CurTraceBuf = (start + 1) % BMT_LIMIT;
-}
-_bm_die(Oid dbId, Oid relId, int blkNo, int bufNo,
-		int allocType, long start, long cur)
-{
-	FILE	   *fp;
-	bmtrace    *tb;
-	int			i;
-	tb = &TraceBuf[cur];
-	if ((fp = AllocateFile("/tmp/death_notice", "w")) == NULL)
-		elog(FATAL, "buffer alloc trace error and can't open log file");
-	fprintf(fp, "buffer alloc trace detected the following error:\n\n");
-	fprintf(fp, "    buffer %d being %s inconsistently with a previous %s\n\n",
-		 bufNo, (allocType == BMT_DEALLOC ? "deallocated" : "allocated"),
-			(tb->bmt_op == BMT_DEALLOC ? "deallocation" : "allocation"));
-	fprintf(fp, "the trace buffer contains:\n");
-	i = start;
-	for (;;)
-	{
-		tb = &TraceBuf[i];
-		if (tb->bmt_op != BMT_NOTUSED)
-		{
-			fprintf(fp, "     [%3d]%spid %d buf %2d for <%d,%u,%d> ",
-					i, (i == cur ? " ---> " : "\t"),
-					tb->bmt_pid, tb->bmt_buf,
-					tb->bmt_dbid, tb->bmt_relid, tb->bmt_blkno);
-			switch (tb->bmt_op)
-			{
-				case BMT_ALLOCFND:
-					fprintf(fp, "allocate (found)\n");
-					break;
-				case BMT_ALLOCNOTFND:
-					fprintf(fp, "allocate (not found)\n");
-					break;
-				case BMT_DEALLOC:
-					fprintf(fp, "deallocate\n");
-					break;
-				default:
-					fprintf(fp, "unknown op type %d\n", tb->bmt_op);
-					break;
-			}
-		}
-		i = (i + 1) % BMT_LIMIT;
-		if (i == start)
-			break;
-	}
-	fprintf(fp, "\noperation causing error:\n");
-	fprintf(fp, "\tpid %d buf %d for <%d,%u,%d> ",
-			getpid(), bufNo, dbId, relId, blkNo);
-	switch (allocType)
-	{
-		case BMT_ALLOCFND:
-			fprintf(fp, "allocate (found)\n");
-			break;
-		case BMT_ALLOCNOTFND:
-			fprintf(fp, "allocate (not found)\n");
-			break;
-		case BMT_DEALLOC:
-			fprintf(fp, "deallocate\n");
-			break;
-		default:
-			fprintf(fp, "unknown op type %d\n", allocType);
-			break;
-	}
-	FreeFile(fp);
-	kill(getpid(), SIGILL);
-}
-#endif	 /* BMTRACE */
-/*
- * SetBufferCommitInfoNeedsSave
- *
- *	Mark a buffer dirty when we have updated tuple commit-status bits in it.
- *
- * This is similar to WriteNoReleaseBuffer, except that we do not set
- * SharedBufferChanged or BufferDirtiedByMe, because we have not made a
- * critical change that has to be flushed to disk before xact commit --- the
- * status-bit update could be redone by someone else just as easily.  The
- * buffer will be marked dirty, but it will not be written to disk until
- * there is another reason to write it.
- *
- * This routine might get called many times on the same page, if we are making
- * the first scan after commit of an xact that added/deleted many tuples.
- * So, be as quick as we can if the buffer is already dirty.
- */
-void
-SetBufferCommitInfoNeedsSave(Buffer buffer)
-{
-	BufferDesc *bufHdr;
-	if (BufferIsLocal(buffer))
-		return;
-	if (BAD_BUFFER_ID(buffer))
-		return;
-	bufHdr = &BufferDescriptors[buffer - 1];
-	if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
-		(BM_DIRTY | BM_JUST_DIRTIED))
-	{
-		SpinAcquire(BufMgrLock);
-		Assert(bufHdr->refcount > 0);
-		bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
-		SpinRelease(BufMgrLock);
-	}
-}
-void
-UnlockBuffers()
-{
-	BufferDesc *buf;
-	int			i;
-	for (i = 0; i < NBuffers; i++)
-	{
-		if (BufferLocks[i] == 0)
-			continue;
-		Assert(BufferIsValid(i + 1));
-		buf = &(BufferDescriptors[i]);
-		S_LOCK(&(buf->cntx_lock));
-		if (BufferLocks[i] & BL_R_LOCK)
-		{
-			Assert(buf->r_locks > 0);
-			(buf->r_locks)--;
-		}
-		if (BufferLocks[i] & BL_RI_LOCK)
-		{
-			/*
-			 * Someone else could remove our RI lock when acquiring W
-			 * lock. This is possible if we came here from elog(ERROR)
-			 * from IpcSemaphore{Lock|Unlock}(WaitCLSemId). And so we
-			 * don't do Assert(buf->ri_lock) here.
-			 */
-			buf->ri_lock = false;
-		}
-		if (BufferLocks[i] & BL_W_LOCK)
-		{
-			Assert(buf->w_lock);
-			buf->w_lock = false;
-		}
-		S_UNLOCK(&(buf->cntx_lock));
-		BufferLocks[i] = 0;
-	}
-}
-void
-LockBuffer(Buffer buffer, int mode)
-{
-	BufferDesc *buf;
-	bits8	   *buflock;
-	Assert(BufferIsValid(buffer));
-	if (BufferIsLocal(buffer))
-		return;
-	buf = &(BufferDescriptors[buffer - 1]);
-	buflock = &(BufferLocks[buffer - 1]);
-	S_LOCK(&(buf->cntx_lock));
-	if (mode == BUFFER_LOCK_UNLOCK)
-	{
-		if (*buflock & BL_R_LOCK)
-		{
-			Assert(buf->r_locks > 0);
-			Assert(!(buf->w_lock));
-			Assert(!(*buflock & (BL_W_LOCK | BL_RI_LOCK)));
-			(buf->r_locks)--;
-			*buflock &= ~BL_R_LOCK;
-		}
-		else if (*buflock & BL_W_LOCK)
-		{
-			Assert(buf->w_lock);
-			Assert(buf->r_locks == 0);
-			Assert(!(*buflock & (BL_R_LOCK | BL_RI_LOCK)));
-			buf->w_lock = false;
-			*buflock &= ~BL_W_LOCK;
-		}
-		else
-			elog(ERROR, "UNLockBuffer: buffer %lu is not locked", buffer);
-	}
-	else if (mode == BUFFER_LOCK_SHARE)
-	{
-		unsigned	i = 0;
-		Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK)));
-		while (buf->ri_lock || buf->w_lock)
-		{
-			S_UNLOCK(&(buf->cntx_lock));
-			s_lock_sleep(i++);
-			S_LOCK(&(buf->cntx_lock));
-		}
-		(buf->r_locks)++;
-		*buflock |= BL_R_LOCK;
-	}
-	else if (mode == BUFFER_LOCK_EXCLUSIVE)
-	{
-		unsigned	i = 0;
-		Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK)));
-		while (buf->r_locks > 0 || buf->w_lock)
-		{
-			if (buf->r_locks > 3 || (*buflock & BL_RI_LOCK))
-			{
-				/*
-				 * Our RI lock might be removed by concurrent W lock
-				 * acquiring (see what we do with RI locks below when our
-				 * own W acquiring succeeded) and so we set RI lock again
-				 * if we already did this.
-				 */
-				*buflock |= BL_RI_LOCK;
-				buf->ri_lock = true;
-			}
-			S_UNLOCK(&(buf->cntx_lock));
-			s_lock_sleep(i++);
-			S_LOCK(&(buf->cntx_lock));
-		}
-		buf->w_lock = true;
-		*buflock |= BL_W_LOCK;
-		buf->cntxDirty = true;
-		if (*buflock & BL_RI_LOCK)
-		{
-			/*
-			 * It's possible to remove RI locks acquired by another W
-			 * lockers here, but they'll take care about it.
-			 */
-			buf->ri_lock = false;
-			*buflock &= ~BL_RI_LOCK;
-		}
-	}
-	else
-		elog(ERROR, "LockBuffer: unknown lock mode %d", mode);
-	S_UNLOCK(&(buf->cntx_lock));
-}
-/*
- *	Functions for IO error handling
- *
- *	Note : We assume that nested buffer IO never occur.
- *	i.e at most one io_in_progress spinlock is held
- *	per proc.
-*/
-static BufferDesc *InProgressBuf = (BufferDesc *) NULL;
-static bool IsForInput;
-/*
- * Function:StartBufferIO
- *	(Assumptions)
- *	My process is executing no IO
- *	BufMgrLock is held
- *	BM_IO_IN_PROGRESS mask is not set for the buffer
- *	The buffer is Pinned
- *
-*/
-static void
-StartBufferIO(BufferDesc *buf, bool forInput)
-{
-	Assert(!InProgressBuf);
-	Assert(!(buf->flags & BM_IO_IN_PROGRESS));
-	buf->flags |= BM_IO_IN_PROGRESS;
-	/*
-	 * There used to be
-	 *
-	 * Assert(S_LOCK_FREE(&(buf->io_in_progress_lock)));
-	 *
-	 * here, but that's wrong because of the way WaitIO works: someone else
-	 * waiting for the I/O to complete will succeed in grabbing the lock
-	 * for a few instructions, and if we context-swap back to here the
-	 * Assert could fail.  Tiny window for failure, but I've seen it
-	 * happen -- tgl
-	 */
-	S_LOCK(&(buf->io_in_progress_lock));
-	InProgressBuf = buf;
-	IsForInput = forInput;
-}
-/*
- * Function:TerminateBufferIO
- *	(Assumptions)
- *	My process is executing IO for the buffer
- *	BufMgrLock is held
- *	The buffer is Pinned
- *
-*/
-static void
-TerminateBufferIO(BufferDesc *buf)
-{
-	Assert(buf == InProgressBuf);
-	S_UNLOCK(&(buf->io_in_progress_lock));
-	InProgressBuf = (BufferDesc *) 0;
-}
-/*
- * Function:ContinueBufferIO
- *	(Assumptions)
- *	My process is executing IO for the buffer
- *	BufMgrLock is held
- *	The buffer is Pinned
- *
-*/
-static void
-ContinueBufferIO(BufferDesc *buf, bool forInput)
-{
-	Assert(buf == InProgressBuf);
-	Assert(buf->flags & BM_IO_IN_PROGRESS);
-	IsForInput = forInput;
-}
-#ifdef NOT_USED
-void
-InitBufferIO(void)
-{
-	InProgressBuf = (BufferDesc *) 0;
-}
-#endif
-/*
- *	This function is called from ProcReleaseSpins().
- *	BufMgrLock isn't held when this function is called.
- *	BM_IO_ERROR is always set. If BM_IO_ERROR was already
- *	set in case of output,this routine would kill all
- *	backends and reset postmaster.
- */
-void
-AbortBufferIO(void)
-{
-	BufferDesc *buf = InProgressBuf;
-	if (buf)
-	{
-		Assert(buf->flags & BM_IO_IN_PROGRESS);
-		SpinAcquire(BufMgrLock);
-		if (IsForInput)
-			Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty));
-		else
-		{
-			Assert(buf->flags & BM_DIRTY || buf->cntxDirty);
-			if (buf->flags & BM_IO_ERROR)
-			{
-				elog(NOTICE, "write error may be permanent: cannot write block %u for %s/%s",
-				buf->tag.blockNum, buf->blind.dbname, buf->blind.relname);
-			}
-			buf->flags |= BM_DIRTY;
-		}
-		buf->flags |= BM_IO_ERROR;
-		buf->flags &= ~BM_IO_IN_PROGRESS;
-		TerminateBufferIO(buf);
-		SpinRelease(BufMgrLock);
-	}
-}
-/*
- * Cleanup buffer or mark it for cleanup. Buffer may be cleaned
- * up if it's pinned only once.
- *
- * NOTE: buffer must be excl locked.
- */
-void
-MarkBufferForCleanup(Buffer buffer, void (*CleanupFunc)(Buffer))
-{
-	BufferDesc *bufHdr = &BufferDescriptors[buffer - 1];
-	Assert(PrivateRefCount[buffer - 1] > 0);
-	if (PrivateRefCount[buffer - 1] > 1)
-	{
-		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-		PrivateRefCount[buffer - 1]--;
-		SpinAcquire(BufMgrLock);
-		Assert(bufHdr->refcount > 0);
-		bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
-		bufHdr->CleanupFunc = CleanupFunc;
-		SpinRelease(BufMgrLock);
-		return;
-	}
-	SpinAcquire(BufMgrLock);
-	Assert(bufHdr->refcount > 0);
-	if (bufHdr->refcount == 1)
-	{
-		SpinRelease(BufMgrLock);
-		CleanupFunc(buffer);
-		CleanupFunc = NULL;
-	}
-	else
-		SpinRelease(BufMgrLock);
-	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-	PrivateRefCount[buffer - 1]--;
-	SpinAcquire(BufMgrLock);
-	Assert(bufHdr->refcount > 0);
-	bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
-	bufHdr->CleanupFunc = CleanupFunc;
-	bufHdr->refcount--;
-	if (bufHdr->refcount == 0)
-	{
-		AddBufferToFreelist(bufHdr);
-		bufHdr->flags |= BM_FREE;
-	}
-	SpinRelease(BufMgrLock);
-	return;
-}
--- a/src/backend/storage/buffer/xlog_localbuf.c
+++ b/src/backend/storage/buffer/xlog_localbuf.c
-/*-------------------------------------------------------------------------
- *
- * xlog_localbuf.c
- *	  local buffer manager. Fast buffer manager for temporary tables
- *	  or special cases when the operation is not visible to other backends.
- *
- *	  When a relation is being created, the descriptor will have rd_islocal
- *	  set to indicate that the local buffer manager should be used. During
- *	  the same transaction the relation is being created, any inserts or
- *	  selects from the newly created relation will use the local buffer
- *	  pool. rd_islocal is reset at the end of a transaction (commit/abort).
- *	  This is useful for queries like SELECT INTO TABLE and create index.
- *
- * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
- * Portions Copyright (c) 1994-5, Regents of the University of California
- *
- *
- * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/xlog_localbuf.c,v 1.2 2000/11/30 01:39:07 tgl Exp $
- *
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-#include <sys/types.h>
-#include <sys/file.h>
-#include <math.h>
-#include <signal.h>
-#include "executor/execdebug.h"
-#include "storage/buf_internals.h"
-#include "storage/bufmgr.h"
-#include "storage/smgr.h"
-#include "utils/relcache.h"
-extern long int LocalBufferFlushCount;
-int			NLocBuffer = 64;
-BufferDesc *LocalBufferDescriptors = NULL;
-Block	   *LocalBufferBlockPointers = NULL;
-long	   *LocalRefCount = NULL;
-static int	nextFreeLocalBuf = 0;
-/*#define LBDEBUG*/
-/*
- * LocalBufferAlloc -
- *	  allocate a local buffer. We do round robin allocation for now.
- */
-BufferDesc *
-LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
-{
-	int			i;
-	BufferDesc *bufHdr = (BufferDesc *) NULL;
-	if (blockNum == P_NEW)
-	{
-		blockNum = reln->rd_nblocks;
-		reln->rd_nblocks++;
-	}
-	/* a low tech search for now -- not optimized for scans */
-	for (i = 0; i < NLocBuffer; i++)
-	{
-		if (LocalBufferDescriptors[i].tag.rnode.relNode == 
-			reln->rd_node.relNode &&
-			LocalBufferDescriptors[i].tag.blockNum == blockNum)
-		{
-#ifdef LBDEBUG
-			fprintf(stderr, "LB ALLOC (%u,%d) %d\n",
-					RelationGetRelid(reln), blockNum, -i - 1);
-#endif
-			LocalRefCount[i]++;
-			*foundPtr = TRUE;
-			return &LocalBufferDescriptors[i];
-		}
-	}
-#ifdef LBDEBUG
-	fprintf(stderr, "LB ALLOC (%u,%d) %d\n",
-			RelationGetRelid(reln), blockNum, -nextFreeLocalBuf - 1);
-#endif
-	/* need to get a new buffer (round robin for now) */
-	for (i = 0; i < NLocBuffer; i++)
-	{
-		int			b = (nextFreeLocalBuf + i) % NLocBuffer;
-		if (LocalRefCount[b] == 0)
-		{
-			bufHdr = &LocalBufferDescriptors[b];
-			LocalRefCount[b]++;
-			nextFreeLocalBuf = (b + 1) % NLocBuffer;
-			break;
-		}
-	}
-	if (bufHdr == NULL)
-		elog(ERROR, "no empty local buffer.");
-	/*
-	 * this buffer is not referenced but it might still be dirty (the last
-	 * transaction to touch it doesn't need its contents but has not
-	 * flushed it).  if that's the case, write it out before reusing it!
-	 */
-	if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
-	{
-		Relation	bufrel = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
-		Assert(bufrel != NULL);
-		/* flush this page */
-		smgrwrite(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum,
-				  (char *) MAKE_PTR(bufHdr->data));
-		LocalBufferFlushCount++;
-		/*
-		 * drop relcache refcount incremented by
-		 * RelationIdCacheGetRelation
-		 */
-		RelationDecrementReferenceCount(bufrel);
-	}
-	/*
-	 * it's all ours now.
-	 *
-	 * We need not in tblNode currently but will in future I think,
-	 * when we'll give up rel->rd_fd to fmgr cache.
-	 */
-	bufHdr->tag.rnode = reln->rd_node;
-	bufHdr->tag.blockNum = blockNum;
-	bufHdr->flags &= ~BM_DIRTY;
-	bufHdr->cntxDirty = false;
-	/*
-	 * lazy memory allocation: allocate space on first use of a buffer.
-	 */
-	if (bufHdr->data == (SHMEM_OFFSET) 0)
-	{
-		char	   *data = (char *) malloc(BLCKSZ);
-		if (data == NULL)
-			elog(FATAL, "Out of memory in LocalBufferAlloc");
-		/*
-		 * This is a bit of a hack: bufHdr->data needs to be a shmem offset
-		 * for consistency with the shared-buffer case, so make it one
-		 * even though it's not really a valid shmem offset.
-		 */
-		bufHdr->data = MAKE_OFFSET(data);
-		/*
-		 * Set pointer for use by BufferGetBlock() macro.
-		 */
-		LocalBufferBlockPointers[-(bufHdr->buf_id + 2)] = (Block) data;
-	}
-	*foundPtr = FALSE;
-	return bufHdr;
-}
-/*
- * WriteLocalBuffer -
- *	  writes out a local buffer
- */
-int
-WriteLocalBuffer(Buffer buffer, bool release)
-{
-	int			bufid;
-	Assert(BufferIsLocal(buffer));
-#ifdef LBDEBUG
-	fprintf(stderr, "LB WRITE %d\n", buffer);
-#endif
-	bufid = -(buffer + 1);
-	LocalBufferDescriptors[bufid].flags |= BM_DIRTY;
-	if (release)
-	{
-		Assert(LocalRefCount[bufid] > 0);
-		LocalRefCount[bufid]--;
-	}
-	return true;
-}
-/*
- * InitLocalBuffer -
- *	  init the local buffer cache. Since most queries (esp. multi-user ones)
- *	  don't involve local buffers, we delay allocating actual memory for the
- *	  buffer until we need it.
- */
-void
-InitLocalBuffer(void)
-{
-	int			i;
-	/*
-	 * these aren't going away. I'm not gonna use palloc.
-	 */
-	LocalBufferDescriptors =
-		(BufferDesc *) calloc(NLocBuffer, sizeof(BufferDesc));
-	LocalBufferBlockPointers = (Block *) calloc(NLocBuffer, sizeof(Block));
-	LocalRefCount = (long *) calloc(NLocBuffer, sizeof(long));
-	nextFreeLocalBuf = 0;
-	for (i = 0; i < NLocBuffer; i++)
-	{
-		BufferDesc *buf = &LocalBufferDescriptors[i];
-		/*
-		 * negative to indicate local buffer. This is tricky: shared
-		 * buffers start with 0. We have to start with -2. (Note that the
-		 * routine BufferDescriptorGetBuffer adds 1 to buf_id so our first
-		 * buffer id is -1.)
-		 */
-		buf->buf_id = -i - 2;
-	}
-}
-/*
- * LocalBufferSync
- *
- * Flush all dirty buffers in the local buffer cache at commit time.
- * Since the buffer cache is only used for keeping relations visible
- * during a transaction, we will not need these buffers again.
- *
- * Note that we have to *flush* local buffers because of them are not
- * visible to checkpoint makers. But we can skip XLOG flush check.
- */
-void
-LocalBufferSync(void)
-{
-	int			i;
-	for (i = 0; i < NLocBuffer; i++)
-	{
-		BufferDesc *buf = &LocalBufferDescriptors[i];
-		Relation	bufrel;
-		if (buf->flags & BM_DIRTY || buf->cntxDirty)
-		{
-#ifdef LBDEBUG
-			fprintf(stderr, "LB SYNC %d\n", -i - 1);
-#endif
-			bufrel = RelationNodeCacheGetRelation(buf->tag.rnode);
-			Assert(bufrel != NULL);
-			smgrwrite(DEFAULT_SMGR, bufrel, buf->tag.blockNum,
-					  (char *) MAKE_PTR(buf->data));
-			smgrmarkdirty(DEFAULT_SMGR, bufrel, buf->tag.blockNum);
-			LocalBufferFlushCount++;
-			/* drop relcache refcount from RelationIdCacheGetRelation */
-			RelationDecrementReferenceCount(bufrel);
-			buf->flags &= ~BM_DIRTY;
-			buf->cntxDirty = false;
-		}
-	}
-	MemSet(LocalRefCount, 0, sizeof(long) * NLocBuffer);
-	nextFreeLocalBuf = 0;
-}
-void
-ResetLocalBufferPool(void)
-{
-	int			i;
-	for (i = 0; i < NLocBuffer; i++)
-	{
-		BufferDesc *buf = &LocalBufferDescriptors[i];
-		buf->tag.rnode.relNode = InvalidOid;
-		buf->flags &= ~BM_DIRTY;
-		buf->cntxDirty = false;
-	}
-	MemSet(LocalRefCount, 0, sizeof(long) * NLocBuffer);
-	nextFreeLocalBuf = 0;
-}
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.67 2000/11/23 01:08:57 vadim Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.68 2000/11/30 08:46:23 vadim Exp $
 *
 * NOTES:
 *
@@ -192,20 +192,6 @@ static File fileNameOpenFile(FileName fileName, int fileFlags, int fileMode);
 static char *filepath(char *filename);
 static long pg_nofile(void);
-#ifndef XLOG
-/*
- * pg_fsync --- same as fsync except does nothing if -F switch was given
- */
-int
-pg_fsync(int fd)
-{
-	if (enableFsync)
-		return fsync(fd);
-	else
-		return 0;
-}
-#endif
 /*
 * BasicOpenFile --- same as open(2) except can free other FDs if needed
 *
@@ -665,7 +651,6 @@ fileNameOpenFile(FileName fileName,
 	vfdP->fileFlags = fileFlags & ~(O_TRUNC | O_EXCL);
 	vfdP->fileMode = fileMode;
 	vfdP->seekPos = 0;
-#ifdef XLOG
 	/*
 	 * Have to fsync file on commit. Alternative way - log
 	 * file creation and fsync log before actual file creation.
@@ -673,7 +658,6 @@ fileNameOpenFile(FileName fileName,
 	if (fileFlags & O_CREAT)
 		vfdP->fdstate = FD_DIRTY;
 	else
-#endif
 		vfdP->fdstate = 0x0;
 	return file;
@@ -832,13 +816,7 @@ FileWrite(File file, char *buffer, int amount)
 	FileAccess(file);
 	returnCode = write(VfdCache[file].fd, buffer, amount);
 	if (returnCode > 0)
-	{
 		VfdCache[file].seekPos += returnCode;
-#ifndef XLOG
-		/* mark the file as needing fsync */
-		VfdCache[file].fdstate |= FD_DIRTY;
-#endif
-	}
 	else
 		VfdCache[file].seekPos = FileUnknownPos;

--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.79 2000/11/10 03:53:45 vadim Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.80 2000/11/30 08:46:24 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -569,14 +569,6 @@ mdblindwrt(RelFileNode rnode,
 		elog(DEBUG, "mdblindwrt: write() failed: %m");
 		status = SM_FAIL;
 	}
-#ifndef XLOG
-	else if (dofsync &&
-			 pg_fsync(fd) < 0)
-	{
-		elog(DEBUG, "mdblindwrt: fsync() failed: %m");
-		status = SM_FAIL;
-	}
-#endif
 	if (close(fd) < 0)
 	{
@@ -840,7 +832,6 @@ mdabort()
 	return SM_SUCCESS;
 }
-#ifdef XLOG
 /*
 *	mdsync() -- Sync storage.
 *
@@ -854,7 +845,6 @@ mdsync()
 	sync();
 	return SM_SUCCESS;
 }
-#endif
 /*
 *	_fdvec_alloc () -- grab a free (or new) md file descriptor vector.

--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,7 +11,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.45 2000/11/21 21:16:01 petere Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.46 2000/11/30 08:46:24 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -47,9 +47,7 @@ typedef struct f_smgr
 	int			(*smgr_truncate) (Relation reln, int nblocks);
 	int			(*smgr_commit) (void);	/* may be NULL */
 	int			(*smgr_abort) (void);	/* may be NULL */
-#ifdef XLOG
 	int			(*smgr_sync) (void);
-#endif
 } f_smgr;
 /*
@@ -62,11 +60,7 @@ static f_smgr smgrsw[] = {
 	/* magnetic disk */
 	{mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose,
 		mdread, mdwrite, mdflush, mdblindwrt, mdmarkdirty, mdblindmarkdirty,
-#ifdef XLOG
 	mdnblocks, mdtruncate, mdcommit, mdabort, mdsync
-#else
-	mdnblocks, mdtruncate, mdcommit, mdabort
-#endif
 	},
 #ifdef STABLE_MEMORY_STORAGE
@@ -545,7 +539,6 @@ smgrabort()
 	return SM_SUCCESS;
 }
-#ifdef XLOG
 int
 smgrsync()
 {
@@ -564,7 +557,6 @@ smgrsync()
 	return SM_SUCCESS;
 }
-#endif
 #ifdef NOT_USED
 bool
@@ -578,8 +570,6 @@ smgriswo(int16 smgrno)
 #endif
-#ifdef XLOG
 void
 smgr_redo(XLogRecPtr lsn, XLogRecord *record)
 {
@@ -594,4 +584,3 @@ void
 smgr_desc(char *buf, uint8 xl_info, char* rec)
 {
 }
-#endif
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.116 2000/11/10 00:33:10 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.117 2000/11/30 08:46:24 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -2027,8 +2027,7 @@ RelationCacheInitializePhase2(void)
 	}
 }
-#ifdef XLOG		/* used by XLogInitCache */
+/* used by XLogInitCache */
 void CreateDummyCaches(void);
 void DestroyDummyCaches(void);
@@ -2082,8 +2081,6 @@ DestroyDummyCaches(void)
 	MemoryContextSwitchTo(oldcxt);
 }
-#endif	/* XLOG */
 static void
 AttrDefaultFetch(Relation relation)
 {

--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.73 2000/11/28 23:27:57 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.74 2000/11/30 08:46:25 vadim Exp $
 *
 *
 *-------------------------------------------------------------------------
@@ -177,11 +177,6 @@ InitPostgres(const char *dbname, const char *username)
 {
 	bool		bootstrap = IsBootstrapProcessingMode();
-#ifndef XLOG
-	if (!TransactionFlushEnabled())
-		on_shmem_exit(FlushBufferPool, 0);
-#endif
 	SetDatabaseName(dbname);
 	/* ----------------
 	 *	initialize the database id used for system caches and lock tables
@@ -190,11 +185,7 @@ InitPostgres(const char *dbname, const char *username)
 	if (bootstrap)
 	{
 		MyDatabaseId = TemplateDbOid;
-#ifdef OLD_FILE_NAMING
-		SetDatabasePath(ExpandDatabasePath(dbname));
-#else
 		SetDatabasePath(GetDatabasePath(MyDatabaseId));
-#endif
 		LockDisable(true);
 	}
 	else
@@ -228,13 +219,7 @@ InitPostgres(const char *dbname, const char *username)
 				 "Database \"%s\" does not exist in the system catalog.",
 				 dbname);
-#ifdef OLD_FILE_NAMING
-		fullpath = ExpandDatabasePath(datpath);
-		if (!fullpath)
-			elog(FATAL, "Database path could not be resolved.");
-#else
 		fullpath = GetDatabasePath(MyDatabaseId);
-#endif
 		/* Verify the database path */

--- a/src/include/access/htup.h
+++ b/src/include/access/htup.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: htup.h,v 1.40 2000/11/30 01:47:32 vadim Exp $
+ * $Id: htup.h,v 1.41 2000/11/30 08:46:25 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -63,11 +63,6 @@ typedef struct HeapTupleHeaderData
 typedef HeapTupleHeaderData *HeapTupleHeader;
-#ifdef XLOG
-/* XLOG stuff */
 /*
 * XLOG allows to store some information in high 4 bits of log
 * record xl_info field
@@ -127,11 +122,6 @@ typedef struct xl_heap_update
 #define SizeOfHeapUpdate	(offsetof(xl_heap_update, mask) + sizeof(uint8))
-/* end of XLOG stuff */
-#endif	/* XLOG */
 /*
 * MaxTupleSize is the maximum allowed size of a tuple, including header and
 * MAXALIGN alignment padding.  Basically it's BLCKSZ minus the other stuff
@@ -147,7 +137,6 @@ typedef struct xl_heap_update
 #define MaxTupleSize	\
 	(BLCKSZ - MAXALIGN(sizeof(PageHeaderData) + MaxSpecialSpace))
 /*
 * MaxAttrSize is a somewhat arbitrary upper limit on the declared size of
 * data fields of char(n) and similar types.  It need not have anything

--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: nbtree.h,v 1.47 2000/11/21 21:16:05 petere Exp $
+ * $Id: nbtree.h,v 1.48 2000/11/30 08:46:25 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -42,10 +42,7 @@ typedef struct BTPageOpaqueData
 #define BTP_ROOT		(1 << 1)	/* It's the root page (has no parent) */
 #define BTP_FREE		(1 << 2)	/* not currently used... */
 #define BTP_META		(1 << 3)	/* Set in the meta-page only */
-#ifdef	XLOG
 #define	BTP_REORDER		(1 << 4)	/* items must be re-ordered */
-#endif
 } BTPageOpaqueData;
 typedef BTPageOpaqueData *BTPageOpaque;
@@ -209,11 +206,6 @@ typedef BTStackData *BTStack;
 #define P_FIRSTKEY		((OffsetNumber) 2)
 #define P_FIRSTDATAKEY(opaque)  (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY)
-#ifdef XLOG
-/* XLOG stuff */
 /*
 * XLOG allows to store some information in high 4 bits of log
 * record xl_info field
@@ -257,7 +249,6 @@ typedef struct xl_btree_insert
 #define SizeOfBtreeInsert	(offsetof(xl_btreetid, tid) + SizeOfIptrData)
 /* 
 * This is what we need to know about insert with split - 
 * 22 + {4 + 8 | left hi-key} + [btitem] + right sibling btitems. Note that
@@ -292,10 +283,6 @@ typedef struct xl_btree_newroot
 #define SizeOfBtreeNewroot	(offsetof(xl_btree_newroot, rootblk) + sizeof(BlockIdData))
-/* end of XLOG stuff */
-#endif	/* XLOG */
 /*
 *	Operator strategy numbers -- ordering of these is <, <=, =, >=, >
 */

--- a/src/include/access/transam.h
+++ b/src/include/access/transam.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: transam.h,v 1.27 2000/11/21 21:16:05 petere Exp $
+ * $Id: transam.h,v 1.28 2000/11/30 08:46:25 vadim Exp $
 *
 *	 NOTES
 *		Transaction System Version 101 now support proper oid
@@ -67,11 +67,7 @@ typedef unsigned char XidStatus;/* (2 bits) */
 *		transaction page definitions
 * ----------------
 */
-#ifdef XLOG
 #define TP_DataSize				(BLCKSZ - sizeof(XLogRecPtr))
-#else
-#define TP_DataSize				BLCKSZ
-#endif
 #define TP_NumXidStatusPerBlock (TP_DataSize * 4)
 /* ----------------
@@ -88,10 +84,8 @@ typedef unsigned char XidStatus;/* (2 bits) */
 */
 typedef struct LogRelationContentsData
 {
-#ifdef XLOG
 	XLogRecPtr	LSN;		/* temp hack: LSN is member of any block */
 							/* so should be described in bufmgr */
-#endif
 	int			TransSystemVersion;
 } LogRelationContentsData;
@@ -115,9 +109,7 @@ typedef LogRelationContentsData *LogRelationContents;
 */
 typedef struct VariableRelationContentsData
 {
-#ifdef XLOG
 	XLogRecPtr	LSN;
-#endif
 	int			TransSystemVersion;
 	TransactionId nextXidData;
 	TransactionId lastXidData;	/* unused */
@@ -127,21 +119,14 @@ typedef struct VariableRelationContentsData
 typedef VariableRelationContentsData *VariableRelationContents;
 /*
- * VariableCache is placed in shmem and used by backends to
+ * VariableCache is placed in shmem and used by
- * get next available XID & OID without access to
+ * backends to get next available XID & OID.
- * variable relation. Actually, I would like to have two
- * different on-disk storages for next XID and OID...
- * But hoping that someday we will use per database OID
- * generator I leaved this as is.	- vadim 07/21/98
 */
 typedef struct VariableCacheData
 {
-#ifndef XLOG
+	TransactionId	nextXid;
-	uint32		xid_count;
+	Oid				nextOid;
-#endif
+	uint32			oidCount;
-	TransactionId nextXid;
-	Oid			nextOid;
-	uint32		oidCount;
 } VariableCacheData;
 typedef VariableCacheData *VariableCache;

--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: xact.h,v 1.30 2000/11/21 21:16:05 petere Exp $
+ * $Id: xact.h,v 1.31 2000/11/30 08:46:25 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -78,9 +78,6 @@ typedef TransactionStateData *TransactionState;
 #define StoreInvalidTransactionId(dest) \
 	(*((TransactionId*) (dest)) = NullTransactionId)
-#ifdef XLOG
 /*
 * XLOG allows to store some information in high 4 bits of log
 * record xl_info field
@@ -106,8 +103,6 @@ typedef struct xl_xact_abort
 #define SizeOfXactAbort	((offsetof(xl_xact_abort, xtime) + sizeof(time_t)))
-#endif
 /* ----------------
 *		extern definitions
 * ----------------

--- a/src/include/access/xlogutils.h
+++ b/src/include/access/xlogutils.h
 #ifndef XLOG_UTILS_H
+#define	XLOG_UTILS_H
 #include "access/rmgr.h"
 #include "utils/rel.h"

--- a/src/include/config.h.in
+++ b/src/include/config.h.in
@@ -8,7 +8,7 @@
 * or in config.h afterwards.  Of course, if you edit config.h, then your
 * changes will be overwritten the next time you run configure.
 *
- * $Id: config.h.in,v 1.150 2000/11/29 20:59:54 tgl Exp $
+ * $Id: config.h.in,v 1.151 2000/11/30 08:46:25 vadim Exp $
 */
 #ifndef CONFIG_H
@@ -234,9 +234,6 @@
 # define HAVE_UNIX_SOCKETS 1
 #endif
-/* Enable WAL.  Don't disable this, it was only used during development. */
-#define XLOG 1
 /*
 *------------------------------------------------------------------------
 * These hand-configurable symbols are for enabling debugging code,

--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: buf_internals.h,v 1.45 2000/11/30 01:39:08 tgl Exp $
+ * $Id: buf_internals.h,v 1.46 2000/11/30 08:46:26 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -18,7 +18,6 @@
 #include "storage/lmgr.h"
 #include "storage/s_lock.h"
 /* Buf Mgr constants */
 /* in bufmgr.c */
 extern int	Data_Descriptors;
@@ -28,7 +27,6 @@ extern int	Num_Descriptors;
 extern int	ShowPinTrace;
 /*
 * Flags for buffer descriptors
 */
@@ -105,9 +103,7 @@ typedef struct sbufdesc
 	bool		ri_lock;		/* read-intent lock */
 	bool		w_lock;			/* context exclusively locked */
-#ifdef XLOG
 	bool		cntxDirty;		/* new way to mark block as dirty */
-#endif
 	BufferBlindId blind;		/* was used to support blind write */

--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: bufmgr.h,v 1.45 2000/11/30 01:39:08 tgl Exp $
+ * $Id: bufmgr.h,v 1.46 2000/11/30 08:46:26 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -192,10 +192,8 @@ extern void AbortBufferIO(void);
 extern bool BufferIsUpdatable(Buffer buffer);
 extern void MarkBufferForCleanup(Buffer buffer, void (*CleanupFunc)(Buffer));
-#ifdef XLOG
 extern void BufmgrCommit(void);
 extern void BufferSync(void);
-#endif
 extern void InitLocalBuffer(void);

--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: bufpage.h,v 1.36 2000/11/20 21:12:26 vadim Exp $
+ * $Id: bufpage.h,v 1.37 2000/11/30 08:46:26 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -118,13 +118,13 @@ typedef OpaqueData *Opaque;
 */
 typedef struct PageHeaderData
 {
-#ifdef XLOG						/* XXX LSN is member of *any* block, not */
+								/* XXX LSN is member of *any* block, not */
 								/* only page-organized - 'll change later */
 	XLogRecPtr	pd_lsn;			/* LSN: next byte after last byte of xlog */
 								/* record for last change of this page */
 	StartUpID	pd_sui;			/* SUI of last changes (currently it's */
 								/* used by heap AM only) */
-#endif
 	LocationIndex pd_lower;		/* offset to start of free space */
 	LocationIndex pd_upper;		/* offset to end of free space */
 	LocationIndex pd_special;	/* offset to start of special space */
@@ -298,8 +298,6 @@ typedef enum
 			 (sizeof(PageHeaderData) - sizeof(ItemIdData)))) \
 	 / ((int) sizeof(ItemIdData)))
-#ifdef XLOG
 #define PageGetLSN(page) \
 	(((PageHeader) (page))->pd_lsn)
 #define PageSetLSN(page, lsn) \
@@ -310,8 +308,6 @@ typedef enum
 #define PageSetSUI(page, sui) \
 	(((PageHeader) (page))->pd_sui = (StartUpID) (sui))
-#endif
 /* ----------------------------------------------------------------
 *		extern declarations
 * ----------------------------------------------------------------

--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: fd.h,v 1.23 2000/11/10 03:53:45 vadim Exp $
+ * $Id: fd.h,v 1.24 2000/11/30 08:46:26 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -71,10 +71,6 @@ extern int	BasicOpenFile(FileName fileName, int fileFlags, int fileMode);
 extern void closeAllVfds(void);
 extern void AtEOXact_Files(void);
-#ifdef XLOG
 #define pg_fsync(fd)	fsync(fd)
-#else
-extern int	pg_fsync(int fd);
-#endif
 #endif	 /* FD_H */
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: smgr.h,v 1.25 2000/11/21 21:16:05 petere Exp $
+ * $Id: smgr.h,v 1.26 2000/11/30 08:46:26 vadim Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -47,14 +47,11 @@ extern int	smgrtruncate(int16 which, Relation reln, int nblocks);
 extern int	smgrDoPendingDeletes(bool isCommit);
 extern int	smgrcommit(void);
 extern int	smgrabort(void);
-#ifdef XLOG
 extern int	smgrsync(void);
 extern void smgr_redo(XLogRecPtr lsn, XLogRecord *record);
 extern void smgr_undo(XLogRecPtr lsn, XLogRecord *record);
 extern void smgr_desc(char *buf, uint8 xl_info, char* rec);
-#endif
 /* internals: move me elsewhere -- ay 7/94 */
@@ -77,10 +74,7 @@ extern int	mdnblocks(Relation reln);
 extern int	mdtruncate(Relation reln, int nblocks);
 extern int	mdcommit(void);
 extern int	mdabort(void);
-#ifdef XLOG
 extern int	mdsync(void);
-#endif
 /* mm.c */
 extern SPINLOCK MMCacheLock;