Commit d526575f authored by Tom Lane's avatar Tom Lane

Make large sequential scans and VACUUMs work in a limited-size "ring" of

buffers, rather than blowing out the whole shared-buffer arena.  Aside from
avoiding cache spoliation, this fixes the problem that VACUUM formerly tended
to cause a WAL flush for every page it modified, because we had it hacked to
use only a single buffer.  Those flushes will now occur only once per
ring-ful.  The exact ring size, and the threshold for seqscans to switch into
the ring usage pattern, remain under debate; but the infrastructure seems
done.  The key bit of infrastructure is a new optional BufferAccessStrategy
object that can be passed to ReadBuffer operations; this replaces the former
StrategyHintVacuum API.

This patch also changes the buffer usage-count methodology a bit: we now
advance usage_count when first pinning a buffer, rather than when last
unpinning it.  To preserve the behavior that a buffer's lifetime starts to
decrease when it's released, the clock sweep code is modified to not decrement
usage_count of pinned buffers.

Work not done in this commit: teach GiST and GIN indexes to use the vacuum
BufferAccessStrategy for vacuum-driven fetches.

Original patch by Simon, reworked by Heikki and again by Tom.
parent 0a6f2ee8
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.94 2007/05/03 16:45:58 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.95 2007/05/30 20:11:51 tgl Exp $
*
* NOTES
* This file contains only the public interface routines.
......@@ -547,8 +547,9 @@ loop_top:
vacuum_delay_point();
buf = _hash_getbuf(rel, blkno, HASH_WRITE,
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
info->strategy);
page = BufferGetPage(buf);
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
Assert(opaque->hasho_bucket == cur_bucket);
......@@ -596,7 +597,8 @@ loop_top:
/* If we deleted anything, try to compact free space */
if (bucket_dirty)
_hash_squeezebucket(rel, cur_bucket, bucket_blkno);
_hash_squeezebucket(rel, cur_bucket, bucket_blkno,
info->strategy);
/* Release bucket lock */
_hash_droplock(rel, bucket_blkno, HASH_EXCLUSIVE);
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.57 2007/05/03 16:45:58 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.58 2007/05/30 20:11:51 tgl Exp $
*
* NOTES
* Overflow pages look like ordinary relation pages.
......@@ -362,6 +362,9 @@ _hash_firstfreebit(uint32 map)
* Remove this overflow page from its bucket's chain, and mark the page as
* free. On entry, ovflbuf is write-locked; it is released before exiting.
*
* Since this function is invoked in VACUUM, we provide an access strategy
* parameter that controls fetches of the bucket pages.
*
* Returns the block number of the page that followed the given page
* in the bucket, or InvalidBlockNumber if no following page.
*
......@@ -370,7 +373,8 @@ _hash_firstfreebit(uint32 map)
* on the bucket, too.
*/
BlockNumber
_hash_freeovflpage(Relation rel, Buffer ovflbuf)
_hash_freeovflpage(Relation rel, Buffer ovflbuf,
BufferAccessStrategy bstrategy)
{
HashMetaPage metap;
Buffer metabuf;
......@@ -413,8 +417,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
*/
if (BlockNumberIsValid(prevblkno))
{
Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE,
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
Buffer prevbuf = _hash_getbuf_with_strategy(rel,
prevblkno,
HASH_WRITE,
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
bstrategy);
Page prevpage = BufferGetPage(prevbuf);
HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage);
......@@ -424,8 +431,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
}
if (BlockNumberIsValid(nextblkno))
{
Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE,
LH_OVERFLOW_PAGE);
Buffer nextbuf = _hash_getbuf_with_strategy(rel,
nextblkno,
HASH_WRITE,
LH_OVERFLOW_PAGE,
bstrategy);
Page nextpage = BufferGetPage(nextbuf);
HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage);
......@@ -434,6 +444,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
_hash_wrtbuf(rel, nextbuf);
}
/* Note: bstrategy is intentionally not used for metapage and bitmap */
/* Read the metapage so we can determine which bitmap page to use */
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
metap = (HashMetaPage) BufferGetPage(metabuf);
......@@ -558,11 +570,15 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
*
* Caller must hold exclusive lock on the target bucket. This allows
* us to safely lock multiple pages in the bucket.
*
* Since this function is invoked in VACUUM, we provide an access strategy
* parameter that controls fetches of the bucket pages.
*/
void
_hash_squeezebucket(Relation rel,
Bucket bucket,
BlockNumber bucket_blkno)
BlockNumber bucket_blkno,
BufferAccessStrategy bstrategy)
{
Buffer wbuf;
Buffer rbuf = 0;
......@@ -581,7 +597,11 @@ _hash_squeezebucket(Relation rel,
* start squeezing into the base bucket page.
*/
wblkno = bucket_blkno;
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE, LH_BUCKET_PAGE);
wbuf = _hash_getbuf_with_strategy(rel,
wblkno,
HASH_WRITE,
LH_BUCKET_PAGE,
bstrategy);
wpage = BufferGetPage(wbuf);
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
......@@ -595,8 +615,10 @@ _hash_squeezebucket(Relation rel,
}
/*
* find the last page in the bucket chain by starting at the base bucket
* page and working forward.
* Find the last page in the bucket chain by starting at the base bucket
* page and working forward. Note: we assume that a hash bucket chain is
* usually smaller than the buffer ring being used by VACUUM, else using
* the access strategy here would be counterproductive.
*/
ropaque = wopaque;
do
......@@ -604,7 +626,11 @@ _hash_squeezebucket(Relation rel,
rblkno = ropaque->hasho_nextblkno;
if (ropaque != wopaque)
_hash_relbuf(rel, rbuf);
rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
rbuf = _hash_getbuf_with_strategy(rel,
rblkno,
HASH_WRITE,
LH_OVERFLOW_PAGE,
bstrategy);
rpage = BufferGetPage(rbuf);
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
Assert(ropaque->hasho_bucket == bucket);
......@@ -644,7 +670,11 @@ _hash_squeezebucket(Relation rel,
return;
}
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
wbuf = _hash_getbuf_with_strategy(rel,
wblkno,
HASH_WRITE,
LH_OVERFLOW_PAGE,
bstrategy);
wpage = BufferGetPage(wbuf);
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
Assert(wopaque->hasho_bucket == bucket);
......@@ -688,15 +718,19 @@ _hash_squeezebucket(Relation rel,
/* yes, so release wbuf lock first */
_hash_wrtbuf(rel, wbuf);
/* free this overflow page (releases rbuf) */
_hash_freeovflpage(rel, rbuf);
_hash_freeovflpage(rel, rbuf, bstrategy);
/* done */
return;
}
/* free this overflow page, then get the previous one */
_hash_freeovflpage(rel, rbuf);
_hash_freeovflpage(rel, rbuf, bstrategy);
rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
rbuf = _hash_getbuf_with_strategy(rel,
rblkno,
HASH_WRITE,
LH_OVERFLOW_PAGE,
bstrategy);
rpage = BufferGetPage(rbuf);
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
Assert(ropaque->hasho_bucket == bucket);
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.67 2007/05/03 16:45:58 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.68 2007/05/30 20:11:51 tgl Exp $
*
* NOTES
* Postgres hash pages look like ordinary relation pages. The opaque
......@@ -214,6 +214,34 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno)
return buf;
}
/*
* _hash_getbuf_with_strategy() -- Get a buffer with nondefault strategy.
*
* This is identical to _hash_getbuf() but also allows a buffer access
* strategy to be specified. We use this for VACUUM operations.
*/
Buffer
_hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
int access, int flags,
BufferAccessStrategy bstrategy)
{
Buffer buf;
if (blkno == P_NEW)
elog(ERROR, "hash AM does not use P_NEW");
buf = ReadBufferWithStrategy(rel, blkno, bstrategy);
if (access != HASH_NOLOCK)
LockBuffer(buf, access);
/* ref count and lock type are correct */
_hash_checkpage(rel, buf, flags);
return buf;
}
/*
* _hash_relbuf() -- release a locked buffer.
*
......@@ -840,5 +868,5 @@ _hash_splitbucket(Relation rel,
_hash_wrtbuf(rel, obuf);
_hash_wrtbuf(rel, nbuf);
_hash_squeezebucket(rel, obucket, start_oblkno);
_hash_squeezebucket(rel, obucket, start_oblkno, NULL);
}
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.233 2007/05/27 03:50:38 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.234 2007/05/30 20:11:53 tgl Exp $
*
*
* INTERFACE ROUTINES
......@@ -83,6 +83,24 @@ initscan(HeapScanDesc scan, ScanKey key)
*/
scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
/*
* If the table is large relative to NBuffers, use a bulk-read access
* strategy, else use the default random-access strategy. During a
* rescan, don't make a new strategy object if we don't have to.
*/
if (scan->rs_nblocks > NBuffers / 4 &&
!scan->rs_rd->rd_istemp)
{
if (scan->rs_strategy == NULL)
scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD);
}
else
{
if (scan->rs_strategy != NULL)
FreeAccessStrategy(scan->rs_strategy);
scan->rs_strategy = NULL;
}
scan->rs_inited = false;
scan->rs_ctup.t_data = NULL;
ItemPointerSetInvalid(&scan->rs_ctup.t_self);
......@@ -123,9 +141,17 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
Assert(page < scan->rs_nblocks);
scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
scan->rs_rd,
page);
/* release previous scan buffer, if any */
if (BufferIsValid(scan->rs_cbuf))
{
ReleaseBuffer(scan->rs_cbuf);
scan->rs_cbuf = InvalidBuffer;
}
/* read page using selected strategy */
scan->rs_cbuf = ReadBufferWithStrategy(scan->rs_rd,
page,
scan->rs_strategy);
scan->rs_cblock = page;
if (!scan->rs_pageatatime)
......@@ -938,6 +964,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
scan->rs_rd = relation;
scan->rs_snapshot = snapshot;
scan->rs_nkeys = nkeys;
scan->rs_strategy = NULL; /* set in initscan */
/*
* we can use page-at-a-time mode if it's an MVCC-safe snapshot
......@@ -1007,6 +1034,9 @@ heap_endscan(HeapScanDesc scan)
if (scan->rs_key)
pfree(scan->rs_key);
if (scan->rs_strategy != NULL)
FreeAccessStrategy(scan->rs_strategy);
pfree(scan);
}
......
......@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.154 2007/01/05 22:19:23 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.155 2007/05/30 20:11:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -786,9 +786,10 @@ restart:
/*
* We can't use _bt_getbuf() here because it always applies
* _bt_checkpage(), which will barf on an all-zero page. We want to
* recycle all-zero pages, not fail.
* recycle all-zero pages, not fail. Also, we want to use a nondefault
* buffer access strategy.
*/
buf = ReadBuffer(rel, blkno);
buf = ReadBufferWithStrategy(rel, blkno, info->strategy);
LockBuffer(buf, BT_READ);
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.269 2007/05/20 21:08:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.270 2007/05/30 20:11:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -1799,6 +1799,36 @@ XLogFlush(XLogRecPtr record)
LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
}
/*
* Test whether XLOG data has been flushed up to (at least) the given position.
*
* Returns true if a flush is still needed. (It may be that someone else
* is already in process of flushing that far, however.)
*/
bool
XLogNeedsFlush(XLogRecPtr record)
{
/* Quick exit if already known flushed */
if (XLByteLE(record, LogwrtResult.Flush))
return false;
/* read LogwrtResult and update local state */
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
SpinLockAcquire(&xlogctl->info_lck);
LogwrtResult = xlogctl->LogwrtResult;
SpinLockRelease(&xlogctl->info_lck);
}
/* check again */
if (XLByteLE(record, LogwrtResult.Flush))
return false;
return true;
}
/*
* Create a new XLOG file segment, or open a pre-existing one.
*
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.283 2007/05/16 17:28:20 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.284 2007/05/30 20:11:55 tgl Exp $
*
*
* INTERFACE ROUTINES
......@@ -1658,6 +1658,7 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
ivinfo.vacuum_full = false;
ivinfo.message_level = DEBUG2;
ivinfo.num_heap_tuples = -1;
ivinfo.strategy = NULL;
state.tuplesort = tuplesort_begin_datum(TIDOID,
TIDLessOperator, false,
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.107 2007/04/30 03:23:48 tgl Exp $
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.108 2007/05/30 20:11:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -63,10 +63,13 @@ typedef struct AnlIndexData
/* Default statistics target (GUC parameter) */
int default_statistics_target = 10;
/* A few variables that don't seem worth passing around as parameters */
static int elevel = -1;
static MemoryContext anl_context = NULL;
static BufferAccessStrategy vac_strategy;
static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
int samplesize);
......@@ -94,7 +97,8 @@ static bool std_typanalyze(VacAttrStats *stats);
* analyze_rel() -- analyze one relation
*/
void
analyze_rel(Oid relid, VacuumStmt *vacstmt)
analyze_rel(Oid relid, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy)
{
Relation onerel;
int attr_cnt,
......@@ -120,6 +124,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt)
else
elevel = DEBUG2;
vac_strategy = bstrategy;
/*
* Use the current context for storing analysis info. vacuum.c ensures
* that this context will be cleared when I return, thus releasing the
......@@ -845,7 +851,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
* looking at it. We don't maintain a lock on the page, so tuples
* could get added to it, but we ignore such tuples.
*/
targbuffer = ReadBuffer(onerel, targblock);
targbuffer = ReadBufferWithStrategy(onerel, targblock, vac_strategy);
LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
targpage = BufferGetPage(targbuffer);
maxoffset = PageGetMaxOffsetNumber(targpage);
......
......@@ -13,7 +13,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.351 2007/05/17 15:28:29 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.352 2007/05/30 20:11:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -191,6 +191,7 @@ ExecContext_Finish(ExecContext ec)
*----------------------------------------------------------------------
*/
/* A few variables that don't seem worth passing around as parameters */
static MemoryContext vac_context = NULL;
static int elevel = -1;
......@@ -198,6 +199,8 @@ static int elevel = -1;
static TransactionId OldestXmin;
static TransactionId FreezeLimit;
static BufferAccessStrategy vac_strategy;
/* non-export function prototypes */
static List *get_rel_oids(List *relids, const RangeVar *vacrel,
......@@ -257,14 +260,18 @@ static Size PageGetFreeSpaceWithFillFactor(Relation relation, Page page);
* relation OIDs to be processed, and vacstmt->relation is ignored.
* (The non-NIL case is currently only used by autovacuum.)
*
* bstrategy is normally given as NULL, but in autovacuum it can be passed
* in to use the same buffer strategy object across multiple vacuum() calls.
*
* isTopLevel should be passed down from ProcessUtility.
*
* It is the caller's responsibility that both vacstmt and relids
* It is the caller's responsibility that vacstmt, relids, and bstrategy
* (if given) be allocated in a memory context that won't disappear
* at transaction commit.
*/
void
vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
vacuum(VacuumStmt *vacstmt, List *relids,
BufferAccessStrategy bstrategy, bool isTopLevel)
{
const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
volatile MemoryContext anl_context = NULL;
......@@ -319,6 +326,19 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
/*
* If caller didn't give us a buffer strategy object, make one in the
* cross-transaction memory context.
*/
if (bstrategy == NULL)
{
MemoryContext old_context = MemoryContextSwitchTo(vac_context);
bstrategy = GetAccessStrategy(BAS_VACUUM);
MemoryContextSwitchTo(old_context);
}
vac_strategy = bstrategy;
/* Remember whether we are processing everything in the DB */
all_rels = (relids == NIL && vacstmt->relation == NULL);
......@@ -417,15 +437,7 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
else
old_context = MemoryContextSwitchTo(anl_context);
/*
* Tell the buffer replacement strategy that vacuum is causing
* the IO
*/
StrategyHintVacuum(true);
analyze_rel(relid, vacstmt);
StrategyHintVacuum(false);
analyze_rel(relid, vacstmt, vac_strategy);
if (use_own_xacts)
CommitTransactionCommand();
......@@ -441,8 +453,6 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
{
/* Make sure cost accounting is turned off after error */
VacuumCostActive = false;
/* And reset buffer replacement strategy, too */
StrategyHintVacuum(false);
PG_RE_THROW();
}
PG_END_TRY();
......@@ -1084,21 +1094,13 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
*/
toast_relid = onerel->rd_rel->reltoastrelid;
/*
* Tell the cache replacement strategy that vacuum is causing all
* following IO
*/
StrategyHintVacuum(true);
/*
* Do the actual work --- either FULL or "lazy" vacuum
*/
if (vacstmt->full)
full_vacuum_rel(onerel, vacstmt);
else
lazy_vacuum_rel(onerel, vacstmt);
StrategyHintVacuum(false);
lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
/* all done with this class, but hold lock until commit */
relation_close(onerel, NoLock);
......@@ -1290,7 +1292,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
vacuum_delay_point();
buf = ReadBuffer(onerel, blkno);
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
page = BufferGetPage(buf);
/*
......@@ -1730,7 +1732,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
/*
* Process this page of relation.
*/
buf = ReadBuffer(onerel, blkno);
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
page = BufferGetPage(buf);
vacpage->offsets_free = 0;
......@@ -1954,8 +1956,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
nextTid = tp.t_data->t_ctid;
priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
/* assume block# is OK (see heap_fetch comments) */
nextBuf = ReadBuffer(onerel,
ItemPointerGetBlockNumber(&nextTid));
nextBuf = ReadBufferWithStrategy(onerel,
ItemPointerGetBlockNumber(&nextTid),
vac_strategy);
nextPage = BufferGetPage(nextBuf);
/* If bogus or unused slot, assume tp is end of chain */
nextOffnum = ItemPointerGetOffsetNumber(&nextTid);
......@@ -2091,8 +2094,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
break; /* out of check-all-items loop */
}
tp.t_self = vtlp->this_tid;
Pbuf = ReadBuffer(onerel,
ItemPointerGetBlockNumber(&(tp.t_self)));
Pbuf = ReadBufferWithStrategy(onerel,
ItemPointerGetBlockNumber(&(tp.t_self)),
vac_strategy);
Ppage = BufferGetPage(Pbuf);
Pitemid = PageGetItemId(Ppage,
ItemPointerGetOffsetNumber(&(tp.t_self)));
......@@ -2174,11 +2178,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
/* Get page to move from */
tuple.t_self = vtmove[ti].tid;
Cbuf = ReadBuffer(onerel,
ItemPointerGetBlockNumber(&(tuple.t_self)));
Cbuf = ReadBufferWithStrategy(onerel,
ItemPointerGetBlockNumber(&(tuple.t_self)),
vac_strategy);
/* Get page to move to */
dst_buffer = ReadBuffer(onerel, destvacpage->blkno);
dst_buffer = ReadBufferWithStrategy(onerel,
destvacpage->blkno,
vac_strategy);
LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
if (dst_buffer != Cbuf)
......@@ -2239,7 +2246,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
if (i == num_fraged_pages)
break; /* can't move item anywhere */
dst_vacpage = fraged_pages->pagedesc[i];
dst_buffer = ReadBuffer(onerel, dst_vacpage->blkno);
dst_buffer = ReadBufferWithStrategy(onerel,
dst_vacpage->blkno,
vac_strategy);
LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
dst_page = BufferGetPage(dst_buffer);
/* if this page was not used before - clean it */
......@@ -2386,7 +2395,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
Page page;
/* this page was not used as a move target, so must clean it */
buf = ReadBuffer(onerel, (*curpage)->blkno);
buf = ReadBufferWithStrategy(onerel,
(*curpage)->blkno,
vac_strategy);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
page = BufferGetPage(buf);
if (!PageIsEmpty(page))
......@@ -2470,7 +2481,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
int uncnt;
int num_tuples = 0;
buf = ReadBuffer(onerel, vacpage->blkno);
buf = ReadBufferWithStrategy(onerel, vacpage->blkno, vac_strategy);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
page = BufferGetPage(buf);
maxoff = PageGetMaxOffsetNumber(page);
......@@ -2859,7 +2870,7 @@ update_hint_bits(Relation rel, VacPageList fraged_pages, int num_fraged_pages,
break; /* no need to scan any further */
if ((*curpage)->offsets_used == 0)
continue; /* this page was never used as a move dest */
buf = ReadBuffer(rel, (*curpage)->blkno);
buf = ReadBufferWithStrategy(rel, (*curpage)->blkno, vac_strategy);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
page = BufferGetPage(buf);
max_offset = PageGetMaxOffsetNumber(page);
......@@ -2925,7 +2936,9 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
if ((*vacpage)->offsets_free > 0)
{
buf = ReadBuffer(onerel, (*vacpage)->blkno);
buf = ReadBufferWithStrategy(onerel,
(*vacpage)->blkno,
vac_strategy);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
vacuum_page(onerel, buf, *vacpage);
UnlockReleaseBuffer(buf);
......@@ -3012,6 +3025,7 @@ scan_index(Relation indrel, double num_tuples)
ivinfo.vacuum_full = true;
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = num_tuples;
ivinfo.strategy = vac_strategy;
stats = index_vacuum_cleanup(&ivinfo, NULL);
......@@ -3077,6 +3091,7 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
ivinfo.vacuum_full = true;
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = num_tuples + keep_tuples;
ivinfo.strategy = vac_strategy;
/* Do bulk deletion */
stats = index_bulk_delete(&ivinfo, NULL, tid_reaped, (void *) vacpagelist);
......
......@@ -36,7 +36,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.89 2007/05/17 15:28:29 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.90 2007/05/30 20:11:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -96,11 +96,14 @@ typedef struct LVRelStats
} LVRelStats;
/* A few variables that don't seem worth passing around as parameters */
static int elevel = -1;
static TransactionId OldestXmin;
static TransactionId FreezeLimit;
static BufferAccessStrategy vac_strategy;
/* non-export function prototypes */
static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
......@@ -138,7 +141,8 @@ static int vac_cmp_page_spaces(const void *left, const void *right);
* and locked the relation.
*/
void
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy)
{
LVRelStats *vacrelstats;
Relation *Irel;
......@@ -158,6 +162,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
else
elevel = DEBUG2;
vac_strategy = bstrategy;
vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared,
&OldestXmin, &FreezeLimit);
......@@ -318,7 +324,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
vacrelstats->num_index_scans++;
}
buf = ReadBuffer(onerel, blkno);
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
/* Initially, we only need shared access to the buffer */
LockBuffer(buf, BUFFER_LOCK_SHARE);
......@@ -586,7 +592,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
vacuum_delay_point();
tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
buf = ReadBuffer(onerel, tblk);
buf = ReadBufferWithStrategy(onerel, tblk, vac_strategy);
LockBufferForCleanup(buf);
tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
/* Now that we've compacted the page, record its available space */
......@@ -684,6 +690,7 @@ lazy_vacuum_index(Relation indrel,
ivinfo.message_level = elevel;
/* We don't yet know rel_tuples, so pass -1 */
ivinfo.num_heap_tuples = -1;
ivinfo.strategy = vac_strategy;
/* Do bulk deletion */
*stats = index_bulk_delete(&ivinfo, *stats,
......@@ -713,6 +720,7 @@ lazy_cleanup_index(Relation indrel,
ivinfo.vacuum_full = false;
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
ivinfo.strategy = vac_strategy;
stats = index_vacuum_cleanup(&ivinfo, stats);
......@@ -869,7 +877,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
blkno--;
buf = ReadBuffer(onerel, blkno);
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
/* In this phase we only need shared access to the buffer */
LockBuffer(buf, BUFFER_LOCK_SHARE);
......
......@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.46 2007/05/07 20:41:24 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.47 2007/05/30 20:11:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -218,7 +218,8 @@ static void relation_needs_vacanalyze(Oid relid, Form_pg_autovacuum avForm,
bool *doanalyze);
static void autovacuum_do_vac_analyze(Oid relid, bool dovacuum,
bool doanalyze, int freeze_min_age);
bool doanalyze, int freeze_min_age,
BufferAccessStrategy bstrategy);
static HeapTuple get_pg_autovacuum_tuple_relid(Relation avRel, Oid relid);
static PgStat_StatTabEntry *get_pgstat_tabentry_relid(Oid relid, bool isshared,
PgStat_StatDBEntry *shared,
......@@ -1673,6 +1674,7 @@ do_autovacuum(void)
ListCell *cell;
PgStat_StatDBEntry *shared;
PgStat_StatDBEntry *dbentry;
BufferAccessStrategy bstrategy;
/*
* may be NULL if we couldn't find an entry (only happens if we
......@@ -1812,6 +1814,13 @@ do_autovacuum(void)
list_free(toast_oids);
toast_oids = NIL;
/*
* Create a buffer access strategy object for VACUUM to use. We want
* to use the same one across all the vacuum operations we perform,
* since the point is for VACUUM not to blow out the shared cache.
*/
bstrategy = GetAccessStrategy(BAS_VACUUM);
/*
* Perform operations on collected tables.
*/
......@@ -1910,7 +1919,8 @@ next_worker:
autovacuum_do_vac_analyze(tab->at_relid,
tab->at_dovacuum,
tab->at_doanalyze,
tab->at_freeze_min_age);
tab->at_freeze_min_age,
bstrategy);
/* be tidy */
pfree(tab);
}
......@@ -2328,7 +2338,8 @@ relation_needs_vacanalyze(Oid relid,
*/
static void
autovacuum_do_vac_analyze(Oid relid, bool dovacuum, bool doanalyze,
int freeze_min_age)
int freeze_min_age,
BufferAccessStrategy bstrategy)
{
VacuumStmt vacstmt;
MemoryContext old_cxt;
......@@ -2354,7 +2365,7 @@ autovacuum_do_vac_analyze(Oid relid, bool dovacuum, bool doanalyze,
/* Let pgstat know what we're doing */
autovac_report_activity(&vacstmt, relid);
vacuum(&vacstmt, list_make1_oid(relid), true);
vacuum(&vacstmt, list_make1_oid(relid), bstrategy, true);
MemoryContextSwitchTo(old_cxt);
}
......
$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.11 2006/07/23 03:07:58 tgl Exp $
$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.12 2007/05/30 20:11:58 tgl Exp $
Notes about shared buffer access rules
--------------------------------------
......@@ -152,20 +152,21 @@ we could use per-backend LWLocks instead (a buffer header would then contain
a field to show which backend is doing its I/O).
Buffer replacement strategy
---------------------------
Normal buffer replacement strategy
----------------------------------
There is a "free list" of buffers that are prime candidates for replacement.
In particular, buffers that are completely free (contain no valid page) are
always in this list. We may also throw buffers into this list if we
consider their pages unlikely to be needed soon. The list is singly-linked
using fields in the buffer headers; we maintain head and tail pointers in
global variables. (Note: although the list links are in the buffer headers,
they are considered to be protected by the BufFreelistLock, not the
buffer-header spinlocks.) To choose a victim buffer to recycle when there
are no free buffers available, we use a simple clock-sweep algorithm, which
avoids the need to take system-wide locks during common operations. It
works like this:
always in this list. We could also throw buffers into this list if we
consider their pages unlikely to be needed soon; however, the current
algorithm never does that. The list is singly-linked using fields in the
buffer headers; we maintain head and tail pointers in global variables.
(Note: although the list links are in the buffer headers, they are
considered to be protected by the BufFreelistLock, not the buffer-header
spinlocks.) To choose a victim buffer to recycle when there are no free
buffers available, we use a simple clock-sweep algorithm, which avoids the
need to take system-wide locks during common operations. It works like
this:
Each buffer header contains a usage counter, which is incremented (up to a
small limit value) whenever the buffer is unpinned. (This requires only the
......@@ -199,22 +200,40 @@ before we can recycle it; if someone else pins the buffer meanwhile we will
have to give up and try another buffer. This however is not a concern
of the basic select-a-victim-buffer algorithm.)
A special provision is that while running VACUUM, a backend does not
increment the usage count on buffers it accesses. In fact, if ReleaseBuffer
sees that it is dropping the pin count to zero and the usage count is zero,
then it appends the buffer to the tail of the free list. (This implies that
VACUUM, but only VACUUM, must take the BufFreelistLock during ReleaseBuffer;
this shouldn't create much of a contention problem.) This provision
encourages VACUUM to work in a relatively small number of buffers rather
than blowing out the entire buffer cache. It is reasonable since a page
that has been touched only by VACUUM is unlikely to be needed again soon.
Since VACUUM usually requests many pages very fast, the effect of this is that
it will get back the very buffers it filled and possibly modified on the next
call and will therefore do its work in a few shared memory buffers, while
being able to use whatever it finds in the cache already. This also implies
that most of the write traffic caused by a VACUUM will be done by the VACUUM
itself and not pushed off onto other processes.
Buffer ring replacement strategy
---------------------------------
When running a query that needs to access a large number of pages just once,
such as VACUUM or a large sequential scan, a different strategy is used.
A page that has been touched only by such a scan is unlikely to be needed
again soon, so instead of running the normal clock sweep algorithm and
blowing out the entire buffer cache, a small ring of buffers is allocated
using the normal clock sweep algorithm and those buffers are reused for the
whole scan. This also implies that much of the write traffic caused by such
a statement will be done by the backend itself and not pushed off onto other
processes.
For sequential scans, a 256KB ring is used. That's small enough to fit in L2
cache, which makes transferring pages from OS cache to shared buffer cache
efficient. Even less would often be enough, but the ring must be big enough
to accommodate all pages in the scan that are pinned concurrently. 256KB
should also be enough to leave a small cache trail for other backends to
join in a synchronized seq scan. If a ring buffer is dirtied and its LSN
updated, we would normally have to write and flush WAL before we could
re-use the buffer; in this case we instead discard the buffer from the ring
and (later) choose a replacement using the normal clock-sweep algorithm.
Hence this strategy works best for scans that are read-only (or at worst
update hint bits). In a scan that modifies every page in the scan, like a
bulk UPDATE or DELETE, the buffers in the ring will always be dirtied and
the ring strategy effectively degrades to the normal strategy.
VACUUM uses a 256KB ring like sequential scans, but dirty pages are not
removed from the ring. Instead, WAL is flushed if needed to allow reuse of
the buffers. Before introducing the buffer ring strategy in 8.3, VACUUM's
buffers were sent to the freelist, which was effectively a buffer ring of 1
buffer, resulting in excessive WAL flushing. Allowing VACUUM to update
256KB between WAL flushes should be more efficient.
Background writer's processing
......
This diff is collapsed.
This diff is collapsed.
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.76 2007/01/05 22:19:37 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.77 2007/05/30 20:11:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -57,7 +57,8 @@ static Block GetLocalBufferStorage(void);
*
* API is similar to bufmgr.c's BufferAlloc, except that we do not need
* to do any locking since this is all local. Also, IO_IN_PROGRESS
* does not get set.
* does not get set. Lastly, we support only default access strategy
* (hence, usage_count is always advanced).
*/
BufferDesc *
LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
......@@ -88,7 +89,12 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
fprintf(stderr, "LB ALLOC (%u,%d) %d\n",
RelationGetRelid(reln), blockNum, -b - 1);
#endif
/* this part is equivalent to PinBuffer for a shared buffer */
if (LocalRefCount[b] == 0)
{
if (bufHdr->usage_count < BM_MAX_USAGE_COUNT)
bufHdr->usage_count++;
}
LocalRefCount[b]++;
ResourceOwnerRememberBuffer(CurrentResourceOwner,
BufferDescriptorGetBuffer(bufHdr));
......@@ -121,18 +127,21 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
bufHdr = &LocalBufferDescriptors[b];
if (LocalRefCount[b] == 0 && bufHdr->usage_count == 0)
{
LocalRefCount[b]++;
ResourceOwnerRememberBuffer(CurrentResourceOwner,
BufferDescriptorGetBuffer(bufHdr));
break;
}
if (bufHdr->usage_count > 0)
if (LocalRefCount[b] == 0)
{
bufHdr->usage_count--;
trycounter = NLocBuffer;
if (bufHdr->usage_count > 0)
{
bufHdr->usage_count--;
trycounter = NLocBuffer;
}
else
{
/* Found a usable buffer */
LocalRefCount[b]++;
ResourceOwnerRememberBuffer(CurrentResourceOwner,
BufferDescriptorGetBuffer(bufHdr));
break;
}
}
else if (--trycounter == 0)
ereport(ERROR,
......@@ -199,7 +208,7 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
bufHdr->tag = newTag;
bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
bufHdr->flags |= BM_TAG_VALID;
bufHdr->usage_count = 0;
bufHdr->usage_count = 1;
*foundPtr = FALSE;
return bufHdr;
......
......@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.279 2007/04/27 22:05:49 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.280 2007/05/30 20:12:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -931,7 +931,7 @@ ProcessUtility(Node *parsetree,
break;
case T_VacuumStmt:
vacuum((VacuumStmt *) parsetree, NIL, isTopLevel);
vacuum((VacuumStmt *) parsetree, NIL, NULL, isTopLevel);
break;
case T_ExplainStmt:
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/genam.h,v 1.66 2007/01/05 22:19:50 momjian Exp $
* $PostgreSQL: pgsql/src/include/access/genam.h,v 1.67 2007/05/30 20:12:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -40,6 +40,7 @@ typedef struct IndexVacuumInfo
bool vacuum_full; /* VACUUM FULL (we have exclusive lock) */
int message_level; /* ereport level for progress messages */
double num_heap_tuples; /* tuples remaining in heap */
BufferAccessStrategy strategy; /* access strategy for reads */
} IndexVacuumInfo;
/*
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/hash.h,v 1.80 2007/05/03 16:45:58 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/hash.h,v 1.81 2007/05/30 20:12:02 tgl Exp $
*
* NOTES
* modeled after Margo Seltzer's hash implementation for unix.
......@@ -273,11 +273,13 @@ extern void _hash_doinsert(Relation rel, IndexTuple itup);
/* hashovfl.c */
extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf);
extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf);
extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf,
BufferAccessStrategy bstrategy);
extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
BlockNumber blkno);
extern void _hash_squeezebucket(Relation rel,
Bucket bucket, BlockNumber bucket_blkno);
Bucket bucket, BlockNumber bucket_blkno,
BufferAccessStrategy bstrategy);
/* hashpage.c */
extern void _hash_getlock(Relation rel, BlockNumber whichlock, int access);
......@@ -287,6 +289,9 @@ extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno,
int access, int flags);
extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno);
extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
int access, int flags,
BufferAccessStrategy bstrategy);
extern void _hash_relbuf(Relation rel, Buffer buf);
extern void _hash_dropbuf(Relation rel, Buffer buf);
extern void _hash_wrtbuf(Relation rel, Buffer buf);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.53 2007/05/27 03:50:39 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.54 2007/05/30 20:12:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -27,6 +27,7 @@ typedef struct HeapScanDescData
int rs_nkeys; /* number of scan keys */
ScanKey rs_key; /* array of scan key descriptors */
BlockNumber rs_nblocks; /* number of blocks to scan */
BufferAccessStrategy rs_strategy; /* access strategy for reads */
bool rs_pageatatime; /* verify visibility page-at-a-time? */
/* scan current state */
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.77 2007/05/20 21:08:19 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.78 2007/05/30 20:12:02 tgl Exp $
*/
#ifndef XLOG_H
#define XLOG_H
......@@ -159,6 +159,7 @@ extern bool XLOG_DEBUG;
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
extern void XLogFlush(XLogRecPtr RecPtr);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.71 2007/05/17 15:28:29 alvherre Exp $
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.72 2007/05/30 20:12:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -18,9 +18,11 @@
#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
#include "nodes/parsenodes.h"
#include "storage/buf.h"
#include "storage/lock.h"
#include "utils/rel.h"
/*----------
* ANALYZE builds one of these structs for each attribute (column) that is
* to be analyzed. The struct and subsidiary data are in anl_context,
......@@ -110,7 +112,8 @@ extern int vacuum_freeze_min_age;
/* in commands/vacuum.c */
extern void vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel);
extern void vacuum(VacuumStmt *vacstmt, List *relids,
BufferAccessStrategy bstrategy, bool isTopLevel);
extern void vac_open_indexes(Relation relation, LOCKMODE lockmode,
int *nindexes, Relation **Irel);
extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode);
......@@ -127,9 +130,11 @@ extern bool vac_is_partial_index(Relation indrel);
extern void vacuum_delay_point(void);
/* in commands/vacuumlazy.c */
extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt);
extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy);
/* in commands/analyze.c */
extern void analyze_rel(Oid relid, VacuumStmt *vacstmt);
extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy);
#endif /* VACUUM_H */
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/buf.h,v 1.21 2007/01/05 22:19:57 momjian Exp $
* $PostgreSQL: pgsql/src/include/storage/buf.h,v 1.22 2007/05/30 20:12:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -36,4 +36,11 @@ typedef int Buffer;
*/
#define BufferIsLocal(buffer) ((buffer) < 0)
/*
* Buffer access strategy objects.
*
* BufferAccessStrategyData is private to freelist.c
*/
typedef struct BufferAccessStrategyData *BufferAccessStrategy;
#endif /* BUF_H */
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.89 2007/01/05 22:19:57 momjian Exp $
* $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.90 2007/05/30 20:12:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -167,9 +167,6 @@ extern DLLIMPORT BufferDesc *BufferDescriptors;
/* in localbuf.c */
extern BufferDesc *LocalBufferDescriptors;
/* in freelist.c */
extern bool strategy_hint_vacuum;
/* event counters in buf_init.c */
extern long int ReadBufferCount;
extern long int ReadLocalBufferCount;
......@@ -184,8 +181,12 @@ extern long int LocalBufferFlushCount;
*/
/* freelist.c */
extern volatile BufferDesc *StrategyGetBuffer(void);
extern void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head);
extern volatile BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy,
bool *lock_held);
extern void StrategyFreeBuffer(volatile BufferDesc *buf);
extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
volatile BufferDesc *buf);
extern int StrategySyncStart(void);
extern Size StrategyShmemSize(void);
extern void StrategyInitialize(bool init);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.103 2007/05/02 23:18:03 tgl Exp $
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.104 2007/05/30 20:12:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -19,6 +19,14 @@
typedef void *Block;
/* Possible arguments for GetAccessStrategy() */
typedef enum BufferAccessStrategyType
{
BAS_NORMAL, /* Normal random access */
BAS_BULKREAD, /* Large read-only scan (hint bit updates are ok) */
BAS_VACUUM /* VACUUM */
} BufferAccessStrategyType;
/* in globals.c ... this duplicates miscadmin.h */
extern DLLIMPORT int NBuffers;
......@@ -111,6 +119,8 @@ extern DLLIMPORT int32 *LocalRefCount;
* prototypes for functions in bufmgr.c
*/
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
extern Buffer ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
BufferAccessStrategy strategy);
extern Buffer ReadOrZeroBuffer(Relation reln, BlockNumber blockNum);
extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer);
......@@ -157,6 +167,7 @@ extern void BgBufferSync(void);
extern void AtProcExit_LocalBuffers(void);
/* in freelist.c */
extern void StrategyHintVacuum(bool vacuum_active);
extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
extern void FreeAccessStrategy(BufferAccessStrategy strategy);
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment