Commit d526575f authored by Tom Lane's avatar Tom Lane

Make large sequential scans and VACUUMs work in a limited-size "ring" of

buffers, rather than blowing out the whole shared-buffer arena.  Aside from
avoiding cache spoliation, this fixes the problem that VACUUM formerly tended
to cause a WAL flush for every page it modified, because we had it hacked to
use only a single buffer.  Those flushes will now occur only once per
ring-ful.  The exact ring size, and the threshold for seqscans to switch into
the ring usage pattern, remain under debate; but the infrastructure seems
done.  The key bit of infrastructure is a new optional BufferAccessStrategy
object that can be passed to ReadBuffer operations; this replaces the former
StrategyHintVacuum API.

This patch also changes the buffer usage-count methodology a bit: we now
advance usage_count when first pinning a buffer, rather than when last
unpinning it.  To preserve the behavior that a buffer's lifetime starts to
decrease when it's released, the clock sweep code is modified to not decrement
usage_count of pinned buffers.

Work not done in this commit: teach GiST and GIN indexes to use the vacuum
BufferAccessStrategy for vacuum-driven fetches.

Original patch by Simon, reworked by Heikki and again by Tom.
parent 0a6f2ee8
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.94 2007/05/03 16:45:58 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.95 2007/05/30 20:11:51 tgl Exp $
* *
* NOTES * NOTES
* This file contains only the public interface routines. * This file contains only the public interface routines.
...@@ -547,8 +547,9 @@ loop_top: ...@@ -547,8 +547,9 @@ loop_top:
vacuum_delay_point(); vacuum_delay_point();
buf = _hash_getbuf(rel, blkno, HASH_WRITE, buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
info->strategy);
page = BufferGetPage(buf); page = BufferGetPage(buf);
opaque = (HashPageOpaque) PageGetSpecialPointer(page); opaque = (HashPageOpaque) PageGetSpecialPointer(page);
Assert(opaque->hasho_bucket == cur_bucket); Assert(opaque->hasho_bucket == cur_bucket);
...@@ -596,7 +597,8 @@ loop_top: ...@@ -596,7 +597,8 @@ loop_top:
/* If we deleted anything, try to compact free space */ /* If we deleted anything, try to compact free space */
if (bucket_dirty) if (bucket_dirty)
_hash_squeezebucket(rel, cur_bucket, bucket_blkno); _hash_squeezebucket(rel, cur_bucket, bucket_blkno,
info->strategy);
/* Release bucket lock */ /* Release bucket lock */
_hash_droplock(rel, bucket_blkno, HASH_EXCLUSIVE); _hash_droplock(rel, bucket_blkno, HASH_EXCLUSIVE);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.57 2007/05/03 16:45:58 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.58 2007/05/30 20:11:51 tgl Exp $
* *
* NOTES * NOTES
* Overflow pages look like ordinary relation pages. * Overflow pages look like ordinary relation pages.
...@@ -362,6 +362,9 @@ _hash_firstfreebit(uint32 map) ...@@ -362,6 +362,9 @@ _hash_firstfreebit(uint32 map)
* Remove this overflow page from its bucket's chain, and mark the page as * Remove this overflow page from its bucket's chain, and mark the page as
* free. On entry, ovflbuf is write-locked; it is released before exiting. * free. On entry, ovflbuf is write-locked; it is released before exiting.
* *
* Since this function is invoked in VACUUM, we provide an access strategy
* parameter that controls fetches of the bucket pages.
*
* Returns the block number of the page that followed the given page * Returns the block number of the page that followed the given page
* in the bucket, or InvalidBlockNumber if no following page. * in the bucket, or InvalidBlockNumber if no following page.
* *
...@@ -370,7 +373,8 @@ _hash_firstfreebit(uint32 map) ...@@ -370,7 +373,8 @@ _hash_firstfreebit(uint32 map)
* on the bucket, too. * on the bucket, too.
*/ */
BlockNumber BlockNumber
_hash_freeovflpage(Relation rel, Buffer ovflbuf) _hash_freeovflpage(Relation rel, Buffer ovflbuf,
BufferAccessStrategy bstrategy)
{ {
HashMetaPage metap; HashMetaPage metap;
Buffer metabuf; Buffer metabuf;
...@@ -413,8 +417,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf) ...@@ -413,8 +417,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
*/ */
if (BlockNumberIsValid(prevblkno)) if (BlockNumberIsValid(prevblkno))
{ {
Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE, Buffer prevbuf = _hash_getbuf_with_strategy(rel,
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); prevblkno,
HASH_WRITE,
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
bstrategy);
Page prevpage = BufferGetPage(prevbuf); Page prevpage = BufferGetPage(prevbuf);
HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage); HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage);
...@@ -424,8 +431,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf) ...@@ -424,8 +431,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
} }
if (BlockNumberIsValid(nextblkno)) if (BlockNumberIsValid(nextblkno))
{ {
Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE, Buffer nextbuf = _hash_getbuf_with_strategy(rel,
LH_OVERFLOW_PAGE); nextblkno,
HASH_WRITE,
LH_OVERFLOW_PAGE,
bstrategy);
Page nextpage = BufferGetPage(nextbuf); Page nextpage = BufferGetPage(nextbuf);
HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage); HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage);
...@@ -434,6 +444,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf) ...@@ -434,6 +444,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
_hash_wrtbuf(rel, nextbuf); _hash_wrtbuf(rel, nextbuf);
} }
/* Note: bstrategy is intentionally not used for metapage and bitmap */
/* Read the metapage so we can determine which bitmap page to use */ /* Read the metapage so we can determine which bitmap page to use */
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
metap = (HashMetaPage) BufferGetPage(metabuf); metap = (HashMetaPage) BufferGetPage(metabuf);
...@@ -558,11 +570,15 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno) ...@@ -558,11 +570,15 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
* *
* Caller must hold exclusive lock on the target bucket. This allows * Caller must hold exclusive lock on the target bucket. This allows
* us to safely lock multiple pages in the bucket. * us to safely lock multiple pages in the bucket.
*
* Since this function is invoked in VACUUM, we provide an access strategy
* parameter that controls fetches of the bucket pages.
*/ */
void void
_hash_squeezebucket(Relation rel, _hash_squeezebucket(Relation rel,
Bucket bucket, Bucket bucket,
BlockNumber bucket_blkno) BlockNumber bucket_blkno,
BufferAccessStrategy bstrategy)
{ {
Buffer wbuf; Buffer wbuf;
Buffer rbuf = 0; Buffer rbuf = 0;
...@@ -581,7 +597,11 @@ _hash_squeezebucket(Relation rel, ...@@ -581,7 +597,11 @@ _hash_squeezebucket(Relation rel,
* start squeezing into the base bucket page. * start squeezing into the base bucket page.
*/ */
wblkno = bucket_blkno; wblkno = bucket_blkno;
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE, LH_BUCKET_PAGE); wbuf = _hash_getbuf_with_strategy(rel,
wblkno,
HASH_WRITE,
LH_BUCKET_PAGE,
bstrategy);
wpage = BufferGetPage(wbuf); wpage = BufferGetPage(wbuf);
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
...@@ -595,8 +615,10 @@ _hash_squeezebucket(Relation rel, ...@@ -595,8 +615,10 @@ _hash_squeezebucket(Relation rel,
} }
/* /*
* find the last page in the bucket chain by starting at the base bucket * Find the last page in the bucket chain by starting at the base bucket
* page and working forward. * page and working forward. Note: we assume that a hash bucket chain is
* usually smaller than the buffer ring being used by VACUUM, else using
* the access strategy here would be counterproductive.
*/ */
ropaque = wopaque; ropaque = wopaque;
do do
...@@ -604,7 +626,11 @@ _hash_squeezebucket(Relation rel, ...@@ -604,7 +626,11 @@ _hash_squeezebucket(Relation rel,
rblkno = ropaque->hasho_nextblkno; rblkno = ropaque->hasho_nextblkno;
if (ropaque != wopaque) if (ropaque != wopaque)
_hash_relbuf(rel, rbuf); _hash_relbuf(rel, rbuf);
rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE, LH_OVERFLOW_PAGE); rbuf = _hash_getbuf_with_strategy(rel,
rblkno,
HASH_WRITE,
LH_OVERFLOW_PAGE,
bstrategy);
rpage = BufferGetPage(rbuf); rpage = BufferGetPage(rbuf);
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
Assert(ropaque->hasho_bucket == bucket); Assert(ropaque->hasho_bucket == bucket);
...@@ -644,7 +670,11 @@ _hash_squeezebucket(Relation rel, ...@@ -644,7 +670,11 @@ _hash_squeezebucket(Relation rel,
return; return;
} }
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE, LH_OVERFLOW_PAGE); wbuf = _hash_getbuf_with_strategy(rel,
wblkno,
HASH_WRITE,
LH_OVERFLOW_PAGE,
bstrategy);
wpage = BufferGetPage(wbuf); wpage = BufferGetPage(wbuf);
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
Assert(wopaque->hasho_bucket == bucket); Assert(wopaque->hasho_bucket == bucket);
...@@ -688,15 +718,19 @@ _hash_squeezebucket(Relation rel, ...@@ -688,15 +718,19 @@ _hash_squeezebucket(Relation rel,
/* yes, so release wbuf lock first */ /* yes, so release wbuf lock first */
_hash_wrtbuf(rel, wbuf); _hash_wrtbuf(rel, wbuf);
/* free this overflow page (releases rbuf) */ /* free this overflow page (releases rbuf) */
_hash_freeovflpage(rel, rbuf); _hash_freeovflpage(rel, rbuf, bstrategy);
/* done */ /* done */
return; return;
} }
/* free this overflow page, then get the previous one */ /* free this overflow page, then get the previous one */
_hash_freeovflpage(rel, rbuf); _hash_freeovflpage(rel, rbuf, bstrategy);
rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE, LH_OVERFLOW_PAGE); rbuf = _hash_getbuf_with_strategy(rel,
rblkno,
HASH_WRITE,
LH_OVERFLOW_PAGE,
bstrategy);
rpage = BufferGetPage(rbuf); rpage = BufferGetPage(rbuf);
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
Assert(ropaque->hasho_bucket == bucket); Assert(ropaque->hasho_bucket == bucket);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.67 2007/05/03 16:45:58 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.68 2007/05/30 20:11:51 tgl Exp $
* *
* NOTES * NOTES
* Postgres hash pages look like ordinary relation pages. The opaque * Postgres hash pages look like ordinary relation pages. The opaque
...@@ -214,6 +214,34 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno) ...@@ -214,6 +214,34 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno)
return buf; return buf;
} }
/*
* _hash_getbuf_with_strategy() -- Get a buffer with nondefault strategy.
*
* This is identical to _hash_getbuf() but also allows a buffer access
* strategy to be specified. We use this for VACUUM operations.
*/
Buffer
_hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
int access, int flags,
BufferAccessStrategy bstrategy)
{
Buffer buf;
if (blkno == P_NEW)
elog(ERROR, "hash AM does not use P_NEW");
buf = ReadBufferWithStrategy(rel, blkno, bstrategy);
if (access != HASH_NOLOCK)
LockBuffer(buf, access);
/* ref count and lock type are correct */
_hash_checkpage(rel, buf, flags);
return buf;
}
/* /*
* _hash_relbuf() -- release a locked buffer. * _hash_relbuf() -- release a locked buffer.
* *
...@@ -840,5 +868,5 @@ _hash_splitbucket(Relation rel, ...@@ -840,5 +868,5 @@ _hash_splitbucket(Relation rel,
_hash_wrtbuf(rel, obuf); _hash_wrtbuf(rel, obuf);
_hash_wrtbuf(rel, nbuf); _hash_wrtbuf(rel, nbuf);
_hash_squeezebucket(rel, obucket, start_oblkno); _hash_squeezebucket(rel, obucket, start_oblkno, NULL);
} }
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.233 2007/05/27 03:50:38 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.234 2007/05/30 20:11:53 tgl Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
...@@ -83,6 +83,24 @@ initscan(HeapScanDesc scan, ScanKey key) ...@@ -83,6 +83,24 @@ initscan(HeapScanDesc scan, ScanKey key)
*/ */
scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd); scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
/*
* If the table is large relative to NBuffers, use a bulk-read access
* strategy, else use the default random-access strategy. During a
* rescan, don't make a new strategy object if we don't have to.
*/
if (scan->rs_nblocks > NBuffers / 4 &&
!scan->rs_rd->rd_istemp)
{
if (scan->rs_strategy == NULL)
scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD);
}
else
{
if (scan->rs_strategy != NULL)
FreeAccessStrategy(scan->rs_strategy);
scan->rs_strategy = NULL;
}
scan->rs_inited = false; scan->rs_inited = false;
scan->rs_ctup.t_data = NULL; scan->rs_ctup.t_data = NULL;
ItemPointerSetInvalid(&scan->rs_ctup.t_self); ItemPointerSetInvalid(&scan->rs_ctup.t_self);
...@@ -123,9 +141,17 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) ...@@ -123,9 +141,17 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
Assert(page < scan->rs_nblocks); Assert(page < scan->rs_nblocks);
scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf, /* release previous scan buffer, if any */
scan->rs_rd, if (BufferIsValid(scan->rs_cbuf))
page); {
ReleaseBuffer(scan->rs_cbuf);
scan->rs_cbuf = InvalidBuffer;
}
/* read page using selected strategy */
scan->rs_cbuf = ReadBufferWithStrategy(scan->rs_rd,
page,
scan->rs_strategy);
scan->rs_cblock = page; scan->rs_cblock = page;
if (!scan->rs_pageatatime) if (!scan->rs_pageatatime)
...@@ -938,6 +964,7 @@ heap_beginscan(Relation relation, Snapshot snapshot, ...@@ -938,6 +964,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
scan->rs_rd = relation; scan->rs_rd = relation;
scan->rs_snapshot = snapshot; scan->rs_snapshot = snapshot;
scan->rs_nkeys = nkeys; scan->rs_nkeys = nkeys;
scan->rs_strategy = NULL; /* set in initscan */
/* /*
* we can use page-at-a-time mode if it's an MVCC-safe snapshot * we can use page-at-a-time mode if it's an MVCC-safe snapshot
...@@ -1007,6 +1034,9 @@ heap_endscan(HeapScanDesc scan) ...@@ -1007,6 +1034,9 @@ heap_endscan(HeapScanDesc scan)
if (scan->rs_key) if (scan->rs_key)
pfree(scan->rs_key); pfree(scan->rs_key);
if (scan->rs_strategy != NULL)
FreeAccessStrategy(scan->rs_strategy);
pfree(scan); pfree(scan);
} }
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.154 2007/01/05 22:19:23 momjian Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.155 2007/05/30 20:11:53 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -786,9 +786,10 @@ restart: ...@@ -786,9 +786,10 @@ restart:
/* /*
* We can't use _bt_getbuf() here because it always applies * We can't use _bt_getbuf() here because it always applies
* _bt_checkpage(), which will barf on an all-zero page. We want to * _bt_checkpage(), which will barf on an all-zero page. We want to
* recycle all-zero pages, not fail. * recycle all-zero pages, not fail. Also, we want to use a nondefault
* buffer access strategy.
*/ */
buf = ReadBuffer(rel, blkno); buf = ReadBufferWithStrategy(rel, blkno, info->strategy);
LockBuffer(buf, BT_READ); LockBuffer(buf, BT_READ);
page = BufferGetPage(buf); page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page); opaque = (BTPageOpaque) PageGetSpecialPointer(page);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.269 2007/05/20 21:08:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.270 2007/05/30 20:11:55 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1799,6 +1799,36 @@ XLogFlush(XLogRecPtr record) ...@@ -1799,6 +1799,36 @@ XLogFlush(XLogRecPtr record)
LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff); LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
} }
/*
* Test whether XLOG data has been flushed up to (at least) the given position.
*
* Returns true if a flush is still needed. (It may be that someone else
* is already in process of flushing that far, however.)
*/
bool
XLogNeedsFlush(XLogRecPtr record)
{
/* Quick exit if already known flushed */
if (XLByteLE(record, LogwrtResult.Flush))
return false;
/* read LogwrtResult and update local state */
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
SpinLockAcquire(&xlogctl->info_lck);
LogwrtResult = xlogctl->LogwrtResult;
SpinLockRelease(&xlogctl->info_lck);
}
/* check again */
if (XLByteLE(record, LogwrtResult.Flush))
return false;
return true;
}
/* /*
* Create a new XLOG file segment, or open a pre-existing one. * Create a new XLOG file segment, or open a pre-existing one.
* *
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.283 2007/05/16 17:28:20 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.284 2007/05/30 20:11:55 tgl Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
...@@ -1658,6 +1658,7 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot) ...@@ -1658,6 +1658,7 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
ivinfo.vacuum_full = false; ivinfo.vacuum_full = false;
ivinfo.message_level = DEBUG2; ivinfo.message_level = DEBUG2;
ivinfo.num_heap_tuples = -1; ivinfo.num_heap_tuples = -1;
ivinfo.strategy = NULL;
state.tuplesort = tuplesort_begin_datum(TIDOID, state.tuplesort = tuplesort_begin_datum(TIDOID,
TIDLessOperator, false, TIDLessOperator, false,
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.107 2007/04/30 03:23:48 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.108 2007/05/30 20:11:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -63,10 +63,13 @@ typedef struct AnlIndexData ...@@ -63,10 +63,13 @@ typedef struct AnlIndexData
/* Default statistics target (GUC parameter) */ /* Default statistics target (GUC parameter) */
int default_statistics_target = 10; int default_statistics_target = 10;
/* A few variables that don't seem worth passing around as parameters */
static int elevel = -1; static int elevel = -1;
static MemoryContext anl_context = NULL; static MemoryContext anl_context = NULL;
static BufferAccessStrategy vac_strategy;
static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
int samplesize); int samplesize);
...@@ -94,7 +97,8 @@ static bool std_typanalyze(VacAttrStats *stats); ...@@ -94,7 +97,8 @@ static bool std_typanalyze(VacAttrStats *stats);
* analyze_rel() -- analyze one relation * analyze_rel() -- analyze one relation
*/ */
void void
analyze_rel(Oid relid, VacuumStmt *vacstmt) analyze_rel(Oid relid, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy)
{ {
Relation onerel; Relation onerel;
int attr_cnt, int attr_cnt,
...@@ -120,6 +124,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt) ...@@ -120,6 +124,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt)
else else
elevel = DEBUG2; elevel = DEBUG2;
vac_strategy = bstrategy;
/* /*
* Use the current context for storing analysis info. vacuum.c ensures * Use the current context for storing analysis info. vacuum.c ensures
* that this context will be cleared when I return, thus releasing the * that this context will be cleared when I return, thus releasing the
...@@ -845,7 +851,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows, ...@@ -845,7 +851,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
* looking at it. We don't maintain a lock on the page, so tuples * looking at it. We don't maintain a lock on the page, so tuples
* could get added to it, but we ignore such tuples. * could get added to it, but we ignore such tuples.
*/ */
targbuffer = ReadBuffer(onerel, targblock); targbuffer = ReadBufferWithStrategy(onerel, targblock, vac_strategy);
LockBuffer(targbuffer, BUFFER_LOCK_SHARE); LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
targpage = BufferGetPage(targbuffer); targpage = BufferGetPage(targbuffer);
maxoffset = PageGetMaxOffsetNumber(targpage); maxoffset = PageGetMaxOffsetNumber(targpage);
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.351 2007/05/17 15:28:29 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.352 2007/05/30 20:11:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -191,6 +191,7 @@ ExecContext_Finish(ExecContext ec) ...@@ -191,6 +191,7 @@ ExecContext_Finish(ExecContext ec)
*---------------------------------------------------------------------- *----------------------------------------------------------------------
*/ */
/* A few variables that don't seem worth passing around as parameters */
static MemoryContext vac_context = NULL; static MemoryContext vac_context = NULL;
static int elevel = -1; static int elevel = -1;
...@@ -198,6 +199,8 @@ static int elevel = -1; ...@@ -198,6 +199,8 @@ static int elevel = -1;
static TransactionId OldestXmin; static TransactionId OldestXmin;
static TransactionId FreezeLimit; static TransactionId FreezeLimit;
static BufferAccessStrategy vac_strategy;
/* non-export function prototypes */ /* non-export function prototypes */
static List *get_rel_oids(List *relids, const RangeVar *vacrel, static List *get_rel_oids(List *relids, const RangeVar *vacrel,
...@@ -257,14 +260,18 @@ static Size PageGetFreeSpaceWithFillFactor(Relation relation, Page page); ...@@ -257,14 +260,18 @@ static Size PageGetFreeSpaceWithFillFactor(Relation relation, Page page);
* relation OIDs to be processed, and vacstmt->relation is ignored. * relation OIDs to be processed, and vacstmt->relation is ignored.
* (The non-NIL case is currently only used by autovacuum.) * (The non-NIL case is currently only used by autovacuum.)
* *
* bstrategy is normally given as NULL, but in autovacuum it can be passed
* in to use the same buffer strategy object across multiple vacuum() calls.
*
* isTopLevel should be passed down from ProcessUtility. * isTopLevel should be passed down from ProcessUtility.
* *
* It is the caller's responsibility that both vacstmt and relids * It is the caller's responsibility that vacstmt, relids, and bstrategy
* (if given) be allocated in a memory context that won't disappear * (if given) be allocated in a memory context that won't disappear
* at transaction commit. * at transaction commit.
*/ */
void void
vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel) vacuum(VacuumStmt *vacstmt, List *relids,
BufferAccessStrategy bstrategy, bool isTopLevel)
{ {
const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE"; const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
volatile MemoryContext anl_context = NULL; volatile MemoryContext anl_context = NULL;
...@@ -319,6 +326,19 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel) ...@@ -319,6 +326,19 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE); ALLOCSET_DEFAULT_MAXSIZE);
/*
* If caller didn't give us a buffer strategy object, make one in the
* cross-transaction memory context.
*/
if (bstrategy == NULL)
{
MemoryContext old_context = MemoryContextSwitchTo(vac_context);
bstrategy = GetAccessStrategy(BAS_VACUUM);
MemoryContextSwitchTo(old_context);
}
vac_strategy = bstrategy;
/* Remember whether we are processing everything in the DB */ /* Remember whether we are processing everything in the DB */
all_rels = (relids == NIL && vacstmt->relation == NULL); all_rels = (relids == NIL && vacstmt->relation == NULL);
...@@ -417,15 +437,7 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel) ...@@ -417,15 +437,7 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
else else
old_context = MemoryContextSwitchTo(anl_context); old_context = MemoryContextSwitchTo(anl_context);
/* analyze_rel(relid, vacstmt, vac_strategy);
* Tell the buffer replacement strategy that vacuum is causing
* the IO
*/
StrategyHintVacuum(true);
analyze_rel(relid, vacstmt);
StrategyHintVacuum(false);
if (use_own_xacts) if (use_own_xacts)
CommitTransactionCommand(); CommitTransactionCommand();
...@@ -441,8 +453,6 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel) ...@@ -441,8 +453,6 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
{ {
/* Make sure cost accounting is turned off after error */ /* Make sure cost accounting is turned off after error */
VacuumCostActive = false; VacuumCostActive = false;
/* And reset buffer replacement strategy, too */
StrategyHintVacuum(false);
PG_RE_THROW(); PG_RE_THROW();
} }
PG_END_TRY(); PG_END_TRY();
...@@ -1084,21 +1094,13 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind) ...@@ -1084,21 +1094,13 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
*/ */
toast_relid = onerel->rd_rel->reltoastrelid; toast_relid = onerel->rd_rel->reltoastrelid;
/*
* Tell the cache replacement strategy that vacuum is causing all
* following IO
*/
StrategyHintVacuum(true);
/* /*
* Do the actual work --- either FULL or "lazy" vacuum * Do the actual work --- either FULL or "lazy" vacuum
*/ */
if (vacstmt->full) if (vacstmt->full)
full_vacuum_rel(onerel, vacstmt); full_vacuum_rel(onerel, vacstmt);
else else
lazy_vacuum_rel(onerel, vacstmt); lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
StrategyHintVacuum(false);
/* all done with this class, but hold lock until commit */ /* all done with this class, but hold lock until commit */
relation_close(onerel, NoLock); relation_close(onerel, NoLock);
...@@ -1290,7 +1292,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel, ...@@ -1290,7 +1292,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
vacuum_delay_point(); vacuum_delay_point();
buf = ReadBuffer(onerel, blkno); buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
page = BufferGetPage(buf); page = BufferGetPage(buf);
/* /*
...@@ -1730,7 +1732,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, ...@@ -1730,7 +1732,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
/* /*
* Process this page of relation. * Process this page of relation.
*/ */
buf = ReadBuffer(onerel, blkno); buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
page = BufferGetPage(buf); page = BufferGetPage(buf);
vacpage->offsets_free = 0; vacpage->offsets_free = 0;
...@@ -1954,8 +1956,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, ...@@ -1954,8 +1956,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
nextTid = tp.t_data->t_ctid; nextTid = tp.t_data->t_ctid;
priorXmax = HeapTupleHeaderGetXmax(tp.t_data); priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
/* assume block# is OK (see heap_fetch comments) */ /* assume block# is OK (see heap_fetch comments) */
nextBuf = ReadBuffer(onerel, nextBuf = ReadBufferWithStrategy(onerel,
ItemPointerGetBlockNumber(&nextTid)); ItemPointerGetBlockNumber(&nextTid),
vac_strategy);
nextPage = BufferGetPage(nextBuf); nextPage = BufferGetPage(nextBuf);
/* If bogus or unused slot, assume tp is end of chain */ /* If bogus or unused slot, assume tp is end of chain */
nextOffnum = ItemPointerGetOffsetNumber(&nextTid); nextOffnum = ItemPointerGetOffsetNumber(&nextTid);
...@@ -2091,8 +2094,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, ...@@ -2091,8 +2094,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
break; /* out of check-all-items loop */ break; /* out of check-all-items loop */
} }
tp.t_self = vtlp->this_tid; tp.t_self = vtlp->this_tid;
Pbuf = ReadBuffer(onerel, Pbuf = ReadBufferWithStrategy(onerel,
ItemPointerGetBlockNumber(&(tp.t_self))); ItemPointerGetBlockNumber(&(tp.t_self)),
vac_strategy);
Ppage = BufferGetPage(Pbuf); Ppage = BufferGetPage(Pbuf);
Pitemid = PageGetItemId(Ppage, Pitemid = PageGetItemId(Ppage,
ItemPointerGetOffsetNumber(&(tp.t_self))); ItemPointerGetOffsetNumber(&(tp.t_self)));
...@@ -2174,11 +2178,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, ...@@ -2174,11 +2178,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
/* Get page to move from */ /* Get page to move from */
tuple.t_self = vtmove[ti].tid; tuple.t_self = vtmove[ti].tid;
Cbuf = ReadBuffer(onerel, Cbuf = ReadBufferWithStrategy(onerel,
ItemPointerGetBlockNumber(&(tuple.t_self))); ItemPointerGetBlockNumber(&(tuple.t_self)),
vac_strategy);
/* Get page to move to */ /* Get page to move to */
dst_buffer = ReadBuffer(onerel, destvacpage->blkno); dst_buffer = ReadBufferWithStrategy(onerel,
destvacpage->blkno,
vac_strategy);
LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
if (dst_buffer != Cbuf) if (dst_buffer != Cbuf)
...@@ -2239,7 +2246,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, ...@@ -2239,7 +2246,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
if (i == num_fraged_pages) if (i == num_fraged_pages)
break; /* can't move item anywhere */ break; /* can't move item anywhere */
dst_vacpage = fraged_pages->pagedesc[i]; dst_vacpage = fraged_pages->pagedesc[i];
dst_buffer = ReadBuffer(onerel, dst_vacpage->blkno); dst_buffer = ReadBufferWithStrategy(onerel,
dst_vacpage->blkno,
vac_strategy);
LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
dst_page = BufferGetPage(dst_buffer); dst_page = BufferGetPage(dst_buffer);
/* if this page was not used before - clean it */ /* if this page was not used before - clean it */
...@@ -2386,7 +2395,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, ...@@ -2386,7 +2395,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
Page page; Page page;
/* this page was not used as a move target, so must clean it */ /* this page was not used as a move target, so must clean it */
buf = ReadBuffer(onerel, (*curpage)->blkno); buf = ReadBufferWithStrategy(onerel,
(*curpage)->blkno,
vac_strategy);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
page = BufferGetPage(buf); page = BufferGetPage(buf);
if (!PageIsEmpty(page)) if (!PageIsEmpty(page))
...@@ -2470,7 +2481,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, ...@@ -2470,7 +2481,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
int uncnt; int uncnt;
int num_tuples = 0; int num_tuples = 0;
buf = ReadBuffer(onerel, vacpage->blkno); buf = ReadBufferWithStrategy(onerel, vacpage->blkno, vac_strategy);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
page = BufferGetPage(buf); page = BufferGetPage(buf);
maxoff = PageGetMaxOffsetNumber(page); maxoff = PageGetMaxOffsetNumber(page);
...@@ -2859,7 +2870,7 @@ update_hint_bits(Relation rel, VacPageList fraged_pages, int num_fraged_pages, ...@@ -2859,7 +2870,7 @@ update_hint_bits(Relation rel, VacPageList fraged_pages, int num_fraged_pages,
break; /* no need to scan any further */ break; /* no need to scan any further */
if ((*curpage)->offsets_used == 0) if ((*curpage)->offsets_used == 0)
continue; /* this page was never used as a move dest */ continue; /* this page was never used as a move dest */
buf = ReadBuffer(rel, (*curpage)->blkno); buf = ReadBufferWithStrategy(rel, (*curpage)->blkno, vac_strategy);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
page = BufferGetPage(buf); page = BufferGetPage(buf);
max_offset = PageGetMaxOffsetNumber(page); max_offset = PageGetMaxOffsetNumber(page);
...@@ -2925,7 +2936,9 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages) ...@@ -2925,7 +2936,9 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
if ((*vacpage)->offsets_free > 0) if ((*vacpage)->offsets_free > 0)
{ {
buf = ReadBuffer(onerel, (*vacpage)->blkno); buf = ReadBufferWithStrategy(onerel,
(*vacpage)->blkno,
vac_strategy);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
vacuum_page(onerel, buf, *vacpage); vacuum_page(onerel, buf, *vacpage);
UnlockReleaseBuffer(buf); UnlockReleaseBuffer(buf);
...@@ -3012,6 +3025,7 @@ scan_index(Relation indrel, double num_tuples) ...@@ -3012,6 +3025,7 @@ scan_index(Relation indrel, double num_tuples)
ivinfo.vacuum_full = true; ivinfo.vacuum_full = true;
ivinfo.message_level = elevel; ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = num_tuples; ivinfo.num_heap_tuples = num_tuples;
ivinfo.strategy = vac_strategy;
stats = index_vacuum_cleanup(&ivinfo, NULL); stats = index_vacuum_cleanup(&ivinfo, NULL);
...@@ -3077,6 +3091,7 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, ...@@ -3077,6 +3091,7 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
ivinfo.vacuum_full = true; ivinfo.vacuum_full = true;
ivinfo.message_level = elevel; ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = num_tuples + keep_tuples; ivinfo.num_heap_tuples = num_tuples + keep_tuples;
ivinfo.strategy = vac_strategy;
/* Do bulk deletion */ /* Do bulk deletion */
stats = index_bulk_delete(&ivinfo, NULL, tid_reaped, (void *) vacpagelist); stats = index_bulk_delete(&ivinfo, NULL, tid_reaped, (void *) vacpagelist);
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.89 2007/05/17 15:28:29 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.90 2007/05/30 20:11:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -96,11 +96,14 @@ typedef struct LVRelStats ...@@ -96,11 +96,14 @@ typedef struct LVRelStats
} LVRelStats; } LVRelStats;
/* A few variables that don't seem worth passing around as parameters */
static int elevel = -1; static int elevel = -1;
static TransactionId OldestXmin; static TransactionId OldestXmin;
static TransactionId FreezeLimit; static TransactionId FreezeLimit;
static BufferAccessStrategy vac_strategy;
/* non-export function prototypes */ /* non-export function prototypes */
static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
...@@ -138,7 +141,8 @@ static int vac_cmp_page_spaces(const void *left, const void *right); ...@@ -138,7 +141,8 @@ static int vac_cmp_page_spaces(const void *left, const void *right);
* and locked the relation. * and locked the relation.
*/ */
void void
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt) lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy)
{ {
LVRelStats *vacrelstats; LVRelStats *vacrelstats;
Relation *Irel; Relation *Irel;
...@@ -158,6 +162,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt) ...@@ -158,6 +162,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
else else
elevel = DEBUG2; elevel = DEBUG2;
vac_strategy = bstrategy;
vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared, vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared,
&OldestXmin, &FreezeLimit); &OldestXmin, &FreezeLimit);
...@@ -318,7 +324,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, ...@@ -318,7 +324,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
vacrelstats->num_index_scans++; vacrelstats->num_index_scans++;
} }
buf = ReadBuffer(onerel, blkno); buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
/* Initially, we only need shared access to the buffer */ /* Initially, we only need shared access to the buffer */
LockBuffer(buf, BUFFER_LOCK_SHARE); LockBuffer(buf, BUFFER_LOCK_SHARE);
...@@ -586,7 +592,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) ...@@ -586,7 +592,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
vacuum_delay_point(); vacuum_delay_point();
tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
buf = ReadBuffer(onerel, tblk); buf = ReadBufferWithStrategy(onerel, tblk, vac_strategy);
LockBufferForCleanup(buf); LockBufferForCleanup(buf);
tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats); tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
/* Now that we've compacted the page, record its available space */ /* Now that we've compacted the page, record its available space */
...@@ -684,6 +690,7 @@ lazy_vacuum_index(Relation indrel, ...@@ -684,6 +690,7 @@ lazy_vacuum_index(Relation indrel,
ivinfo.message_level = elevel; ivinfo.message_level = elevel;
/* We don't yet know rel_tuples, so pass -1 */ /* We don't yet know rel_tuples, so pass -1 */
ivinfo.num_heap_tuples = -1; ivinfo.num_heap_tuples = -1;
ivinfo.strategy = vac_strategy;
/* Do bulk deletion */ /* Do bulk deletion */
*stats = index_bulk_delete(&ivinfo, *stats, *stats = index_bulk_delete(&ivinfo, *stats,
...@@ -713,6 +720,7 @@ lazy_cleanup_index(Relation indrel, ...@@ -713,6 +720,7 @@ lazy_cleanup_index(Relation indrel,
ivinfo.vacuum_full = false; ivinfo.vacuum_full = false;
ivinfo.message_level = elevel; ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = vacrelstats->rel_tuples; ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
ivinfo.strategy = vac_strategy;
stats = index_vacuum_cleanup(&ivinfo, stats); stats = index_vacuum_cleanup(&ivinfo, stats);
...@@ -869,7 +877,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) ...@@ -869,7 +877,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
blkno--; blkno--;
buf = ReadBuffer(onerel, blkno); buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
/* In this phase we only need shared access to the buffer */ /* In this phase we only need shared access to the buffer */
LockBuffer(buf, BUFFER_LOCK_SHARE); LockBuffer(buf, BUFFER_LOCK_SHARE);
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.46 2007/05/07 20:41:24 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.47 2007/05/30 20:11:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -218,7 +218,8 @@ static void relation_needs_vacanalyze(Oid relid, Form_pg_autovacuum avForm, ...@@ -218,7 +218,8 @@ static void relation_needs_vacanalyze(Oid relid, Form_pg_autovacuum avForm,
bool *doanalyze); bool *doanalyze);
static void autovacuum_do_vac_analyze(Oid relid, bool dovacuum, static void autovacuum_do_vac_analyze(Oid relid, bool dovacuum,
bool doanalyze, int freeze_min_age); bool doanalyze, int freeze_min_age,
BufferAccessStrategy bstrategy);
static HeapTuple get_pg_autovacuum_tuple_relid(Relation avRel, Oid relid); static HeapTuple get_pg_autovacuum_tuple_relid(Relation avRel, Oid relid);
static PgStat_StatTabEntry *get_pgstat_tabentry_relid(Oid relid, bool isshared, static PgStat_StatTabEntry *get_pgstat_tabentry_relid(Oid relid, bool isshared,
PgStat_StatDBEntry *shared, PgStat_StatDBEntry *shared,
...@@ -1673,6 +1674,7 @@ do_autovacuum(void) ...@@ -1673,6 +1674,7 @@ do_autovacuum(void)
ListCell *cell; ListCell *cell;
PgStat_StatDBEntry *shared; PgStat_StatDBEntry *shared;
PgStat_StatDBEntry *dbentry; PgStat_StatDBEntry *dbentry;
BufferAccessStrategy bstrategy;
/* /*
* may be NULL if we couldn't find an entry (only happens if we * may be NULL if we couldn't find an entry (only happens if we
...@@ -1812,6 +1814,13 @@ do_autovacuum(void) ...@@ -1812,6 +1814,13 @@ do_autovacuum(void)
list_free(toast_oids); list_free(toast_oids);
toast_oids = NIL; toast_oids = NIL;
/*
* Create a buffer access strategy object for VACUUM to use. We want
* to use the same one across all the vacuum operations we perform,
* since the point is for VACUUM not to blow out the shared cache.
*/
bstrategy = GetAccessStrategy(BAS_VACUUM);
/* /*
* Perform operations on collected tables. * Perform operations on collected tables.
*/ */
...@@ -1910,7 +1919,8 @@ next_worker: ...@@ -1910,7 +1919,8 @@ next_worker:
autovacuum_do_vac_analyze(tab->at_relid, autovacuum_do_vac_analyze(tab->at_relid,
tab->at_dovacuum, tab->at_dovacuum,
tab->at_doanalyze, tab->at_doanalyze,
tab->at_freeze_min_age); tab->at_freeze_min_age,
bstrategy);
/* be tidy */ /* be tidy */
pfree(tab); pfree(tab);
} }
...@@ -2328,7 +2338,8 @@ relation_needs_vacanalyze(Oid relid, ...@@ -2328,7 +2338,8 @@ relation_needs_vacanalyze(Oid relid,
*/ */
static void static void
autovacuum_do_vac_analyze(Oid relid, bool dovacuum, bool doanalyze, autovacuum_do_vac_analyze(Oid relid, bool dovacuum, bool doanalyze,
int freeze_min_age) int freeze_min_age,
BufferAccessStrategy bstrategy)
{ {
VacuumStmt vacstmt; VacuumStmt vacstmt;
MemoryContext old_cxt; MemoryContext old_cxt;
...@@ -2354,7 +2365,7 @@ autovacuum_do_vac_analyze(Oid relid, bool dovacuum, bool doanalyze, ...@@ -2354,7 +2365,7 @@ autovacuum_do_vac_analyze(Oid relid, bool dovacuum, bool doanalyze,
/* Let pgstat know what we're doing */ /* Let pgstat know what we're doing */
autovac_report_activity(&vacstmt, relid); autovac_report_activity(&vacstmt, relid);
vacuum(&vacstmt, list_make1_oid(relid), true); vacuum(&vacstmt, list_make1_oid(relid), bstrategy, true);
MemoryContextSwitchTo(old_cxt); MemoryContextSwitchTo(old_cxt);
} }
......
$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.11 2006/07/23 03:07:58 tgl Exp $ $PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.12 2007/05/30 20:11:58 tgl Exp $
Notes about shared buffer access rules Notes about shared buffer access rules
-------------------------------------- --------------------------------------
...@@ -152,20 +152,21 @@ we could use per-backend LWLocks instead (a buffer header would then contain ...@@ -152,20 +152,21 @@ we could use per-backend LWLocks instead (a buffer header would then contain
a field to show which backend is doing its I/O). a field to show which backend is doing its I/O).
Buffer replacement strategy Normal buffer replacement strategy
--------------------------- ----------------------------------
There is a "free list" of buffers that are prime candidates for replacement. There is a "free list" of buffers that are prime candidates for replacement.
In particular, buffers that are completely free (contain no valid page) are In particular, buffers that are completely free (contain no valid page) are
always in this list. We may also throw buffers into this list if we always in this list. We could also throw buffers into this list if we
consider their pages unlikely to be needed soon. The list is singly-linked consider their pages unlikely to be needed soon; however, the current
using fields in the buffer headers; we maintain head and tail pointers in algorithm never does that. The list is singly-linked using fields in the
global variables. (Note: although the list links are in the buffer headers, buffer headers; we maintain head and tail pointers in global variables.
they are considered to be protected by the BufFreelistLock, not the (Note: although the list links are in the buffer headers, they are
buffer-header spinlocks.) To choose a victim buffer to recycle when there considered to be protected by the BufFreelistLock, not the buffer-header
are no free buffers available, we use a simple clock-sweep algorithm, which spinlocks.) To choose a victim buffer to recycle when there are no free
avoids the need to take system-wide locks during common operations. It buffers available, we use a simple clock-sweep algorithm, which avoids the
works like this: need to take system-wide locks during common operations. It works like
this:
Each buffer header contains a usage counter, which is incremented (up to a Each buffer header contains a usage counter, which is incremented (up to a
small limit value) whenever the buffer is unpinned. (This requires only the small limit value) whenever the buffer is unpinned. (This requires only the
...@@ -199,22 +200,40 @@ before we can recycle it; if someone else pins the buffer meanwhile we will ...@@ -199,22 +200,40 @@ before we can recycle it; if someone else pins the buffer meanwhile we will
have to give up and try another buffer. This however is not a concern have to give up and try another buffer. This however is not a concern
of the basic select-a-victim-buffer algorithm.) of the basic select-a-victim-buffer algorithm.)
A special provision is that while running VACUUM, a backend does not
increment the usage count on buffers it accesses. In fact, if ReleaseBuffer Buffer ring replacement strategy
sees that it is dropping the pin count to zero and the usage count is zero, ---------------------------------
then it appends the buffer to the tail of the free list. (This implies that
VACUUM, but only VACUUM, must take the BufFreelistLock during ReleaseBuffer; When running a query that needs to access a large number of pages just once,
this shouldn't create much of a contention problem.) This provision such as VACUUM or a large sequential scan, a different strategy is used.
encourages VACUUM to work in a relatively small number of buffers rather A page that has been touched only by such a scan is unlikely to be needed
than blowing out the entire buffer cache. It is reasonable since a page again soon, so instead of running the normal clock sweep algorithm and
that has been touched only by VACUUM is unlikely to be needed again soon. blowing out the entire buffer cache, a small ring of buffers is allocated
using the normal clock sweep algorithm and those buffers are reused for the
Since VACUUM usually requests many pages very fast, the effect of this is that whole scan. This also implies that much of the write traffic caused by such
it will get back the very buffers it filled and possibly modified on the next a statement will be done by the backend itself and not pushed off onto other
call and will therefore do its work in a few shared memory buffers, while processes.
being able to use whatever it finds in the cache already. This also implies
that most of the write traffic caused by a VACUUM will be done by the VACUUM For sequential scans, a 256KB ring is used. That's small enough to fit in L2
itself and not pushed off onto other processes. cache, which makes transferring pages from OS cache to shared buffer cache
efficient. Even less would often be enough, but the ring must be big enough
to accommodate all pages in the scan that are pinned concurrently. 256KB
should also be enough to leave a small cache trail for other backends to
join in a synchronized seq scan. If a ring buffer is dirtied and its LSN
updated, we would normally have to write and flush WAL before we could
re-use the buffer; in this case we instead discard the buffer from the ring
and (later) choose a replacement using the normal clock-sweep algorithm.
Hence this strategy works best for scans that are read-only (or at worst
update hint bits). In a scan that modifies every page in the scan, like a
bulk UPDATE or DELETE, the buffers in the ring will always be dirtied and
the ring strategy effectively degrades to the normal strategy.
VACUUM uses a 256KB ring like sequential scans, but dirty pages are not
removed from the ring. Instead, WAL is flushed if needed to allow reuse of
the buffers. Before introducing the buffer ring strategy in 8.3, VACUUM's
buffers were sent to the freelist, which was effectively a buffer ring of 1
buffer, resulting in excessive WAL flushing. Allowing VACUUM to update
256KB between WAL flushes should be more efficient.
Background writer's processing Background writer's processing
......
This diff is collapsed.
This diff is collapsed.
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.76 2007/01/05 22:19:37 momjian Exp $ * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.77 2007/05/30 20:11:59 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -57,7 +57,8 @@ static Block GetLocalBufferStorage(void); ...@@ -57,7 +57,8 @@ static Block GetLocalBufferStorage(void);
* *
* API is similar to bufmgr.c's BufferAlloc, except that we do not need * API is similar to bufmgr.c's BufferAlloc, except that we do not need
* to do any locking since this is all local. Also, IO_IN_PROGRESS * to do any locking since this is all local. Also, IO_IN_PROGRESS
* does not get set. * does not get set. Lastly, we support only default access strategy
* (hence, usage_count is always advanced).
*/ */
BufferDesc * BufferDesc *
LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
...@@ -88,7 +89,12 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) ...@@ -88,7 +89,12 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
fprintf(stderr, "LB ALLOC (%u,%d) %d\n", fprintf(stderr, "LB ALLOC (%u,%d) %d\n",
RelationGetRelid(reln), blockNum, -b - 1); RelationGetRelid(reln), blockNum, -b - 1);
#endif #endif
/* this part is equivalent to PinBuffer for a shared buffer */
if (LocalRefCount[b] == 0)
{
if (bufHdr->usage_count < BM_MAX_USAGE_COUNT)
bufHdr->usage_count++;
}
LocalRefCount[b]++; LocalRefCount[b]++;
ResourceOwnerRememberBuffer(CurrentResourceOwner, ResourceOwnerRememberBuffer(CurrentResourceOwner,
BufferDescriptorGetBuffer(bufHdr)); BufferDescriptorGetBuffer(bufHdr));
...@@ -121,18 +127,21 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) ...@@ -121,18 +127,21 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
bufHdr = &LocalBufferDescriptors[b]; bufHdr = &LocalBufferDescriptors[b];
if (LocalRefCount[b] == 0 && bufHdr->usage_count == 0) if (LocalRefCount[b] == 0)
{
LocalRefCount[b]++;
ResourceOwnerRememberBuffer(CurrentResourceOwner,
BufferDescriptorGetBuffer(bufHdr));
break;
}
if (bufHdr->usage_count > 0)
{ {
bufHdr->usage_count--; if (bufHdr->usage_count > 0)
trycounter = NLocBuffer; {
bufHdr->usage_count--;
trycounter = NLocBuffer;
}
else
{
/* Found a usable buffer */
LocalRefCount[b]++;
ResourceOwnerRememberBuffer(CurrentResourceOwner,
BufferDescriptorGetBuffer(bufHdr));
break;
}
} }
else if (--trycounter == 0) else if (--trycounter == 0)
ereport(ERROR, ereport(ERROR,
...@@ -199,7 +208,7 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) ...@@ -199,7 +208,7 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
bufHdr->tag = newTag; bufHdr->tag = newTag;
bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR); bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
bufHdr->flags |= BM_TAG_VALID; bufHdr->flags |= BM_TAG_VALID;
bufHdr->usage_count = 0; bufHdr->usage_count = 1;
*foundPtr = FALSE; *foundPtr = FALSE;
return bufHdr; return bufHdr;
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.279 2007/04/27 22:05:49 tgl Exp $ * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.280 2007/05/30 20:12:01 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -931,7 +931,7 @@ ProcessUtility(Node *parsetree, ...@@ -931,7 +931,7 @@ ProcessUtility(Node *parsetree,
break; break;
case T_VacuumStmt: case T_VacuumStmt:
vacuum((VacuumStmt *) parsetree, NIL, isTopLevel); vacuum((VacuumStmt *) parsetree, NIL, NULL, isTopLevel);
break; break;
case T_ExplainStmt: case T_ExplainStmt:
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/genam.h,v 1.66 2007/01/05 22:19:50 momjian Exp $ * $PostgreSQL: pgsql/src/include/access/genam.h,v 1.67 2007/05/30 20:12:02 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -40,6 +40,7 @@ typedef struct IndexVacuumInfo ...@@ -40,6 +40,7 @@ typedef struct IndexVacuumInfo
bool vacuum_full; /* VACUUM FULL (we have exclusive lock) */ bool vacuum_full; /* VACUUM FULL (we have exclusive lock) */
int message_level; /* ereport level for progress messages */ int message_level; /* ereport level for progress messages */
double num_heap_tuples; /* tuples remaining in heap */ double num_heap_tuples; /* tuples remaining in heap */
BufferAccessStrategy strategy; /* access strategy for reads */
} IndexVacuumInfo; } IndexVacuumInfo;
/* /*
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/hash.h,v 1.80 2007/05/03 16:45:58 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.81 2007/05/30 20:12:02 tgl Exp $
* *
* NOTES * NOTES
* modeled after Margo Seltzer's hash implementation for unix. * modeled after Margo Seltzer's hash implementation for unix.
...@@ -273,11 +273,13 @@ extern void _hash_doinsert(Relation rel, IndexTuple itup); ...@@ -273,11 +273,13 @@ extern void _hash_doinsert(Relation rel, IndexTuple itup);
/* hashovfl.c */ /* hashovfl.c */
extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf); extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf);
extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf); extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf,
BufferAccessStrategy bstrategy);
extern void _hash_initbitmap(Relation rel, HashMetaPage metap, extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
BlockNumber blkno); BlockNumber blkno);
extern void _hash_squeezebucket(Relation rel, extern void _hash_squeezebucket(Relation rel,
Bucket bucket, BlockNumber bucket_blkno); Bucket bucket, BlockNumber bucket_blkno,
BufferAccessStrategy bstrategy);
/* hashpage.c */ /* hashpage.c */
extern void _hash_getlock(Relation rel, BlockNumber whichlock, int access); extern void _hash_getlock(Relation rel, BlockNumber whichlock, int access);
...@@ -287,6 +289,9 @@ extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, ...@@ -287,6 +289,9 @@ extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno,
int access, int flags); int access, int flags);
extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno); extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno); extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno);
extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
int access, int flags,
BufferAccessStrategy bstrategy);
extern void _hash_relbuf(Relation rel, Buffer buf); extern void _hash_relbuf(Relation rel, Buffer buf);
extern void _hash_dropbuf(Relation rel, Buffer buf); extern void _hash_dropbuf(Relation rel, Buffer buf);
extern void _hash_wrtbuf(Relation rel, Buffer buf); extern void _hash_wrtbuf(Relation rel, Buffer buf);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.53 2007/05/27 03:50:39 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.54 2007/05/30 20:12:02 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -27,6 +27,7 @@ typedef struct HeapScanDescData ...@@ -27,6 +27,7 @@ typedef struct HeapScanDescData
int rs_nkeys; /* number of scan keys */ int rs_nkeys; /* number of scan keys */
ScanKey rs_key; /* array of scan key descriptors */ ScanKey rs_key; /* array of scan key descriptors */
BlockNumber rs_nblocks; /* number of blocks to scan */ BlockNumber rs_nblocks; /* number of blocks to scan */
BufferAccessStrategy rs_strategy; /* access strategy for reads */
bool rs_pageatatime; /* verify visibility page-at-a-time? */ bool rs_pageatatime; /* verify visibility page-at-a-time? */
/* scan current state */ /* scan current state */
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.77 2007/05/20 21:08:19 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.78 2007/05/30 20:12:02 tgl Exp $
*/ */
#ifndef XLOG_H #ifndef XLOG_H
#define XLOG_H #define XLOG_H
...@@ -159,6 +159,7 @@ extern bool XLOG_DEBUG; ...@@ -159,6 +159,7 @@ extern bool XLOG_DEBUG;
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata); extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
extern void XLogFlush(XLogRecPtr RecPtr); extern void XLogFlush(XLogRecPtr RecPtr);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record); extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec); extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.71 2007/05/17 15:28:29 alvherre Exp $ * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.72 2007/05/30 20:12:03 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -18,9 +18,11 @@ ...@@ -18,9 +18,11 @@
#include "catalog/pg_statistic.h" #include "catalog/pg_statistic.h"
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "nodes/parsenodes.h" #include "nodes/parsenodes.h"
#include "storage/buf.h"
#include "storage/lock.h" #include "storage/lock.h"
#include "utils/rel.h" #include "utils/rel.h"
/*---------- /*----------
* ANALYZE builds one of these structs for each attribute (column) that is * ANALYZE builds one of these structs for each attribute (column) that is
* to be analyzed. The struct and subsidiary data are in anl_context, * to be analyzed. The struct and subsidiary data are in anl_context,
...@@ -110,7 +112,8 @@ extern int vacuum_freeze_min_age; ...@@ -110,7 +112,8 @@ extern int vacuum_freeze_min_age;
/* in commands/vacuum.c */ /* in commands/vacuum.c */
extern void vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel); extern void vacuum(VacuumStmt *vacstmt, List *relids,
BufferAccessStrategy bstrategy, bool isTopLevel);
extern void vac_open_indexes(Relation relation, LOCKMODE lockmode, extern void vac_open_indexes(Relation relation, LOCKMODE lockmode,
int *nindexes, Relation **Irel); int *nindexes, Relation **Irel);
extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode); extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode);
...@@ -127,9 +130,11 @@ extern bool vac_is_partial_index(Relation indrel); ...@@ -127,9 +130,11 @@ extern bool vac_is_partial_index(Relation indrel);
extern void vacuum_delay_point(void); extern void vacuum_delay_point(void);
/* in commands/vacuumlazy.c */ /* in commands/vacuumlazy.c */
extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt); extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy);
/* in commands/analyze.c */ /* in commands/analyze.c */
extern void analyze_rel(Oid relid, VacuumStmt *vacstmt); extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy);
#endif /* VACUUM_H */ #endif /* VACUUM_H */
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/buf.h,v 1.21 2007/01/05 22:19:57 momjian Exp $ * $PostgreSQL: pgsql/src/include/storage/buf.h,v 1.22 2007/05/30 20:12:03 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -36,4 +36,11 @@ typedef int Buffer; ...@@ -36,4 +36,11 @@ typedef int Buffer;
*/ */
#define BufferIsLocal(buffer) ((buffer) < 0) #define BufferIsLocal(buffer) ((buffer) < 0)
/*
* Buffer access strategy objects.
*
* BufferAccessStrategyData is private to freelist.c
*/
typedef struct BufferAccessStrategyData *BufferAccessStrategy;
#endif /* BUF_H */ #endif /* BUF_H */
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.89 2007/01/05 22:19:57 momjian Exp $ * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.90 2007/05/30 20:12:03 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -167,9 +167,6 @@ extern DLLIMPORT BufferDesc *BufferDescriptors; ...@@ -167,9 +167,6 @@ extern DLLIMPORT BufferDesc *BufferDescriptors;
/* in localbuf.c */ /* in localbuf.c */
extern BufferDesc *LocalBufferDescriptors; extern BufferDesc *LocalBufferDescriptors;
/* in freelist.c */
extern bool strategy_hint_vacuum;
/* event counters in buf_init.c */ /* event counters in buf_init.c */
extern long int ReadBufferCount; extern long int ReadBufferCount;
extern long int ReadLocalBufferCount; extern long int ReadLocalBufferCount;
...@@ -184,8 +181,12 @@ extern long int LocalBufferFlushCount; ...@@ -184,8 +181,12 @@ extern long int LocalBufferFlushCount;
*/ */
/* freelist.c */ /* freelist.c */
extern volatile BufferDesc *StrategyGetBuffer(void); extern volatile BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy,
extern void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head); bool *lock_held);
extern void StrategyFreeBuffer(volatile BufferDesc *buf);
extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
volatile BufferDesc *buf);
extern int StrategySyncStart(void); extern int StrategySyncStart(void);
extern Size StrategyShmemSize(void); extern Size StrategyShmemSize(void);
extern void StrategyInitialize(bool init); extern void StrategyInitialize(bool init);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.103 2007/05/02 23:18:03 tgl Exp $ * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.104 2007/05/30 20:12:03 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -19,6 +19,14 @@ ...@@ -19,6 +19,14 @@
typedef void *Block; typedef void *Block;
/* Possible arguments for GetAccessStrategy() */
typedef enum BufferAccessStrategyType
{
BAS_NORMAL, /* Normal random access */
BAS_BULKREAD, /* Large read-only scan (hint bit updates are ok) */
BAS_VACUUM /* VACUUM */
} BufferAccessStrategyType;
/* in globals.c ... this duplicates miscadmin.h */ /* in globals.c ... this duplicates miscadmin.h */
extern DLLIMPORT int NBuffers; extern DLLIMPORT int NBuffers;
...@@ -111,6 +119,8 @@ extern DLLIMPORT int32 *LocalRefCount; ...@@ -111,6 +119,8 @@ extern DLLIMPORT int32 *LocalRefCount;
* prototypes for functions in bufmgr.c * prototypes for functions in bufmgr.c
*/ */
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum); extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
extern Buffer ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
BufferAccessStrategy strategy);
extern Buffer ReadOrZeroBuffer(Relation reln, BlockNumber blockNum); extern Buffer ReadOrZeroBuffer(Relation reln, BlockNumber blockNum);
extern void ReleaseBuffer(Buffer buffer); extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer); extern void UnlockReleaseBuffer(Buffer buffer);
...@@ -157,6 +167,7 @@ extern void BgBufferSync(void); ...@@ -157,6 +167,7 @@ extern void BgBufferSync(void);
extern void AtProcExit_LocalBuffers(void); extern void AtProcExit_LocalBuffers(void);
/* in freelist.c */ /* in freelist.c */
extern void StrategyHintVacuum(bool vacuum_active); extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
extern void FreeAccessStrategy(BufferAccessStrategy strategy);
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment