Commit 8c3cc86e authored by Tom Lane's avatar Tom Lane

During WAL recovery, when reading a page that we intend to overwrite completely

from the WAL data, don't bother to physically read it; just have bufmgr.c
return a zeroed-out buffer instead.  This speeds recovery significantly,
and also avoids unnecessary failures when a page-to-be-overwritten has corrupt
page headers on disk.  This replaces a former kluge that accomplished the
latter by pretending zero_damaged_pages was always ON during WAL recovery;
which was OK when the kluge was put in, but is unsafe when restoring a WAL
log that was written with full_page_writes off.

Heikki Linnakangas
parent 8ec94385
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.49 2007/01/05 22:19:24 momjian Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.50 2007/05/02 23:18:03 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -206,7 +206,9 @@ XLogCheckInvalidPages(void) ...@@ -206,7 +206,9 @@ XLogCheckInvalidPages(void)
* If "init" is true then the caller intends to rewrite the page fully * If "init" is true then the caller intends to rewrite the page fully
* using the info in the XLOG record. In this case we will extend the * using the info in the XLOG record. In this case we will extend the
* relation if needed to make the page exist, and we will not complain about * relation if needed to make the page exist, and we will not complain about
* the page being "new" (all zeroes). * the page being "new" (all zeroes); in fact, we usually will supply a
* zeroed buffer without reading the page at all, so as to avoid unnecessary
* failure if the page is present on disk but has corrupt headers.
* *
* If "init" is false then the caller needs the page to be valid already. * If "init" is false then the caller needs the page to be valid already.
* If the page doesn't exist or contains zeroes, we return InvalidBuffer. * If the page doesn't exist or contains zeroes, we return InvalidBuffer.
...@@ -226,6 +228,9 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init) ...@@ -226,6 +228,9 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
if (blkno < lastblock) if (blkno < lastblock)
{ {
/* page exists in file */ /* page exists in file */
if (init)
buffer = ReadOrZeroBuffer(reln, blkno);
else
buffer = ReadBuffer(reln, blkno); buffer = ReadBuffer(reln, blkno);
} }
else else
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.216 2007/03/30 18:34:55 mha Exp $ * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.217 2007/05/02 23:18:03 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -17,6 +17,12 @@ ...@@ -17,6 +17,12 @@
* and pin it so that no one can destroy it while this process * and pin it so that no one can destroy it while this process
* is using it. * is using it.
* *
* ReadOrZeroBuffer() -- like ReadBuffer, but if the page is not already in
* cache we don't read it, but just return a zeroed-out buffer. Useful
* when the caller intends to fill the page from scratch, since this
* saves I/O and avoids unnecessary failure if the page-on-disk has
* corrupt page headers.
*
* ReleaseBuffer() -- unpin a buffer * ReleaseBuffer() -- unpin a buffer
* *
* MarkBufferDirty() -- mark a pinned buffer's contents as "dirty". * MarkBufferDirty() -- mark a pinned buffer's contents as "dirty".
...@@ -87,6 +93,8 @@ static volatile BufferDesc *PinCountWaitBuf = NULL; ...@@ -87,6 +93,8 @@ static volatile BufferDesc *PinCountWaitBuf = NULL;
extern PgStat_MsgBgWriter BgWriterStats; extern PgStat_MsgBgWriter BgWriterStats;
static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum,
bool zeroPage);
static bool PinBuffer(volatile BufferDesc *buf); static bool PinBuffer(volatile BufferDesc *buf);
static void PinBuffer_Locked(volatile BufferDesc *buf); static void PinBuffer_Locked(volatile BufferDesc *buf);
static void UnpinBuffer(volatile BufferDesc *buf, static void UnpinBuffer(volatile BufferDesc *buf,
...@@ -120,6 +128,27 @@ static void AtProcExit_Buffers(int code, Datum arg); ...@@ -120,6 +128,27 @@ static void AtProcExit_Buffers(int code, Datum arg);
*/ */
Buffer Buffer
ReadBuffer(Relation reln, BlockNumber blockNum) ReadBuffer(Relation reln, BlockNumber blockNum)
{
return ReadBuffer_common(reln, blockNum, false);
}
/*
* ReadOrZeroBuffer -- like ReadBuffer, but if the page isn't in buffer
* cache already, it's filled with zeros instead of reading it from
* disk. The caller is expected to overwrite the whole buffer,
* so that the current page contents are not interesting.
*/
Buffer
ReadOrZeroBuffer(Relation reln, BlockNumber blockNum)
{
return ReadBuffer_common(reln, blockNum, true);
}
/*
* ReadBuffer_common -- common logic for ReadBuffer and ReadOrZeroBuffer
*/
static Buffer
ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
{ {
volatile BufferDesc *bufHdr; volatile BufferDesc *bufHdr;
Block bufBlock; Block bufBlock;
...@@ -253,17 +282,18 @@ ReadBuffer(Relation reln, BlockNumber blockNum) ...@@ -253,17 +282,18 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
} }
else else
{ {
/*
* Read in the page, unless the caller intends to overwrite it
* and just wants us to allocate a buffer.
*/
if (zeroPage)
MemSet((char *) bufBlock, 0, BLCKSZ);
else
smgrread(reln->rd_smgr, blockNum, (char *) bufBlock); smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
/* check for garbage data */ /* check for garbage data */
if (!PageHeaderIsValid((PageHeader) bufBlock)) if (!PageHeaderIsValid((PageHeader) bufBlock))
{ {
/* if (zero_damaged_pages)
* During WAL recovery, the first access to any data page should
* overwrite the whole page from the WAL; so a clobbered page
* header is not reason to fail. Hence, when InRecovery we may
* always act as though zero_damaged_pages is ON.
*/
if (zero_damaged_pages || InRecovery)
{ {
ereport(WARNING, ereport(WARNING,
(errcode(ERRCODE_DATA_CORRUPTED), (errcode(ERRCODE_DATA_CORRUPTED),
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.102 2007/01/05 22:19:57 momjian Exp $ * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.103 2007/05/02 23:18:03 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -111,6 +111,7 @@ extern DLLIMPORT int32 *LocalRefCount; ...@@ -111,6 +111,7 @@ extern DLLIMPORT int32 *LocalRefCount;
* prototypes for functions in bufmgr.c * prototypes for functions in bufmgr.c
*/ */
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum); extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
extern Buffer ReadOrZeroBuffer(Relation reln, BlockNumber blockNum);
extern void ReleaseBuffer(Buffer buffer); extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer); extern void UnlockReleaseBuffer(Buffer buffer);
extern void MarkBufferDirty(Buffer buffer); extern void MarkBufferDirty(Buffer buffer);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment