Commit fafa374f authored by Simon Riggs's avatar Simon Riggs

Introduce WAL records to log reuse of btree pages, allowing conflict

resolution during Hot Standby. Page reuse interlock requested by Tom.
Analysis and patch by me.
parent 4688869f
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.118 2010/02/08 04:33:53 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.119 2010/02/13 00:59:58 sriggs Exp $
* *
* NOTES * NOTES
* Postgres btree pages look like ordinary relation pages. The opaque * Postgres btree pages look like ordinary relation pages. The opaque
...@@ -446,6 +446,48 @@ _bt_checkpage(Relation rel, Buffer buf) ...@@ -446,6 +446,48 @@ _bt_checkpage(Relation rel, Buffer buf)
errhint("Please REINDEX it."))); errhint("Please REINDEX it.")));
} }
/*
* Log the reuse of a page from the FSM.
*/
static void
_bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
{
if (rel->rd_istemp)
return;
/* No ereport(ERROR) until changes are logged */
START_CRIT_SECTION();
/*
* We don't do MarkBufferDirty here because we're about initialise
* the page, and nobody else can see it yet.
*/
/* XLOG stuff */
{
XLogRecPtr recptr;
XLogRecData rdata[1];
xl_btree_reuse_page xlrec_reuse;
xlrec_reuse.node = rel->rd_node;
xlrec_reuse.block = blkno;
xlrec_reuse.latestRemovedXid = latestRemovedXid;
rdata[0].data = (char *) &xlrec_reuse;
rdata[0].len = SizeOfBtreeReusePage;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = NULL;
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata);
/*
* We don't do PageSetLSN or PageSetTLI here because
* we're about initialise the page, so no need.
*/
}
END_CRIT_SECTION();
}
/* /*
* _bt_getbuf() -- Get a buffer by block number for read or write. * _bt_getbuf() -- Get a buffer by block number for read or write.
* *
...@@ -511,6 +553,18 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) ...@@ -511,6 +553,18 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
page = BufferGetPage(buf); page = BufferGetPage(buf);
if (_bt_page_recyclable(page)) if (_bt_page_recyclable(page))
{ {
/*
* If we are generating WAL for Hot Standby then create
* a WAL record that will allow us to conflict with
* queries running on standby.
*/
if (XLogStandbyInfoActive())
{
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
_bt_log_reuse_page(rel, blkno, opaque->btpo.xact);
}
/* Okay to use page. Re-initialize and return it */ /* Okay to use page. Re-initialize and return it */
_bt_pageinit(page, BufferGetPageSize(buf)); _bt_pageinit(page, BufferGetPageSize(buf));
return buf; return buf;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.60 2010/02/08 04:33:53 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.61 2010/02/13 00:59:58 sriggs Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -814,6 +814,11 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -814,6 +814,11 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = record->xl_info & ~XLR_INFO_MASK;
if (InHotStandby)
{
switch (info)
{
case XLOG_BTREE_DELETE:
/* /*
* Btree delete records can conflict with standby queries. You might * Btree delete records can conflict with standby queries. You might
* think that vacuum records would conflict as well, but we've handled * think that vacuum records would conflict as well, but we've handled
...@@ -822,7 +827,6 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -822,7 +827,6 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record)
* just once when that arrives. After that any we know that no conflicts * just once when that arrives. After that any we know that no conflicts
* exist from individual btree vacuum records on that index. * exist from individual btree vacuum records on that index.
*/ */
if (InHotStandby && info == XLOG_BTREE_DELETE)
{ {
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record); xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
...@@ -835,6 +839,24 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -835,6 +839,24 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record)
*/ */
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node); ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
} }
break;
case XLOG_BTREE_REUSE_PAGE:
/*
* Btree reuse page records exist to provide a conflict point when we
* reuse pages in the index via the FSM. That's all it does though.
*/
{
xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
}
return;
default:
break;
}
}
/* /*
* Vacuum needs to pin and take cleanup lock on every leaf page, * Vacuum needs to pin and take cleanup lock on every leaf page,
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.128 2010/02/08 04:33:54 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.129 2010/02/13 00:59:58 sriggs Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -221,6 +221,7 @@ typedef struct BTMetaPageData ...@@ -221,6 +221,7 @@ typedef struct BTMetaPageData
#define XLOG_BTREE_DELETE_PAGE_HALF 0xB0 /* page deletion that makes #define XLOG_BTREE_DELETE_PAGE_HALF 0xB0 /* page deletion that makes
* parent half-dead */ * parent half-dead */
#define XLOG_BTREE_VACUUM 0xC0 /* delete entries on a page during vacuum */ #define XLOG_BTREE_VACUUM 0xC0 /* delete entries on a page during vacuum */
#define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from FSM */
/* /*
* All that we need to find changed index tuple * All that we need to find changed index tuple
...@@ -321,6 +322,18 @@ typedef struct xl_btree_delete ...@@ -321,6 +322,18 @@ typedef struct xl_btree_delete
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, latestRemovedXid) + sizeof(TransactionId)) #define SizeOfBtreeDelete (offsetof(xl_btree_delete, latestRemovedXid) + sizeof(TransactionId))
/*
* This is what we need to know about page reuse within btree.
*/
typedef struct xl_btree_reuse_page
{
RelFileNode node;
BlockNumber block;
TransactionId latestRemovedXid;
} xl_btree_reuse_page;
#define SizeOfBtreeReusePage (sizeof(xl_btree_reuse_page))
/* /*
* This is what we need to know about vacuum of individual leaf index tuples. * This is what we need to know about vacuum of individual leaf index tuples.
* The WAL record can represent deletion of any number of index tuples on a * The WAL record can represent deletion of any number of index tuples on a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment