Commit fafa374f authored by Simon Riggs's avatar Simon Riggs

Introduce WAL records to log reuse of btree pages, allowing conflict

resolution during Hot Standby. Page reuse interlock requested by Tom.
Analysis and patch by me.
parent 4688869f
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.118 2010/02/08 04:33:53 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.119 2010/02/13 00:59:58 sriggs Exp $
* *
* NOTES * NOTES
* Postgres btree pages look like ordinary relation pages. The opaque * Postgres btree pages look like ordinary relation pages. The opaque
...@@ -446,6 +446,48 @@ _bt_checkpage(Relation rel, Buffer buf) ...@@ -446,6 +446,48 @@ _bt_checkpage(Relation rel, Buffer buf)
errhint("Please REINDEX it."))); errhint("Please REINDEX it.")));
} }
/*
* Log the reuse of a page from the FSM.
*/
static void
_bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
{
if (rel->rd_istemp)
return;
/* No ereport(ERROR) until changes are logged */
START_CRIT_SECTION();
/*
* We don't do MarkBufferDirty here because we're about initialise
* the page, and nobody else can see it yet.
*/
/* XLOG stuff */
{
XLogRecPtr recptr;
XLogRecData rdata[1];
xl_btree_reuse_page xlrec_reuse;
xlrec_reuse.node = rel->rd_node;
xlrec_reuse.block = blkno;
xlrec_reuse.latestRemovedXid = latestRemovedXid;
rdata[0].data = (char *) &xlrec_reuse;
rdata[0].len = SizeOfBtreeReusePage;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = NULL;
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata);
/*
* We don't do PageSetLSN or PageSetTLI here because
* we're about initialise the page, so no need.
*/
}
END_CRIT_SECTION();
}
/* /*
* _bt_getbuf() -- Get a buffer by block number for read or write. * _bt_getbuf() -- Get a buffer by block number for read or write.
* *
...@@ -510,7 +552,19 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) ...@@ -510,7 +552,19 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
{ {
page = BufferGetPage(buf); page = BufferGetPage(buf);
if (_bt_page_recyclable(page)) if (_bt_page_recyclable(page))
{ {
/*
* If we are generating WAL for Hot Standby then create
* a WAL record that will allow us to conflict with
* queries running on standby.
*/
if (XLogStandbyInfoActive())
{
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
_bt_log_reuse_page(rel, blkno, opaque->btpo.xact);
}
/* Okay to use page. Re-initialize and return it */ /* Okay to use page. Re-initialize and return it */
_bt_pageinit(page, BufferGetPageSize(buf)); _bt_pageinit(page, BufferGetPageSize(buf));
return buf; return buf;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.60 2010/02/08 04:33:53 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.61 2010/02/13 00:59:58 sriggs Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -814,26 +814,48 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -814,26 +814,48 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = record->xl_info & ~XLR_INFO_MASK;
/* if (InHotStandby)
* Btree delete records can conflict with standby queries. You might
* think that vacuum records would conflict as well, but we've handled
* that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
* cleaned by the vacuum of the heap and so we can resolve any conflicts
* just once when that arrives. After that any we know that no conflicts
* exist from individual btree vacuum records on that index.
*/
if (InHotStandby && info == XLOG_BTREE_DELETE)
{ {
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record); switch (info)
{
case XLOG_BTREE_DELETE:
/*
* Btree delete records can conflict with standby queries. You might
* think that vacuum records would conflict as well, but we've handled
* that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
* cleaned by the vacuum of the heap and so we can resolve any conflicts
* just once when that arrives. After that any we know that no conflicts
* exist from individual btree vacuum records on that index.
*/
{
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
/* /*
* XXX Currently we put everybody on death row, because * XXX Currently we put everybody on death row, because
* currently _bt_delitems() supplies InvalidTransactionId. * currently _bt_delitems() supplies InvalidTransactionId.
* This can be fairly painful, so providing a better value * This can be fairly painful, so providing a better value
* here is worth some thought and possibly some effort to * here is worth some thought and possibly some effort to
* improve. * improve.
*/ */
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node); ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
}
break;
case XLOG_BTREE_REUSE_PAGE:
/*
* Btree reuse page records exist to provide a conflict point when we
* reuse pages in the index via the FSM. That's all it does though.
*/
{
xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
}
return;
default:
break;
}
} }
/* /*
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.128 2010/02/08 04:33:54 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.129 2010/02/13 00:59:58 sriggs Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -221,6 +221,7 @@ typedef struct BTMetaPageData ...@@ -221,6 +221,7 @@ typedef struct BTMetaPageData
#define XLOG_BTREE_DELETE_PAGE_HALF 0xB0 /* page deletion that makes #define XLOG_BTREE_DELETE_PAGE_HALF 0xB0 /* page deletion that makes
* parent half-dead */ * parent half-dead */
#define XLOG_BTREE_VACUUM 0xC0 /* delete entries on a page during vacuum */ #define XLOG_BTREE_VACUUM 0xC0 /* delete entries on a page during vacuum */
#define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from FSM */
/* /*
* All that we need to find changed index tuple * All that we need to find changed index tuple
...@@ -321,6 +322,18 @@ typedef struct xl_btree_delete ...@@ -321,6 +322,18 @@ typedef struct xl_btree_delete
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, latestRemovedXid) + sizeof(TransactionId)) #define SizeOfBtreeDelete (offsetof(xl_btree_delete, latestRemovedXid) + sizeof(TransactionId))
/*
* This is what we need to know about page reuse within btree.
*/
typedef struct xl_btree_reuse_page
{
RelFileNode node;
BlockNumber block;
TransactionId latestRemovedXid;
} xl_btree_reuse_page;
#define SizeOfBtreeReusePage (sizeof(xl_btree_reuse_page))
/* /*
* This is what we need to know about vacuum of individual leaf index tuples. * This is what we need to know about vacuum of individual leaf index tuples.
* The WAL record can represent deletion of any number of index tuples on a * The WAL record can represent deletion of any number of index tuples on a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment