Commit f8f42279 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Refactor per-page logic common to all redo routines to a new function.

Every redo routine uses the same idiom to determine what to do to a page:
check if there's a backup block for it, and if not read, the buffer if the
block exists, and check its LSN. Refactor that into a common function,
XLogReadBufferForRedo, making all the redo routines shorter and more
readable.

This has no user-visible effect, and makes no changes to the WAL format.

Reviewed by Andres Freund, Alvaro Herrera, Michael Paquier.
parent 26f8b99b
...@@ -20,25 +20,25 @@ ...@@ -20,25 +20,25 @@
static MemoryContext opCtx; /* working memory for operations */ static MemoryContext opCtx; /* working memory for operations */
static void static void
ginRedoClearIncompleteSplit(XLogRecPtr lsn, RelFileNode node, BlockNumber blkno) ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record,
int block_index,
RelFileNode node, BlockNumber blkno)
{ {
Buffer buffer; Buffer buffer;
Page page; Page page;
buffer = XLogReadBuffer(node, blkno, false); if (XLogReadBufferForRedo(lsn, record, block_index, node, blkno, &buffer)
if (!BufferIsValid(buffer)) == BLK_NEEDS_REDO)
return; /* page was deleted, nothing to do */
page = (Page) BufferGetPage(buffer);
if (lsn > PageGetLSN(page))
{ {
page = (Page) BufferGetPage(buffer);
GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT; GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
} }
static void static void
...@@ -332,7 +332,6 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record) ...@@ -332,7 +332,6 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
{ {
ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record); ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
Buffer buffer; Buffer buffer;
Page page;
char *payload; char *payload;
BlockNumber leftChildBlkno = InvalidBlockNumber; BlockNumber leftChildBlkno = InvalidBlockNumber;
BlockNumber rightChildBlkno = InvalidBlockNumber; BlockNumber rightChildBlkno = InvalidBlockNumber;
...@@ -351,26 +350,14 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record) ...@@ -351,26 +350,14 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload); rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
payload += sizeof(BlockIdData); payload += sizeof(BlockIdData);
if (record->xl_info & XLR_BKP_BLOCK(0)) ginRedoClearIncompleteSplit(lsn, record, 0, data->node, leftChildBlkno);
(void) RestoreBackupBlock(lsn, record, 0, false, false);
else
ginRedoClearIncompleteSplit(lsn, data->node, leftChildBlkno);
} }
/* If we have a full-page image, restore it and we're done */ if (XLogReadBufferForRedo(lsn, record, isLeaf ? 0 : 1, data->node,
if (record->xl_info & XLR_BKP_BLOCK(isLeaf ? 0 : 1)) data->blkno, &buffer) == BLK_NEEDS_REDO)
{ {
(void) RestoreBackupBlock(lsn, record, isLeaf ? 0 : 1, false, false); Page page = BufferGetPage(buffer);
return;
}
buffer = XLogReadBuffer(data->node, data->blkno, false);
if (!BufferIsValid(buffer))
return; /* page was deleted, nothing to do */
page = (Page) BufferGetPage(buffer);
if (lsn > PageGetLSN(page))
{
/* How to insert the payload is tree-type specific */ /* How to insert the payload is tree-type specific */
if (data->flags & GIN_INSERT_ISDATA) if (data->flags & GIN_INSERT_ISDATA)
{ {
...@@ -386,8 +373,8 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record) ...@@ -386,8 +373,8 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
} }
static void static void
...@@ -476,12 +463,7 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record) ...@@ -476,12 +463,7 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
* split * split
*/ */
if (!isLeaf) if (!isLeaf)
{ ginRedoClearIncompleteSplit(lsn, record, 0, data->node, data->leftChildBlkno);
if (record->xl_info & XLR_BKP_BLOCK(0))
(void) RestoreBackupBlock(lsn, record, 0, false, false);
else
ginRedoClearIncompleteSplit(lsn, data->node, data->leftChildBlkno);
}
flags = 0; flags = 0;
if (isLeaf) if (isLeaf)
...@@ -605,31 +587,21 @@ ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record) ...@@ -605,31 +587,21 @@ ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record)
{ {
ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetData(record); ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetData(record);
Buffer buffer; Buffer buffer;
Page page;
/* If we have a full-page image, restore it and we're done */ if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->blkno,
if (record->xl_info & XLR_BKP_BLOCK(0)) &buffer) == BLK_NEEDS_REDO)
{ {
(void) RestoreBackupBlock(lsn, record, 0, false, false); Page page = BufferGetPage(buffer);
return;
}
buffer = XLogReadBuffer(xlrec->node, xlrec->blkno, false);
if (!BufferIsValid(buffer))
return;
page = (Page) BufferGetPage(buffer);
Assert(GinPageIsLeaf(page)); Assert(GinPageIsLeaf(page));
Assert(GinPageIsData(page)); Assert(GinPageIsData(page));
if (lsn > PageGetLSN(page))
{
ginRedoRecompress(page, &xlrec->data); ginRedoRecompress(page, &xlrec->data);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
} }
static void static void
...@@ -641,62 +613,42 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) ...@@ -641,62 +613,42 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
Buffer lbuffer; Buffer lbuffer;
Page page; Page page;
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->blkno, &dbuffer)
dbuffer = RestoreBackupBlock(lsn, record, 0, false, true); == BLK_NEEDS_REDO)
else
{ {
dbuffer = XLogReadBuffer(data->node, data->blkno, false); page = BufferGetPage(dbuffer);
if (BufferIsValid(dbuffer))
{ Assert(GinPageIsData(page));
page = BufferGetPage(dbuffer); GinPageGetOpaque(page)->flags = GIN_DELETED;
if (lsn > PageGetLSN(page)) PageSetLSN(page, lsn);
{ MarkBufferDirty(dbuffer);
Assert(GinPageIsData(page));
GinPageGetOpaque(page)->flags = GIN_DELETED;
PageSetLSN(page, lsn);
MarkBufferDirty(dbuffer);
}
}
} }
if (record->xl_info & XLR_BKP_BLOCK(1)) if (XLogReadBufferForRedo(lsn, record, 1, data->node, data->parentBlkno,
pbuffer = RestoreBackupBlock(lsn, record, 1, false, true); &pbuffer) == BLK_NEEDS_REDO)
else
{ {
pbuffer = XLogReadBuffer(data->node, data->parentBlkno, false); page = BufferGetPage(pbuffer);
if (BufferIsValid(pbuffer))
{ Assert(GinPageIsData(page));
page = BufferGetPage(pbuffer); Assert(!GinPageIsLeaf(page));
if (lsn > PageGetLSN(page)) GinPageDeletePostingItem(page, data->parentOffset);
{ PageSetLSN(page, lsn);
Assert(GinPageIsData(page)); MarkBufferDirty(pbuffer);
Assert(!GinPageIsLeaf(page));
GinPageDeletePostingItem(page, data->parentOffset);
PageSetLSN(page, lsn);
MarkBufferDirty(pbuffer);
}
}
} }
if (record->xl_info & XLR_BKP_BLOCK(2)) if (XLogReadBufferForRedo(lsn, record, 2, data->node, data->leftBlkno,
(void) RestoreBackupBlock(lsn, record, 2, false, false); &lbuffer) == BLK_NEEDS_REDO)
else if (data->leftBlkno != InvalidBlockNumber)
{ {
lbuffer = XLogReadBuffer(data->node, data->leftBlkno, false); page = BufferGetPage(lbuffer);
if (BufferIsValid(lbuffer))
{ Assert(GinPageIsData(page));
page = BufferGetPage(lbuffer); GinPageGetOpaque(page)->rightlink = data->rightLink;
if (lsn > PageGetLSN(page)) PageSetLSN(page, lsn);
{ MarkBufferDirty(lbuffer);
Assert(GinPageIsData(page));
GinPageGetOpaque(page)->rightlink = data->rightLink;
PageSetLSN(page, lsn);
MarkBufferDirty(lbuffer);
}
UnlockReleaseBuffer(lbuffer);
}
} }
if (BufferIsValid(lbuffer))
UnlockReleaseBuffer(lbuffer);
if (BufferIsValid(pbuffer)) if (BufferIsValid(pbuffer))
UnlockReleaseBuffer(pbuffer); UnlockReleaseBuffer(pbuffer);
if (BufferIsValid(dbuffer)) if (BufferIsValid(dbuffer))
...@@ -730,74 +682,64 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record) ...@@ -730,74 +682,64 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
/* /*
* insert into tail page * insert into tail page
*/ */
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogReadBufferForRedo(lsn, record, 0, data->node,
(void) RestoreBackupBlock(lsn, record, 0, false, false); data->metadata.tail, &buffer)
else == BLK_NEEDS_REDO)
{ {
buffer = XLogReadBuffer(data->node, data->metadata.tail, false); Page page = BufferGetPage(buffer);
if (BufferIsValid(buffer)) OffsetNumber off;
{ int i;
Page page = BufferGetPage(buffer); Size tupsize;
IndexTuple tuples;
if (lsn > PageGetLSN(page)) tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
{
OffsetNumber l,
off = (PageIsEmpty(page)) ? FirstOffsetNumber :
OffsetNumberNext(PageGetMaxOffsetNumber(page));
int i,
tupsize;
IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
for (i = 0; i < data->ntuples; i++) if (PageIsEmpty(page))
{ off = FirstOffsetNumber;
tupsize = IndexTupleSize(tuples); else
off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
l = PageAddItem(page, (Item) tuples, tupsize, off, false, false); for (i = 0; i < data->ntuples; i++)
{
tupsize = IndexTupleSize(tuples);
if (l == InvalidOffsetNumber) if (PageAddItem(page, (Item) tuples, tupsize, off,
elog(ERROR, "failed to add item to index page"); false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index page");
tuples = (IndexTuple) (((char *) tuples) + tupsize); tuples = (IndexTuple) (((char *) tuples) + tupsize);
off++; off++;
} }
/* /*
* Increase counter of heap tuples * Increase counter of heap tuples
*/ */
GinPageGetOpaque(page)->maxoff++; GinPageGetOpaque(page)->maxoff++;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
}
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
else if (data->prevTail != InvalidBlockNumber) else if (data->prevTail != InvalidBlockNumber)
{ {
/* /*
* New tail * New tail
*/ */
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->prevTail,
(void) RestoreBackupBlock(lsn, record, 0, false, false); &buffer) == BLK_NEEDS_REDO)
else
{ {
buffer = XLogReadBuffer(data->node, data->prevTail, false); Page page = BufferGetPage(buffer);
if (BufferIsValid(buffer))
{
Page page = BufferGetPage(buffer);
if (lsn > PageGetLSN(page)) GinPageGetOpaque(page)->rightlink = data->newRightlink;
{
GinPageGetOpaque(page)->rightlink = data->newRightlink;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
}
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
UnlockReleaseBuffer(metabuffer); UnlockReleaseBuffer(metabuffer);
......
...@@ -48,31 +48,26 @@ gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index, ...@@ -48,31 +48,26 @@ gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
{ {
Buffer buffer; Buffer buffer;
Page page; Page page;
XLogRedoAction action;
if (record->xl_info & XLR_BKP_BLOCK(block_index))
buffer = RestoreBackupBlock(lsn, record, block_index, false, true);
else
{
buffer = XLogReadBuffer(node, childblkno, false);
if (!BufferIsValid(buffer))
return; /* page was deleted, nothing to do */
}
page = (Page) BufferGetPage(buffer);
/* /*
* Note that we still update the page even if page LSN is equal to the LSN * Note that we still update the page even if it was restored from a full
* of this record, because the updated NSN is not included in the full * page image, because the updated NSN is not included in the image.
* page image.
*/ */
if (lsn >= PageGetLSN(page)) action = XLogReadBufferForRedo(lsn, record, block_index, node, childblkno,
&buffer);
if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
{ {
page = BufferGetPage(buffer);
GistPageSetNSN(page, lsn); GistPageSetNSN(page, lsn);
GistClearFollowRight(page); GistClearFollowRight(page);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
} }
UnlockReleaseBuffer(buffer); if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
/* /*
...@@ -87,104 +82,86 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record) ...@@ -87,104 +82,86 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
Page page; Page page;
char *data; char *data;
/* if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
* We need to acquire and hold lock on target page while updating the left &buffer) == BLK_NEEDS_REDO)
* child page. If we have a full-page image of target page, getting the
* lock is a side-effect of restoring that image. Note that even if the
* target page no longer exists, we'll still attempt to replay the change
* on the child page.
*/
if (record->xl_info & XLR_BKP_BLOCK(0))
buffer = RestoreBackupBlock(lsn, record, 0, false, true);
else
buffer = XLogReadBuffer(xldata->node, xldata->blkno, false);
/* Fix follow-right data on left child page */
if (BlockNumberIsValid(xldata->leftchild))
gistRedoClearFollowRight(lsn, record, 1,
xldata->node, xldata->leftchild);
/* Done if target page no longer exists */
if (!BufferIsValid(buffer))
return;
/* nothing more to do if page was backed up (and no info to do it with) */
if (record->xl_info & XLR_BKP_BLOCK(0))
{ {
UnlockReleaseBuffer(buffer); page = (Page) BufferGetPage(buffer);
return;
}
page = (Page) BufferGetPage(buffer);
/* nothing more to do if change already applied */
if (lsn <= PageGetLSN(page))
{
UnlockReleaseBuffer(buffer);
return;
}
data = begin + sizeof(gistxlogPageUpdate);
/* Delete old tuples */ data = begin + sizeof(gistxlogPageUpdate);
if (xldata->ntodelete > 0)
{
int i;
OffsetNumber *todelete = (OffsetNumber *) data;
data += sizeof(OffsetNumber) * xldata->ntodelete; /* Delete old tuples */
if (xldata->ntodelete > 0)
{
int i;
OffsetNumber *todelete = (OffsetNumber *) data;
for (i = 0; i < xldata->ntodelete; i++) data += sizeof(OffsetNumber) * xldata->ntodelete;
PageIndexTupleDelete(page, todelete[i]);
if (GistPageIsLeaf(page))
GistMarkTuplesDeleted(page);
}
/* add tuples */ for (i = 0; i < xldata->ntodelete; i++)
if (data - begin < record->xl_len) PageIndexTupleDelete(page, todelete[i]);
{ if (GistPageIsLeaf(page))
OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber : GistMarkTuplesDeleted(page);
OffsetNumberNext(PageGetMaxOffsetNumber(page)); }
while (data - begin < record->xl_len) /* add tuples */
if (data - begin < record->xl_len)
{ {
IndexTuple itup = (IndexTuple) data; OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber :
Size sz = IndexTupleSize(itup); OffsetNumberNext(PageGetMaxOffsetNumber(page));
OffsetNumber l;
while (data - begin < record->xl_len)
data += sz; {
IndexTuple itup = (IndexTuple) data;
Size sz = IndexTupleSize(itup);
OffsetNumber l;
data += sz;
l = PageAddItem(page, (Item) itup, sz, off, false, false);
if (l == InvalidOffsetNumber)
elog(ERROR, "failed to add item to GiST index page, size %d bytes",
(int) sz);
off++;
}
}
else
{
/*
* special case: leafpage, nothing to insert, nothing to delete,
* then vacuum marks page
*/
if (GistPageIsLeaf(page) && xldata->ntodelete == 0)
GistClearTuplesDeleted(page);
}
l = PageAddItem(page, (Item) itup, sz, off, false, false); if (!GistPageIsLeaf(page) &&
if (l == InvalidOffsetNumber) PageGetMaxOffsetNumber(page) == InvalidOffsetNumber &&
elog(ERROR, "failed to add item to GiST index page, size %d bytes", xldata->blkno == GIST_ROOT_BLKNO)
(int) sz); {
off++; /*
* all links on non-leaf root page was deleted by vacuum full, so
* root page becomes a leaf
*/
GistPageSetLeaf(page);
} }
}
else
{
/*
* special case: leafpage, nothing to insert, nothing to delete, then
* vacuum marks page
*/
if (GistPageIsLeaf(page) && xldata->ntodelete == 0)
GistClearTuplesDeleted(page);
}
if (!GistPageIsLeaf(page) && PageSetLSN(page, lsn);
PageGetMaxOffsetNumber(page) == InvalidOffsetNumber && MarkBufferDirty(buffer);
xldata->blkno == GIST_ROOT_BLKNO)
{
/*
* all links on non-leaf root page was deleted by vacuum full, so root
* page becomes a leaf
*/
GistPageSetLeaf(page);
} }
PageSetLSN(page, lsn); /*
MarkBufferDirty(buffer); * Fix follow-right data on left child page
UnlockReleaseBuffer(buffer); *
* This must be done while still holding the lock on the target page. Note
* that even if the target page no longer exists, we still attempt to
* replay the change on the child page.
*/
if (BlockNumberIsValid(xldata->leftchild))
gistRedoClearFollowRight(lsn, record, 1,
xldata->node, xldata->leftchild);
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
static void static void
......
...@@ -7134,15 +7134,13 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record) ...@@ -7134,15 +7134,13 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
{ {
xl_heap_clean *xlrec = (xl_heap_clean *) XLogRecGetData(record); xl_heap_clean *xlrec = (xl_heap_clean *) XLogRecGetData(record);
Buffer buffer; Buffer buffer;
Page page; Size freespace = 0;
OffsetNumber *end; RelFileNode rnode;
OffsetNumber *redirected; BlockNumber blkno;
OffsetNumber *nowdead; XLogRedoAction action;
OffsetNumber *nowunused;
int nredirected; rnode = xlrec->node;
int ndead; blkno = xlrec->block;
int nunused;
Size freespace;
/* /*
* We're about to remove tuples. In Hot Standby mode, ensure that there's * We're about to remove tuples. In Hot Standby mode, ensure that there's
...@@ -7153,65 +7151,63 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record) ...@@ -7153,65 +7151,63 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
* latestRemovedXid is invalid, skip conflict processing. * latestRemovedXid is invalid, skip conflict processing.
*/ */
if (InHotStandby && TransactionIdIsValid(xlrec->latestRemovedXid)) if (InHotStandby && TransactionIdIsValid(xlrec->latestRemovedXid))
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, rnode);
xlrec->node);
/* /*
* If we have a full-page image, restore it (using a cleanup lock) and * If we have a full-page image, restore it (using a cleanup lock) and
* we're done. * we're done.
*/ */
if (record->xl_info & XLR_BKP_BLOCK(0)) action = XLogReadBufferForRedoExtended(lsn, record, 0,
{ rnode, MAIN_FORKNUM, blkno,
(void) RestoreBackupBlock(lsn, record, 0, true, false); RBM_NORMAL, true, &buffer);
return; if (action == BLK_NEEDS_REDO)
} {
Page page = (Page) BufferGetPage(buffer);
OffsetNumber *end;
OffsetNumber *redirected;
OffsetNumber *nowdead;
OffsetNumber *nowunused;
int nredirected;
int ndead;
int nunused;
nredirected = xlrec->nredirected;
ndead = xlrec->ndead;
end = (OffsetNumber *) ((char *) xlrec + record->xl_len);
redirected = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean);
nowdead = redirected + (nredirected * 2);
nowunused = nowdead + ndead;
nunused = (end - nowunused);
Assert(nunused >= 0);
/* Update all item pointers per the record, and repair fragmentation */
heap_page_prune_execute(buffer,
redirected, nredirected,
nowdead, ndead,
nowunused, nunused);
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL); /*
if (!BufferIsValid(buffer)) * Note: we don't worry about updating the page's prunability hints.
return; * At worst this will cause an extra prune cycle to occur soon.
LockBufferForCleanup(buffer); */
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page)) PageSetLSN(page, lsn);
{ MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
return;
} }
if (BufferIsValid(buffer))
nredirected = xlrec->nredirected; UnlockReleaseBuffer(buffer);
ndead = xlrec->ndead;
end = (OffsetNumber *) ((char *) xlrec + record->xl_len);
redirected = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean);
nowdead = redirected + (nredirected * 2);
nowunused = nowdead + ndead;
nunused = (end - nowunused);
Assert(nunused >= 0);
/* Update all item pointers per the record, and repair fragmentation */
heap_page_prune_execute(buffer,
redirected, nredirected,
nowdead, ndead,
nowunused, nunused);
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
/*
* Note: we don't worry about updating the page's prunability hints. At
* worst this will cause an extra prune cycle to occur soon.
*/
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
/* /*
* Update the FSM as well. * Update the FSM as well.
* *
* XXX: We don't get here if the page was restored from full page image. * XXX: Don't do this if the page was restored from full page image. We
* We don't bother to update the FSM in that case, it doesn't need to be * don't bother to update the FSM in that case, it doesn't need to be
* totally accurate anyway. * totally accurate anyway.
*/ */
XLogRecordPageWithFreeSpace(xlrec->node, xlrec->block, freespace); if (action == BLK_NEEDS_REDO)
XLogRecordPageWithFreeSpace(xlrec->node, xlrec->block, freespace);
} }
/* /*
...@@ -7226,6 +7222,14 @@ static void ...@@ -7226,6 +7222,14 @@ static void
heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record) heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
{ {
xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record); xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
Buffer buffer;
Page page;
RelFileNode rnode;
BlockNumber blkno;
XLogRedoAction action;
rnode = xlrec->node;
blkno = xlrec->block;
/* /*
* If there are any Hot Standby transactions running that have an xmin * If there are any Hot Standby transactions running that have an xmin
...@@ -7237,60 +7241,43 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record) ...@@ -7237,60 +7241,43 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
* rather than killing the transaction outright. * rather than killing the transaction outright.
*/ */
if (InHotStandby) if (InHotStandby)
ResolveRecoveryConflictWithSnapshot(xlrec->cutoff_xid, xlrec->node); ResolveRecoveryConflictWithSnapshot(xlrec->cutoff_xid, rnode);
/* /*
* If heap block was backed up, restore it. This can only happen with * Read the heap page, if it still exists. If the heap file has dropped or
* checksums enabled. * truncated later in recovery, we don't need to update the page, but we'd
* better still update the visibility map.
*/ */
if (record->xl_info & XLR_BKP_BLOCK(1)) action = XLogReadBufferForRedo(lsn, record, 1, rnode, blkno, &buffer);
if (action == BLK_NEEDS_REDO)
{ {
Assert(DataChecksumsEnabled()); /*
(void) RestoreBackupBlock(lsn, record, 1, false, false); * We don't bump the LSN of the heap page when setting the visibility
* map bit (unless checksums are enabled, in which case we must),
* because that would generate an unworkable volume of full-page
* writes. This exposes us to torn page hazards, but since we're not
* inspecting the existing page contents in any way, we don't care.
*
* However, all operations that clear the visibility map bit *do* bump
* the LSN, and those operations will only be replayed if the XLOG LSN
* follows the page LSN. Thus, if the page LSN has advanced past our
* XLOG record's LSN, we mustn't mark the page all-visible, because
* the subsequent update won't be replayed to clear the flag.
*/
page = BufferGetPage(buffer);
PageSetAllVisible(page);
MarkBufferDirty(buffer);
} }
else else if (action == BLK_RESTORED)
{ {
Buffer buffer;
Page page;
/* /*
* Read the heap page, if it still exists. If the heap file has been * If heap block was backed up, restore it. This can only happen with
* dropped or truncated later in recovery, we don't need to update the * checksums enabled.
* page, but we'd better still update the visibility map.
*/ */
buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, Assert(DataChecksumsEnabled());
xlrec->block, RBM_NORMAL);
if (BufferIsValid(buffer))
{
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
page = (Page) BufferGetPage(buffer);
/*
* We don't bump the LSN of the heap page when setting the
* visibility map bit (unless checksums are enabled, in which case
* we must), because that would generate an unworkable volume of
* full-page writes. This exposes us to torn page hazards, but
* since we're not inspecting the existing page contents in any
* way, we don't care.
*
* However, all operations that clear the visibility map bit *do*
* bump the LSN, and those operations will only be replayed if the
* XLOG LSN follows the page LSN. Thus, if the page LSN has
* advanced past our XLOG record's LSN, we mustn't mark the page
* all-visible, because the subsequent update won't be replayed to
* clear the flag.
*/
if (lsn > PageGetLSN(page))
{
PageSetAllVisible(page);
MarkBufferDirty(buffer);
}
/* Done with heap page. */
UnlockReleaseBuffer(buffer);
}
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* /*
* Even if we skipped the heap page update due to the LSN interlock, it's * Even if we skipped the heap page update due to the LSN interlock, it's
...@@ -7305,8 +7292,8 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record) ...@@ -7305,8 +7292,8 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
Relation reln; Relation reln;
Buffer vmbuffer = InvalidBuffer; Buffer vmbuffer = InvalidBuffer;
reln = CreateFakeRelcacheEntry(xlrec->node); reln = CreateFakeRelcacheEntry(rnode);
visibilitymap_pin(reln, xlrec->block, &vmbuffer); visibilitymap_pin(reln, blkno, &vmbuffer);
/* /*
* Don't set the bit if replay has already passed this point. * Don't set the bit if replay has already passed this point.
...@@ -7320,7 +7307,7 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record) ...@@ -7320,7 +7307,7 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
* real harm is done; and the next VACUUM will fix it. * real harm is done; and the next VACUUM will fix it.
*/ */
if (lsn > PageGetLSN(BufferGetPage(vmbuffer))) if (lsn > PageGetLSN(BufferGetPage(vmbuffer)))
visibilitymap_set(reln, xlrec->block, InvalidBuffer, lsn, vmbuffer, visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
xlrec->cutoff_xid); xlrec->cutoff_xid);
ReleaseBuffer(vmbuffer); ReleaseBuffer(vmbuffer);
...@@ -7347,42 +7334,30 @@ heap_xlog_freeze_page(XLogRecPtr lsn, XLogRecord *record) ...@@ -7347,42 +7334,30 @@ heap_xlog_freeze_page(XLogRecPtr lsn, XLogRecord *record)
if (InHotStandby) if (InHotStandby)
ResolveRecoveryConflictWithSnapshot(cutoff_xid, xlrec->node); ResolveRecoveryConflictWithSnapshot(cutoff_xid, xlrec->node);
/* If we have a full-page image, restore it and we're done */ if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->block,
if (record->xl_info & XLR_BKP_BLOCK(0)) &buffer) == BLK_NEEDS_REDO)
{ {
(void) RestoreBackupBlock(lsn, record, 0, false, false); page = BufferGetPage(buffer);
return;
}
buffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
if (!BufferIsValid(buffer))
return;
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page)) /* now execute freeze plan for each frozen tuple */
{ for (ntup = 0; ntup < xlrec->ntuples; ntup++)
UnlockReleaseBuffer(buffer); {
return; xl_heap_freeze_tuple *xlrec_tp;
} ItemId lp;
HeapTupleHeader tuple;
/* now execute freeze plan for each frozen tuple */ xlrec_tp = &xlrec->tuples[ntup];
for (ntup = 0; ntup < xlrec->ntuples; ntup++) lp = PageGetItemId(page, xlrec_tp->offset); /* offsets are one-based */
{ tuple = (HeapTupleHeader) PageGetItem(page, lp);
xl_heap_freeze_tuple *xlrec_tp;
ItemId lp;
HeapTupleHeader tuple;
xlrec_tp = &xlrec->tuples[ntup]; heap_execute_freeze_tuple(tuple, xlrec_tp);
lp = PageGetItemId(page, xlrec_tp->offset); /* offsets are one-based */ }
tuple = (HeapTupleHeader) PageGetItem(page, lp);
heap_execute_freeze_tuple(tuple, xlrec_tp); PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer))
PageSetLSN(page, lsn); UnlockReleaseBuffer(buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
} }
/* /*
...@@ -7422,8 +7397,10 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record) ...@@ -7422,8 +7397,10 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
ItemId lp = NULL; ItemId lp = NULL;
HeapTupleHeader htup; HeapTupleHeader htup;
BlockNumber blkno; BlockNumber blkno;
RelFileNode target_node;
blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid)); blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
target_node = xlrec->target.node;
/* /*
* The visibility map may need to be fixed even if the heap page is * The visibility map may need to be fixed even if the heap page is
...@@ -7431,7 +7408,7 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record) ...@@ -7431,7 +7408,7 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
*/ */
if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
{ {
Relation reln = CreateFakeRelcacheEntry(xlrec->target.node); Relation reln = CreateFakeRelcacheEntry(target_node);
Buffer vmbuffer = InvalidBuffer; Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, blkno, &vmbuffer); visibilitymap_pin(reln, blkno, &vmbuffer);
...@@ -7440,52 +7417,41 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record) ...@@ -7440,52 +7417,41 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
FreeFakeRelcacheEntry(reln); FreeFakeRelcacheEntry(reln);
} }
/* If we have a full-page image, restore it and we're done */ if (XLogReadBufferForRedo(lsn, record, 0, target_node, blkno, &buffer)
if (record->xl_info & XLR_BKP_BLOCK(0)) == BLK_NEEDS_REDO)
{ {
(void) RestoreBackupBlock(lsn, record, 0, false, false); page = (Page) BufferGetPage(buffer);
return;
}
buffer = XLogReadBuffer(xlrec->target.node, blkno, false);
if (!BufferIsValid(buffer))
return;
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page)) /* changes are applied */ offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
{ if (PageGetMaxOffsetNumber(page) >= offnum)
UnlockReleaseBuffer(buffer); lp = PageGetItemId(page, offnum);
return;
}
offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid)); if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
if (PageGetMaxOffsetNumber(page) >= offnum) elog(PANIC, "heap_delete_redo: invalid lp");
lp = PageGetItemId(page, offnum);
if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp)) htup = (HeapTupleHeader) PageGetItem(page, lp);
elog(PANIC, "heap_delete_redo: invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp); htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); HeapTupleHeaderClearHotUpdated(htup);
htup->t_infomask2 &= ~HEAP_KEYS_UPDATED; fix_infomask_from_infobits(xlrec->infobits_set,
HeapTupleHeaderClearHotUpdated(htup); &htup->t_infomask, &htup->t_infomask2);
fix_infomask_from_infobits(xlrec->infobits_set, HeapTupleHeaderSetXmax(htup, xlrec->xmax);
&htup->t_infomask, &htup->t_infomask2); HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
HeapTupleHeaderSetXmax(htup, xlrec->xmax);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Mark the page as a candidate for pruning */ /* Mark the page as a candidate for pruning */
PageSetPrunable(page, record->xl_xid); PageSetPrunable(page, record->xl_xid);
if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page); PageClearAllVisible(page);
/* Make sure there is no forward chain link in t_ctid */ /* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = xlrec->target.tid; htup->t_ctid = xlrec->target.tid;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer); }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
static void static void
...@@ -7503,9 +7469,12 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record) ...@@ -7503,9 +7469,12 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
HeapTupleHeader htup; HeapTupleHeader htup;
xl_heap_header xlhdr; xl_heap_header xlhdr;
uint32 newlen; uint32 newlen;
Size freespace; Size freespace = 0;
RelFileNode target_node;
BlockNumber blkno; BlockNumber blkno;
XLogRedoAction action;
target_node = xlrec->target.node;
blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid)); blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
/* /*
...@@ -7514,7 +7483,7 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record) ...@@ -7514,7 +7483,7 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
*/ */
if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
{ {
Relation reln = CreateFakeRelcacheEntry(xlrec->target.node); Relation reln = CreateFakeRelcacheEntry(target_node);
Buffer vmbuffer = InvalidBuffer; Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, blkno, &vmbuffer); visibilitymap_pin(reln, blkno, &vmbuffer);
...@@ -7523,82 +7492,76 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record) ...@@ -7523,82 +7492,76 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
FreeFakeRelcacheEntry(reln); FreeFakeRelcacheEntry(reln);
} }
/* If we have a full-page image, restore it and we're done */ /*
if (record->xl_info & XLR_BKP_BLOCK(0)) * If we inserted the first and only tuple on the page, re-initialize
{ * the page from scratch.
(void) RestoreBackupBlock(lsn, record, 0, false, false); */
return;
}
if (record->xl_info & XLOG_HEAP_INIT_PAGE) if (record->xl_info & XLOG_HEAP_INIT_PAGE)
{ {
buffer = XLogReadBuffer(xlrec->target.node, blkno, true); XLogReadBufferForRedoExtended(lsn, record, 0,
Assert(BufferIsValid(buffer)); target_node, MAIN_FORKNUM, blkno,
page = (Page) BufferGetPage(buffer); RBM_ZERO, false, &buffer);
page = BufferGetPage(buffer);
PageInit(page, BufferGetPageSize(buffer), 0); PageInit(page, BufferGetPageSize(buffer), 0);
action = BLK_NEEDS_REDO;
} }
else else
action = XLogReadBufferForRedo(lsn, record, 0, target_node, blkno,
&buffer);
if (action == BLK_NEEDS_REDO)
{ {
buffer = XLogReadBuffer(xlrec->target.node, blkno, false); page = BufferGetPage(buffer);
if (!BufferIsValid(buffer))
return;
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page)) /* changes are applied */ offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
{ if (PageGetMaxOffsetNumber(page) + 1 < offnum)
UnlockReleaseBuffer(buffer); elog(PANIC, "heap_insert_redo: invalid max offset number");
return;
}
}
offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid)); newlen = record->xl_len - SizeOfHeapInsert - SizeOfHeapHeader;
if (PageGetMaxOffsetNumber(page) + 1 < offnum) Assert(newlen <= MaxHeapTupleSize);
elog(PANIC, "heap_insert_redo: invalid max offset number"); memcpy((char *) &xlhdr,
(char *) xlrec + SizeOfHeapInsert,
newlen = record->xl_len - SizeOfHeapInsert - SizeOfHeapHeader; SizeOfHeapHeader);
Assert(newlen <= MaxHeapTupleSize); htup = &tbuf.hdr;
memcpy((char *) &xlhdr, MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
(char *) xlrec + SizeOfHeapInsert, /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
SizeOfHeapHeader); memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
htup = &tbuf.hdr; (char *) xlrec + SizeOfHeapInsert + SizeOfHeapHeader,
MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData)); newlen);
/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */ newlen += offsetof(HeapTupleHeaderData, t_bits);
memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits), htup->t_infomask2 = xlhdr.t_infomask2;
(char *) xlrec + SizeOfHeapInsert + SizeOfHeapHeader, htup->t_infomask = xlhdr.t_infomask;
newlen); htup->t_hoff = xlhdr.t_hoff;
newlen += offsetof(HeapTupleHeaderData, t_bits); HeapTupleHeaderSetXmin(htup, record->xl_xid);
htup->t_infomask2 = xlhdr.t_infomask2; HeapTupleHeaderSetCmin(htup, FirstCommandId);
htup->t_infomask = xlhdr.t_infomask; htup->t_ctid = xlrec->target.tid;
htup->t_hoff = xlhdr.t_hoff;
HeapTupleHeaderSetXmin(htup, record->xl_xid);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
htup->t_ctid = xlrec->target.tid;
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true); offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
if (offnum == InvalidOffsetNumber) if (offnum == InvalidOffsetNumber)
elog(PANIC, "heap_insert_redo: failed to add tuple"); elog(PANIC, "heap_insert_redo: failed to add tuple");
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page); PageClearAllVisible(page);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer); }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* /*
* If the page is running low on free space, update the FSM as well. * If the page is running low on free space, update the FSM as well.
* Arbitrarily, our definition of "low" is less than 20%. We can't do much * Arbitrarily, our definition of "low" is less than 20%. We can't do much
* better than that without knowing the fill-factor for the table. * better than that without knowing the fill-factor for the table.
* *
* XXX: We don't get here if the page was restored from full page image. * XXX: Don't do this if the page was restored from full page image. We
* We don't bother to update the FSM in that case, it doesn't need to be * don't bother to update the FSM in that case, it doesn't need to be
* totally accurate anyway. * totally accurate anyway.
*/ */
if (freespace < BLCKSZ / 5) if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
XLogRecordPageWithFreeSpace(xlrec->target.node, blkno, freespace); XLogRecordPageWithFreeSpace(xlrec->target.node, blkno, freespace);
} }
...@@ -7610,6 +7573,8 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record) ...@@ -7610,6 +7573,8 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
{ {
char *recdata = XLogRecGetData(record); char *recdata = XLogRecGetData(record);
xl_heap_multi_insert *xlrec; xl_heap_multi_insert *xlrec;
RelFileNode rnode;
BlockNumber blkno;
Buffer buffer; Buffer buffer;
Page page; Page page;
struct struct
...@@ -7619,10 +7584,10 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record) ...@@ -7619,10 +7584,10 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
} tbuf; } tbuf;
HeapTupleHeader htup; HeapTupleHeader htup;
uint32 newlen; uint32 newlen;
Size freespace; Size freespace = 0;
BlockNumber blkno;
int i; int i;
bool isinit = (record->xl_info & XLOG_HEAP_INIT_PAGE) != 0; bool isinit = (record->xl_info & XLOG_HEAP_INIT_PAGE) != 0;
XLogRedoAction action;
/* /*
* Insertion doesn't overwrite MVCC data, so no conflict processing is * Insertion doesn't overwrite MVCC data, so no conflict processing is
...@@ -7632,6 +7597,9 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record) ...@@ -7632,6 +7597,9 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
xlrec = (xl_heap_multi_insert *) recdata; xlrec = (xl_heap_multi_insert *) recdata;
recdata += SizeOfHeapMultiInsert; recdata += SizeOfHeapMultiInsert;
rnode = xlrec->node;
blkno = xlrec->blkno;
/* /*
* If we're reinitializing the page, the tuples are stored in order from * If we're reinitializing the page, the tuples are stored in order from
* FirstOffsetNumber. Otherwise there's an array of offsets in the WAL * FirstOffsetNumber. Otherwise there's an array of offsets in the WAL
...@@ -7640,15 +7608,13 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record) ...@@ -7640,15 +7608,13 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
if (!isinit) if (!isinit)
recdata += sizeof(OffsetNumber) * xlrec->ntuples; recdata += sizeof(OffsetNumber) * xlrec->ntuples;
blkno = xlrec->blkno;
/* /*
* The visibility map may need to be fixed even if the heap page is * The visibility map may need to be fixed even if the heap page is
* already up-to-date. * already up-to-date.
*/ */
if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
{ {
Relation reln = CreateFakeRelcacheEntry(xlrec->node); Relation reln = CreateFakeRelcacheEntry(rnode);
Buffer vmbuffer = InvalidBuffer; Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, blkno, &vmbuffer); visibilitymap_pin(reln, blkno, &vmbuffer);
...@@ -7657,94 +7623,82 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record) ...@@ -7657,94 +7623,82 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
FreeFakeRelcacheEntry(reln); FreeFakeRelcacheEntry(reln);
} }
/* If we have a full-page image, restore it and we're done */
if (record->xl_info & XLR_BKP_BLOCK(0))
{
(void) RestoreBackupBlock(lsn, record, 0, false, false);
return;
}
if (isinit) if (isinit)
{ {
buffer = XLogReadBuffer(xlrec->node, blkno, true); XLogReadBufferForRedoExtended(lsn, record, 0,
Assert(BufferIsValid(buffer)); rnode, MAIN_FORKNUM, blkno,
page = (Page) BufferGetPage(buffer); RBM_ZERO, false, &buffer);
page = BufferGetPage(buffer);
PageInit(page, BufferGetPageSize(buffer), 0); PageInit(page, BufferGetPageSize(buffer), 0);
action = BLK_NEEDS_REDO;
} }
else else
{ action = XLogReadBufferForRedo(lsn, record, 0, rnode, blkno, &buffer);
buffer = XLogReadBuffer(xlrec->node, blkno, false);
if (!BufferIsValid(buffer))
return;
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page)) /* changes are applied */ if (action == BLK_NEEDS_REDO)
{
UnlockReleaseBuffer(buffer);
return;
}
}
for (i = 0; i < xlrec->ntuples; i++)
{ {
OffsetNumber offnum; page = BufferGetPage(buffer);
xl_multi_insert_tuple *xlhdr; for (i = 0; i < xlrec->ntuples; i++)
{
OffsetNumber offnum;
xl_multi_insert_tuple *xlhdr;
if (isinit) if (isinit)
offnum = FirstOffsetNumber + i; offnum = FirstOffsetNumber + i;
else else
offnum = xlrec->offsets[i]; offnum = xlrec->offsets[i];
if (PageGetMaxOffsetNumber(page) + 1 < offnum) if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "heap_multi_insert_redo: invalid max offset number"); elog(PANIC, "heap_multi_insert_redo: invalid max offset number");
xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(recdata);
recdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
newlen = xlhdr->datalen;
Assert(newlen <= MaxHeapTupleSize);
htup = &tbuf.hdr;
MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
(char *) recdata,
newlen);
recdata += newlen;
newlen += offsetof(HeapTupleHeaderData, t_bits);
htup->t_infomask2 = xlhdr->t_infomask2;
htup->t_infomask = xlhdr->t_infomask;
htup->t_hoff = xlhdr->t_hoff;
HeapTupleHeaderSetXmin(htup, record->xl_xid);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
if (offnum == InvalidOffsetNumber)
elog(PANIC, "heap_multi_insert_redo: failed to add tuple");
}
xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(recdata); freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
recdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
newlen = xlhdr->datalen; PageSetLSN(page, lsn);
Assert(newlen <= MaxHeapTupleSize);
htup = &tbuf.hdr;
MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
(char *) recdata,
newlen);
recdata += newlen;
newlen += offsetof(HeapTupleHeaderData, t_bits); if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
htup->t_infomask2 = xlhdr->t_infomask2; PageClearAllVisible(page);
htup->t_infomask = xlhdr->t_infomask;
htup->t_hoff = xlhdr->t_hoff;
HeapTupleHeaderSetXmin(htup, record->xl_xid);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true); MarkBufferDirty(buffer);
if (offnum == InvalidOffsetNumber)
elog(PANIC, "heap_multi_insert_redo: failed to add tuple");
} }
if (BufferIsValid(buffer))
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ UnlockReleaseBuffer(buffer);
PageSetLSN(page, lsn);
if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
/* /*
* If the page is running low on free space, update the FSM as well. * If the page is running low on free space, update the FSM as well.
* Arbitrarily, our definition of "low" is less than 20%. We can't do much * Arbitrarily, our definition of "low" is less than 20%. We can't do much
* better than that without knowing the fill-factor for the table. * better than that without knowing the fill-factor for the table.
* *
* XXX: We don't get here if the page was restored from full page image. * XXX: Don't do this if the page was restored from full page image. We
* We don't bother to update the FSM in that case, it doesn't need to be * don't bother to update the FSM in that case, it doesn't need to be
* totally accurate anyway. * totally accurate anyway.
*/ */
if (freespace < BLCKSZ / 5) if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
XLogRecordPageWithFreeSpace(xlrec->node, blkno, freespace); XLogRecordPageWithFreeSpace(xlrec->node, blkno, freespace);
} }
...@@ -7755,8 +7709,9 @@ static void ...@@ -7755,8 +7709,9 @@ static void
heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
{ {
xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record); xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
bool samepage = (ItemPointerGetBlockNumber(&(xlrec->newtid)) == RelFileNode rnode;
ItemPointerGetBlockNumber(&(xlrec->target.tid))); BlockNumber oldblk;
BlockNumber newblk;
Buffer obuffer, Buffer obuffer,
nbuffer; nbuffer;
Page page; Page page;
...@@ -7775,24 +7730,29 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) ...@@ -7775,24 +7730,29 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
} tbuf; } tbuf;
xl_heap_header_len xlhdr; xl_heap_header_len xlhdr;
uint32 newlen; uint32 newlen;
Size freespace; Size freespace = 0;
XLogRedoAction oldaction;
XLogRedoAction newaction;
/* initialize to keep the compiler quiet */ /* initialize to keep the compiler quiet */
oldtup.t_data = NULL; oldtup.t_data = NULL;
oldtup.t_len = 0; oldtup.t_len = 0;
rnode = xlrec->target.node;
newblk = ItemPointerGetBlockNumber(&xlrec->newtid);
oldblk = ItemPointerGetBlockNumber(&xlrec->target.tid);
/* /*
* The visibility map may need to be fixed even if the heap page is * The visibility map may need to be fixed even if the heap page is
* already up-to-date. * already up-to-date.
*/ */
if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
{ {
Relation reln = CreateFakeRelcacheEntry(xlrec->target.node); Relation reln = CreateFakeRelcacheEntry(rnode);
BlockNumber block = ItemPointerGetBlockNumber(&xlrec->target.tid);
Buffer vmbuffer = InvalidBuffer; Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, block, &vmbuffer); visibilitymap_pin(reln, oldblk, &vmbuffer);
visibilitymap_clear(reln, block, vmbuffer); visibilitymap_clear(reln, oldblk, vmbuffer);
ReleaseBuffer(vmbuffer); ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln); FreeFakeRelcacheEntry(reln);
} }
...@@ -7807,84 +7767,67 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) ...@@ -7807,84 +7767,67 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
* added the new tuple to the new page. * added the new tuple to the new page.
*/ */
if (record->xl_info & XLR_BKP_BLOCK(0))
{
obuffer = RestoreBackupBlock(lsn, record, 0, false, true);
if (samepage)
{
/* backup block covered both changes, so we're done */
UnlockReleaseBuffer(obuffer);
return;
}
goto newt;
}
/* Deal with old tuple version */ /* Deal with old tuple version */
oldaction = XLogReadBufferForRedo(lsn, record, 0, rnode, oldblk, &obuffer);
obuffer = XLogReadBuffer(xlrec->target.node, if (oldaction == BLK_NEEDS_REDO)
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
false);
if (!BufferIsValid(obuffer))
goto newt;
page = (Page) BufferGetPage(obuffer);
if (lsn <= PageGetLSN(page)) /* changes are applied */
{ {
if (samepage) page = (Page) BufferGetPage(obuffer);
{
UnlockReleaseBuffer(obuffer);
return;
}
goto newt;
}
offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp)) offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
elog(PANIC, "heap_update_redo: invalid lp"); if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
htup = (HeapTupleHeader) PageGetItem(page, lp); if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
elog(PANIC, "heap_update_redo: invalid lp");
oldtup.t_data = htup; htup = (HeapTupleHeader) PageGetItem(page, lp);
oldtup.t_len = ItemIdGetLength(lp);
htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); oldtup.t_data = htup;
htup->t_infomask2 &= ~HEAP_KEYS_UPDATED; oldtup.t_len = ItemIdGetLength(lp);
if (hot_update)
HeapTupleHeaderSetHotUpdated(htup);
else
HeapTupleHeaderClearHotUpdated(htup);
fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
&htup->t_infomask2);
HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Set forward chain link in t_ctid */
htup->t_ctid = xlrec->newtid;
/* Mark the page as a candidate for pruning */ htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
PageSetPrunable(page, record->xl_xid); htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
if (hot_update)
HeapTupleHeaderSetHotUpdated(htup);
else
HeapTupleHeaderClearHotUpdated(htup);
fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
&htup->t_infomask2);
HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Set forward chain link in t_ctid */
htup->t_ctid = xlrec->newtid;
/* Mark the page as a candidate for pruning */
PageSetPrunable(page, record->xl_xid);
if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED) PageSetLSN(page, lsn);
PageClearAllVisible(page); MarkBufferDirty(obuffer);
}
/* /*
* this test is ugly, but necessary to avoid thinking that insert change * Read the page the new tuple goes into, if different from old.
* is already applied
*/ */
if (samepage) if (oldblk == newblk)
{ {
nbuffer = obuffer; nbuffer = obuffer;
goto newsame; newaction = oldaction;
} }
else if (record->xl_info & XLOG_HEAP_INIT_PAGE)
PageSetLSN(page, lsn); {
MarkBufferDirty(obuffer); XLogReadBufferForRedoExtended(lsn, record, 1,
rnode, MAIN_FORKNUM, newblk,
/* Deal with new tuple */ RBM_ZERO, false, &nbuffer);
page = (Page) BufferGetPage(nbuffer);
newt:; PageInit(page, BufferGetPageSize(nbuffer), 0);
newaction = BLK_NEEDS_REDO;
}
else
newaction = XLogReadBufferForRedo(lsn, record, 1, rnode, newblk,
&nbuffer);
/* /*
* The visibility map may need to be fixed even if the heap page is * The visibility map may need to be fixed even if the heap page is
...@@ -7893,144 +7836,110 @@ newt:; ...@@ -7893,144 +7836,110 @@ newt:;
if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED) if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED)
{ {
Relation reln = CreateFakeRelcacheEntry(xlrec->target.node); Relation reln = CreateFakeRelcacheEntry(xlrec->target.node);
BlockNumber block = ItemPointerGetBlockNumber(&xlrec->newtid);
Buffer vmbuffer = InvalidBuffer; Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, block, &vmbuffer); visibilitymap_pin(reln, newblk, &vmbuffer);
visibilitymap_clear(reln, block, vmbuffer); visibilitymap_clear(reln, newblk, vmbuffer);
ReleaseBuffer(vmbuffer); ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln); FreeFakeRelcacheEntry(reln);
} }
if (record->xl_info & XLR_BKP_BLOCK(1)) /* Deal with new tuple */
{ if (newaction == BLK_NEEDS_REDO)
(void) RestoreBackupBlock(lsn, record, 1, false, false);
if (BufferIsValid(obuffer))
UnlockReleaseBuffer(obuffer);
return;
}
if (record->xl_info & XLOG_HEAP_INIT_PAGE)
{ {
nbuffer = XLogReadBuffer(xlrec->target.node,
ItemPointerGetBlockNumber(&(xlrec->newtid)),
true);
Assert(BufferIsValid(nbuffer));
page = (Page) BufferGetPage(nbuffer); page = (Page) BufferGetPage(nbuffer);
PageInit(page, BufferGetPageSize(nbuffer), 0); offnum = ItemPointerGetOffsetNumber(&(xlrec->newtid));
} if (PageGetMaxOffsetNumber(page) + 1 < offnum)
else elog(PANIC, "heap_update_redo: invalid max offset number");
{
nbuffer = XLogReadBuffer(xlrec->target.node, recdata = (char *) xlrec + SizeOfHeapUpdate;
ItemPointerGetBlockNumber(&(xlrec->newtid)),
false); if (xlrec->flags & XLOG_HEAP_PREFIX_FROM_OLD)
if (!BufferIsValid(nbuffer))
{ {
if (BufferIsValid(obuffer)) Assert(newblk == oldblk);
UnlockReleaseBuffer(obuffer); memcpy(&prefixlen, recdata, sizeof(uint16));
return; recdata += sizeof(uint16);
} }
page = (Page) BufferGetPage(nbuffer); if (xlrec->flags & XLOG_HEAP_SUFFIX_FROM_OLD)
if (lsn <= PageGetLSN(page)) /* changes are applied */
{ {
UnlockReleaseBuffer(nbuffer); Assert(newblk == oldblk);
if (BufferIsValid(obuffer)) memcpy(&suffixlen, recdata, sizeof(uint16));
UnlockReleaseBuffer(obuffer); recdata += sizeof(uint16);
return;
} }
}
newsame:; memcpy((char *) &xlhdr, recdata, SizeOfHeapHeaderLen);
recdata += SizeOfHeapHeaderLen;
offnum = ItemPointerGetOffsetNumber(&(xlrec->newtid)); Assert(xlhdr.t_len + prefixlen + suffixlen <= MaxHeapTupleSize);
if (PageGetMaxOffsetNumber(page) + 1 < offnum) htup = &tbuf.hdr;
elog(PANIC, "heap_update_redo: invalid max offset number"); MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
recdata = (char *) xlrec + SizeOfHeapUpdate;
if (xlrec->flags & XLOG_HEAP_PREFIX_FROM_OLD) /*
{ * Reconstruct the new tuple using the prefix and/or suffix from the
Assert(samepage); * old tuple, and the data stored in the WAL record.
memcpy(&prefixlen, recdata, sizeof(uint16)); */
recdata += sizeof(uint16); newp = (char *) htup + offsetof(HeapTupleHeaderData, t_bits);
} if (prefixlen > 0)
if (xlrec->flags & XLOG_HEAP_SUFFIX_FROM_OLD) {
{ int len;
Assert(samepage);
memcpy(&suffixlen, recdata, sizeof(uint16)); /* copy bitmap [+ padding] [+ oid] from WAL record */
recdata += sizeof(uint16); len = xlhdr.header.t_hoff - offsetof(HeapTupleHeaderData, t_bits);
} memcpy(newp, recdata, len);
recdata += len;
newp += len;
/* copy prefix from old tuple */
memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
newp += prefixlen;
/* copy new tuple data from WAL record */
len = xlhdr.t_len - (xlhdr.header.t_hoff - offsetof(HeapTupleHeaderData, t_bits));
memcpy(newp, recdata, len);
recdata += len;
newp += len;
}
else
{
/*
* copy bitmap [+ padding] [+ oid] + data from record, all in one
* go
*/
memcpy(newp, recdata, xlhdr.t_len);
recdata += xlhdr.t_len;
newp += xlhdr.t_len;
}
/* copy suffix from old tuple */
if (suffixlen > 0)
memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
memcpy((char *) &xlhdr, recdata, SizeOfHeapHeaderLen); newlen = offsetof(HeapTupleHeaderData, t_bits) + xlhdr.t_len + prefixlen + suffixlen;
recdata += SizeOfHeapHeaderLen; htup->t_infomask2 = xlhdr.header.t_infomask2;
htup->t_infomask = xlhdr.header.t_infomask;
htup->t_hoff = xlhdr.header.t_hoff;
Assert(xlhdr.t_len + prefixlen + suffixlen <= MaxHeapTupleSize); HeapTupleHeaderSetXmin(htup, record->xl_xid);
htup = &tbuf.hdr; HeapTupleHeaderSetCmin(htup, FirstCommandId);
MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData)); HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
/* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = xlrec->newtid;
/* offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
* Reconstruct the new tuple using the prefix and/or suffix from the old if (offnum == InvalidOffsetNumber)
* tuple, and the data stored in the WAL record. elog(PANIC, "heap_update_redo: failed to add tuple");
*/
newp = (char *) htup + offsetof(HeapTupleHeaderData, t_bits);
if (prefixlen > 0)
{
int len;
/* copy bitmap [+ padding] [+ oid] from WAL record */ if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED)
len = xlhdr.header.t_hoff - offsetof(HeapTupleHeaderData, t_bits); PageClearAllVisible(page);
memcpy(newp, recdata, len);
recdata += len;
newp += len;
/* copy prefix from old tuple */ freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
newp += prefixlen;
/* copy new tuple data from WAL record */ PageSetLSN(page, lsn);
len = xlhdr.t_len - (xlhdr.header.t_hoff - offsetof(HeapTupleHeaderData, t_bits)); MarkBufferDirty(nbuffer);
memcpy(newp, recdata, len);
recdata += len;
newp += len;
} }
else if (BufferIsValid(nbuffer) && nbuffer != obuffer)
{ UnlockReleaseBuffer(nbuffer);
/* copy bitmap [+ padding] [+ oid] + data from record, all in one go */ if (BufferIsValid(obuffer))
memcpy(newp, recdata, xlhdr.t_len);
recdata += xlhdr.t_len;
newp += xlhdr.t_len;
}
/* copy suffix from old tuple */
if (suffixlen > 0)
memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
newlen = offsetof(HeapTupleHeaderData, t_bits) +xlhdr.t_len + prefixlen + suffixlen;
htup->t_infomask2 = xlhdr.header.t_infomask2;
htup->t_infomask = xlhdr.header.t_infomask;
htup->t_hoff = xlhdr.header.t_hoff;
HeapTupleHeaderSetXmin(htup, record->xl_xid);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
/* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = xlrec->newtid;
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
if (offnum == InvalidOffsetNumber)
elog(PANIC, "heap_update_redo: failed to add tuple");
if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
PageSetLSN(page, lsn);
MarkBufferDirty(nbuffer);
UnlockReleaseBuffer(nbuffer);
if (BufferIsValid(obuffer) && obuffer != nbuffer)
UnlockReleaseBuffer(obuffer); UnlockReleaseBuffer(obuffer);
/* /*
...@@ -8044,11 +7953,11 @@ newsame:; ...@@ -8044,11 +7953,11 @@ newsame:;
* as it did before the update, assuming the new tuple is about the same * as it did before the update, assuming the new tuple is about the same
* size as the old one. * size as the old one.
* *
* XXX: We don't get here if the page was restored from full page image. * XXX: Don't do this if the page was restored from full page image. We
* We don't bother to update the FSM in that case, it doesn't need to be * don't bother to update the FSM in that case, it doesn't need to be
* totally accurate anyway. * totally accurate anyway.
*/ */
if (!hot_update && freespace < BLCKSZ / 5) if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
XLogRecordPageWithFreeSpace(xlrec->target.node, XLogRecordPageWithFreeSpace(xlrec->target.node,
ItemPointerGetBlockNumber(&(xlrec->newtid)), ItemPointerGetBlockNumber(&(xlrec->newtid)),
freespace); freespace);
...@@ -8064,53 +7973,41 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record) ...@@ -8064,53 +7973,41 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
ItemId lp = NULL; ItemId lp = NULL;
HeapTupleHeader htup; HeapTupleHeader htup;
/* If we have a full-page image, restore it and we're done */ if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
if (record->xl_info & XLR_BKP_BLOCK(0)) ItemPointerGetBlockNumber(&xlrec->target.tid),
{ &buffer) == BLK_NEEDS_REDO)
(void) RestoreBackupBlock(lsn, record, 0, false, false);
return;
}
buffer = XLogReadBuffer(xlrec->target.node,
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
false);
if (!BufferIsValid(buffer))
return;
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page)) /* changes are applied */
{ {
UnlockReleaseBuffer(buffer); page = (Page) BufferGetPage(buffer);
return;
}
offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid)); offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
if (PageGetMaxOffsetNumber(page) >= offnum) if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum); lp = PageGetItemId(page, offnum);
if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp)) if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
elog(PANIC, "heap_lock_redo: invalid lp"); elog(PANIC, "heap_lock_redo: invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp); htup = (HeapTupleHeader) PageGetItem(page, lp);
fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
&htup->t_infomask2); &htup->t_infomask2);
/* /*
* Clear relevant update flags, but only if the modified infomask says * Clear relevant update flags, but only if the modified infomask says
* there's no update. * there's no update.
*/ */
if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask)) if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
{ {
HeapTupleHeaderClearHotUpdated(htup); HeapTupleHeaderClearHotUpdated(htup);
/* Make sure there is no forward chain link in t_ctid */ /* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = xlrec->target.tid; htup->t_ctid = xlrec->target.tid;
}
HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
} }
HeapTupleHeaderSetXmax(htup, xlrec->locking_xid); if (BufferIsValid(buffer))
HeapTupleHeaderSetCmax(htup, FirstCommandId, false); UnlockReleaseBuffer(buffer);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
} }
static void static void
...@@ -8124,42 +8021,29 @@ heap_xlog_lock_updated(XLogRecPtr lsn, XLogRecord *record) ...@@ -8124,42 +8021,29 @@ heap_xlog_lock_updated(XLogRecPtr lsn, XLogRecord *record)
ItemId lp = NULL; ItemId lp = NULL;
HeapTupleHeader htup; HeapTupleHeader htup;
/* If we have a full-page image, restore it and we're done */ if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
if (record->xl_info & XLR_BKP_BLOCK(0)) ItemPointerGetBlockNumber(&(xlrec->target.tid)),
{ &buffer) == BLK_NEEDS_REDO)
(void) RestoreBackupBlock(lsn, record, 0, false, false);
return;
}
buffer = XLogReadBuffer(xlrec->target.node,
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
false);
if (!BufferIsValid(buffer))
return;
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page)) /* changes are applied */
{ {
UnlockReleaseBuffer(buffer); page = BufferGetPage(buffer);
return; offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
} if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp)) if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
elog(PANIC, "heap_xlog_lock_updated: invalid lp"); elog(PANIC, "heap_xlog_lock_updated: invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp); htup = (HeapTupleHeader) PageGetItem(page, lp);
fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
&htup->t_infomask2); &htup->t_infomask2);
HeapTupleHeaderSetXmax(htup, xlrec->xmax); HeapTupleHeaderSetXmax(htup, xlrec->xmax);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer); }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
static void static void
...@@ -8174,47 +8058,35 @@ heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record) ...@@ -8174,47 +8058,35 @@ heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
uint32 oldlen; uint32 oldlen;
uint32 newlen; uint32 newlen;
/* If we have a full-page image, restore it and we're done */ if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
if (record->xl_info & XLR_BKP_BLOCK(0)) ItemPointerGetBlockNumber(&(xlrec->target.tid)),
&buffer) == BLK_NEEDS_REDO)
{ {
(void) RestoreBackupBlock(lsn, record, 0, false, false); page = BufferGetPage(buffer);
return;
}
buffer = XLogReadBuffer(xlrec->target.node, offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
ItemPointerGetBlockNumber(&(xlrec->target.tid)), if (PageGetMaxOffsetNumber(page) >= offnum)
false); lp = PageGetItemId(page, offnum);
if (!BufferIsValid(buffer))
return;
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page)) /* changes are applied */ if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
{ elog(PANIC, "heap_inplace_redo: invalid lp");
UnlockReleaseBuffer(buffer);
return;
}
offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid)); htup = (HeapTupleHeader) PageGetItem(page, lp);
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp)) oldlen = ItemIdGetLength(lp) - htup->t_hoff;
elog(PANIC, "heap_inplace_redo: invalid lp"); newlen = record->xl_len - SizeOfHeapInplace;
if (oldlen != newlen)
elog(PANIC, "heap_inplace_redo: wrong tuple length");
htup = (HeapTupleHeader) PageGetItem(page, lp); memcpy((char *) htup + htup->t_hoff,
(char *) xlrec + SizeOfHeapInplace,
oldlen = ItemIdGetLength(lp) - htup->t_hoff; newlen);
newlen = record->xl_len - SizeOfHeapInplace;
if (oldlen != newlen)
elog(PANIC, "heap_inplace_redo: wrong tuple length");
memcpy((char *) htup + htup->t_hoff,
(char *) xlrec + SizeOfHeapInplace,
newlen);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer); }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
void void
......
...@@ -116,27 +116,25 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn, ...@@ -116,27 +116,25 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
*/ */
static void static void
_bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record, _bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
int block_index,
RelFileNode rnode, BlockNumber cblock) RelFileNode rnode, BlockNumber cblock)
{ {
Buffer buf; Buffer buf;
buf = XLogReadBuffer(rnode, cblock, false); if (XLogReadBufferForRedo(lsn, record, block_index, rnode, cblock, &buf)
if (BufferIsValid(buf)) == BLK_NEEDS_REDO)
{ {
Page page = (Page) BufferGetPage(buf); Page page = (Page) BufferGetPage(buf);
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
if (lsn > PageGetLSN(page)) Assert((pageop->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0);
{ pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
Assert((pageop->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0); PageSetLSN(page, lsn);
pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT; MarkBufferDirty(buf);
PageSetLSN(page, lsn);
MarkBufferDirty(buf);
}
UnlockReleaseBuffer(buf);
} }
if (BufferIsValid(buf))
UnlockReleaseBuffer(buf);
} }
static void static void
...@@ -184,39 +182,28 @@ btree_xlog_insert(bool isleaf, bool ismeta, ...@@ -184,39 +182,28 @@ btree_xlog_insert(bool isleaf, bool ismeta,
*/ */
if (!isleaf) if (!isleaf)
{ {
if (record->xl_info & XLR_BKP_BLOCK(0)) _bt_clear_incomplete_split(lsn, record, 0, xlrec->target.node, cblkno);
(void) RestoreBackupBlock(lsn, record, 0, false, false);
else
_bt_clear_incomplete_split(lsn, record, xlrec->target.node, cblkno);
main_blk_index = 1; main_blk_index = 1;
} }
else else
main_blk_index = 0; main_blk_index = 0;
if (record->xl_info & XLR_BKP_BLOCK(main_blk_index)) if (XLogReadBufferForRedo(lsn, record, main_blk_index, xlrec->target.node,
(void) RestoreBackupBlock(lsn, record, main_blk_index, false, false); ItemPointerGetBlockNumber(&(xlrec->target.tid)),
else &buffer) == BLK_NEEDS_REDO)
{ {
buffer = XLogReadBuffer(xlrec->target.node, page = BufferGetPage(buffer);
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
false);
if (BufferIsValid(buffer))
{
page = (Page) BufferGetPage(buffer);
if (lsn > PageGetLSN(page)) if (PageAddItem(page, (Item) datapos, datalen,
{ ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
if (PageAddItem(page, (Item) datapos, datalen, false, false) == InvalidOffsetNumber)
ItemPointerGetOffsetNumber(&(xlrec->target.tid)), elog(PANIC, "btree_insert_redo: failed to add item");
false, false) == InvalidOffsetNumber)
elog(PANIC, "btree_insert_redo: failed to add item");
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
}
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* /*
* Note: in normal operation, we'd update the metapage while still holding * Note: in normal operation, we'd update the metapage while still holding
...@@ -299,12 +286,7 @@ btree_xlog_split(bool onleft, bool isroot, ...@@ -299,12 +286,7 @@ btree_xlog_split(bool onleft, bool isroot,
* before locking the other pages) * before locking the other pages)
*/ */
if (!isleaf) if (!isleaf)
{ _bt_clear_incomplete_split(lsn, record, 1, xlrec->node, cblkno);
if (record->xl_info & XLR_BKP_BLOCK(1))
(void) RestoreBackupBlock(lsn, record, 1, false, false);
else
_bt_clear_incomplete_split(lsn, record, xlrec->node, cblkno);
}
/* Reconstruct right (new) sibling page from scratch */ /* Reconstruct right (new) sibling page from scratch */
rbuf = XLogReadBuffer(xlrec->node, xlrec->rightsib, true); rbuf = XLogReadBuffer(xlrec->node, xlrec->rightsib, true);
...@@ -340,87 +322,77 @@ btree_xlog_split(bool onleft, bool isroot, ...@@ -340,87 +322,77 @@ btree_xlog_split(bool onleft, bool isroot,
/* don't release the buffer yet; we touch right page's first item below */ /* don't release the buffer yet; we touch right page's first item below */
/* Now reconstruct left (original) sibling page */ /* Now reconstruct left (original) sibling page */
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->leftsib,
lbuf = RestoreBackupBlock(lsn, record, 0, false, true); &lbuf) == BLK_NEEDS_REDO)
else
{ {
lbuf = XLogReadBuffer(xlrec->node, xlrec->leftsib, false); /*
* To retain the same physical order of the tuples that they had, we
if (BufferIsValid(lbuf)) * initialize a temporary empty page for the left page and add all the
* items to that in item number order. This mirrors how _bt_split()
* works. It's not strictly required to retain the same physical
* order, as long as the items are in the correct item number order,
* but it helps debugging. See also _bt_restore_page(), which does
* the same for the right page.
*/
Page lpage = (Page) BufferGetPage(lbuf);
BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
OffsetNumber off;
Page newlpage;
OffsetNumber leftoff;
newlpage = PageGetTempPageCopySpecial(lpage);
/* Set high key */
leftoff = P_HIKEY;
if (PageAddItem(newlpage, left_hikey, left_hikeysz,
P_HIKEY, false, false) == InvalidOffsetNumber)
elog(PANIC, "failed to add high key to left page after split");
leftoff = OffsetNumberNext(leftoff);
for (off = P_FIRSTDATAKEY(lopaque); off < xlrec->firstright; off++)
{ {
/* ItemId itemid;
* To retain the same physical order of the tuples that they had, Size itemsz;
* we initialize a temporary empty page for the left page and add Item item;
* all the items to that in item number order. This mirrors how
* _bt_split() works. It's not strictly required to retain the
* same physical order, as long as the items are in the correct
* item number order, but it helps debugging. See also
* _bt_restore_page(), which does the same for the right page.
*/
Page lpage = (Page) BufferGetPage(lbuf);
BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
if (lsn > PageGetLSN(lpage)) /* add the new item if it was inserted on left page */
if (onleft && off == newitemoff)
{ {
OffsetNumber off; if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
Page newlpage; false, false) == InvalidOffsetNumber)
OffsetNumber leftoff; elog(ERROR, "failed to add new item to left page after split");
newlpage = PageGetTempPageCopySpecial(lpage);
/* Set high key */
leftoff = P_HIKEY;
if (PageAddItem(newlpage, left_hikey, left_hikeysz,
P_HIKEY, false, false) == InvalidOffsetNumber)
elog(PANIC, "failed to add high key to left page after split");
leftoff = OffsetNumberNext(leftoff); leftoff = OffsetNumberNext(leftoff);
for (off = P_FIRSTDATAKEY(lopaque); off < xlrec->firstright; off++)
{
ItemId itemid;
Size itemsz;
Item item;
/* add the new item if it was inserted on left page */
if (onleft && off == newitemoff)
{
if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add new item to left page after split");
leftoff = OffsetNumberNext(leftoff);
}
itemid = PageGetItemId(lpage, off);
itemsz = ItemIdGetLength(itemid);
item = PageGetItem(lpage, itemid);
if (PageAddItem(newlpage, item, itemsz, leftoff,
false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add old item to left page after split");
leftoff = OffsetNumberNext(leftoff);
}
/* cope with possibility that newitem goes at the end */
if (onleft && off == newitemoff)
{
if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add new item to left page after split");
leftoff = OffsetNumberNext(leftoff);
}
PageRestoreTempPage(newlpage, lpage);
/* Fix opaque fields */
lopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
if (isleaf)
lopaque->btpo_flags |= BTP_LEAF;
lopaque->btpo_next = xlrec->rightsib;
lopaque->btpo_cycleid = 0;
PageSetLSN(lpage, lsn);
MarkBufferDirty(lbuf);
} }
itemid = PageGetItemId(lpage, off);
itemsz = ItemIdGetLength(itemid);
item = PageGetItem(lpage, itemid);
if (PageAddItem(newlpage, item, itemsz, leftoff,
false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add old item to left page after split");
leftoff = OffsetNumberNext(leftoff);
} }
/* cope with possibility that newitem goes at the end */
if (onleft && off == newitemoff)
{
if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add new item to left page after split");
leftoff = OffsetNumberNext(leftoff);
}
PageRestoreTempPage(newlpage, lpage);
/* Fix opaque fields */
lopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
if (isleaf)
lopaque->btpo_flags |= BTP_LEAF;
lopaque->btpo_next = xlrec->rightsib;
lopaque->btpo_cycleid = 0;
PageSetLSN(lpage, lsn);
MarkBufferDirty(lbuf);
} }
/* We no longer need the buffers */ /* We no longer need the buffers */
...@@ -443,31 +415,21 @@ btree_xlog_split(bool onleft, bool isroot, ...@@ -443,31 +415,21 @@ btree_xlog_split(bool onleft, bool isroot,
* whether this was a leaf or internal page. * whether this was a leaf or internal page.
*/ */
int rnext_index = isleaf ? 1 : 2; int rnext_index = isleaf ? 1 : 2;
Buffer buffer;
if (record->xl_info & XLR_BKP_BLOCK(rnext_index)) if (XLogReadBufferForRedo(lsn, record, rnext_index, xlrec->node,
(void) RestoreBackupBlock(lsn, record, rnext_index, false, false); xlrec->rnext, &buffer) == BLK_NEEDS_REDO)
else
{ {
Buffer buffer; Page page = (Page) BufferGetPage(buffer);
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
buffer = XLogReadBuffer(xlrec->node, xlrec->rnext, false);
if (BufferIsValid(buffer))
{
Page page = (Page) BufferGetPage(buffer);
if (lsn > PageGetLSN(page))
{
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
pageop->btpo_prev = xlrec->rightsib; pageop->btpo_prev = xlrec->rightsib;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
}
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
} }
...@@ -529,54 +491,41 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record) ...@@ -529,54 +491,41 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
} }
} }
/*
* If we have a full-page image, restore it (using a cleanup lock) and
* we're done.
*/
if (record->xl_info & XLR_BKP_BLOCK(0))
{
(void) RestoreBackupBlock(lsn, record, 0, true, false);
return;
}
/* /*
* Like in btvacuumpage(), we need to take a cleanup lock on every leaf * Like in btvacuumpage(), we need to take a cleanup lock on every leaf
* page. See nbtree/README for details. * page. See nbtree/README for details.
*/ */
buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL); if (XLogReadBufferForRedoExtended(lsn, record, 0,
if (!BufferIsValid(buffer)) xlrec->node, MAIN_FORKNUM, xlrec->block,
return; RBM_NORMAL, true, &buffer)
LockBufferForCleanup(buffer); == BLK_NEEDS_REDO)
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page))
{ {
UnlockReleaseBuffer(buffer); page = (Page) BufferGetPage(buffer);
return;
}
if (record->xl_len > SizeOfBtreeVacuum) if (record->xl_len > SizeOfBtreeVacuum)
{ {
OffsetNumber *unused; OffsetNumber *unused;
OffsetNumber *unend; OffsetNumber *unend;
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeVacuum); unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeVacuum);
unend = (OffsetNumber *) ((char *) xlrec + record->xl_len); unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
if ((unend - unused) > 0) if ((unend - unused) > 0)
PageIndexMultiDelete(page, unused, unend - unused); PageIndexMultiDelete(page, unused, unend - unused);
} }
/* /*
* Mark the page as not containing any LP_DEAD items --- see comments in * Mark the page as not containing any LP_DEAD items --- see comments
* _bt_delitems_vacuum(). * in _bt_delitems_vacuum().
*/ */
opaque = (BTPageOpaque) PageGetSpecialPointer(page); opaque = (BTPageOpaque) PageGetSpecialPointer(page);
opaque->btpo_flags &= ~BTP_HAS_GARBAGE; opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer); }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
/* /*
...@@ -752,47 +701,36 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record) ...@@ -752,47 +701,36 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
ResolveRecoveryConflictWithSnapshot(latestRemovedXid, xlrec->node); ResolveRecoveryConflictWithSnapshot(latestRemovedXid, xlrec->node);
} }
/* If we have a full-page image, restore it and we're done */
if (record->xl_info & XLR_BKP_BLOCK(0))
{
(void) RestoreBackupBlock(lsn, record, 0, false, false);
return;
}
/* /*
* We don't need to take a cleanup lock to apply these changes. See * We don't need to take a cleanup lock to apply these changes. See
* nbtree/README for details. * nbtree/README for details.
*/ */
buffer = XLogReadBuffer(xlrec->node, xlrec->block, false); if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->block,
if (!BufferIsValid(buffer)) &buffer) == BLK_NEEDS_REDO)
return;
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page))
{ {
UnlockReleaseBuffer(buffer); page = (Page) BufferGetPage(buffer);
return;
}
if (record->xl_len > SizeOfBtreeDelete) if (record->xl_len > SizeOfBtreeDelete)
{ {
OffsetNumber *unused; OffsetNumber *unused;
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete); unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
PageIndexMultiDelete(page, unused, xlrec->nitems); PageIndexMultiDelete(page, unused, xlrec->nitems);
} }
/* /*
* Mark the page as not containing any LP_DEAD items --- see comments in * Mark the page as not containing any LP_DEAD items --- see comments
* _bt_delitems_delete(). * in _bt_delitems_delete().
*/ */
opaque = (BTPageOpaque) PageGetSpecialPointer(page); opaque = (BTPageOpaque) PageGetSpecialPointer(page);
opaque->btpo_flags &= ~BTP_HAS_GARBAGE; opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer); }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
static void static void
...@@ -816,42 +754,36 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record) ...@@ -816,42 +754,36 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
*/ */
/* parent page */ /* parent page */
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node, parent,
(void) RestoreBackupBlock(lsn, record, 0, false, false); &buffer) == BLK_NEEDS_REDO)
else
{ {
buffer = XLogReadBuffer(xlrec->target.node, parent, false); OffsetNumber poffset;
if (BufferIsValid(buffer)) ItemId itemid;
{ IndexTuple itup;
page = (Page) BufferGetPage(buffer); OffsetNumber nextoffset;
pageop = (BTPageOpaque) PageGetSpecialPointer(page); BlockNumber rightsib;
if (lsn > PageGetLSN(page))
{ page = (Page) BufferGetPage(buffer);
OffsetNumber poffset; pageop = (BTPageOpaque) PageGetSpecialPointer(page);
ItemId itemid;
IndexTuple itup; poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
OffsetNumber nextoffset;
BlockNumber rightsib; nextoffset = OffsetNumberNext(poffset);
itemid = PageGetItemId(page, nextoffset);
poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid)); itup = (IndexTuple) PageGetItem(page, itemid);
rightsib = ItemPointerGetBlockNumber(&itup->t_tid);
nextoffset = OffsetNumberNext(poffset);
itemid = PageGetItemId(page, nextoffset); itemid = PageGetItemId(page, poffset);
itup = (IndexTuple) PageGetItem(page, itemid); itup = (IndexTuple) PageGetItem(page, itemid);
rightsib = ItemPointerGetBlockNumber(&itup->t_tid); ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY);
nextoffset = OffsetNumberNext(poffset);
itemid = PageGetItemId(page, poffset); PageIndexTupleDelete(page, nextoffset);
itup = (IndexTuple) PageGetItem(page, itemid);
ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY); PageSetLSN(page, lsn);
nextoffset = OffsetNumberNext(poffset); MarkBufferDirty(buffer);
PageIndexTupleDelete(page, nextoffset);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
}
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* Rewrite the leaf page as a halfdead page */ /* Rewrite the leaf page as a halfdead page */
buffer = XLogReadBuffer(xlrec->target.node, xlrec->leafblk, true); buffer = XLogReadBuffer(xlrec->target.node, xlrec->leafblk, true);
...@@ -911,56 +843,34 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record) ...@@ -911,56 +843,34 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
*/ */
/* Fix left-link of right sibling */ /* Fix left-link of right sibling */
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, rightsib, &buffer)
(void) RestoreBackupBlock(lsn, record, 0, false, false); == BLK_NEEDS_REDO)
else
{ {
buffer = XLogReadBuffer(xlrec->node, rightsib, false); page = (Page) BufferGetPage(buffer);
if (BufferIsValid(buffer)) pageop = (BTPageOpaque) PageGetSpecialPointer(page);
{ pageop->btpo_prev = leftsib;
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page))
{
UnlockReleaseBuffer(buffer);
}
else
{
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
pageop->btpo_prev = leftsib;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* Fix right-link of left sibling, if any */ /* Fix right-link of left sibling, if any */
if (record->xl_info & XLR_BKP_BLOCK(1)) if (leftsib != P_NONE)
(void) RestoreBackupBlock(lsn, record, 1, false, false);
else
{ {
if (leftsib != P_NONE) if (XLogReadBufferForRedo(lsn, record, 1, xlrec->node, leftsib, &buffer)
== BLK_NEEDS_REDO)
{ {
buffer = XLogReadBuffer(xlrec->node, leftsib, false); page = (Page) BufferGetPage(buffer);
if (BufferIsValid(buffer)) pageop = (BTPageOpaque) PageGetSpecialPointer(page);
{ pageop->btpo_next = rightsib;
page = (Page) BufferGetPage(buffer);
if (lsn <= PageGetLSN(page)) PageSetLSN(page, lsn);
{ MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
else
{
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
pageop->btpo_next = rightsib;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
}
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
/* Rewrite target page as empty deleted page */ /* Rewrite target page as empty deleted page */
...@@ -1071,10 +981,7 @@ btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record) ...@@ -1071,10 +981,7 @@ btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY); Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
/* Clear the incomplete-split flag in left child */ /* Clear the incomplete-split flag in left child */
if (record->xl_info & XLR_BKP_BLOCK(0)) _bt_clear_incomplete_split(lsn, record, 0, xlrec->node, cblkno);
(void) RestoreBackupBlock(lsn, record, 0, false, false);
else
_bt_clear_incomplete_split(lsn, record, xlrec->node, cblkno);
} }
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
......
...@@ -113,6 +113,7 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record) ...@@ -113,6 +113,7 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
SpGistLeafTupleData leafTupleHdr; SpGistLeafTupleData leafTupleHdr;
Buffer buffer; Buffer buffer;
Page page; Page page;
XLogRedoAction action;
ptr += sizeof(spgxlogAddLeaf); ptr += sizeof(spgxlogAddLeaf);
leafTuple = ptr; leafTuple = ptr;
...@@ -124,82 +125,78 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record) ...@@ -124,82 +125,78 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
* simultaneously; but in WAL replay it should be safe to update the leaf * simultaneously; but in WAL replay it should be safe to update the leaf
* page before updating the parent. * page before updating the parent.
*/ */
if (record->xl_info & XLR_BKP_BLOCK(0)) if (xldata->newPage)
(void) RestoreBackupBlock(lsn, record, 0, false, false); {
buffer = XLogReadBuffer(xldata->node, xldata->blknoLeaf, true);
SpGistInitBuffer(buffer,
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO;
}
else else
action = XLogReadBufferForRedo(lsn, record, 0,
xldata->node, xldata->blknoLeaf,
&buffer);
if (action == BLK_NEEDS_REDO)
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blknoLeaf, page = BufferGetPage(buffer);
xldata->newPage);
if (BufferIsValid(buffer))
{
page = BufferGetPage(buffer);
if (xldata->newPage) /* insert new tuple */
SpGistInitBuffer(buffer, if (xldata->offnumLeaf != xldata->offnumHeadLeaf)
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); {
/* normal cases, tuple was added by SpGistPageAddNewItem */
addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size,
xldata->offnumLeaf);
if (lsn > PageGetLSN(page)) /* update head tuple's chain link if needed */
if (xldata->offnumHeadLeaf != InvalidOffsetNumber)
{ {
/* insert new tuple */ SpGistLeafTuple head;
if (xldata->offnumLeaf != xldata->offnumHeadLeaf)
{
/* normal cases, tuple was added by SpGistPageAddNewItem */
addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size,
xldata->offnumLeaf);
/* update head tuple's chain link if needed */
if (xldata->offnumHeadLeaf != InvalidOffsetNumber)
{
SpGistLeafTuple head;
head = (SpGistLeafTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumHeadLeaf));
Assert(head->nextOffset == leafTupleHdr.nextOffset);
head->nextOffset = xldata->offnumLeaf;
}
}
else
{
/* replacing a DEAD tuple */
PageIndexTupleDelete(page, xldata->offnumLeaf);
if (PageAddItem(page,
(Item) leafTuple, leafTupleHdr.size,
xldata->offnumLeaf, false, false) != xldata->offnumLeaf)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
leafTupleHdr.size);
}
PageSetLSN(page, lsn); head = (SpGistLeafTuple) PageGetItem(page,
MarkBufferDirty(buffer); PageGetItemId(page, xldata->offnumHeadLeaf));
Assert(head->nextOffset == leafTupleHdr.nextOffset);
head->nextOffset = xldata->offnumLeaf;
} }
UnlockReleaseBuffer(buffer);
} }
else
{
/* replacing a DEAD tuple */
PageIndexTupleDelete(page, xldata->offnumLeaf);
if (PageAddItem(page, (Item) leafTuple, leafTupleHdr.size,
xldata->offnumLeaf, false, false) != xldata->offnumLeaf)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
leafTupleHdr.size);
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* update parent downlink if necessary */ /* update parent downlink if necessary */
if (record->xl_info & XLR_BKP_BLOCK(1)) if (xldata->blknoParent != InvalidBlockNumber)
(void) RestoreBackupBlock(lsn, record, 1, false, false);
else if (xldata->blknoParent != InvalidBlockNumber)
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false); if (XLogReadBufferForRedo(lsn, record, 1,
if (BufferIsValid(buffer)) xldata->node, xldata->blknoParent,
&buffer) == BLK_NEEDS_REDO)
{ {
SpGistInnerTuple tuple;
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
if (lsn > PageGetLSN(page))
{
SpGistInnerTuple tuple;
tuple = (SpGistInnerTuple) PageGetItem(page, tuple = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent)); PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(tuple, xldata->nodeI, spgUpdateNodeLink(tuple, xldata->nodeI,
xldata->blknoLeaf, xldata->offnumLeaf); xldata->blknoLeaf, xldata->offnumLeaf);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
} }
...@@ -214,6 +211,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record) ...@@ -214,6 +211,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
int nInsert; int nInsert;
Buffer buffer; Buffer buffer;
Page page; Page page;
XLogRedoAction action;
fillFakeState(&state, xldata->stateSrc); fillFakeState(&state, xldata->stateSrc);
...@@ -234,98 +232,82 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record) ...@@ -234,98 +232,82 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
*/ */
/* Insert tuples on the dest page (do first, so redirect is valid) */ /* Insert tuples on the dest page (do first, so redirect is valid) */
if (record->xl_info & XLR_BKP_BLOCK(1)) if (xldata->newPage)
(void) RestoreBackupBlock(lsn, record, 1, false, false);
else
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blknoDst, buffer = XLogReadBuffer(xldata->node, xldata->blknoDst, true);
xldata->newPage); SpGistInitBuffer(buffer,
if (BufferIsValid(buffer))
{
page = BufferGetPage(buffer);
if (xldata->newPage)
SpGistInitBuffer(buffer,
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO;
}
else
action = XLogReadBufferForRedo(lsn, record, 1,
xldata->node, xldata->blknoDst,
&buffer);
if (action == BLK_NEEDS_REDO)
{
int i;
if (lsn > PageGetLSN(page)) page = BufferGetPage(buffer);
{
int i;
for (i = 0; i < nInsert; i++)
{
char *leafTuple;
SpGistLeafTupleData leafTupleHdr;
/*
* the tuples are not aligned, so must copy to access
* the size field.
*/
leafTuple = ptr;
memcpy(&leafTupleHdr, leafTuple,
sizeof(SpGistLeafTupleData));
addOrReplaceTuple(page, (Item) leafTuple,
leafTupleHdr.size, toInsert[i]);
ptr += leafTupleHdr.size;
}
PageSetLSN(page, lsn); for (i = 0; i < nInsert; i++)
MarkBufferDirty(buffer); {
} char *leafTuple;
UnlockReleaseBuffer(buffer); SpGistLeafTupleData leafTupleHdr;
/*
* the tuples are not aligned, so must copy to access the size
* field.
*/
leafTuple = ptr;
memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData));
addOrReplaceTuple(page, (Item) leafTuple,
leafTupleHdr.size, toInsert[i]);
ptr += leafTupleHdr.size;
} }
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* Delete tuples from the source page, inserting a redirection pointer */ /* Delete tuples from the source page, inserting a redirection pointer */
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoSrc,
(void) RestoreBackupBlock(lsn, record, 0, false, false); &buffer) == BLK_NEEDS_REDO)
else
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, false); page = BufferGetPage(buffer);
if (BufferIsValid(buffer)) spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves,
{
page = BufferGetPage(buffer);
if (lsn > PageGetLSN(page))
{
spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves,
state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT, state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
xldata->blknoDst, xldata->blknoDst,
toInsert[nInsert - 1]); toInsert[nInsert - 1]);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
}
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* And update the parent downlink */ /* And update the parent downlink */
if (record->xl_info & XLR_BKP_BLOCK(2)) if (XLogReadBufferForRedo(lsn, record, 2, xldata->node, xldata->blknoParent,
(void) RestoreBackupBlock(lsn, record, 2, false, false); &buffer) == BLK_NEEDS_REDO)
else
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false); SpGistInnerTuple tuple;
if (BufferIsValid(buffer))
{ page = BufferGetPage(buffer);
page = BufferGetPage(buffer);
if (lsn > PageGetLSN(page))
{
SpGistInnerTuple tuple;
tuple = (SpGistInnerTuple) PageGetItem(page, tuple = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent)); PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(tuple, xldata->nodeI, spgUpdateNodeLink(tuple, xldata->nodeI,
xldata->blknoDst, toInsert[nInsert - 1]); xldata->blknoDst, toInsert[nInsert - 1]);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
}
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
static void static void
...@@ -339,6 +321,7 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record) ...@@ -339,6 +321,7 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
Buffer buffer; Buffer buffer;
Page page; Page page;
int bbi; int bbi;
XLogRedoAction action;
ptr += sizeof(spgxlogAddNode); ptr += sizeof(spgxlogAddNode);
innerTuple = ptr; innerTuple = ptr;
...@@ -351,29 +334,21 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record) ...@@ -351,29 +334,21 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
{ {
/* update in place */ /* update in place */
Assert(xldata->blknoParent == InvalidBlockNumber); Assert(xldata->blknoParent == InvalidBlockNumber);
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
(void) RestoreBackupBlock(lsn, record, 0, false, false); &buffer) == BLK_NEEDS_REDO)
else
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blkno, false); page = BufferGetPage(buffer);
if (BufferIsValid(buffer)) PageIndexTupleDelete(page, xldata->offnum);
{ if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size,
page = BufferGetPage(buffer); xldata->offnum, false, false) != xldata->offnum)
if (lsn > PageGetLSN(page)) elog(ERROR, "failed to add item of size %u to SPGiST index page",
{ innerTupleHdr.size);
PageIndexTupleDelete(page, xldata->offnum);
if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size, PageSetLSN(page, lsn);
xldata->offnum, MarkBufferDirty(buffer);
false, false) != xldata->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
innerTupleHdr.size);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
}
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
else else
{ {
...@@ -390,90 +365,79 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record) ...@@ -390,90 +365,79 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
Assert(xldata->blkno != xldata->blknoNew); Assert(xldata->blkno != xldata->blknoNew);
/* Install new tuple first so redirect is valid */ /* Install new tuple first so redirect is valid */
if (record->xl_info & XLR_BKP_BLOCK(1)) if (xldata->newPage)
(void) RestoreBackupBlock(lsn, record, 1, false, false); {
buffer = XLogReadBuffer(xldata->node, xldata->blknoNew, true);
/* AddNode is not used for nulls pages */
SpGistInitBuffer(buffer, 0);
action = BLK_NEEDS_REDO;
}
else else
action = XLogReadBufferForRedo(lsn, record, 1,
xldata->node, xldata->blknoNew,
&buffer);
if (action == BLK_NEEDS_REDO)
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blknoNew, page = BufferGetPage(buffer);
xldata->newPage);
if (BufferIsValid(buffer)) addOrReplaceTuple(page, (Item) innerTuple,
innerTupleHdr.size, xldata->offnumNew);
/*
* If parent is in this same page, don't advance LSN; doing so
* would fool us into not applying the parent downlink update
* below. We'll update the LSN when we fix the parent downlink.
*/
if (xldata->blknoParent != xldata->blknoNew)
{ {
page = BufferGetPage(buffer); PageSetLSN(page, lsn);
/* AddNode is not used for nulls pages */
if (xldata->newPage)
SpGistInitBuffer(buffer, 0);
if (lsn > PageGetLSN(page))
{
addOrReplaceTuple(page, (Item) innerTuple,
innerTupleHdr.size, xldata->offnumNew);
/*
* If parent is in this same page, don't advance LSN;
* doing so would fool us into not applying the parent
* downlink update below. We'll update the LSN when we
* fix the parent downlink.
*/
if (xldata->blknoParent != xldata->blknoNew)
{
PageSetLSN(page, lsn);
}
MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
} }
MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* Delete old tuple, replacing it with redirect or placeholder tuple */ /* Delete old tuple, replacing it with redirect or placeholder tuple */
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
(void) RestoreBackupBlock(lsn, record, 0, false, false); &buffer) == BLK_NEEDS_REDO)
else
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blkno, false); SpGistDeadTuple dt;
if (BufferIsValid(buffer))
page = BufferGetPage(buffer);
if (state.isBuild)
dt = spgFormDeadTuple(&state, SPGIST_PLACEHOLDER,
InvalidBlockNumber,
InvalidOffsetNumber);
else
dt = spgFormDeadTuple(&state, SPGIST_REDIRECT,
xldata->blknoNew,
xldata->offnumNew);
PageIndexTupleDelete(page, xldata->offnum);
if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum,
false, false) != xldata->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
dt->size);
if (state.isBuild)
SpGistPageGetOpaque(page)->nPlaceholder++;
else
SpGistPageGetOpaque(page)->nRedirection++;
/*
* If parent is in this same page, don't advance LSN; doing so
* would fool us into not applying the parent downlink update
* below. We'll update the LSN when we fix the parent downlink.
*/
if (xldata->blknoParent != xldata->blkno)
{ {
page = BufferGetPage(buffer); PageSetLSN(page, lsn);
if (lsn > PageGetLSN(page))
{
SpGistDeadTuple dt;
if (state.isBuild)
dt = spgFormDeadTuple(&state, SPGIST_PLACEHOLDER,
InvalidBlockNumber,
InvalidOffsetNumber);
else
dt = spgFormDeadTuple(&state, SPGIST_REDIRECT,
xldata->blknoNew,
xldata->offnumNew);
PageIndexTupleDelete(page, xldata->offnum);
if (PageAddItem(page, (Item) dt, dt->size,
xldata->offnum,
false, false) != xldata->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
dt->size);
if (state.isBuild)
SpGistPageGetOpaque(page)->nPlaceholder++;
else
SpGistPageGetOpaque(page)->nRedirection++;
/*
* If parent is in this same page, don't advance LSN;
* doing so would fool us into not applying the parent
* downlink update below. We'll update the LSN when we
* fix the parent downlink.
*/
if (xldata->blknoParent != xldata->blkno)
{
PageSetLSN(page, lsn);
}
MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
} }
MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* /*
* Update parent downlink. Since parent could be in either of the * Update parent downlink. Since parent could be in either of the
...@@ -491,29 +455,32 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record) ...@@ -491,29 +455,32 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
{ {
if (bbi == 2) /* else we already did it */ if (bbi == 2) /* else we already did it */
(void) RestoreBackupBlock(lsn, record, bbi, false, false); (void) RestoreBackupBlock(lsn, record, bbi, false, false);
action = BLK_RESTORED;
buffer = InvalidBuffer;
} }
else else
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false); action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node,
if (BufferIsValid(buffer)) xldata->blknoParent, &buffer);
{ Assert(action != BLK_RESTORED);
page = BufferGetPage(buffer); }
if (lsn > PageGetLSN(page)) if (action == BLK_NEEDS_REDO)
{ {
SpGistInnerTuple innerTuple; SpGistInnerTuple innerTuple;
page = BufferGetPage(buffer);
innerTuple = (SpGistInnerTuple) PageGetItem(page, innerTuple = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent)); PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(innerTuple, xldata->nodeI, spgUpdateNodeLink(innerTuple, xldata->nodeI,
xldata->blknoNew, xldata->offnumNew); xldata->blknoNew, xldata->offnumNew);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
}
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
} }
...@@ -545,60 +512,56 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record) ...@@ -545,60 +512,56 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
*/ */
/* insert postfix tuple first to avoid dangling link */ /* insert postfix tuple first to avoid dangling link */
if (record->xl_info & XLR_BKP_BLOCK(1)) if (xldata->blknoPostfix != xldata->blknoPrefix)
(void) RestoreBackupBlock(lsn, record, 1, false, false);
else if (xldata->blknoPostfix != xldata->blknoPrefix)
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blknoPostfix, XLogRedoAction action;
xldata->newPage);
if (BufferIsValid(buffer))
{
page = BufferGetPage(buffer);
if (xldata->newPage)
{
buffer = XLogReadBuffer(xldata->node, xldata->blknoPostfix, true);
/* SplitTuple is not used for nulls pages */ /* SplitTuple is not used for nulls pages */
if (xldata->newPage) SpGistInitBuffer(buffer, 0);
SpGistInitBuffer(buffer, 0); action = BLK_NEEDS_REDO;
}
else
action = XLogReadBufferForRedo(lsn, record, 1,
xldata->node, xldata->blknoPostfix,
&buffer);
if (lsn > PageGetLSN(page)) if (action == BLK_NEEDS_REDO)
{ {
addOrReplaceTuple(page, (Item) postfixTuple, page = BufferGetPage(buffer);
postfixTupleHdr.size, xldata->offnumPostfix);
PageSetLSN(page, lsn); addOrReplaceTuple(page, (Item) postfixTuple,
MarkBufferDirty(buffer); postfixTupleHdr.size, xldata->offnumPostfix);
}
UnlockReleaseBuffer(buffer); PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
/* now handle the original page */ /* now handle the original page */
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoPrefix,
(void) RestoreBackupBlock(lsn, record, 0, false, false); &buffer) == BLK_NEEDS_REDO)
else
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blknoPrefix, false); page = BufferGetPage(buffer);
if (BufferIsValid(buffer)) PageIndexTupleDelete(page, xldata->offnumPrefix);
{ if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size,
page = BufferGetPage(buffer);
if (lsn > PageGetLSN(page))
{
PageIndexTupleDelete(page, xldata->offnumPrefix);
if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size,
xldata->offnumPrefix, false, false) != xldata->offnumPrefix) xldata->offnumPrefix, false, false) != xldata->offnumPrefix)
elog(ERROR, "failed to add item of size %u to SPGiST index page", elog(ERROR, "failed to add item of size %u to SPGiST index page",
prefixTupleHdr.size); prefixTupleHdr.size);
if (xldata->blknoPostfix == xldata->blknoPrefix) if (xldata->blknoPostfix == xldata->blknoPrefix)
addOrReplaceTuple(page, (Item) postfixTuple, addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size,
postfixTupleHdr.size, xldata->offnumPostfix);
xldata->offnumPostfix);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
}
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
static void static void
...@@ -616,9 +579,11 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record) ...@@ -616,9 +579,11 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
Buffer destBuffer; Buffer destBuffer;
Page srcPage; Page srcPage;
Page destPage; Page destPage;
Buffer innerBuffer;
Page page; Page page;
int bbi; int bbi;
int i; int i;
XLogRedoAction action;
fillFakeState(&state, xldata->stateSrc); fillFakeState(&state, xldata->stateSrc);
...@@ -668,46 +633,37 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record) ...@@ -668,46 +633,37 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
* inserting leaf tuples and the new inner tuple, else the added * inserting leaf tuples and the new inner tuple, else the added
* redirect tuple will be a dangling link.) * redirect tuple will be a dangling link.)
*/ */
if (record->xl_info & XLR_BKP_BLOCK(bbi)) if (XLogReadBufferForRedo(lsn, record, bbi,
xldata->node, xldata->blknoSrc,
&srcBuffer) == BLK_NEEDS_REDO)
{ {
srcBuffer = RestoreBackupBlock(lsn, record, bbi, false, true); srcPage = BufferGetPage(srcBuffer);
srcPage = NULL; /* don't need to do any page updates */
/*
* We have it a bit easier here than in doPickSplit(), because we
* know the inner tuple's location already, so we can inject the
* correct redirection tuple now.
*/
if (!state.isBuild)
spgPageIndexMultiDelete(&state, srcPage,
toDelete, xldata->nDelete,
SPGIST_REDIRECT,
SPGIST_PLACEHOLDER,
xldata->blknoInner,
xldata->offnumInner);
else
spgPageIndexMultiDelete(&state, srcPage,
toDelete, xldata->nDelete,
SPGIST_PLACEHOLDER,
SPGIST_PLACEHOLDER,
InvalidBlockNumber,
InvalidOffsetNumber);
/* don't update LSN etc till we're done with it */
} }
else else
{ {
srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, false); srcPage = NULL; /* don't do any page updates */
if (BufferIsValid(srcBuffer))
{
srcPage = BufferGetPage(srcBuffer);
if (lsn > PageGetLSN(srcPage))
{
/*
* We have it a bit easier here than in doPickSplit(),
* because we know the inner tuple's location already, so
* we can inject the correct redirection tuple now.
*/
if (!state.isBuild)
spgPageIndexMultiDelete(&state, srcPage,
toDelete, xldata->nDelete,
SPGIST_REDIRECT,
SPGIST_PLACEHOLDER,
xldata->blknoInner,
xldata->offnumInner);
else
spgPageIndexMultiDelete(&state, srcPage,
toDelete, xldata->nDelete,
SPGIST_PLACEHOLDER,
SPGIST_PLACEHOLDER,
InvalidBlockNumber,
InvalidOffsetNumber);
/* don't update LSN etc till we're done with it */
}
else
srcPage = NULL; /* don't do any page updates */
}
else
srcPage = NULL;
} }
bbi++; bbi++;
} }
...@@ -735,22 +691,15 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record) ...@@ -735,22 +691,15 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
* We could probably release the page lock immediately in the * We could probably release the page lock immediately in the
* full-page-image case, but for safety let's hold it till later. * full-page-image case, but for safety let's hold it till later.
*/ */
if (record->xl_info & XLR_BKP_BLOCK(bbi)) if (XLogReadBufferForRedo(lsn, record, bbi,
xldata->node, xldata->blknoDest,
&destBuffer) == BLK_NEEDS_REDO)
{ {
destBuffer = RestoreBackupBlock(lsn, record, bbi, false, true); destPage = (Page) BufferGetPage(destBuffer);
destPage = NULL; /* don't need to do any page updates */
} }
else else
{ {
destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, false); destPage = NULL; /* don't do any page updates */
if (BufferIsValid(destBuffer))
{
destPage = (Page) BufferGetPage(destBuffer);
if (lsn <= PageGetLSN(destPage))
destPage = NULL; /* don't do any page updates */
}
else
destPage = NULL;
} }
bbi++; bbi++;
} }
...@@ -787,43 +736,40 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record) ...@@ -787,43 +736,40 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
} }
/* restore new inner tuple */ /* restore new inner tuple */
if (record->xl_info & XLR_BKP_BLOCK(bbi)) if (xldata->initInner)
(void) RestoreBackupBlock(lsn, record, bbi, false, false);
else
{ {
Buffer buffer = XLogReadBuffer(xldata->node, xldata->blknoInner, innerBuffer = XLogReadBuffer(xldata->node, xldata->blknoInner, true);
xldata->initInner); SpGistInitBuffer(innerBuffer,
(xldata->storesNulls ? SPGIST_NULLS : 0));
if (BufferIsValid(buffer)) action = BLK_NEEDS_REDO;
{ }
page = BufferGetPage(buffer); else
action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node,
xldata->blknoInner, &innerBuffer);
if (xldata->initInner) if (action == BLK_NEEDS_REDO)
SpGistInitBuffer(buffer, {
(xldata->storesNulls ? SPGIST_NULLS : 0)); page = BufferGetPage(innerBuffer);
if (lsn > PageGetLSN(page)) addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size,
{ xldata->offnumInner);
addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size,
xldata->offnumInner);
/* if inner is also parent, update link while we're here */ /* if inner is also parent, update link while we're here */
if (xldata->blknoInner == xldata->blknoParent) if (xldata->blknoInner == xldata->blknoParent)
{ {
SpGistInnerTuple parent; SpGistInnerTuple parent;
parent = (SpGistInnerTuple) PageGetItem(page, parent = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent)); PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parent, xldata->nodeI, spgUpdateNodeLink(parent, xldata->nodeI,
xldata->blknoInner, xldata->offnumInner); xldata->blknoInner, xldata->offnumInner);
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
} }
PageSetLSN(page, lsn);
MarkBufferDirty(innerBuffer);
} }
if (BufferIsValid(innerBuffer))
UnlockReleaseBuffer(innerBuffer);
bbi++; bbi++;
/* /*
...@@ -843,31 +789,26 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record) ...@@ -843,31 +789,26 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
} }
else if (xldata->blknoInner != xldata->blknoParent) else if (xldata->blknoInner != xldata->blknoParent)
{ {
if (record->xl_info & XLR_BKP_BLOCK(bbi)) Buffer parentBuffer;
(void) RestoreBackupBlock(lsn, record, bbi, false, false);
else
{
Buffer buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false);
if (BufferIsValid(buffer)) if (XLogReadBufferForRedo(lsn, record, bbi,
{ xldata->node, xldata->blknoParent,
page = BufferGetPage(buffer); &parentBuffer) == BLK_NEEDS_REDO)
{
SpGistInnerTuple parent;
if (lsn > PageGetLSN(page)) page = BufferGetPage(parentBuffer);
{
SpGistInnerTuple parent;
parent = (SpGistInnerTuple) PageGetItem(page, parent = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent)); PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parent, xldata->nodeI, spgUpdateNodeLink(parent, xldata->nodeI,
xldata->blknoInner, xldata->offnumInner); xldata->blknoInner, xldata->offnumInner);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
MarkBufferDirty(buffer); MarkBufferDirty(parentBuffer);
}
UnlockReleaseBuffer(buffer);
}
} }
if (BufferIsValid(parentBuffer))
UnlockReleaseBuffer(parentBuffer);
} }
} }
...@@ -902,62 +843,56 @@ spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record) ...@@ -902,62 +843,56 @@ spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
ptr += sizeof(OffsetNumber) * xldata->nChain; ptr += sizeof(OffsetNumber) * xldata->nChain;
chainDest = (OffsetNumber *) ptr; chainDest = (OffsetNumber *) ptr;
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
(void) RestoreBackupBlock(lsn, record, 0, false, false); &buffer) == BLK_NEEDS_REDO)
else
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blkno, false); page = BufferGetPage(buffer);
if (BufferIsValid(buffer))
spgPageIndexMultiDelete(&state, page,
toDead, xldata->nDead,
SPGIST_DEAD, SPGIST_DEAD,
InvalidBlockNumber,
InvalidOffsetNumber);
spgPageIndexMultiDelete(&state, page,
toPlaceholder, xldata->nPlaceholder,
SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
InvalidBlockNumber,
InvalidOffsetNumber);
/* see comments in vacuumLeafPage() */
for (i = 0; i < xldata->nMove; i++)
{ {
page = BufferGetPage(buffer); ItemId idSrc = PageGetItemId(page, moveSrc[i]);
if (lsn > PageGetLSN(page)) ItemId idDest = PageGetItemId(page, moveDest[i]);
{ ItemIdData tmp;
spgPageIndexMultiDelete(&state, page,
toDead, xldata->nDead,
SPGIST_DEAD, SPGIST_DEAD,
InvalidBlockNumber,
InvalidOffsetNumber);
spgPageIndexMultiDelete(&state, page, tmp = *idSrc;
toPlaceholder, xldata->nPlaceholder, *idSrc = *idDest;
SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, *idDest = tmp;
InvalidBlockNumber, }
InvalidOffsetNumber);
/* see comments in vacuumLeafPage() */ spgPageIndexMultiDelete(&state, page,
for (i = 0; i < xldata->nMove; i++) moveSrc, xldata->nMove,
{ SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
ItemId idSrc = PageGetItemId(page, moveSrc[i]); InvalidBlockNumber,
ItemId idDest = PageGetItemId(page, moveDest[i]); InvalidOffsetNumber);
ItemIdData tmp;
tmp = *idSrc;
*idSrc = *idDest;
*idDest = tmp;
}
spgPageIndexMultiDelete(&state, page,
moveSrc, xldata->nMove,
SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
InvalidBlockNumber,
InvalidOffsetNumber);
for (i = 0; i < xldata->nChain; i++) for (i = 0; i < xldata->nChain; i++)
{ {
SpGistLeafTuple lt; SpGistLeafTuple lt;
lt = (SpGistLeafTuple) PageGetItem(page, lt = (SpGistLeafTuple) PageGetItem(page,
PageGetItemId(page, chainSrc[i])); PageGetItemId(page, chainSrc[i]));
Assert(lt->tupstate == SPGIST_LIVE); Assert(lt->tupstate == SPGIST_LIVE);
lt->nextOffset = chainDest[i]; lt->nextOffset = chainDest[i];
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
UnlockReleaseBuffer(buffer);
} }
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
static void static void
...@@ -971,25 +906,19 @@ spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record) ...@@ -971,25 +906,19 @@ spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
toDelete = xldata->offsets; toDelete = xldata->offsets;
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
(void) RestoreBackupBlock(lsn, record, 0, false, false); &buffer) == BLK_NEEDS_REDO)
else
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blkno, false); page = BufferGetPage(buffer);
if (BufferIsValid(buffer))
{
page = BufferGetPage(buffer);
if (lsn > PageGetLSN(page))
{
/* The tuple numbers are in order */
PageIndexMultiDelete(page, toDelete, xldata->nDelete);
PageSetLSN(page, lsn); /* The tuple numbers are in order */
MarkBufferDirty(buffer); PageIndexMultiDelete(page, toDelete, xldata->nDelete);
}
UnlockReleaseBuffer(buffer); PageSetLSN(page, lsn);
} MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
static void static void
...@@ -999,7 +928,6 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record) ...@@ -999,7 +928,6 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr; spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr;
OffsetNumber *itemToPlaceholder; OffsetNumber *itemToPlaceholder;
Buffer buffer; Buffer buffer;
Page page;
itemToPlaceholder = xldata->offsets; itemToPlaceholder = xldata->offsets;
...@@ -1014,64 +942,55 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record) ...@@ -1014,64 +942,55 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
xldata->node); xldata->node);
} }
if (record->xl_info & XLR_BKP_BLOCK(0)) if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
(void) RestoreBackupBlock(lsn, record, 0, false, false); &buffer) == BLK_NEEDS_REDO)
else
{ {
buffer = XLogReadBuffer(xldata->node, xldata->blkno, false); Page page = BufferGetPage(buffer);
SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
int i;
if (BufferIsValid(buffer)) /* Convert redirect pointers to plain placeholders */
for (i = 0; i < xldata->nToPlaceholder; i++)
{ {
page = BufferGetPage(buffer); SpGistDeadTuple dt;
if (lsn > PageGetLSN(page))
{
SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
int i;
/* Convert redirect pointers to plain placeholders */ dt = (SpGistDeadTuple) PageGetItem(page,
for (i = 0; i < xldata->nToPlaceholder; i++)
{
SpGistDeadTuple dt;
dt = (SpGistDeadTuple) PageGetItem(page,
PageGetItemId(page, itemToPlaceholder[i])); PageGetItemId(page, itemToPlaceholder[i]));
Assert(dt->tupstate == SPGIST_REDIRECT); Assert(dt->tupstate == SPGIST_REDIRECT);
dt->tupstate = SPGIST_PLACEHOLDER; dt->tupstate = SPGIST_PLACEHOLDER;
ItemPointerSetInvalid(&dt->pointer); ItemPointerSetInvalid(&dt->pointer);
} }
Assert(opaque->nRedirection >= xldata->nToPlaceholder);
opaque->nRedirection -= xldata->nToPlaceholder;
opaque->nPlaceholder += xldata->nToPlaceholder;
/* Remove placeholder tuples at end of page */
if (xldata->firstPlaceholder != InvalidOffsetNumber)
{
int max = PageGetMaxOffsetNumber(page);
OffsetNumber *toDelete;
toDelete = palloc(sizeof(OffsetNumber) * max); Assert(opaque->nRedirection >= xldata->nToPlaceholder);
opaque->nRedirection -= xldata->nToPlaceholder;
opaque->nPlaceholder += xldata->nToPlaceholder;
for (i = xldata->firstPlaceholder; i <= max; i++) /* Remove placeholder tuples at end of page */
toDelete[i - xldata->firstPlaceholder] = i; if (xldata->firstPlaceholder != InvalidOffsetNumber)
{
int max = PageGetMaxOffsetNumber(page);
OffsetNumber *toDelete;
i = max - xldata->firstPlaceholder + 1; toDelete = palloc(sizeof(OffsetNumber) * max);
Assert(opaque->nPlaceholder >= i);
opaque->nPlaceholder -= i;
/* The array is sorted, so can use PageIndexMultiDelete */ for (i = xldata->firstPlaceholder; i <= max; i++)
PageIndexMultiDelete(page, toDelete, i); toDelete[i - xldata->firstPlaceholder] = i;
pfree(toDelete); i = max - xldata->firstPlaceholder + 1;
} Assert(opaque->nPlaceholder >= i);
opaque->nPlaceholder -= i;
PageSetLSN(page, lsn); /* The array is sorted, so can use PageIndexMultiDelete */
MarkBufferDirty(buffer); PageIndexMultiDelete(page, toDelete, i);
}
UnlockReleaseBuffer(buffer); pfree(toDelete);
} }
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
} }
void void
......
...@@ -500,33 +500,28 @@ incrementally update the page, the rdata array *must* mention the buffer ...@@ -500,33 +500,28 @@ incrementally update the page, the rdata array *must* mention the buffer
ID at least once; otherwise there is no defense against torn-page problems. ID at least once; otherwise there is no defense against torn-page problems.
The standard replay-routine pattern for this case is The standard replay-routine pattern for this case is
if (record->xl_info & XLR_BKP_BLOCK(N)) if (XLogReadBufferForRedo(lsn, record, N, rnode, blkno, &buffer) == BLK_NEEDS_REDO)
{ {
/* apply the change from the full-page image */ page = (Page) BufferGetPage(buffer);
(void) RestoreBackupBlock(lsn, record, N, false, false);
return;
}
buffer = XLogReadBuffer(rnode, blkno, false); ... apply the change ...
if (!BufferIsValid(buffer))
{
/* page has been deleted, so we need do nothing */
return;
}
page = (Page) BufferGetPage(buffer);
if (XLByteLE(lsn, PageGetLSN(page))) PageSetLSN(page, lsn);
{ MarkBufferDirty(buffer);
/* changes are already applied */
UnlockReleaseBuffer(buffer);
return;
} }
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
... apply the change ... XLogReadBufferForRedo reads the page from disk, and checks what action needs to
be taken to the page. If the XLR_BKP_BLOCK(N) flag is set, it restores the
PageSetLSN(page, lsn); full page image and returns BLK_RESTORED. If there is no full page image, but
MarkBufferDirty(buffer); page cannot be found or if the change has already been replayed (i.e. the
UnlockReleaseBuffer(buffer); page's LSN >= the record we're replaying), it returns BLK_NOTFOUND or BLK_DONE,
respectively. Usually, the redo routine only needs to pay attention to the
BLK_NEEDS_REDO return code, which means that the routine should apply the
incremental change. In any case, the caller is responsible for unlocking and
releasing the buffer. Note that XLogReadBufferForRedo returns the buffer
locked even if no redo is required, unless the page does not exist.
As noted above, for a multi-page update you need to be able to determine As noted above, for a multi-page update you need to be able to determine
which XLR_BKP_BLOCK(N) flag applies to each page. If a WAL record reflects which XLR_BKP_BLOCK(N) flag applies to each page. If a WAL record reflects
...@@ -539,31 +534,8 @@ per the above discussion, fully-rewritable buffers shouldn't be mentioned in ...@@ -539,31 +534,8 @@ per the above discussion, fully-rewritable buffers shouldn't be mentioned in
When replaying a WAL record that describes changes on multiple pages, you When replaying a WAL record that describes changes on multiple pages, you
must be careful to lock the pages properly to prevent concurrent Hot Standby must be careful to lock the pages properly to prevent concurrent Hot Standby
queries from seeing an inconsistent state. If this requires that two queries from seeing an inconsistent state. If this requires that two
or more buffer locks be held concurrently, the coding pattern shown above or more buffer locks be held concurrently, you must lock the pages in
is too simplistic, since it assumes the routine can exit as soon as it's appropriate order, and not release the locks until all the changes are done.
known the current page requires no modification. Instead, you might have
something like
if (record->xl_info & XLR_BKP_BLOCK(0))
{
/* apply the change from the full-page image */
buffer0 = RestoreBackupBlock(lsn, record, 0, false, true);
}
else
{
buffer0 = XLogReadBuffer(rnode, blkno, false);
if (BufferIsValid(buffer0))
{
... apply the change if not already done ...
MarkBufferDirty(buffer0);
}
}
... similarly apply the changes for remaining pages ...
/* and now we can release the lock on the first page */
if (BufferIsValid(buffer0))
UnlockReleaseBuffer(buffer0);
Note that we must only use PageSetLSN/PageGetLSN() when we know the action Note that we must only use PageSetLSN/PageGetLSN() when we know the action
is serialised. Only Startup process may modify data blocks during recovery, is serialised. Only Startup process may modify data blocks during recovery,
......
...@@ -242,6 +242,87 @@ XLogCheckInvalidPages(void) ...@@ -242,6 +242,87 @@ XLogCheckInvalidPages(void)
invalid_page_tab = NULL; invalid_page_tab = NULL;
} }
/*
* XLogReadBufferForRedo
* Read a page during XLOG replay
*
* Reads a block referenced by a WAL record into shared buffer cache, and
* determines what needs to be done to redo the changes to it. If the WAL
* record includes a full-page image of the page, it is restored.
*
* 'lsn' is the LSN of the record being replayed. It is compared with the
* page's LSN to determine if the record has already been replayed.
* 'rnode' and 'blkno' point to the block being replayed (main fork number
* is implied, use XLogReadBufferForRedoExtended for other forks).
* 'block_index' identifies the backup block in the record for the page.
*
* Returns one of the following:
*
* BLK_NEEDS_REDO - changes from the WAL record need to be applied
* BLK_DONE - block doesn't need replaying
* BLK_RESTORED - block was restored from a full-page image included in
* the record
* BLK_NOTFOUND - block was not found (because it was truncated away by
* an operation later in the WAL stream)
*
* On return, the buffer is locked in exclusive-mode, and returned in *buf.
* Note that the buffer is locked and returned even if it doesn't need
* replaying. (Getting the buffer lock is not really necessary during
* single-process crash recovery, but some subroutines such as MarkBufferDirty
* will complain if we don't have the lock. In hot standby mode it's
* definitely necessary.)
*/
XLogRedoAction
XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index,
RelFileNode rnode, BlockNumber blkno,
Buffer *buf)
{
return XLogReadBufferForRedoExtended(lsn, record, block_index,
rnode, MAIN_FORKNUM, blkno,
RBM_NORMAL, false, buf);
}
/*
* XLogReadBufferForRedoExtended
* Like XLogReadBufferForRedo, but with extra options.
*
* If mode is RBM_ZERO or RBM_ZERO_ON_ERROR, if the page doesn't exist, the
* relation is extended with all-zeroes pages up to the referenced block
* number. In RBM_ZERO mode, the return value is always BLK_NEEDS_REDO.
*
* If 'get_cleanup_lock' is true, a "cleanup lock" is acquired on the buffer
* using LockBufferForCleanup(), instead of a regular exclusive lock.
*/
XLogRedoAction
XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
int block_index, RelFileNode rnode,
ForkNumber forkno, BlockNumber blkno,
ReadBufferMode mode, bool get_cleanup_lock,
Buffer *buf)
{
if (record->xl_info & XLR_BKP_BLOCK(block_index))
{
*buf = RestoreBackupBlock(lsn, record, block_index,
get_cleanup_lock, true);
return BLK_RESTORED;
}
else
{
*buf = XLogReadBufferExtended(rnode, forkno, blkno, mode);
if (BufferIsValid(*buf))
{
LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
if (lsn <= PageGetLSN(BufferGetPage(*buf)))
return BLK_DONE;
else
return BLK_NEEDS_REDO;
}
else
return BLK_NOTFOUND;
}
}
/* /*
* XLogReadBuffer * XLogReadBuffer
* Read a page during XLOG replay. * Read a page during XLOG replay.
......
/* /*
* xlogutils.h * xlogutils.h
* *
* PostgreSQL transaction log manager utility routines * Utilities for replaying WAL records.
* *
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#ifndef XLOG_UTILS_H #ifndef XLOG_UTILS_H
#define XLOG_UTILS_H #define XLOG_UTILS_H
#include "access/xlog.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
...@@ -22,6 +23,26 @@ extern void XLogDropDatabase(Oid dbid); ...@@ -22,6 +23,26 @@ extern void XLogDropDatabase(Oid dbid);
extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum, extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
BlockNumber nblocks); BlockNumber nblocks);
/* Result codes for XLogReadBufferForRedo[Extended] */
typedef enum
{
BLK_NEEDS_REDO, /* changes from WAL record need to be applied */
BLK_DONE, /* block is already up-to-date */
BLK_RESTORED, /* block was restored from a full-page image */
BLK_NOTFOUND /* block was not found (and hence does not need to be
* replayed) */
} XLogRedoAction;
extern XLogRedoAction XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record,
int block_index, RelFileNode rnode, BlockNumber blkno,
Buffer *buf);
extern XLogRedoAction XLogReadBufferForRedoExtended(XLogRecPtr lsn,
XLogRecord *record, int block_index,
RelFileNode rnode, ForkNumber forkno,
BlockNumber blkno,
ReadBufferMode mode, bool get_cleanup_lock,
Buffer *buf);
extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init); extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
BlockNumber blkno, ReadBufferMode mode); BlockNumber blkno, ReadBufferMode mode);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment