Commit c91a9b5a authored by Heikki Linnakangas's avatar Heikki Linnakangas

Fix backup-block numbering in redo of b-tree split.

I got the backup block numbers off-by-one in the commit that changed the
way incomplete-splits are handled. I blame the comments, which said
"backup block 1" and "backup block 2", even though the backup blocks
are numbered starting from 0, in the macros and functions used in replay.
Fix the comments and the code.

Per Jeff Janes' bug report about corruption caused by torn page writes.
The incorrect code is new in git master, but backpatch the comment change
down to 9.0, where the numbering in the redo-side macros  was changed.
parent eb568121
...@@ -1299,7 +1299,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, ...@@ -1299,7 +1299,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
lastrdata->data = (char *) newitem; lastrdata->data = (char *) newitem;
lastrdata->len = MAXALIGN(newitemsz); lastrdata->len = MAXALIGN(newitemsz);
lastrdata->buffer = buf; /* backup block 1 */ lastrdata->buffer = buf; /* backup block 0 */
lastrdata->buffer_std = true; lastrdata->buffer_std = true;
} }
...@@ -1320,7 +1320,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, ...@@ -1320,7 +1320,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
item = (IndexTuple) PageGetItem(origpage, itemid); item = (IndexTuple) PageGetItem(origpage, itemid);
lastrdata->data = (char *) item; lastrdata->data = (char *) item;
lastrdata->len = MAXALIGN(IndexTupleSize(item)); lastrdata->len = MAXALIGN(IndexTupleSize(item));
lastrdata->buffer = buf; /* backup block 1 */ lastrdata->buffer = buf; /* backup block 0 */
lastrdata->buffer_std = true; lastrdata->buffer_std = true;
} }
...@@ -1333,11 +1333,11 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, ...@@ -1333,11 +1333,11 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
* Although we don't need to WAL-log anything on the left page, we * Although we don't need to WAL-log anything on the left page, we
* still need XLogInsert to consider storing a full-page image of * still need XLogInsert to consider storing a full-page image of
* the left page, so make an empty entry referencing that buffer. * the left page, so make an empty entry referencing that buffer.
* This also ensures that the left page is always backup block 1. * This also ensures that the left page is always backup block 0.
*/ */
lastrdata->data = NULL; lastrdata->data = NULL;
lastrdata->len = 0; lastrdata->len = 0;
lastrdata->buffer = buf; /* backup block 1 */ lastrdata->buffer = buf; /* backup block 0 */
lastrdata->buffer_std = true; lastrdata->buffer_std = true;
} }
...@@ -1353,7 +1353,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, ...@@ -1353,7 +1353,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
cblkno = BufferGetBlockNumber(cbuf); cblkno = BufferGetBlockNumber(cbuf);
lastrdata->data = (char *) &cblkno; lastrdata->data = (char *) &cblkno;
lastrdata->len = sizeof(BlockNumber); lastrdata->len = sizeof(BlockNumber);
lastrdata->buffer = cbuf; /* backup block 2 */ lastrdata->buffer = cbuf; /* backup block 1 */
lastrdata->buffer_std = true; lastrdata->buffer_std = true;
} }
...@@ -1386,7 +1386,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, ...@@ -1386,7 +1386,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
lastrdata->data = NULL; lastrdata->data = NULL;
lastrdata->len = 0; lastrdata->len = 0;
lastrdata->buffer = sbuf; /* bkp block 2 (leaf) or 3 (non-leaf) */ lastrdata->buffer = sbuf; /* bkp block 1 (leaf) or 2 (non-leaf) */
lastrdata->buffer_std = true; lastrdata->buffer_std = true;
} }
......
...@@ -300,8 +300,8 @@ btree_xlog_split(bool onleft, bool isroot, ...@@ -300,8 +300,8 @@ btree_xlog_split(bool onleft, bool isroot,
*/ */
if (!isleaf) if (!isleaf)
{ {
if (record->xl_info & XLR_BKP_BLOCK(2)) if (record->xl_info & XLR_BKP_BLOCK(1))
(void) RestoreBackupBlock(lsn, record, 2, false, false); (void) RestoreBackupBlock(lsn, record, 1, false, false);
else else
_bt_clear_incomplete_split(lsn, record, xlrec->node, cblkno); _bt_clear_incomplete_split(lsn, record, xlrec->node, cblkno);
} }
...@@ -439,10 +439,10 @@ btree_xlog_split(bool onleft, bool isroot, ...@@ -439,10 +439,10 @@ btree_xlog_split(bool onleft, bool isroot,
if (xlrec->rnext != P_NONE) if (xlrec->rnext != P_NONE)
{ {
/* /*
* the backup block containing right sibling is 2 or 3, depending * the backup block containing right sibling is 1 or 2, depending
* whether this was a leaf or internal page. * whether this was a leaf or internal page.
*/ */
int rnext_index = isleaf ? 2 : 3; int rnext_index = isleaf ? 1 : 2;
if (record->xl_info & XLR_BKP_BLOCK(rnext_index)) if (record->xl_info & XLR_BKP_BLOCK(rnext_index))
(void) RestoreBackupBlock(lsn, record, rnext_index, false, false); (void) RestoreBackupBlock(lsn, record, rnext_index, false, false);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment