Commit a45c70ac authored by Heikki Linnakangas's avatar Heikki Linnakangas

Fix double-XLogBeginInsert call in GIN page splits.

If data checksums or wal_log_hints is on, and a GIN page is split, the code
to find a new, empty, block was called after having already called
XLogBeginInsert(). That causes an assertion failure or PANIC, if finding the
new block involves updating a FSM page that had not been modified since last
checkpoint, because that update is WAL-logged, which calls XLogBeginInsert
again. Nested XLogBeginInsert calls are not supported.

To fix, rearrange GIN code so that XLogBeginInsert is called later, after
finding the victim buffers.

Reported by Jeff Janes.
parent b36805f3
...@@ -358,20 +358,15 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, ...@@ -358,20 +358,15 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
* placeToPage can register some data to the WAL record. * placeToPage can register some data to the WAL record.
* *
* If placeToPage returns INSERTED, placeToPage has already called * If placeToPage returns INSERTED, placeToPage has already called
* START_CRIT_SECTION(), and we're responsible for calling * START_CRIT_SECTION() and XLogBeginInsert(), and registered any data
* END_CRIT_SECTION. When it returns INSERTED, it is also responsible for * required to replay the operation, in block index 0. We're responsible
* registering any data required to replay the operation with * for filling in the main data portion of the WAL record, calling
* XLogRegisterData(0, ...). It may only add data to block index 0; the * XLogInsert(), and END_CRIT_SECTION.
* main data of the WAL record is reserved for this function.
* *
* If placeToPage returns SPLIT, we're wholly responsible for WAL logging. * If placeToPage returns SPLIT, we're wholly responsible for WAL logging.
* Splits happen infrequently, so we just make a full-page image of all * Splits happen infrequently, so we just make a full-page image of all
* the pages involved. * the pages involved.
*/ */
if (RelationNeedsWAL(btree->index))
XLogBeginInsert();
rc = btree->placeToPage(btree, stack->buffer, stack, rc = btree->placeToPage(btree, stack->buffer, stack,
insertdata, updateblkno, insertdata, updateblkno,
&newlpage, &newrpage); &newlpage, &newrpage);
...@@ -558,6 +553,8 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, ...@@ -558,6 +553,8 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
XLogBeginInsert();
/* /*
* We just take full page images of all the split pages. Splits * We just take full page images of all the split pages. Splits
* are uncommon enough that it's not worth complicating the code * are uncommon enough that it's not worth complicating the code
......
...@@ -600,7 +600,10 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, ...@@ -600,7 +600,10 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
*/ */
MemoryContextSwitchTo(oldCxt); MemoryContextSwitchTo(oldCxt);
if (RelationNeedsWAL(btree->index)) if (RelationNeedsWAL(btree->index))
{
XLogBeginInsert();
registerLeafRecompressWALData(buf, leaf); registerLeafRecompressWALData(buf, leaf);
}
START_CRIT_SECTION(); START_CRIT_SECTION();
dataPlaceToPageLeafRecompress(buf, leaf); dataPlaceToPageLeafRecompress(buf, leaf);
...@@ -1120,6 +1123,7 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack, ...@@ -1120,6 +1123,7 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
data.offset = off; data.offset = off;
data.newitem = *pitem; data.newitem = *pitem;
XLogBeginInsert();
XLogRegisterBuffer(0, buf, REGBUF_STANDARD); XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterBufData(0, (char *) &data, XLogRegisterBufData(0, (char *) &data,
sizeof(ginxlogInsertDataInternal)); sizeof(ginxlogInsertDataInternal));
......
...@@ -557,6 +557,7 @@ entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, ...@@ -557,6 +557,7 @@ entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
data.isDelete = insertData->isDelete; data.isDelete = insertData->isDelete;
data.offset = off; data.offset = off;
XLogBeginInsert();
XLogRegisterBuffer(0, buf, REGBUF_STANDARD); XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterBufData(0, (char *) &data, XLogRegisterBufData(0, (char *) &data,
offsetof(ginxlogInsertEntry, tuple)); offsetof(ginxlogInsertEntry, tuple));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment