Commit 631118fe authored by Heikki Linnakangas's avatar Heikki Linnakangas

Get rid of the post-recovery cleanup step of GIN page splits.

Replace it with an approach similar to what GiST uses: when a page is split,
the left sibling is marked with a flag indicating that the parent hasn't been
updated yet. When the parent is updated, the flag is cleared. If an insertion
steps on a page with the flag set, it will finish split before proceeding
with the insertion.

The post-recovery cleanup mechanism was never totally reliable, as insertion
to the parent could fail e.g because of running out of memory or disk space,
leaving the tree in an inconsistent state.

This also divides the responsibility of WAL-logging more clearly between
the generic ginbtree.c code, and the parts specific to entry and posting
trees. There is now a common WAL record format for insertions and deletions,
which is written by ginbtree.c, followed by tree-specific payload, which is
returned by the placetopage- and split- callbacks.
parent ce5326ee
This diff is collapsed.
...@@ -227,6 +227,7 @@ GinDataPageAddItemPointer(Page page, ItemPointer data, OffsetNumber offset) ...@@ -227,6 +227,7 @@ GinDataPageAddItemPointer(Page page, ItemPointer data, OffsetNumber offset)
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
char *ptr; char *ptr;
Assert(ItemPointerIsValid(data));
Assert(GinPageIsLeaf(page)); Assert(GinPageIsLeaf(page));
if (offset == InvalidOffsetNumber) if (offset == InvalidOffsetNumber)
...@@ -255,6 +256,7 @@ GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset) ...@@ -255,6 +256,7 @@ GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
char *ptr; char *ptr;
Assert(PostingItemGetBlockNumber(data) != InvalidBlockNumber);
Assert(!GinPageIsLeaf(page)); Assert(!GinPageIsLeaf(page));
if (offset == InvalidOffsetNumber) if (offset == InvalidOffsetNumber)
...@@ -338,11 +340,8 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, ...@@ -338,11 +340,8 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
XLogRecData **prdata) XLogRecData **prdata)
{ {
Page page = BufferGetPage(buf); Page page = BufferGetPage(buf);
int cnt = 0;
/* these must be static so they can be returned to caller */ /* these must be static so they can be returned to caller */
static XLogRecData rdata[3]; static XLogRecData rdata[2];
static ginxlogInsert data;
/* quick exit if it doesn't fit */ /* quick exit if it doesn't fit */
if (!dataIsEnoughSpace(btree, buf, off, insertdata)) if (!dataIsEnoughSpace(btree, buf, off, insertdata))
...@@ -359,45 +358,10 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, ...@@ -359,45 +358,10 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
PostingItemSetBlockNumber(pitem, updateblkno); PostingItemSetBlockNumber(pitem, updateblkno);
} }
data.updateBlkno = updateblkno;
data.node = btree->index->rd_node;
data.blkno = BufferGetBlockNumber(buf);
data.offset = off;
data.nitem = 1;
data.isDelete = FALSE;
data.isData = TRUE;
data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
/*
* Prevent full page write if child's split occurs. That is needed to
* remove incomplete splits while replaying WAL
*
* data.updateBlkno contains new block number (of newly created right
* page) for recently splited page.
*/
if (data.updateBlkno == InvalidBlockNumber)
{
rdata[0].buffer = buf;
rdata[0].buffer_std = FALSE;
rdata[0].data = NULL;
rdata[0].len = 0;
rdata[0].next = &rdata[1];
cnt++;
}
rdata[cnt].buffer = InvalidBuffer;
rdata[cnt].data = (char *) &data;
rdata[cnt].len = sizeof(ginxlogInsert);
rdata[cnt].next = &rdata[cnt + 1];
cnt++;
rdata[cnt].buffer = InvalidBuffer;
/* data and len filled in below */
rdata[cnt].next = NULL;
if (GinPageIsLeaf(page)) if (GinPageIsLeaf(page))
{ {
GinBtreeDataLeafInsertData *items = insertdata; GinBtreeDataLeafInsertData *items = insertdata;
static ginxlogInsertDataLeaf data;
uint32 savedPos = items->curitem; uint32 savedPos = items->curitem;
if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff) if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
...@@ -415,10 +379,18 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, ...@@ -415,10 +379,18 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
{ {
GinDataPageAddItemPointer(page, items->items + items->curitem, off); GinDataPageAddItemPointer(page, items->items + items->curitem, off);
items->curitem++; items->curitem++;
data.nitem = 1;
} }
rdata[cnt].data = (char *) &items->items[savedPos]; rdata[0].buffer = InvalidBuffer;
rdata[cnt].len = sizeof(ItemPointerData) * data.nitem; rdata[0].data = (char *) &data;
rdata[0].len = offsetof(ginxlogInsertDataLeaf, items);
rdata[0].next = &rdata[1];
rdata[1].buffer = InvalidBuffer;
rdata[1].data = (char *) &items->items[savedPos];
rdata[1].len = sizeof(ItemPointerData) * data.nitem;
rdata[1].next = NULL;
} }
else else
{ {
...@@ -426,8 +398,10 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, ...@@ -426,8 +398,10 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
GinDataPageAddPostingItem(page, pitem, off); GinDataPageAddPostingItem(page, pitem, off);
rdata[cnt].data = (char *) pitem; rdata[0].buffer = InvalidBuffer;
rdata[cnt].len = sizeof(PostingItem); rdata[0].data = (char *) pitem;
rdata[0].len = sizeof(PostingItem);
rdata[0].next = NULL;
} }
return true; return true;
...@@ -456,8 +430,8 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, ...@@ -456,8 +430,8 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
Size freeSpace; Size freeSpace;
/* these must be static so they can be returned to caller */ /* these must be static so they can be returned to caller */
static ginxlogSplit data; static ginxlogSplitData data;
static XLogRecData rdata[4]; static XLogRecData rdata[2];
static char vector[2 * BLCKSZ]; static char vector[2 * BLCKSZ];
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
...@@ -488,6 +462,7 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, ...@@ -488,6 +462,7 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
if (isleaf && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff) if (isleaf && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff)
{ {
/* append new items to the end */
GinBtreeDataLeafInsertData *items = insertdata; GinBtreeDataLeafInsertData *items = insertdata;
while (items->curitem < items->nitem && while (items->curitem < items->nitem &&
...@@ -566,25 +541,18 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, ...@@ -566,25 +541,18 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
bound = GinDataPageGetRightBound(rpage); bound = GinDataPageGetRightBound(rpage);
*bound = oldbound; *bound = oldbound;
data.node = btree->index->rd_node;
data.rootBlkno = InvalidBlockNumber;
data.lblkno = BufferGetBlockNumber(lbuf);
data.rblkno = BufferGetBlockNumber(rbuf);
data.separator = separator; data.separator = separator;
data.nitem = maxoff; data.nitem = maxoff;
data.isData = TRUE;
data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
data.isRootSplit = FALSE;
data.rightbound = oldbound; data.rightbound = oldbound;
rdata[0].buffer = InvalidBuffer; rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &data; rdata[0].data = (char *) &data;
rdata[0].len = sizeof(ginxlogSplit); rdata[0].len = sizeof(ginxlogSplitData);
rdata[0].next = &rdata[1]; rdata[0].next = &rdata[1];
rdata[1].buffer = InvalidBuffer; rdata[1].buffer = InvalidBuffer;
rdata[1].data = vector; rdata[1].data = vector;
rdata[1].len = MAXALIGN(maxoff * sizeofitem); rdata[1].len = maxoff * sizeofitem;
rdata[1].next = NULL; rdata[1].next = NULL;
return lpage; return lpage;
...@@ -610,21 +578,18 @@ dataPrepareDownlink(GinBtree btree, Buffer lbuf) ...@@ -610,21 +578,18 @@ dataPrepareDownlink(GinBtree btree, Buffer lbuf)
* Also called from ginxlog, should not use btree * Also called from ginxlog, should not use btree
*/ */
void void
ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf) ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage)
{ {
Page page = BufferGetPage(root),
lpage = BufferGetPage(lbuf),
rpage = BufferGetPage(rbuf);
PostingItem li, PostingItem li,
ri; ri;
li.key = *GinDataPageGetRightBound(lpage); li.key = *GinDataPageGetRightBound(lpage);
PostingItemSetBlockNumber(&li, BufferGetBlockNumber(lbuf)); PostingItemSetBlockNumber(&li, lblkno);
GinDataPageAddPostingItem(page, &li, InvalidOffsetNumber); GinDataPageAddPostingItem(root, &li, InvalidOffsetNumber);
ri.key = *GinDataPageGetRightBound(rpage); ri.key = *GinDataPageGetRightBound(rpage);
PostingItemSetBlockNumber(&ri, BufferGetBlockNumber(rbuf)); PostingItemSetBlockNumber(&ri, rblkno);
GinDataPageAddPostingItem(page, &ri, InvalidOffsetNumber); GinDataPageAddPostingItem(root, &ri, InvalidOffsetNumber);
} }
/* /*
......
...@@ -504,7 +504,7 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, ...@@ -504,7 +504,7 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
/* these must be static so they can be returned to caller */ /* these must be static so they can be returned to caller */
static XLogRecData rdata[3]; static XLogRecData rdata[3];
static ginxlogInsert data; static ginxlogInsertEntry data;
/* quick exit if it doesn't fit */ /* quick exit if it doesn't fit */
if (!entryIsEnoughSpace(btree, buf, off, insertData)) if (!entryIsEnoughSpace(btree, buf, off, insertData))
...@@ -512,7 +512,6 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, ...@@ -512,7 +512,6 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
*prdata = rdata; *prdata = rdata;
entryPreparePage(btree, page, off, insertData, updateblkno); entryPreparePage(btree, page, off, insertData, updateblkno);
data.updateBlkno = updateblkno;
placed = PageAddItem(page, placed = PageAddItem(page,
(Item) insertData->entry, (Item) insertData->entry,
...@@ -522,34 +521,11 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, ...@@ -522,34 +521,11 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
elog(ERROR, "failed to add item to index page in \"%s\"", elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index)); RelationGetRelationName(btree->index));
data.node = btree->index->rd_node;
data.blkno = BufferGetBlockNumber(buf);
data.offset = off;
data.nitem = 1;
data.isDelete = insertData->isDelete; data.isDelete = insertData->isDelete;
data.isData = false;
data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
/*
* Prevent full page write if child's split occurs. That is needed to
* remove incomplete splits while replaying WAL
*
* data.updateBlkno contains new block number (of newly created right
* page) for recently splited page.
*/
if (data.updateBlkno == InvalidBlockNumber)
{
rdata[0].buffer = buf;
rdata[0].buffer_std = TRUE;
rdata[0].data = NULL;
rdata[0].len = 0;
rdata[0].next = &rdata[1];
cnt++;
}
rdata[cnt].buffer = InvalidBuffer; rdata[cnt].buffer = InvalidBuffer;
rdata[cnt].data = (char *) &data; rdata[cnt].data = (char *) &data;
rdata[cnt].len = sizeof(ginxlogInsert); rdata[cnt].len = offsetof(ginxlogInsertEntry, tuple);
rdata[cnt].next = &rdata[cnt + 1]; rdata[cnt].next = &rdata[cnt + 1];
cnt++; cnt++;
...@@ -577,6 +553,7 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, ...@@ -577,6 +553,7 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
maxoff, maxoff,
separator = InvalidOffsetNumber; separator = InvalidOffsetNumber;
Size totalsize = 0; Size totalsize = 0;
Size tupstoresize;
Size lsize = 0, Size lsize = 0,
size; size;
char *ptr; char *ptr;
...@@ -588,18 +565,18 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, ...@@ -588,18 +565,18 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
/* these must be static so they can be returned to caller */ /* these must be static so they can be returned to caller */
static XLogRecData rdata[2]; static XLogRecData rdata[2];
static ginxlogSplit data; static ginxlogSplitEntry data;
static char tupstore[2 * BLCKSZ]; static char tupstore[2 * BLCKSZ];
*prdata = rdata; *prdata = rdata;
data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
InvalidOffsetNumber : GinGetDownlink(insertData->entry);
data.updateBlkno = updateblkno;
entryPreparePage(btree, lpage, off, insertData, updateblkno); entryPreparePage(btree, lpage, off, insertData, updateblkno);
/*
* First, append all the existing tuples and the new tuple we're inserting
* one after another in a temporary workspace.
*/
maxoff = PageGetMaxOffsetNumber(lpage); maxoff = PageGetMaxOffsetNumber(lpage);
ptr = tupstore; ptr = tupstore;
for (i = FirstOffsetNumber; i <= maxoff; i++) for (i = FirstOffsetNumber; i <= maxoff; i++)
{ {
if (i == off) if (i == off)
...@@ -624,7 +601,12 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, ...@@ -624,7 +601,12 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
ptr += size; ptr += size;
totalsize += size + sizeof(ItemIdData); totalsize += size + sizeof(ItemIdData);
} }
tupstoresize = ptr - tupstore;
/*
* Initialize the left and right pages, and copy all the tuples back to
* them.
*/
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
...@@ -654,24 +636,17 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, ...@@ -654,24 +636,17 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
ptr += MAXALIGN(IndexTupleSize(itup)); ptr += MAXALIGN(IndexTupleSize(itup));
} }
data.node = btree->index->rd_node;
data.rootBlkno = InvalidBlockNumber;
data.lblkno = BufferGetBlockNumber(lbuf);
data.rblkno = BufferGetBlockNumber(rbuf);
data.separator = separator; data.separator = separator;
data.nitem = maxoff; data.nitem = maxoff;
data.isData = FALSE;
data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
data.isRootSplit = FALSE;
rdata[0].buffer = InvalidBuffer; rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &data; rdata[0].data = (char *) &data;
rdata[0].len = sizeof(ginxlogSplit); rdata[0].len = sizeof(ginxlogSplitEntry);
rdata[0].next = &rdata[1]; rdata[0].next = &rdata[1];
rdata[1].buffer = InvalidBuffer; rdata[1].buffer = InvalidBuffer;
rdata[1].data = tupstore; rdata[1].data = tupstore;
rdata[1].len = MAXALIGN(totalsize); rdata[1].len = tupstoresize;
rdata[1].next = NULL; rdata[1].next = NULL;
return lpage; return lpage;
...@@ -702,24 +677,19 @@ entryPrepareDownlink(GinBtree btree, Buffer lbuf) ...@@ -702,24 +677,19 @@ entryPrepareDownlink(GinBtree btree, Buffer lbuf)
* Also called from ginxlog, should not use btree * Also called from ginxlog, should not use btree
*/ */
void void
ginEntryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf) ginEntryFillRoot(GinBtree btree, Page root,
BlockNumber lblkno, Page lpage,
BlockNumber rblkno, Page rpage)
{ {
Page page = BufferGetPage(root);
Page lpage = BufferGetPage(lbuf);
Page rpage = BufferGetPage(rbuf);
IndexTuple itup; IndexTuple itup;
itup = GinFormInteriorTuple(getRightMostTuple(lpage), itup = GinFormInteriorTuple(getRightMostTuple(lpage), lpage, lblkno);
lpage, if (PageAddItem(root, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
BufferGetBlockNumber(lbuf));
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index root page"); elog(ERROR, "failed to add item to index root page");
pfree(itup); pfree(itup);
itup = GinFormInteriorTuple(getRightMostTuple(rpage), itup = GinFormInteriorTuple(getRightMostTuple(rpage), rpage, rblkno);
rpage, if (PageAddItem(root, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
BufferGetBlockNumber(rbuf));
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index root page"); elog(ERROR, "failed to add item to index root page");
pfree(itup); pfree(itup);
} }
......
This diff is collapsed.
...@@ -41,20 +41,45 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec) ...@@ -41,20 +41,45 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
desc_node(buf, ((ginxlogCreatePostingTree *) rec)->node, ((ginxlogCreatePostingTree *) rec)->blkno); desc_node(buf, ((ginxlogCreatePostingTree *) rec)->node, ((ginxlogCreatePostingTree *) rec)->blkno);
break; break;
case XLOG_GIN_INSERT: case XLOG_GIN_INSERT:
appendStringInfoString(buf, "Insert item, "); {
desc_node(buf, ((ginxlogInsert *) rec)->node, ((ginxlogInsert *) rec)->blkno); ginxlogInsert *xlrec = (ginxlogInsert *) rec;
appendStringInfo(buf, " offset: %u nitem: %u isdata: %c isleaf %c isdelete %c updateBlkno:%u", char *payload = rec + sizeof(ginxlogInsert);
((ginxlogInsert *) rec)->offset,
((ginxlogInsert *) rec)->nitem, appendStringInfoString(buf, "Insert item, ");
(((ginxlogInsert *) rec)->isData) ? 'T' : 'F', desc_node(buf, xlrec->node, xlrec->blkno);
(((ginxlogInsert *) rec)->isLeaf) ? 'T' : 'F', appendStringInfo(buf, " offset: %u isdata: %c isleaf: %c",
(((ginxlogInsert *) rec)->isDelete) ? 'T' : 'F', xlrec->offset,
((ginxlogInsert *) rec)->updateBlkno); (xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
if (!(xlrec->flags & GIN_INSERT_ISLEAF))
{
BlockNumber leftChildBlkno;
BlockNumber rightChildBlkno;
memcpy(&leftChildBlkno, payload, sizeof(BlockNumber));
payload += sizeof(BlockNumber);
memcpy(&rightChildBlkno, payload, sizeof(BlockNumber));
payload += sizeof(BlockNumber);
appendStringInfo(buf, " children: %u/%u",
leftChildBlkno, rightChildBlkno);
}
if (!(xlrec->flags & GIN_INSERT_ISDATA))
appendStringInfo(buf, " isdelete: %c",
(((ginxlogInsertEntry *) payload)->isDelete) ? 'T' : 'F');
else if (xlrec->flags & GIN_INSERT_ISLEAF)
appendStringInfo(buf, " nitem: %u",
(((ginxlogInsertDataLeaf *) payload)->nitem) ? 'T' : 'F');
else
appendStringInfo(buf, " pitem: %u-%u/%u",
PostingItemGetBlockNumber((PostingItem *) payload),
ItemPointerGetBlockNumber(&((PostingItem *) payload)->key),
ItemPointerGetOffsetNumber(&((PostingItem *) payload)->key));
}
break; break;
case XLOG_GIN_SPLIT: case XLOG_GIN_SPLIT:
appendStringInfoString(buf, "Page split, "); appendStringInfoString(buf, "Page split, ");
desc_node(buf, ((ginxlogSplit *) rec)->node, ((ginxlogSplit *) rec)->lblkno); desc_node(buf, ((ginxlogSplit *) rec)->node, ((ginxlogSplit *) rec)->lblkno);
appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->isRootSplit) ? 'T' : 'F'); appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
break; break;
case XLOG_GIN_VACUUM_PAGE: case XLOG_GIN_VACUUM_PAGE:
appendStringInfoString(buf, "Vacuum page, "); appendStringInfoString(buf, "Vacuum page, ");
......
...@@ -58,6 +58,5 @@ extern void gin_redo(XLogRecPtr lsn, XLogRecord *record); ...@@ -58,6 +58,5 @@ extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
extern void gin_desc(StringInfo buf, uint8 xl_info, char *rec); extern void gin_desc(StringInfo buf, uint8 xl_info, char *rec);
extern void gin_xlog_startup(void); extern void gin_xlog_startup(void);
extern void gin_xlog_cleanup(void); extern void gin_xlog_cleanup(void);
extern bool gin_safe_restartpoint(void);
#endif /* GIN_H */ #endif /* GIN_H */
...@@ -48,6 +48,7 @@ typedef GinPageOpaqueData *GinPageOpaque; ...@@ -48,6 +48,7 @@ typedef GinPageOpaqueData *GinPageOpaque;
#define GIN_META (1 << 3) #define GIN_META (1 << 3)
#define GIN_LIST (1 << 4) #define GIN_LIST (1 << 4)
#define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */ #define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
#define GIN_INCOMPLETE_SPLIT (1 << 6) /* page was split, but parent not updated */
/* Page numbers of fixed-location pages */ /* Page numbers of fixed-location pages */
#define GIN_METAPAGE_BLKNO (0) #define GIN_METAPAGE_BLKNO (0)
...@@ -119,6 +120,7 @@ typedef struct GinMetaPageData ...@@ -119,6 +120,7 @@ typedef struct GinMetaPageData
#define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED) #define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED)
#define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED) #define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
#define GinPageSetNonDeleted(page) ( GinPageGetOpaque(page)->flags &= ~GIN_DELETED) #define GinPageSetNonDeleted(page) ( GinPageGetOpaque(page)->flags &= ~GIN_DELETED)
#define GinPageIsIncompleteSplit(page) ( GinPageGetOpaque(page)->flags & GIN_INCOMPLETE_SPLIT)
#define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber) #define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
...@@ -336,41 +338,77 @@ typedef struct ginxlogInsert ...@@ -336,41 +338,77 @@ typedef struct ginxlogInsert
{ {
RelFileNode node; RelFileNode node;
BlockNumber blkno; BlockNumber blkno;
BlockNumber updateBlkno; uint16 flags; /* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */
OffsetNumber offset; OffsetNumber offset;
bool isDelete;
bool isData;
bool isLeaf;
OffsetNumber nitem;
/* /*
* follows: tuples or ItemPointerData or PostingItem or list of * FOLLOWS:
* ItemPointerData *
* 1. if not leaf page, block numbers of the left and right child pages
* whose split this insertion finishes. As BlockIdData[2] (beware of adding
* fields before this that would make them not 16-bit aligned)
*
* 2. one of the following structs, depending on tree type.
*
* NB: the below structs are only 16-bit aligned when appended to a
* ginxlogInsert struct! Beware of adding fields to them that require
* stricter alignment.
*/ */
} ginxlogInsert; } ginxlogInsert;
typedef struct
{
bool isDelete;
IndexTupleData tuple; /* variable length */
} ginxlogInsertEntry;
typedef struct
{
OffsetNumber nitem;
ItemPointerData items[1]; /* variable length */
} ginxlogInsertDataLeaf;
/* In an insert to an internal data page, the payload is a PostingItem */
#define XLOG_GIN_SPLIT 0x30 #define XLOG_GIN_SPLIT 0x30
typedef struct ginxlogSplit typedef struct ginxlogSplit
{ {
RelFileNode node; RelFileNode node;
BlockNumber lblkno; BlockNumber lblkno;
BlockNumber rootBlkno;
BlockNumber rblkno; BlockNumber rblkno;
BlockNumber rrlink; BlockNumber rrlink; /* right link, or root's blocknumber if root split */
BlockNumber leftChildBlkno; /* valid on a non-leaf split */
BlockNumber rightChildBlkno;
uint16 flags;
/* follows: one of the following structs */
} ginxlogSplit;
/*
* Flags used in ginxlogInsert and ginxlogSplit records
*/
#define GIN_INSERT_ISDATA 0x01 /* for both insert and split records */
#define GIN_INSERT_ISLEAF 0x02 /* .. */
#define GIN_SPLIT_ROOT 0x04 /* only for split records */
typedef struct
{
OffsetNumber separator; OffsetNumber separator;
OffsetNumber nitem; OffsetNumber nitem;
bool isData; /* FOLLOWS: IndexTuples */
bool isLeaf; } ginxlogSplitEntry;
bool isRootSplit;
BlockNumber leftChildBlkno; typedef struct
BlockNumber updateBlkno; {
OffsetNumber separator;
OffsetNumber nitem;
ItemPointerData rightbound;
ItemPointerData rightbound; /* used only in posting tree */ /* FOLLOWS: array of ItemPointers (for leaf) or PostingItems (non-leaf) */
/* follows: list of tuple or ItemPointerData or PostingItem */ } ginxlogSplitData;
} ginxlogSplit;
#define XLOG_GIN_VACUUM_PAGE 0x40 #define XLOG_GIN_VACUUM_PAGE 0x40
...@@ -488,7 +526,7 @@ typedef struct GinBtreeData ...@@ -488,7 +526,7 @@ typedef struct GinBtreeData
bool (*placeToPage) (GinBtree, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **); bool (*placeToPage) (GinBtree, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **);
Page (*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **); Page (*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **);
void *(*prepareDownlink) (GinBtree, Buffer); void *(*prepareDownlink) (GinBtree, Buffer);
void (*fillRoot) (GinBtree, Buffer, Buffer, Buffer); void (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);
bool isData; bool isData;
...@@ -535,9 +573,6 @@ extern Buffer ginStepRight(Buffer buffer, Relation index, int lockmode); ...@@ -535,9 +573,6 @@ extern Buffer ginStepRight(Buffer buffer, Relation index, int lockmode);
extern void freeGinBtreeStack(GinBtreeStack *stack); extern void freeGinBtreeStack(GinBtreeStack *stack);
extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack, extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack,
void *insertdata, GinStatsData *buildStats); void *insertdata, GinStatsData *buildStats);
extern void ginFindParents(GinBtree btree, GinBtreeStack *stack);
extern void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
GinStatsData *buildStats);
/* ginentrypage.c */ /* ginentrypage.c */
extern IndexTuple GinFormTuple(GinState *ginstate, extern IndexTuple GinFormTuple(GinState *ginstate,
...@@ -547,7 +582,7 @@ extern void GinShortenTuple(IndexTuple itup, uint32 nipd); ...@@ -547,7 +582,7 @@ extern void GinShortenTuple(IndexTuple itup, uint32 nipd);
extern void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, extern void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
Datum key, GinNullCategory category, Datum key, GinNullCategory category,
GinState *ginstate); GinState *ginstate);
extern void ginEntryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf); extern void ginEntryFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
/* gindatapage.c */ /* gindatapage.c */
extern BlockNumber createPostingTree(Relation index, extern BlockNumber createPostingTree(Relation index,
...@@ -560,7 +595,7 @@ extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno, ...@@ -560,7 +595,7 @@ extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
ItemPointerData *items, uint32 nitem, ItemPointerData *items, uint32 nitem,
GinStatsData *buildStats); GinStatsData *buildStats);
extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno); extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno);
extern void ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf); extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno); extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
/* ginscan.c */ /* ginscan.c */
......
...@@ -38,7 +38,7 @@ PG_RMGR(RM_HEAP2_ID, "Heap2", heap2_redo, heap2_desc, NULL, NULL, NULL) ...@@ -38,7 +38,7 @@ PG_RMGR(RM_HEAP2_ID, "Heap2", heap2_redo, heap2_desc, NULL, NULL, NULL)
PG_RMGR(RM_HEAP_ID, "Heap", heap_redo, heap_desc, NULL, NULL, NULL) PG_RMGR(RM_HEAP_ID, "Heap", heap_redo, heap_desc, NULL, NULL, NULL)
PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_xlog_startup, btree_xlog_cleanup, btree_safe_restartpoint) PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_xlog_startup, btree_xlog_cleanup, btree_safe_restartpoint)
PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, NULL, NULL, NULL) PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, NULL, NULL, NULL)
PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup, gin_safe_restartpoint) PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup, NULL)
PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup, NULL) PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup, NULL)
PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, NULL, NULL, NULL) PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, NULL, NULL, NULL)
PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_xlog_startup, spg_xlog_cleanup, NULL) PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_xlog_startup, spg_xlog_cleanup, NULL)
...@@ -55,7 +55,7 @@ typedef struct BkpBlock ...@@ -55,7 +55,7 @@ typedef struct BkpBlock
/* /*
* Each page of XLOG file has a header like this: * Each page of XLOG file has a header like this:
*/ */
#define XLOG_PAGE_MAGIC 0xD076 /* can be used as WAL version indicator */ #define XLOG_PAGE_MAGIC 0xD077 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData typedef struct XLogPageHeaderData
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment