Commit ce5326ee authored by Heikki Linnakangas's avatar Heikki Linnakangas

More GIN refactoring.

Separate the insertion payload from the more static portions of GinBtree.
GinBtree now only contains information related to searching the tree, and
the information of what to insert is passed separately.

Add root block number to GinBtree, instead of passing it around all the
functions as argument.

Split off ginFinishSplit() from ginInsertValue(). ginFinishSplit is
responsible for finding the parent and inserting the downlink to it.
parent 4118f7e8
This diff is collapsed.
This diff is collapsed.
...@@ -112,6 +112,7 @@ GinFormTuple(GinState *ginstate, ...@@ -112,6 +112,7 @@ GinFormTuple(GinState *ginstate,
if (newsize != IndexTupleSize(itup)) if (newsize != IndexTupleSize(itup))
{ {
itup = repalloc(itup, newsize); itup = repalloc(itup, newsize);
/* /*
* PostgreSQL 9.3 and earlier did not clear this new space, so we * PostgreSQL 9.3 and earlier did not clear this new space, so we
* might find uninitialized padding when reading tuples from disk. * might find uninitialized padding when reading tuples from disk.
...@@ -431,22 +432,26 @@ entryGetLeftMostPage(GinBtree btree, Page page) ...@@ -431,22 +432,26 @@ entryGetLeftMostPage(GinBtree btree, Page page)
} }
static bool static bool
entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off) entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off,
GinBtreeEntryInsertData *insertData)
{ {
Size itupsz = 0; Size releasedsz = 0;
Size addedsz;
Page page = BufferGetPage(buf); Page page = BufferGetPage(buf);
Assert(btree->entry); Assert(insertData->entry);
Assert(!GinPageIsData(page)); Assert(!GinPageIsData(page));
if (btree->isDelete) if (insertData->isDelete)
{ {
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
itupsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); releasedsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
} }
if (PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData)) addedsz = MAXALIGN(IndexTupleSize(insertData->entry)) + sizeof(ItemIdData);
if (PageGetFreeSpace(page) + releasedsz >= addedsz)
return true; return true;
return false; return false;
...@@ -457,42 +462,42 @@ entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off) ...@@ -457,42 +462,42 @@ entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
* should update it, update old child blkno to new right page * should update it, update old child blkno to new right page
* if child split occurred * if child split occurred
*/ */
static BlockNumber static void
entryPreparePage(GinBtree btree, Page page, OffsetNumber off) entryPreparePage(GinBtree btree, Page page, OffsetNumber off,
GinBtreeEntryInsertData *insertData, BlockNumber updateblkno)
{ {
BlockNumber ret = InvalidBlockNumber; Assert(insertData->entry);
Assert(btree->entry);
Assert(!GinPageIsData(page)); Assert(!GinPageIsData(page));
if (btree->isDelete) if (insertData->isDelete)
{ {
Assert(GinPageIsLeaf(page)); Assert(GinPageIsLeaf(page));
PageIndexTupleDelete(page, off); PageIndexTupleDelete(page, off);
} }
if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber) if (!GinPageIsLeaf(page) && updateblkno != InvalidBlockNumber)
{ {
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
GinSetDownlink(itup, btree->rightblkno); GinSetDownlink(itup, updateblkno);
ret = btree->rightblkno;
} }
btree->rightblkno = InvalidBlockNumber;
return ret;
} }
/* /*
* Place tuple on page and fills WAL record * Place tuple on page and fills WAL record
* *
* If the tuple doesn't fit, returns false without modifying the page. * If the tuple doesn't fit, returns false without modifying the page.
*
* On insertion to an internal node, in addition to inserting the given item,
* the downlink of the existing item at 'off' is updated to point to
* 'updateblkno'.
*/ */
static bool static bool
entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
void *insertPayload, BlockNumber updateblkno,
XLogRecData **prdata) XLogRecData **prdata)
{ {
GinBtreeEntryInsertData *insertData = insertPayload;
Page page = BufferGetPage(buf); Page page = BufferGetPage(buf);
OffsetNumber placed; OffsetNumber placed;
int cnt = 0; int cnt = 0;
...@@ -502,13 +507,17 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, ...@@ -502,13 +507,17 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
static ginxlogInsert data; static ginxlogInsert data;
/* quick exit if it doesn't fit */ /* quick exit if it doesn't fit */
if (!entryIsEnoughSpace(btree, buf, off)) if (!entryIsEnoughSpace(btree, buf, off, insertData))
return false; return false;
*prdata = rdata; *prdata = rdata;
data.updateBlkno = entryPreparePage(btree, page, off); entryPreparePage(btree, page, off, insertData, updateblkno);
data.updateBlkno = updateblkno;
placed = PageAddItem(page, (Item) btree->entry, IndexTupleSize(btree->entry), off, false, false); placed = PageAddItem(page,
(Item) insertData->entry,
IndexTupleSize(insertData->entry),
off, false, false);
if (placed != off) if (placed != off)
elog(ERROR, "failed to add item to index page in \"%s\"", elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index)); RelationGetRelationName(btree->index));
...@@ -517,7 +526,7 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, ...@@ -517,7 +526,7 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
data.blkno = BufferGetBlockNumber(buf); data.blkno = BufferGetBlockNumber(buf);
data.offset = off; data.offset = off;
data.nitem = 1; data.nitem = 1;
data.isDelete = btree->isDelete; data.isDelete = insertData->isDelete;
data.isData = false; data.isData = false;
data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE; data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
...@@ -545,12 +554,10 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, ...@@ -545,12 +554,10 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
cnt++; cnt++;
rdata[cnt].buffer = InvalidBuffer; rdata[cnt].buffer = InvalidBuffer;
rdata[cnt].data = (char *) btree->entry; rdata[cnt].data = (char *) insertData->entry;
rdata[cnt].len = IndexTupleSize(btree->entry); rdata[cnt].len = IndexTupleSize(insertData->entry);
rdata[cnt].next = NULL; rdata[cnt].next = NULL;
btree->entry = NULL;
return true; return true;
} }
...@@ -561,8 +568,11 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, ...@@ -561,8 +568,11 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
* an equal number! * an equal number!
*/ */
static Page static Page
entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off,
void *insertPayload,
BlockNumber updateblkno, XLogRecData **prdata)
{ {
GinBtreeEntryInsertData *insertData = insertPayload;
OffsetNumber i, OffsetNumber i,
maxoff, maxoff,
separator = InvalidOffsetNumber; separator = InvalidOffsetNumber;
...@@ -583,8 +593,9 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR ...@@ -583,8 +593,9 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
*prdata = rdata; *prdata = rdata;
data.leftChildBlkno = (GinPageIsLeaf(lpage)) ? data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
InvalidOffsetNumber : GinGetDownlink(btree->entry); InvalidOffsetNumber : GinGetDownlink(insertData->entry);
data.updateBlkno = entryPreparePage(btree, lpage, off); data.updateBlkno = updateblkno;
entryPreparePage(btree, lpage, off, insertData, updateblkno);
maxoff = PageGetMaxOffsetNumber(lpage); maxoff = PageGetMaxOffsetNumber(lpage);
ptr = tupstore; ptr = tupstore;
...@@ -593,8 +604,8 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR ...@@ -593,8 +604,8 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
{ {
if (i == off) if (i == off)
{ {
size = MAXALIGN(IndexTupleSize(btree->entry)); size = MAXALIGN(IndexTupleSize(insertData->entry));
memcpy(ptr, btree->entry, size); memcpy(ptr, insertData->entry, size);
ptr += size; ptr += size;
totalsize += size + sizeof(ItemIdData); totalsize += size + sizeof(ItemIdData);
} }
...@@ -608,8 +619,8 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR ...@@ -608,8 +619,8 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
if (off == maxoff + 1) if (off == maxoff + 1)
{ {
size = MAXALIGN(IndexTupleSize(btree->entry)); size = MAXALIGN(IndexTupleSize(insertData->entry));
memcpy(ptr, btree->entry, size); memcpy(ptr, insertData->entry, size);
ptr += size; ptr += size;
totalsize += size + sizeof(ItemIdData); totalsize += size + sizeof(ItemIdData);
} }
...@@ -667,20 +678,23 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR ...@@ -667,20 +678,23 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
} }
/* /*
* Prepare the state in 'btree' for inserting a downlink for given buffer. * Construct insertion payload for inserting the downlink for given buffer.
*/ */
static void static void *
entryPrepareDownlink(GinBtree btree, Buffer lbuf) entryPrepareDownlink(GinBtree btree, Buffer lbuf)
{ {
GinBtreeEntryInsertData *insertData;
Page lpage = BufferGetPage(lbuf); Page lpage = BufferGetPage(lbuf);
BlockNumber lblkno = BufferGetBlockNumber(lbuf);
IndexTuple itup; IndexTuple itup;
itup = getRightMostTuple(lpage); itup = getRightMostTuple(lpage);
btree->entry = GinFormInteriorTuple(itup, insertData = palloc(sizeof(GinBtreeEntryInsertData));
lpage, insertData->entry = GinFormInteriorTuple(itup, lpage, lblkno);
BufferGetBlockNumber(lbuf)); insertData->isDelete = false;
btree->rightblkno = GinPageGetOpaque(lpage)->rightlink;
return insertData;
} }
/* /*
...@@ -724,6 +738,7 @@ ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, ...@@ -724,6 +738,7 @@ ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
memset(btree, 0, sizeof(GinBtreeData)); memset(btree, 0, sizeof(GinBtreeData));
btree->index = ginstate->index; btree->index = ginstate->index;
btree->rootBlkno = GIN_ROOT_BLKNO;
btree->ginstate = ginstate; btree->ginstate = ginstate;
btree->findChildPage = entryLocateEntry; btree->findChildPage = entryLocateEntry;
...@@ -743,5 +758,4 @@ ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, ...@@ -743,5 +758,4 @@ ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
btree->entryAttnum = attnum; btree->entryAttnum = attnum;
btree->entryKey = key; btree->entryKey = key;
btree->entryCategory = category; btree->entryCategory = category;
btree->isDelete = FALSE;
} }
...@@ -374,7 +374,7 @@ restartScanEntry: ...@@ -374,7 +374,7 @@ restartScanEntry:
ginPrepareEntryScan(&btreeEntry, entry->attnum, ginPrepareEntryScan(&btreeEntry, entry->attnum,
entry->queryKey, entry->queryCategory, entry->queryKey, entry->queryCategory,
ginstate); ginstate);
stackEntry = ginFindLeafPage(&btreeEntry, GIN_ROOT_BLKNO, true); stackEntry = ginFindLeafPage(&btreeEntry, true);
page = BufferGetPage(stackEntry->buffer); page = BufferGetPage(stackEntry->buffer);
needUnlock = TRUE; needUnlock = TRUE;
......
...@@ -163,17 +163,20 @@ ginEntryInsert(GinState *ginstate, ...@@ -163,17 +163,20 @@ ginEntryInsert(GinState *ginstate,
GinStatsData *buildStats) GinStatsData *buildStats)
{ {
GinBtreeData btree; GinBtreeData btree;
GinBtreeEntryInsertData insertdata;
GinBtreeStack *stack; GinBtreeStack *stack;
IndexTuple itup; IndexTuple itup;
Page page; Page page;
insertdata.isDelete = FALSE;
/* During index build, count the to-be-inserted entry */ /* During index build, count the to-be-inserted entry */
if (buildStats) if (buildStats)
buildStats->nEntries++; buildStats->nEntries++;
ginPrepareEntryScan(&btree, attnum, key, category, ginstate); ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
stack = ginFindLeafPage(&btree, GIN_ROOT_BLKNO, false); stack = ginFindLeafPage(&btree, false);
page = BufferGetPage(stack->buffer); page = BufferGetPage(stack->buffer);
if (btree.findItem(&btree, stack)) if (btree.findItem(&btree, stack))
...@@ -201,7 +204,7 @@ ginEntryInsert(GinState *ginstate, ...@@ -201,7 +204,7 @@ ginEntryInsert(GinState *ginstate,
itup = addItemPointersToLeafTuple(ginstate, itup, itup = addItemPointersToLeafTuple(ginstate, itup,
items, nitem, buildStats); items, nitem, buildStats);
btree.isDelete = TRUE; insertdata.isDelete = TRUE;
} }
else else
{ {
...@@ -211,8 +214,8 @@ ginEntryInsert(GinState *ginstate, ...@@ -211,8 +214,8 @@ ginEntryInsert(GinState *ginstate,
} }
/* Insert the new or modified leaf tuple */ /* Insert the new or modified leaf tuple */
btree.entry = itup; insertdata.entry = itup;
ginInsertValue(&btree, stack, buildStats); ginInsertValue(&btree, stack, &insertdata, buildStats);
pfree(itup); pfree(itup);
} }
......
...@@ -774,7 +774,7 @@ ginContinueSplit(ginIncompleteSplit *split) ...@@ -774,7 +774,7 @@ ginContinueSplit(ginIncompleteSplit *split)
GinState ginstate; GinState ginstate;
Relation reln; Relation reln;
Buffer buffer; Buffer buffer;
GinBtreeStack stack; GinBtreeStack *stack;
/* /*
* elog(NOTICE,"ginContinueSplit root:%u l:%u r:%u", split->rootBlkno, * elog(NOTICE,"ginContinueSplit root:%u l:%u r:%u", split->rootBlkno,
...@@ -802,22 +802,22 @@ ginContinueSplit(ginIncompleteSplit *split) ...@@ -802,22 +802,22 @@ ginContinueSplit(ginIncompleteSplit *split)
} }
else else
{ {
ginPrepareDataScan(&btree, reln); ginPrepareDataScan(&btree, reln, split->rootBlkno);
} }
stack.blkno = split->leftBlkno; stack = palloc(sizeof(GinBtreeStack));
stack.buffer = buffer; stack->blkno = split->leftBlkno;
stack.off = InvalidOffsetNumber; stack->buffer = buffer;
stack.parent = NULL; stack->off = InvalidOffsetNumber;
stack->parent = NULL;
ginFindParents(&btree, &stack, split->rootBlkno); ginFindParents(&btree, stack);
LockBuffer(stack->parent->buffer, GIN_UNLOCK);
ginFinishSplit(&btree, stack, NULL);
btree.prepareDownlink(&btree, buffer); /* buffer is released by ginFinishSplit */
ginInsertValue(&btree, stack.parent, NULL);
FreeFakeRelcacheEntry(reln); FreeFakeRelcacheEntry(reln);
UnlockReleaseBuffer(buffer);
} }
void void
......
...@@ -485,41 +485,59 @@ typedef struct GinBtreeData ...@@ -485,41 +485,59 @@ typedef struct GinBtreeData
/* insert methods */ /* insert methods */
OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber); OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
bool (*placeToPage) (GinBtree, Buffer, OffsetNumber, XLogRecData **); bool (*placeToPage) (GinBtree, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **);
Page (*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, XLogRecData **); Page (*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **);
void (*prepareDownlink) (GinBtree, Buffer); void *(*prepareDownlink) (GinBtree, Buffer);
void (*fillRoot) (GinBtree, Buffer, Buffer, Buffer); void (*fillRoot) (GinBtree, Buffer, Buffer, Buffer);
bool isData; bool isData;
Relation index; Relation index;
BlockNumber rootBlkno;
GinState *ginstate; /* not valid in a data scan */ GinState *ginstate; /* not valid in a data scan */
bool fullScan; bool fullScan;
bool isBuild; bool isBuild;
BlockNumber rightblkno; /* Search key for Entry tree */
/* Entry options */
OffsetNumber entryAttnum; OffsetNumber entryAttnum;
Datum entryKey; Datum entryKey;
GinNullCategory entryCategory; GinNullCategory entryCategory;
IndexTuple entry;
bool isDelete;
/* Data (posting tree) options */ /* Search key for data tree (posting tree) */
ItemPointerData itemptr;
} GinBtreeData;
/* This represents a tuple to be inserted to entry tree. */
typedef struct
{
IndexTuple entry; /* tuple to insert */
bool isDelete; /* delete old tuple at same offset? */
} GinBtreeEntryInsertData;
/*
* This represents an itempointer, or many itempointers, to be inserted to
* a data (posting tree) leaf page
*/
typedef struct
{
ItemPointerData *items; ItemPointerData *items;
uint32 nitem; uint32 nitem;
uint32 curitem; uint32 curitem;
} GinBtreeDataLeafInsertData;
PostingItem pitem; /*
} GinBtreeData; * For internal data (posting tree) pages, the insertion payload is a
* PostingItem
*/
extern GinBtreeStack *ginFindLeafPage(GinBtree btree, BlockNumber rootBlkno, bool searchMode); extern GinBtreeStack *ginFindLeafPage(GinBtree btree, bool searchMode);
extern Buffer ginStepRight(Buffer buffer, Relation index, int lockmode); extern Buffer ginStepRight(Buffer buffer, Relation index, int lockmode);
extern void freeGinBtreeStack(GinBtreeStack *stack); extern void freeGinBtreeStack(GinBtreeStack *stack);
extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack, extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack,
void *insertdata, GinStatsData *buildStats);
extern void ginFindParents(GinBtree btree, GinBtreeStack *stack);
extern void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
GinStatsData *buildStats); GinStatsData *buildStats);
extern void ginFindParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBlkno);
/* ginentrypage.c */ /* ginentrypage.c */
extern IndexTuple GinFormTuple(GinState *ginstate, extern IndexTuple GinFormTuple(GinState *ginstate,
...@@ -543,7 +561,7 @@ extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno, ...@@ -543,7 +561,7 @@ extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
GinStatsData *buildStats); GinStatsData *buildStats);
extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno); extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno);
extern void ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf); extern void ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf);
extern void ginPrepareDataScan(GinBtree btree, Relation index); extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
/* ginscan.c */ /* ginscan.c */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment