Commit b3364fc8 authored by Bruce Momjian's avatar Bruce Momjian

pgindent new GIST index code, per request from Tom.

parent 08817bdb
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.125 2005/06/30 17:52:13 teodor Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.126 2005/09/22 20:44:36 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -23,7 +23,7 @@
#include "miscadmin.h"
#include "utils/memutils.h"
const XLogRecPtr XLogRecPtrForTemp = { 1, 1 };
const XLogRecPtr XLogRecPtrForTemp = {1, 1};
/* Working state for gistbuild and its callback */
typedef struct
......@@ -46,7 +46,7 @@ static void gistdoinsert(Relation r,
IndexTuple itup,
GISTSTATE *GISTstate);
static void gistfindleaf(GISTInsertState *state,
GISTSTATE *giststate);
GISTSTATE *giststate);
#define ROTATEDIST(d) do { \
......@@ -55,7 +55,7 @@ static void gistfindleaf(GISTInsertState *state,
tmp->next = (d); \
(d)=tmp; \
} while(0)
/*
* Create and return a temporary memory context for use by GiST. We
......@@ -65,15 +65,15 @@ static void gistfindleaf(GISTInsertState *state,
* GiST code itself, to avoid the need to do some awkward manual
* memory management.
*/
MemoryContext
createTempGistContext(void)
{
return AllocSetContextCreate(CurrentMemoryContext,
"GiST temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
}
MemoryContext
createTempGistContext(void)
{
return AllocSetContextCreate(CurrentMemoryContext,
"GiST temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
}
/*
* Routine to build an index. Basically calls insert over and over.
......@@ -92,8 +92,8 @@ gistbuild(PG_FUNCTION_ARGS)
Buffer buffer;
/*
* We expect to be called exactly once for any index relation. If
* that's not the case, big trouble's what we have.
* We expect to be called exactly once for any index relation. If that's
* not the case, big trouble's what we have.
*/
if (RelationGetNumberOfBlocks(index) != 0)
elog(ERROR, "index \"%s\" already contains data",
......@@ -105,15 +105,16 @@ gistbuild(PG_FUNCTION_ARGS)
/* initialize the root page */
buffer = gistNewBuffer(index);
GISTInitBuffer(buffer, F_LEAF);
if ( !index->rd_istemp ) {
XLogRecPtr recptr;
XLogRecData rdata;
Page page;
if (!index->rd_istemp)
{
XLogRecPtr recptr;
XLogRecData rdata;
Page page;
rdata.buffer = InvalidBuffer;
rdata.data = (char*)&(index->rd_node);
rdata.len = sizeof(RelFileNode);
rdata.next = NULL;
rdata.buffer = InvalidBuffer;
rdata.data = (char *) &(index->rd_node);
rdata.len = sizeof(RelFileNode);
rdata.next = NULL;
page = BufferGetPage(buffer);
......@@ -124,7 +125,8 @@ gistbuild(PG_FUNCTION_ARGS)
PageSetTLI(page, ThisTimeLineID);
END_CRIT_SECTION();
} else
}
else
PageSetLSN(BufferGetPage(buffer), XLogRecPtrForTemp);
LockBuffer(buffer, GIST_UNLOCK);
WriteBuffer(buffer);
......@@ -132,9 +134,10 @@ gistbuild(PG_FUNCTION_ARGS)
/* build the index */
buildstate.numindexattrs = indexInfo->ii_NumIndexAttrs;
buildstate.indtuples = 0;
/*
* create a temporary memory context that is reset once for each
* tuple inserted into the index
* create a temporary memory context that is reset once for each tuple
* inserted into the index
*/
buildstate.tmpCtx = createTempGistContext();
......@@ -185,7 +188,7 @@ gistbuildCallback(Relation index,
{
gistcentryinit(&buildstate->giststate, i, &tmpcentry, values[i],
NULL, NULL, (OffsetNumber) 0,
-1 /* size is currently bogus */, TRUE, FALSE);
-1 /* size is currently bogus */ , TRUE, FALSE);
values[i] = tmpcentry.key;
}
}
......@@ -195,11 +198,11 @@ gistbuildCallback(Relation index,
itup->t_tid = htup->t_self;
/*
* Since we already have the index relation locked, we call
* gistdoinsert directly. Normal access method calls dispatch through
* gistinsert, which locks the relation for write. This is the right
* thing to do if you're inserting single tups, but not when you're
* initializing the whole index at once.
* Since we already have the index relation locked, we call gistdoinsert
* directly. Normal access method calls dispatch through gistinsert,
* which locks the relation for write. This is the right thing to do if
* you're inserting single tups, but not when you're initializing the
* whole index at once.
*/
gistdoinsert(index, itup, &buildstate->giststate);
......@@ -221,6 +224,7 @@ gistinsert(PG_FUNCTION_ARGS)
Datum *values = (Datum *) PG_GETARG_POINTER(1);
bool *isnull = (bool *) PG_GETARG_POINTER(2);
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
#ifdef NOT_USED
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
bool checkUnique = PG_GETARG_BOOL(5);
......@@ -250,7 +254,7 @@ gistinsert(PG_FUNCTION_ARGS)
{
gistcentryinit(&giststate, i, &tmpentry, values[i],
NULL, NULL, (OffsetNumber) 0,
-1 /* size is currently bogus */, TRUE, FALSE);
-1 /* size is currently bogus */ , TRUE, FALSE);
values[i] = tmpentry.key;
}
}
......@@ -276,148 +280,167 @@ gistinsert(PG_FUNCTION_ARGS)
static void
gistdoinsert(Relation r, IndexTuple itup, GISTSTATE *giststate)
{
GISTInsertState state;
GISTInsertState state;
memset(&state, 0, sizeof(GISTInsertState));
state.itup = (IndexTuple *) palloc(sizeof(IndexTuple));
state.itup[0] = (IndexTuple) palloc(IndexTupleSize(itup));
memcpy(state.itup[0], itup, IndexTupleSize(itup));
state.ituplen=1;
state.ituplen = 1;
state.r = r;
state.key = itup->t_tid;
state.needInsertComplete = true;
state.needInsertComplete = true;
state.stack = (GISTInsertStack*)palloc0(sizeof(GISTInsertStack));
state.stack->blkno=GIST_ROOT_BLKNO;
state.stack = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
state.stack->blkno = GIST_ROOT_BLKNO;
gistfindleaf(&state, giststate);
gistmakedeal(&state, giststate);
}
static bool
gistplacetopage(GISTInsertState *state, GISTSTATE *giststate) {
bool is_splitted = false;
bool is_leaf = (GistPageIsLeaf(state->stack->page)) ? true : false;
gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
{
bool is_splitted = false;
bool is_leaf = (GistPageIsLeaf(state->stack->page)) ? true : false;
if ( !is_leaf )
if (!is_leaf)
/*
* This node's key has been modified, either because a child
* split occurred or because we needed to adjust our key for
* an insert in a child node. Therefore, remove the old
* version of this node's key.
* This node's key has been modified, either because a child split
* occurred or because we needed to adjust our key for an insert in a
* child node. Therefore, remove the old version of this node's key.
*/
PageIndexTupleDelete(state->stack->page, state->stack->childoffnum);
if (gistnospace(state->stack->page, state->itup, state->ituplen))
{
/* no space for insertion */
IndexTuple *itvec,
*newitup;
int tlen,olen;
SplitedPageLayout *dist=NULL, *ptr;
int tlen,
olen;
SplitedPageLayout *dist = NULL,
*ptr;
is_splitted = true;
itvec = gistextractbuffer(state->stack->buffer, &tlen);
olen=tlen;
olen = tlen;
itvec = gistjoinvector(itvec, &tlen, state->itup, state->ituplen);
newitup = gistSplit(state->r, state->stack->buffer, itvec, &tlen, &dist, giststate);
if ( !state->r->rd_istemp ) {
if (!state->r->rd_istemp)
{
XLogRecPtr recptr;
XLogRecData *rdata;
XLogRecData *rdata;
rdata = formSplitRdata(state->r->rd_node, state->stack->blkno,
&(state->key), dist);
&(state->key), dist);
START_CRIT_SECTION();
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
ptr = dist;
while(ptr) {
while (ptr)
{
PageSetLSN(BufferGetPage(ptr->buffer), recptr);
PageSetTLI(BufferGetPage(ptr->buffer), ThisTimeLineID);
ptr=ptr->next;
ptr = ptr->next;
}
END_CRIT_SECTION();
} else {
}
else
{
ptr = dist;
while(ptr) {
while (ptr)
{
PageSetLSN(BufferGetPage(ptr->buffer), XLogRecPtrForTemp);
ptr=ptr->next;
ptr = ptr->next;
}
}
state->itup = newitup;
state->ituplen = tlen; /* now tlen >= 2 */
state->ituplen = tlen; /* now tlen >= 2 */
if ( state->stack->blkno == GIST_ROOT_BLKNO ) {
if (state->stack->blkno == GIST_ROOT_BLKNO)
{
gistnewroot(state->r, state->stack->buffer, state->itup, state->ituplen, &(state->key));
state->needInsertComplete=false;
state->needInsertComplete = false;
ptr = dist;
while(ptr) {
Page page = (Page)BufferGetPage(ptr->buffer);
GistPageGetOpaque(page)->rightlink = ( ptr->next ) ?
while (ptr)
{
Page page = (Page) BufferGetPage(ptr->buffer);
GistPageGetOpaque(page)->rightlink = (ptr->next) ?
ptr->next->block.blkno : InvalidBlockNumber;
GistPageGetOpaque(page)->nsn = PageGetLSN(page);
LockBuffer( ptr->buffer, GIST_UNLOCK );
LockBuffer(ptr->buffer, GIST_UNLOCK);
WriteBuffer(ptr->buffer);
ptr=ptr->next;
ptr = ptr->next;
}
} else {
Page page;
BlockNumber rightrightlink = InvalidBlockNumber;
SplitedPageLayout *ourpage=NULL;
GistNSN oldnsn;
}
else
{
Page page;
BlockNumber rightrightlink = InvalidBlockNumber;
SplitedPageLayout *ourpage = NULL;
GistNSN oldnsn;
GISTPageOpaque opaque;
/* move origpage to first in chain */
if ( dist->block.blkno != state->stack->blkno ) {
if (dist->block.blkno != state->stack->blkno)
{
ptr = dist;
while(ptr->next) {
if ( ptr->next->block.blkno == state->stack->blkno ) {
while (ptr->next)
{
if (ptr->next->block.blkno == state->stack->blkno)
{
ourpage = ptr->next;
ptr->next = ptr->next->next;
ourpage->next = dist;
dist = ourpage;
break;
}
ptr=ptr->next;
ptr = ptr->next;
}
Assert( ourpage != NULL );
} else
Assert(ourpage != NULL);
}
else
ourpage = dist;
/* now gets all needed data, and sets nsn's */
page = (Page)BufferGetPage(ourpage->buffer);
page = (Page) BufferGetPage(ourpage->buffer);
opaque = GistPageGetOpaque(page);
rightrightlink = opaque->rightlink;
oldnsn = opaque->nsn;
opaque->nsn = PageGetLSN(page);
opaque->rightlink = ourpage->next->block.blkno;
/* fills and write all new pages.
They isn't linked into tree yet */
/*
* fills and write all new pages. They isn't linked into tree yet
*/
ptr = ourpage->next;
while(ptr) {
page = (Page)BufferGetPage(ptr->buffer);
GistPageGetOpaque(page)->rightlink = ( ptr->next ) ?
while (ptr)
{
page = (Page) BufferGetPage(ptr->buffer);
GistPageGetOpaque(page)->rightlink = (ptr->next) ?
ptr->next->block.blkno : rightrightlink;
/* only for last set oldnsn */
GistPageGetOpaque(page)->nsn = ( ptr->next ) ?
GistPageGetOpaque(page)->nsn = (ptr->next) ?
opaque->nsn : oldnsn;
LockBuffer(ptr->buffer, GIST_UNLOCK);
WriteBuffer(ptr->buffer);
ptr=ptr->next;
ptr = ptr->next;
}
}
WriteNoReleaseBuffer( state->stack->buffer );
WriteNoReleaseBuffer(state->stack->buffer);
}
else
{
......@@ -427,20 +450,23 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate) {
gistfillbuffer(state->r, state->stack->page, state->itup, state->ituplen, InvalidOffsetNumber);
oldlsn = PageGetLSN(state->stack->page);
if ( !state->r->rd_istemp ) {
OffsetNumber noffs=0, offs[ MAXALIGN( sizeof(OffsetNumber) ) / sizeof(OffsetNumber) ];
if (!state->r->rd_istemp)
{
OffsetNumber noffs = 0,
offs[MAXALIGN(sizeof(OffsetNumber)) / sizeof(OffsetNumber)];
XLogRecPtr recptr;
XLogRecData *rdata;
if ( !is_leaf ) {
/*only on inner page we should delete previous version */
XLogRecData *rdata;
if (!is_leaf)
{
/* only on inner page we should delete previous version */
offs[0] = state->stack->childoffnum;
noffs=1;
noffs = 1;
}
rdata = formUpdateRdata(state->r->rd_node, state->stack->blkno,
offs, noffs, false, state->itup, state->ituplen,
&(state->key));
offs, noffs, false, state->itup, state->ituplen,
&(state->key));
START_CRIT_SECTION();
......@@ -449,69 +475,84 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate) {
PageSetTLI(state->stack->page, ThisTimeLineID);
END_CRIT_SECTION();
} else
}
else
PageSetLSN(state->stack->page, XLogRecPtrForTemp);
if ( state->stack->blkno == GIST_ROOT_BLKNO )
state->needInsertComplete=false;
if (state->stack->blkno == GIST_ROOT_BLKNO)
state->needInsertComplete = false;
WriteNoReleaseBuffer(state->stack->buffer);
if (!is_leaf) /* small optimization: inform scan ablout deleting... */
gistadjscans(state->r, GISTOP_DEL, state->stack->blkno,
state->stack->childoffnum, PageGetLSN(state->stack->page), oldlsn );
if (!is_leaf) /* small optimization: inform scan ablout
* deleting... */
gistadjscans(state->r, GISTOP_DEL, state->stack->blkno,
state->stack->childoffnum, PageGetLSN(state->stack->page), oldlsn);
if (state->ituplen > 1)
{ /* previous is_splitted==true */
/*
* child was splited, so we must form union for insertion in
* parent
*/
IndexTuple newtup = gistunion(state->r, state->itup, state->ituplen, giststate);
ItemPointerSetBlockNumber(&(newtup->t_tid), state->stack->blkno);
state->itup[0] = newtup;
state->ituplen = 1;
} else if (is_leaf) {
/* itup[0] store key to adjust parent, we set it to valid
to correct check by GistTupleIsInvalid macro in gistgetadjusted() */
}
else if (is_leaf)
{
/*
* itup[0] store key to adjust parent, we set it to valid to
* correct check by GistTupleIsInvalid macro in gistgetadjusted()
*/
ItemPointerSetBlockNumber(&(state->itup[0]->t_tid), state->stack->blkno);
GistTupleSetValid( state->itup[0] );
GistTupleSetValid(state->itup[0]);
}
}
return is_splitted;
}
/*
* returns stack of pages, all pages in stack are pinned, and
* returns stack of pages, all pages in stack are pinned, and
* leaf is X-locked
*/
*/
static void
gistfindleaf(GISTInsertState *state, GISTSTATE *giststate)
{
ItemId iid;
IndexTuple idxtuple;
GISTPageOpaque opaque;
GISTPageOpaque opaque;
/* walk down, We don't lock page for a long time, but so
we should be ready to recheck path in a bad case...
We remember, that page->lsn should never be invalid. */
while( true ) {
/*
* walk down, We don't lock page for a long time, but so we should be
* ready to recheck path in a bad case... We remember, that page->lsn
* should never be invalid.
*/
while (true)
{
if ( XLogRecPtrIsInvalid( state->stack->lsn ) )
if (XLogRecPtrIsInvalid(state->stack->lsn))
state->stack->buffer = ReadBuffer(state->r, state->stack->blkno);
LockBuffer( state->stack->buffer, GIST_SHARE );
LockBuffer(state->stack->buffer, GIST_SHARE);
state->stack->page = (Page) BufferGetPage(state->stack->buffer);
opaque = GistPageGetOpaque(state->stack->page);
state->stack->lsn = PageGetLSN(state->stack->page);
Assert( state->r->rd_istemp || !XLogRecPtrIsInvalid( state->stack->lsn ) );
Assert(state->r->rd_istemp || !XLogRecPtrIsInvalid(state->stack->lsn));
if ( state->stack->blkno != GIST_ROOT_BLKNO &&
XLByteLT( state->stack->parent->lsn, opaque->nsn) ) {
/* caused split non-root page is detected, go up to parent to choose best child */
LockBuffer( state->stack->buffer, GIST_UNLOCK );
ReleaseBuffer( state->stack->buffer );
if (state->stack->blkno != GIST_ROOT_BLKNO &&
XLByteLT(state->stack->parent->lsn, opaque->nsn))
{
/*
* caused split non-root page is detected, go up to parent to
* choose best child
*/
LockBuffer(state->stack->buffer, GIST_UNLOCK);
ReleaseBuffer(state->stack->buffer);
state->stack = state->stack->parent;
continue;
}
......@@ -519,62 +560,76 @@ gistfindleaf(GISTInsertState *state, GISTSTATE *giststate)
if (!GistPageIsLeaf(state->stack->page))
{
/*
* This is an internal page, so continue to walk down the
* tree. We find the child node that has the minimum insertion
* penalty and recursively invoke ourselves to modify that
* node. Once the recursive call returns, we may need to
* adjust the parent node for two reasons: the child node
* split, or the key in this node needs to be adjusted for the
* newly inserted key below us.
*/
GISTInsertStack *item=(GISTInsertStack*)palloc0(sizeof(GISTInsertStack));
/*
* This is an internal page, so continue to walk down the tree. We
* find the child node that has the minimum insertion penalty and
* recursively invoke ourselves to modify that node. Once the
* recursive call returns, we may need to adjust the parent node
* for two reasons: the child node split, or the key in this node
* needs to be adjusted for the newly inserted key below us.
*/
GISTInsertStack *item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
state->stack->childoffnum = gistchoose(state->r, state->stack->page, state->itup[0], giststate);
iid = PageGetItemId(state->stack->page, state->stack->childoffnum);
idxtuple = (IndexTuple) PageGetItem(state->stack->page, iid);
item->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
LockBuffer( state->stack->buffer, GIST_UNLOCK );
LockBuffer(state->stack->buffer, GIST_UNLOCK);
item->parent = state->stack;
item->child = NULL;
if ( state->stack )
if (state->stack)
state->stack->child = item;
state->stack = item;
} else {
}
else
{
/* be carefull, during unlock/lock page may be changed... */
LockBuffer( state->stack->buffer, GIST_UNLOCK );
LockBuffer( state->stack->buffer, GIST_EXCLUSIVE );
LockBuffer(state->stack->buffer, GIST_UNLOCK);
LockBuffer(state->stack->buffer, GIST_EXCLUSIVE);
state->stack->page = (Page) BufferGetPage(state->stack->buffer);
opaque = GistPageGetOpaque(state->stack->page);
if ( state->stack->blkno == GIST_ROOT_BLKNO ) {
/* the only page can become inner instead of leaf is a root page,
so for root we should recheck it */
if ( !GistPageIsLeaf(state->stack->page) ) {
/* very rarely situation: during unlock/lock index
with number of pages = 1 was increased */
LockBuffer( state->stack->buffer, GIST_UNLOCK );
if (state->stack->blkno == GIST_ROOT_BLKNO)
{
/*
* the only page can become inner instead of leaf is a root
* page, so for root we should recheck it
*/
if (!GistPageIsLeaf(state->stack->page))
{
/*
* very rarely situation: during unlock/lock index with
* number of pages = 1 was increased
*/
LockBuffer(state->stack->buffer, GIST_UNLOCK);
continue;
}
/* we don't need to check root split, because checking
leaf/inner is enough to recognize split for root */
} else if ( XLByteLT( state->stack->parent->lsn, opaque->nsn) ) {
/* detecting split during unlock/lock, so we should
find better child on parent*/
}
/*
* we don't need to check root split, because checking
* leaf/inner is enough to recognize split for root
*/
}
else if (XLByteLT(state->stack->parent->lsn, opaque->nsn))
{
/*
* detecting split during unlock/lock, so we should find
* better child on parent
*/
/* forget buffer */
LockBuffer( state->stack->buffer, GIST_UNLOCK );
ReleaseBuffer( state->stack->buffer );
LockBuffer(state->stack->buffer, GIST_UNLOCK);
ReleaseBuffer(state->stack->buffer);
state->stack = state->stack->parent;
continue;
continue;
}
state->stack->lsn = PageGetLSN( state->stack->page );
state->stack->lsn = PageGetLSN(state->stack->page);
/* ok we found a leaf page and it X-locked */
break;
}
......@@ -587,10 +642,12 @@ gistfindleaf(GISTInsertState *state, GISTSTATE *giststate)
* Should have the same interface as XLogReadBuffer
*/
static Buffer
gistReadAndLockBuffer( Relation r, BlockNumber blkno ) {
Buffer buffer = ReadBuffer( r, blkno );
LockBuffer( buffer, GIST_SHARE );
return buffer;
gistReadAndLockBuffer(Relation r, BlockNumber blkno)
{
Buffer buffer = ReadBuffer(r, blkno);
LockBuffer(buffer, GIST_SHARE);
return buffer;
}
/*
......@@ -598,38 +655,45 @@ gistReadAndLockBuffer( Relation r, BlockNumber blkno ) {
* to prevent deadlocks, it should lock only one page simultaneously.
* Function uses in recovery and usial mode, so should work with different
* read functions (gistReadAndLockBuffer and XLogReadBuffer)
* returns from the begining of closest parent;
* returns from the begining of closest parent;
*/
GISTInsertStack*
gistFindPath( Relation r, BlockNumber child, Buffer (*myReadBuffer)(Relation, BlockNumber) ) {
Page page;
Buffer buffer;
OffsetNumber i, maxoff;
ItemId iid;
IndexTuple idxtuple;
GISTInsertStack *top, *tail, *ptr;
BlockNumber blkno;
top = tail = (GISTInsertStack*)palloc0( sizeof(GISTInsertStack) );
GISTInsertStack *
gistFindPath(Relation r, BlockNumber child, Buffer (*myReadBuffer) (Relation, BlockNumber))
{
Page page;
Buffer buffer;
OffsetNumber i,
maxoff;
ItemId iid;
IndexTuple idxtuple;
GISTInsertStack *top,
*tail,
*ptr;
BlockNumber blkno;
top = tail = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
top->blkno = GIST_ROOT_BLKNO;
while( top && top->blkno != child ) {
buffer = myReadBuffer(r, top->blkno); /* buffer locked */
page = (Page)BufferGetPage( buffer );
while (top && top->blkno != child)
{
buffer = myReadBuffer(r, top->blkno); /* buffer locked */
page = (Page) BufferGetPage(buffer);
if ( GistPageIsLeaf(page) ) {
if (GistPageIsLeaf(page))
{
/* we can safety go away, follows only leaf pages */
LockBuffer( buffer, GIST_UNLOCK );
ReleaseBuffer( buffer );
LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer);
return NULL;
}
top->lsn = PageGetLSN(page);
top->lsn = PageGetLSN(page);
if ( top->parent && XLByteLT( top->parent->lsn, GistPageGetOpaque(page)->nsn) &&
GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */) {
if (top->parent && XLByteLT(top->parent->lsn, GistPageGetOpaque(page)->nsn) &&
GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
{
/* page splited while we thinking of... */
ptr = (GISTInsertStack*)palloc0( sizeof(GISTInsertStack) );
ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
ptr->blkno = GistPageGetOpaque(page)->rightlink;
ptr->childoffnum = InvalidOffsetNumber;
ptr->parent = top;
......@@ -637,119 +701,143 @@ gistFindPath( Relation r, BlockNumber child, Buffer (*myReadBuffer)(Relation, B
tail->next = ptr;
tail = ptr;
}
maxoff = PageGetMaxOffsetNumber(page);
for(i = FirstOffsetNumber; i<= maxoff; i = OffsetNumberNext(i)) {
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
iid = PageGetItemId(page, i);
idxtuple = (IndexTuple) PageGetItem(page, iid);
blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
if ( blkno == child ) {
if (blkno == child)
{
OffsetNumber poff = InvalidOffsetNumber;
/* make childs links */
ptr = top;
while( ptr->parent ) {
while (ptr->parent)
{
/* set child link */
ptr->parent->child = ptr;
/* move childoffnum.. */
if ( ptr == top ) {
/*first iteration*/
if (ptr == top)
{
/* first iteration */
poff = ptr->parent->childoffnum;
ptr->parent->childoffnum = ptr->childoffnum;
} else {
}
else
{
OffsetNumber tmp = ptr->parent->childoffnum;
ptr->parent->childoffnum = poff;
poff = tmp;
}
ptr = ptr->parent;
}
top->childoffnum = i;
LockBuffer( buffer, GIST_UNLOCK );
ReleaseBuffer( buffer );
LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer);
return top;
} else {
}
else
{
/* Install next inner page to the end of stack */
ptr = (GISTInsertStack*)palloc0( sizeof(GISTInsertStack) );
ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
ptr->blkno = blkno;
ptr->childoffnum = i; /* set offsetnumber of child to child !!! */
ptr->childoffnum = i; /* set offsetnumber of child to child
* !!! */
ptr->parent = top;
ptr->next = NULL;
tail->next = ptr;
tail = ptr;
}
}
LockBuffer( buffer, GIST_UNLOCK );
ReleaseBuffer( buffer );
LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer);
top = top->next;
}
return NULL;
return NULL;
}
/*
/*
* Returns X-locked parent of stack page
*/
static void
gistFindCorrectParent( Relation r, GISTInsertStack *child ) {
GISTInsertStack *parent = child->parent;
LockBuffer( parent->buffer, GIST_EXCLUSIVE );
parent->page = (Page)BufferGetPage( parent->buffer );
gistFindCorrectParent(Relation r, GISTInsertStack *child)
{
GISTInsertStack *parent = child->parent;
LockBuffer(parent->buffer, GIST_EXCLUSIVE);
parent->page = (Page) BufferGetPage(parent->buffer);
/* here we don't need to distinguish between split and page update */
if ( parent->childoffnum == InvalidOffsetNumber || !XLByteEQ( parent->lsn, PageGetLSN(parent->page) ) ) {
if (parent->childoffnum == InvalidOffsetNumber || !XLByteEQ(parent->lsn, PageGetLSN(parent->page)))
{
/* parent is changed, look child in right links until found */
OffsetNumber i, maxoff;
ItemId iid;
IndexTuple idxtuple;
GISTInsertStack *ptr;
while(true) {
OffsetNumber i,
maxoff;
ItemId iid;
IndexTuple idxtuple;
GISTInsertStack *ptr;
while (true)
{
maxoff = PageGetMaxOffsetNumber(parent->page);
for(i = FirstOffsetNumber; i<= maxoff; i = OffsetNumberNext(i)) {
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
iid = PageGetItemId(parent->page, i);
idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
if ( ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno ) {
if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
{
/* yes!!, found */
parent->childoffnum = i;
return;
}
}
parent->blkno = GistPageGetOpaque( parent->page )->rightlink;
LockBuffer( parent->buffer, GIST_UNLOCK );
ReleaseBuffer( parent->buffer );
if ( parent->blkno == InvalidBlockNumber )
/* end of chain and still didn't found parent,
It's very-very rare situation when root splited */
parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
LockBuffer(parent->buffer, GIST_UNLOCK);
ReleaseBuffer(parent->buffer);
if (parent->blkno == InvalidBlockNumber)
/*
* end of chain and still didn't found parent, It's very-very
* rare situation when root splited
*/
break;
parent->buffer = ReadBuffer( r, parent->blkno );
LockBuffer( parent->buffer, GIST_EXCLUSIVE );
parent->page = (Page)BufferGetPage( parent->buffer );
}
parent->buffer = ReadBuffer(r, parent->blkno);
LockBuffer(parent->buffer, GIST_EXCLUSIVE);
parent->page = (Page) BufferGetPage(parent->buffer);
}
/* awful!!, we need search tree to find parent ... ,
but before we should release all old parent */
/*
* awful!!, we need search tree to find parent ... , but before we
* should release all old parent
*/
ptr = child->parent->parent; /* child->parent already released above */
while(ptr) {
ReleaseBuffer( ptr->buffer );
ptr = child->parent->parent; /* child->parent already released
* above */
while (ptr)
{
ReleaseBuffer(ptr->buffer);
ptr = ptr->parent;
}
/* ok, find new path */
ptr = parent = gistFindPath(r, child->blkno, gistReadAndLockBuffer);
Assert( ptr!=NULL );
Assert(ptr != NULL);
/* read all buffers as supposed in caller */
while( ptr ) {
ptr->buffer = ReadBuffer( r, ptr->blkno );
ptr->page = (Page)BufferGetPage( ptr->buffer );
/* read all buffers as supposed in caller */
while (ptr)
{
ptr->buffer = ReadBuffer(r, ptr->blkno);
ptr->page = (Page) BufferGetPage(ptr->buffer);
ptr = ptr->parent;
}
......@@ -758,78 +846,90 @@ gistFindCorrectParent( Relation r, GISTInsertStack *child ) {
parent->child = child;
/* make recursive call to normal processing */
gistFindCorrectParent( r, child );
}
gistFindCorrectParent(r, child);
}
return;
}
void
gistmakedeal(GISTInsertState *state, GISTSTATE *giststate) {
gistmakedeal(GISTInsertState *state, GISTSTATE *giststate)
{
int is_splitted;
ItemId iid;
IndexTuple oldtup, newtup;
IndexTuple oldtup,
newtup;
/* walk up */
while( true ) {
/*
* After this call: 1. if child page was splited, then itup
* contains keys for each page 2. if child page wasn't splited,
* then itup contains additional for adjustment of current key
*/
if ( state->stack->parent ) {
/* X-lock parent page before proceed child,
gistFindCorrectParent should find and lock it */
gistFindCorrectParent( state->r, state->stack );
while (true)
{
/*
* After this call: 1. if child page was splited, then itup contains
* keys for each page 2. if child page wasn't splited, then itup
* contains additional for adjustment of current key
*/
if (state->stack->parent)
{
/*
* X-lock parent page before proceed child, gistFindCorrectParent
* should find and lock it
*/
gistFindCorrectParent(state->r, state->stack);
}
is_splitted = gistplacetopage(state, giststate);
/* parent locked above, so release child buffer */
LockBuffer(state->stack->buffer, GIST_UNLOCK );
ReleaseBuffer( state->stack->buffer );
LockBuffer(state->stack->buffer, GIST_UNLOCK);
ReleaseBuffer(state->stack->buffer);
/* pop parent page from stack */
state->stack = state->stack->parent;
/* stack is void */
if ( ! state->stack )
if (!state->stack)
break;
/* child did not split, so we can check is it needed to update parent tuple */
/*
* child did not split, so we can check is it needed to update parent
* tuple
*/
if (!is_splitted)
{
/* parent's tuple */
iid = PageGetItemId(state->stack->page, state->stack->childoffnum);
oldtup = (IndexTuple) PageGetItem(state->stack->page, iid);
newtup = gistgetadjusted(state->r, oldtup, state->itup[0], giststate);
if (!newtup) { /* not need to update key */
LockBuffer( state->stack->buffer, GIST_UNLOCK );
if (!newtup)
{ /* not need to update key */
LockBuffer(state->stack->buffer, GIST_UNLOCK);
break;
}
state->itup[0] = newtup;
}
} /* while */
state->itup[0] = newtup;
}
} /* while */
/* release all parent buffers */
while( state->stack ) {
while (state->stack)
{
ReleaseBuffer(state->stack->buffer);
state->stack = state->stack->parent;
}
/* say to xlog that insert is completed */
if ( state->needInsertComplete && !state->r->rd_istemp )
gistxlogInsertCompletion(state->r->rd_node, &(state->key), 1);
if (state->needInsertComplete && !state->r->rd_istemp)
gistxlogInsertCompletion(state->r->rd_node, &(state->key), 1);
}
static void
gistToRealOffset(OffsetNumber *arr, int len, OffsetNumber *reasloffset) {
int i;
static void
gistToRealOffset(OffsetNumber *arr, int len, OffsetNumber *reasloffset)
{
int i;
for(i=0;i<len;i++)
arr[i] = reasloffset[ arr[i] ];
for (i = 0; i < len; i++)
arr[i] = reasloffset[arr[i]];
}
/*
......@@ -840,7 +940,7 @@ gistSplit(Relation r,
Buffer buffer,
IndexTuple *itup, /* contains compressed entry */
int *len,
SplitedPageLayout **dist,
SplitedPageLayout **dist,
GISTSTATE *giststate)
{
Page p;
......@@ -856,24 +956,25 @@ gistSplit(Relation r,
GISTPageOpaque opaque;
GIST_SPLITVEC v;
GistEntryVector *entryvec;
int i, fakeoffset,
int i,
fakeoffset,
nlen;
OffsetNumber *realoffset;
IndexTuple *cleaneditup = itup;
int lencleaneditup = *len;
OffsetNumber *realoffset;
IndexTuple *cleaneditup = itup;
int lencleaneditup = *len;
p = (Page) BufferGetPage(buffer);
opaque = GistPageGetOpaque(p);
/*
* The root of the tree is the first block in the relation. If we're
* about to split the root, we need to do some hocus-pocus to enforce
* this guarantee.
* about to split the root, we need to do some hocus-pocus to enforce this
* guarantee.
*/
if (BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO)
{
leftbuf = gistNewBuffer(r);
GISTInitBuffer(leftbuf, opaque->flags&F_LEAF);
GISTInitBuffer(leftbuf, opaque->flags & F_LEAF);
lbknum = BufferGetBlockNumber(leftbuf);
left = (Page) BufferGetPage(leftbuf);
}
......@@ -886,7 +987,7 @@ gistSplit(Relation r,
}
rightbuf = gistNewBuffer(r);
GISTInitBuffer(rightbuf, opaque->flags&F_LEAF);
GISTInitBuffer(rightbuf, opaque->flags & F_LEAF);
rbknum = BufferGetBlockNumber(rightbuf);
right = (Page) BufferGetPage(rightbuf);
......@@ -901,10 +1002,11 @@ gistSplit(Relation r,
Datum datum;
bool IsNull;
if (!GistPageIsLeaf(p) && GistTupleIsInvalid( itup[i - 1] )) {
if (!GistPageIsLeaf(p) && GistTupleIsInvalid(itup[i - 1]))
{
entryvec->n--;
/* remember position of invalid tuple */
realoffset[ entryvec->n ] = i;
realoffset[entryvec->n] = i;
continue;
}
......@@ -913,54 +1015,62 @@ gistSplit(Relation r,
datum, r, p, i,
ATTSIZE(datum, giststate->tupdesc, 1, IsNull),
FALSE, IsNull);
realoffset[ fakeoffset ] = i;
realoffset[fakeoffset] = i;
fakeoffset++;
}
/*
* if it was invalid tuple then we need special processing. If
* it's possible, we move all invalid tuples on right page.
* We should remember, that union with invalid tuples
* is a invalid tuple.
*/
if ( entryvec->n != *len + 1 ) {
lencleaneditup = entryvec->n-1;
cleaneditup = (IndexTuple*)palloc(lencleaneditup * sizeof(IndexTuple));
for(i=1;i<entryvec->n;i++)
cleaneditup[i-1] = itup[ realoffset[ i ]-1 ];
if ( gistnospace( left, cleaneditup, lencleaneditup ) ) {
/* no space on left to put all good tuples, so picksplit */
/*
* if it was invalid tuple then we need special processing. If it's
* possible, we move all invalid tuples on right page. We should remember,
* that union with invalid tuples is a invalid tuple.
*/
if (entryvec->n != *len + 1)
{
lencleaneditup = entryvec->n - 1;
cleaneditup = (IndexTuple *) palloc(lencleaneditup * sizeof(IndexTuple));
for (i = 1; i < entryvec->n; i++)
cleaneditup[i - 1] = itup[realoffset[i] - 1];
if (gistnospace(left, cleaneditup, lencleaneditup))
{
/* no space on left to put all good tuples, so picksplit */
gistUserPicksplit(r, entryvec, &v, cleaneditup, lencleaneditup, giststate);
v.spl_leftvalid = true;
v.spl_rightvalid = false;
gistToRealOffset( v.spl_left, v.spl_nleft, realoffset );
gistToRealOffset( v.spl_right, v.spl_nright, realoffset );
} else {
/* we can try to store all valid tuples on one page */
v.spl_right = (OffsetNumber*)palloc( entryvec->n * sizeof(OffsetNumber) );
v.spl_left = (OffsetNumber*)palloc( entryvec->n * sizeof(OffsetNumber) );
if ( lencleaneditup==0 ) {
gistToRealOffset(v.spl_left, v.spl_nleft, realoffset);
gistToRealOffset(v.spl_right, v.spl_nright, realoffset);
}
else
{
/* we can try to store all valid tuples on one page */
v.spl_right = (OffsetNumber *) palloc(entryvec->n * sizeof(OffsetNumber));
v.spl_left = (OffsetNumber *) palloc(entryvec->n * sizeof(OffsetNumber));
if (lencleaneditup == 0)
{
/* all tuples are invalid, so moves half of its to right */
v.spl_leftvalid = v.spl_rightvalid = false;
v.spl_nright = 0;
v.spl_nleft = 0;
for(i=1;i<=*len;i++)
if ( i-1<*len/2 )
v.spl_left[ v.spl_nleft++ ] = i;
for (i = 1; i <= *len; i++)
if (i - 1 < *len / 2)
v.spl_left[v.spl_nleft++] = i;
else
v.spl_right[ v.spl_nright++ ] = i;
} else {
/* we will not call gistUserPicksplit, just put good
tuples on left and invalid on right */
v.spl_right[v.spl_nright++] = i;
}
else
{
/*
* we will not call gistUserPicksplit, just put good tuples on
* left and invalid on right
*/
v.spl_nleft = lencleaneditup;
v.spl_nright = 0;
for(i=1;i<entryvec->n;i++)
v.spl_left[i-1] = i;
gistToRealOffset( v.spl_left, v.spl_nleft, realoffset );
v.spl_lattr[0] = v.spl_ldatum = (Datum)0;
v.spl_rattr[0] = v.spl_rdatum = (Datum)0;
for (i = 1; i < entryvec->n; i++)
v.spl_left[i - 1] = i;
gistToRealOffset(v.spl_left, v.spl_nleft, realoffset);
v.spl_lattr[0] = v.spl_ldatum = (Datum) 0;
v.spl_rattr[0] = v.spl_rdatum = (Datum) 0;
v.spl_lisnull[0] = true;
v.spl_risnull[0] = true;
gistunionsubkey(r, giststate, itup, &v, true);
......@@ -968,16 +1078,18 @@ gistSplit(Relation r,
v.spl_rightvalid = false;
}
}
} else {
/* there is no invalid tuples, so usial processing */
}
else
{
/* there is no invalid tuples, so usial processing */
gistUserPicksplit(r, entryvec, &v, itup, *len, giststate);
v.spl_leftvalid = v.spl_rightvalid = true;
}
/* form left and right vector */
lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (*len+1));
rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (*len+1));
lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (*len + 1));
rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (*len + 1));
for (i = 0; i < v.spl_nleft; i++)
lvectup[i] = itup[v.spl_left[i] - 1];
......@@ -986,7 +1098,8 @@ gistSplit(Relation r,
rvectup[i] = itup[v.spl_right[i] - 1];
/* place invalid tuples on right page if itsn't done yet */
for (fakeoffset = entryvec->n; fakeoffset < *len+1 && lencleaneditup; fakeoffset++) {
for (fakeoffset = entryvec->n; fakeoffset < *len + 1 && lencleaneditup; fakeoffset++)
{
rvectup[v.spl_nright++] = itup[realoffset[fakeoffset] - 1];
}
......@@ -999,26 +1112,27 @@ gistSplit(Relation r,
}
else
{
char *ptr;
char *ptr;
gistfillbuffer(r, right, rvectup, v.spl_nright, FirstOffsetNumber);
/* XLOG stuff */
ROTATEDIST(*dist);
(*dist)->block.blkno = BufferGetBlockNumber(rightbuf);
(*dist)->block.num = v.spl_nright;
(*dist)->list = (IndexTupleData*)palloc( BLCKSZ );
ptr = (char*) ( (*dist)->list );
for(i=0;i<v.spl_nright;i++) {
memcpy( ptr, rvectup[i], IndexTupleSize( rvectup[i] ) );
ptr += IndexTupleSize( rvectup[i] );
(*dist)->list = (IndexTupleData *) palloc(BLCKSZ);
ptr = (char *) ((*dist)->list);
for (i = 0; i < v.spl_nright; i++)
{
memcpy(ptr, rvectup[i], IndexTupleSize(rvectup[i]));
ptr += IndexTupleSize(rvectup[i]);
}
(*dist)->lenlist = ptr - ( (char*) ( (*dist)->list ) );
(*dist)->lenlist = ptr - ((char *) ((*dist)->list));
(*dist)->buffer = rightbuf;
nlen = 1;
newtup = (IndexTuple *) palloc(sizeof(IndexTuple) * 1);
newtup[0] = ( v.spl_rightvalid ) ? gistFormTuple(giststate, r, v.spl_rattr, v.spl_rattrsize, v.spl_risnull)
: gist_form_invalid_tuple( rbknum );
newtup[0] = (v.spl_rightvalid) ? gistFormTuple(giststate, r, v.spl_rattr, v.spl_rattrsize, v.spl_risnull)
: gist_form_invalid_tuple(rbknum);
ItemPointerSetBlockNumber(&(newtup[0]->t_tid), rbknum);
}
......@@ -1034,34 +1148,35 @@ gistSplit(Relation r,
}
else
{
char *ptr;
char *ptr;
gistfillbuffer(r, left, lvectup, v.spl_nleft, FirstOffsetNumber);
/* XLOG stuff */
ROTATEDIST(*dist);
(*dist)->block.blkno = BufferGetBlockNumber(leftbuf);
(*dist)->block.num = v.spl_nleft;
(*dist)->list = (IndexTupleData*)palloc( BLCKSZ );
ptr = (char*) ( (*dist)->list );
for(i=0;i<v.spl_nleft;i++) {
memcpy( ptr, lvectup[i], IndexTupleSize( lvectup[i] ) );
ptr += IndexTupleSize( lvectup[i] );
(*dist)->list = (IndexTupleData *) palloc(BLCKSZ);
ptr = (char *) ((*dist)->list);
for (i = 0; i < v.spl_nleft; i++)
{
memcpy(ptr, lvectup[i], IndexTupleSize(lvectup[i]));
ptr += IndexTupleSize(lvectup[i]);
}
(*dist)->lenlist = ptr - ( (char*) ( (*dist)->list ) );
(*dist)->lenlist = ptr - ((char *) ((*dist)->list));
(*dist)->buffer = leftbuf;
if (BufferGetBlockNumber(buffer) != GIST_ROOT_BLKNO)
PageRestoreTempPage(left, p);
nlen += 1;
newtup = (IndexTuple *) repalloc(newtup, sizeof(IndexTuple) * nlen);
newtup[nlen - 1] = ( v.spl_leftvalid ) ? gistFormTuple(giststate, r, v.spl_lattr, v.spl_lattrsize, v.spl_lisnull)
: gist_form_invalid_tuple( lbknum );
newtup[nlen - 1] = (v.spl_leftvalid) ? gistFormTuple(giststate, r, v.spl_lattr, v.spl_lattrsize, v.spl_lisnull)
: gist_form_invalid_tuple(lbknum);
ItemPointerSetBlockNumber(&(newtup[nlen - 1]->t_tid), lbknum);
}
GistClearTuplesDeleted(p);
*len = nlen;
return newtup;
}
......@@ -1071,18 +1186,19 @@ gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer ke
{
Page page;
Assert( BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO );
Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
page = BufferGetPage(buffer);
GISTInitBuffer(buffer, 0);
gistfillbuffer(r, page, itup, len, FirstOffsetNumber);
if ( !r->rd_istemp ) {
XLogRecPtr recptr;
XLogRecData *rdata;
if (!r->rd_istemp)
{
XLogRecPtr recptr;
XLogRecData *rdata;
rdata = formUpdateRdata(r->rd_node, GIST_ROOT_BLKNO,
NULL, 0, false, itup, len, key);
NULL, 0, false, itup, len, key);
START_CRIT_SECTION();
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_NEW_ROOT, rdata);
......@@ -1090,7 +1206,8 @@ gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer ke
PageSetTLI(page, ThisTimeLineID);
END_CRIT_SECTION();
} else
}
else
PageSetLSN(page, XLogRecPtrForTemp);
}
......@@ -1136,4 +1253,3 @@ freeGISTstate(GISTSTATE *giststate)
{
/* no work */
}
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.50 2005/06/27 12:45:22 teodor Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.51 2005/09/22 20:44:36 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -20,64 +20,71 @@
#include "utils/memutils.h"
static OffsetNumber gistfindnext(IndexScanDesc scan, OffsetNumber n,
ScanDirection dir);
static int gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples);
ScanDirection dir);
static int gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples);
static bool gistindex_keytest(IndexTuple tuple, IndexScanDesc scan,
OffsetNumber offset);
OffsetNumber offset);
static void
killtuple(Relation r, GISTScanOpaque so, ItemPointer iptr) {
Buffer buffer = so->curbuf;
static void
killtuple(Relation r, GISTScanOpaque so, ItemPointer iptr)
{
Buffer buffer = so->curbuf;
for(;;) {
Page p;
for (;;)
{
Page p;
BlockNumber blkno;
OffsetNumber offset, maxoff;
OffsetNumber offset,
maxoff;
LockBuffer(buffer, GIST_SHARE);
p = (Page) BufferGetPage(buffer);
LockBuffer( buffer, GIST_SHARE );
p = (Page)BufferGetPage( buffer );
if ( buffer == so->curbuf && XLByteEQ( so->stack->lsn, PageGetLSN(p) ) ) {
if (buffer == so->curbuf && XLByteEQ(so->stack->lsn, PageGetLSN(p)))
{
/* page unchanged, so all is simple */
offset = ItemPointerGetOffsetNumber(iptr);
PageGetItemId(p, offset)->lp_flags |= LP_DELETE;
SetBufferCommitInfoNeedsSave(buffer);
LockBuffer( buffer, GIST_UNLOCK );
LockBuffer(buffer, GIST_UNLOCK);
break;
}
maxoff = PageGetMaxOffsetNumber( p );
maxoff = PageGetMaxOffsetNumber(p);
for(offset = FirstOffsetNumber; offset<= maxoff; offset = OffsetNumberNext(offset)) {
IndexTuple ituple = (IndexTuple) PageGetItem(p, PageGetItemId(p, offset));
for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
{
IndexTuple ituple = (IndexTuple) PageGetItem(p, PageGetItemId(p, offset));
if ( ItemPointerEquals( &(ituple->t_tid), iptr ) ) {
if (ItemPointerEquals(&(ituple->t_tid), iptr))
{
/* found */
PageGetItemId(p, offset)->lp_flags |= LP_DELETE;
SetBufferCommitInfoNeedsSave(buffer);
LockBuffer( buffer, GIST_UNLOCK );
if ( buffer != so->curbuf )
ReleaseBuffer( buffer );
LockBuffer(buffer, GIST_UNLOCK);
if (buffer != so->curbuf)
ReleaseBuffer(buffer);
return;
}
}
}
/* follow right link */
/*
* ??? is it good? if tuple dropped by concurrent vacuum,
* we will read all leaf pages...
* ??? is it good? if tuple dropped by concurrent vacuum, we will read
* all leaf pages...
*/
blkno = GistPageGetOpaque(p)->rightlink;
LockBuffer( buffer, GIST_UNLOCK );
if ( buffer != so->curbuf )
ReleaseBuffer( buffer );
LockBuffer(buffer, GIST_UNLOCK);
if (buffer != so->curbuf)
ReleaseBuffer(buffer);
if ( blkno==InvalidBlockNumber )
if (blkno == InvalidBlockNumber)
/* can't found, dropped by somebody else */
return;
buffer = ReadBuffer( r, blkno );
buffer = ReadBuffer(r, blkno);
}
}
}
/*
* gistgettuple() -- Get the next tuple in the scan
......@@ -85,27 +92,27 @@ killtuple(Relation r, GISTScanOpaque so, ItemPointer iptr) {
Datum
gistgettuple(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
GISTScanOpaque so;
ItemPointerData tid;
bool res;
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
GISTScanOpaque so;
ItemPointerData tid;
bool res;
so = (GISTScanOpaque) scan->opaque;
/*
* If we have produced an index tuple in the past and the executor
* has informed us we need to mark it as "killed", do so now.
* If we have produced an index tuple in the past and the executor has
* informed us we need to mark it as "killed", do so now.
*/
if (scan->kill_prior_tuple && ItemPointerIsValid(&(scan->currentItemData)))
if (scan->kill_prior_tuple && ItemPointerIsValid(&(scan->currentItemData)))
killtuple(scan->indexRelation, so, &(scan->currentItemData));
/*
* Get the next tuple that matches the search key. If asked to
* skip killed tuples, continue looping until we find a non-killed
* tuple that matches the search key.
* Get the next tuple that matches the search key. If asked to skip killed
* tuples, continue looping until we find a non-killed tuple that matches
* the search key.
*/
res = ( gistnext(scan, dir, &tid, 1, scan->ignore_killed_tuples) ) ? true : false;
res = (gistnext(scan, dir, &tid, 1, scan->ignore_killed_tuples)) ? true : false;
PG_RETURN_BOOL(res);
}
......@@ -114,12 +121,12 @@ Datum
gistgetmulti(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
int32 max_tids = PG_GETARG_INT32(2);
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
*returned_tids = gistnext(scan, ForwardScanDirection, tids, max_tids, false);
PG_RETURN_BOOL(*returned_tids == max_tids);
}
......@@ -128,17 +135,17 @@ gistgetmulti(PG_FUNCTION_ARGS)
* either to fetch the first such tuple or subsequent matching
* tuples. Returns true iff a matching tuple was found.
*/
static int
static int
gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples)
{
Page p;
OffsetNumber n;
GISTScanOpaque so;
GISTSearchStack *stk;
GISTSearchStack *stk;
IndexTuple it;
GISTPageOpaque opaque;
bool resetoffset=false;
int ntids=0;
GISTPageOpaque opaque;
bool resetoffset = false;
int ntids = 0;
so = (GISTScanOpaque) scan->opaque;
......@@ -149,59 +156,67 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b
Assert(so->stack == NULL);
so->curbuf = ReadBuffer(scan->indexRelation, GIST_ROOT_BLKNO);
stk = so->stack = (GISTSearchStack*) palloc0( sizeof(GISTSearchStack) );
stk = so->stack = (GISTSearchStack *) palloc0(sizeof(GISTSearchStack));
stk->next = NULL;
stk->block = GIST_ROOT_BLKNO;
} else if ( so->curbuf == InvalidBuffer ) {
}
else if (so->curbuf == InvalidBuffer)
{
return 0;
}
for(;;) {
for (;;)
{
/* First of all, we need lock buffer */
Assert( so->curbuf != InvalidBuffer );
LockBuffer( so->curbuf, GIST_SHARE );
Assert(so->curbuf != InvalidBuffer);
LockBuffer(so->curbuf, GIST_SHARE);
p = BufferGetPage(so->curbuf);
opaque = GistPageGetOpaque( p );
opaque = GistPageGetOpaque(p);
resetoffset = false;
if ( XLogRecPtrIsInvalid( so->stack->lsn ) || !XLByteEQ( so->stack->lsn, PageGetLSN(p) ) ) {
if (XLogRecPtrIsInvalid(so->stack->lsn) || !XLByteEQ(so->stack->lsn, PageGetLSN(p)))
{
/* page changed from last visit or visit first time , reset offset */
so->stack->lsn = PageGetLSN(p);
resetoffset = true;
/* check page split, occured from last visit or visit to parent */
if ( !XLogRecPtrIsInvalid( so->stack->parentlsn ) &&
XLByteLT( so->stack->parentlsn, opaque->nsn ) &&
opaque->rightlink != InvalidBlockNumber /* sanity check */ &&
(so->stack->next==NULL || so->stack->next->block != opaque->rightlink) /* check if already added */) {
if (!XLogRecPtrIsInvalid(so->stack->parentlsn) &&
XLByteLT(so->stack->parentlsn, opaque->nsn) &&
opaque->rightlink != InvalidBlockNumber /* sanity check */ &&
(so->stack->next == NULL || so->stack->next->block != opaque->rightlink) /* check if already
added */ )
{
/* detect page split, follow right link to add pages */
stk = (GISTSearchStack*) palloc( sizeof(GISTSearchStack) );
stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack));
stk->next = so->stack->next;
stk->block = opaque->rightlink;
stk->parentlsn = so->stack->parentlsn;
memset( &(stk->lsn), 0, sizeof(GistNSN) );
memset(&(stk->lsn), 0, sizeof(GistNSN));
so->stack->next = stk;
}
}
/* if page is empty, then just skip it */
if ( PageIsEmpty(p) ) {
LockBuffer( so->curbuf, GIST_UNLOCK );
if (PageIsEmpty(p))
{
LockBuffer(so->curbuf, GIST_UNLOCK);
stk = so->stack->next;
pfree( so->stack );
pfree(so->stack);
so->stack = stk;
if (so->stack == NULL) {
if (so->stack == NULL)
{
ReleaseBuffer(so->curbuf);
so->curbuf = InvalidBuffer;
return ntids;
}
so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
stk->block);
stk->block);
continue;
}
......@@ -215,33 +230,33 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b
else
{
n = ItemPointerGetOffsetNumber(&(scan->currentItemData));
if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(n);
else
n = OffsetNumberNext(n);
}
/* wonderfull, we can look at page */
/* wonderfull, we can look at page */
for(;;)
for (;;)
{
n = gistfindnext(scan, n, dir);
if (!OffsetNumberIsValid(n))
{
/*
* We ran out of matching index entries on the current
* page, so pop the top stack entry and use it to continue
* the search.
* We ran out of matching index entries on the current page,
* so pop the top stack entry and use it to continue the
* search.
*/
LockBuffer( so->curbuf, GIST_UNLOCK );
LockBuffer(so->curbuf, GIST_UNLOCK);
stk = so->stack->next;
pfree( so->stack );
pfree(so->stack);
so->stack = stk;
/* If we're out of stack entries, we're done */
if (so->stack == NULL)
{
ReleaseBuffer(so->curbuf);
......@@ -250,8 +265,8 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b
}
so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
stk->block);
/* XXX go up */
stk->block);
/* XXX go up */
break;
}
......@@ -259,20 +274,22 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b
{
/*
* We've found a matching index entry in a leaf page, so
* return success. Note that we keep "curbuf" pinned so
* that we can efficiently resume the index scan later.
* return success. Note that we keep "curbuf" pinned so that
* we can efficiently resume the index scan later.
*/
ItemPointerSet(&(scan->currentItemData),
BufferGetBlockNumber(so->curbuf), n);
BufferGetBlockNumber(so->curbuf), n);
if ( ! ( ignore_killed_tuples && ItemIdDeleted(PageGetItemId(p, n)) ) ) {
if (!(ignore_killed_tuples && ItemIdDeleted(PageGetItemId(p, n))))
{
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
tids[ntids] = scan->xs_ctup.t_self = it->t_tid;
ntids++;
if ( ntids == maxtids ) {
LockBuffer( so->curbuf, GIST_UNLOCK );
if (ntids == maxtids)
{
LockBuffer(so->curbuf, GIST_UNLOCK);
return ntids;
}
}
......@@ -281,14 +298,14 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b
{
/*
* We've found an entry in an internal node whose key is
* consistent with the search key, so push it to stack
* consistent with the search key, so push it to stack
*/
stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack));
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
stk->block = ItemPointerGetBlockNumber(&(it->t_tid));
memset( &(stk->lsn), 0, sizeof(GistNSN) );
memset(&(stk->lsn), 0, sizeof(GistNSN));
stk->parentlsn = so->stack->lsn;
stk->next = so->stack->next;
......@@ -320,12 +337,12 @@ gistindex_keytest(IndexTuple tuple,
IndexScanDesc scan,
OffsetNumber offset)
{
int keySize = scan->numberOfKeys;
ScanKey key = scan->keyData;
Relation r = scan->indexRelation;
int keySize = scan->numberOfKeys;
ScanKey key = scan->keyData;
Relation r = scan->indexRelation;
GISTScanOpaque so;
Page p;
GISTSTATE *giststate;
Page p;
GISTSTATE *giststate;
so = (GISTScanOpaque) scan->opaque;
giststate = so->giststate;
......@@ -334,9 +351,10 @@ gistindex_keytest(IndexTuple tuple,
IncrIndexProcessed();
/*
* Tuple doesn't restore after crash recovery because of inclomplete insert
*/
if ( !GistPageIsLeaf(p) && GistTupleIsInvalid(tuple) )
* Tuple doesn't restore after crash recovery because of inclomplete
* insert
*/
if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple))
return true;
while (keySize > 0)
......@@ -366,13 +384,12 @@ gistindex_keytest(IndexTuple tuple,
FALSE, isNull);
/*
* Call the Consistent function to evaluate the test. The
* arguments are the index datum (as a GISTENTRY*), the comparison
* datum, and the comparison operator's strategy number and
* subtype from pg_amop.
* Call the Consistent function to evaluate the test. The arguments
* are the index datum (as a GISTENTRY*), the comparison datum, and
* the comparison operator's strategy number and subtype from pg_amop.
*
* (Presently there's no need to pass the subtype since it'll always
* be zero, but might as well pass it for possible future use.)
* (Presently there's no need to pass the subtype since it'll always be
* zero, but might as well pass it for possible future use.)
*/
test = FunctionCall4(&key->sk_func,
PointerGetDatum(&de),
......@@ -399,26 +416,26 @@ gistindex_keytest(IndexTuple tuple,
static OffsetNumber
gistfindnext(IndexScanDesc scan, OffsetNumber n, ScanDirection dir)
{
OffsetNumber maxoff;
IndexTuple it;
GISTScanOpaque so;
MemoryContext oldcxt;
Page p;
OffsetNumber maxoff;
IndexTuple it;
GISTScanOpaque so;
MemoryContext oldcxt;
Page p;
so = (GISTScanOpaque) scan->opaque;
p = BufferGetPage(so->curbuf);
maxoff = PageGetMaxOffsetNumber(p);
/*
* Make sure we're in a short-lived memory context when we invoke
* a user-supplied GiST method in gistindex_keytest(), so we don't
* leak memory
* Make sure we're in a short-lived memory context when we invoke a
* user-supplied GiST method in gistindex_keytest(), so we don't leak
* memory
*/
oldcxt = MemoryContextSwitchTo(so->tempCxt);
/*
* If we modified the index during the scan, we may have a pointer to
* a ghost tuple, before the scan. If this is the case, back up one.
* If we modified the index during the scan, we may have a pointer to a
* ghost tuple, before the scan. If this is the case, back up one.
*/
if (so->flags & GS_CURBEFORE)
{
......@@ -442,9 +459,8 @@ gistfindnext(IndexScanDesc scan, OffsetNumber n, ScanDirection dir)
MemoryContextReset(so->tempCxt);
/*
* If we found a matching entry, return its offset; otherwise
* return InvalidOffsetNumber to inform the caller to go to the
* next page.
* If we found a matching entry, return its offset; otherwise return
* InvalidOffsetNumber to inform the caller to go to the next page.
*/
if (n >= FirstOffsetNumber && n <= maxoff)
return n;
......
......@@ -10,7 +10,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.1 2005/07/01 19:19:02 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.2 2005/09/22 20:44:36 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -30,10 +30,10 @@ typedef struct
static int compare_KB(const void *a, const void *b);
static bool gist_box_leaf_consistent(BOX *key, BOX *query,
StrategyNumber strategy);
StrategyNumber strategy);
static double size_box(Datum dbox);
static bool rtree_internal_consistent(BOX *key, BOX *query,
StrategyNumber strategy);
StrategyNumber strategy);
/**************************************************
......@@ -268,11 +268,11 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
#define ADDLIST( list, unionD, pos, num ) do { \
if ( pos ) { \
if ( (unionD)->high.x < cur->high.x ) (unionD)->high.x = cur->high.x; \
if ( (unionD)->low.x > cur->low.x ) (unionD)->low.x = cur->low.x; \
if ( (unionD)->low.x > cur->low.x ) (unionD)->low.x = cur->low.x; \
if ( (unionD)->high.y < cur->high.y ) (unionD)->high.y = cur->high.y; \
if ( (unionD)->low.y > cur->low.y ) (unionD)->low.y = cur->low.y; \
if ( (unionD)->low.y > cur->low.y ) (unionD)->low.y = cur->low.y; \
} else { \
memcpy( (void*)(unionD), (void*) cur, sizeof( BOX ) ); \
memcpy( (void*)(unionD), (void*) cur, sizeof( BOX ) ); \
} \
(list)[pos] = num; \
(pos)++; \
......@@ -411,62 +411,62 @@ gist_box_leaf_consistent(BOX *key, BOX *query, StrategyNumber strategy)
case RTLeftStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_left,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTOverLeftStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overleft,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTOverlapStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overlap,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTOverRightStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overright,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTRightStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_right,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTSameStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_same,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTContainsStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_contain,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTContainedByStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_contained,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTOverBelowStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overbelow,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTBelowStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_below,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTAboveStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_above,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTOverAboveStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overabove,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
default:
retval = FALSE;
......@@ -477,7 +477,7 @@ gist_box_leaf_consistent(BOX *key, BOX *query, StrategyNumber strategy)
static double
size_box(Datum dbox)
{
BOX *box = DatumGetBoxP(dbox);
BOX *box = DatumGetBoxP(dbox);
if (box == NULL || box->high.x <= box->low.x || box->high.y <= box->low.y)
return 0.0;
......@@ -506,58 +506,58 @@ rtree_internal_consistent(BOX *key, BOX *query, StrategyNumber strategy)
case RTLeftStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_overright,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTOverLeftStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_right,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTOverlapStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overlap,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTOverRightStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_left,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTRightStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_overleft,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTSameStrategyNumber:
case RTContainsStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_contain,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTContainedByStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overlap,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTOverBelowStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_above,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTBelowStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_overabove,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTAboveStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_overbelow,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
case RTOverAboveStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_below,
PointerGetDatum(key),
PointerGetDatum(query)));
PointerGetDatum(query)));
break;
default:
retval = FALSE;
......@@ -621,8 +621,8 @@ gist_poly_consistent(PG_FUNCTION_ARGS)
/*
* Since the operators are marked lossy anyway, we can just use
* rtree_internal_consistent even at leaf nodes. (This works
* in part because the index entries are bounding boxes not polygons.)
* rtree_internal_consistent even at leaf nodes. (This works in part
* because the index entries are bounding boxes not polygons.)
*/
result = rtree_internal_consistent(DatumGetBoxP(entry->key),
&(query->boundbox), strategy);
......@@ -651,7 +651,7 @@ gist_circle_compress(PG_FUNCTION_ARGS)
retval = palloc(sizeof(GISTENTRY));
if (DatumGetCircleP(entry->key) != NULL)
{
CIRCLE *in = DatumGetCircleP(entry->key);
CIRCLE *in = DatumGetCircleP(entry->key);
BOX *r;
r = (BOX *) palloc(sizeof(BOX));
......@@ -683,7 +683,7 @@ Datum
gist_circle_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
CIRCLE *query = PG_GETARG_CIRCLE_P(1);
CIRCLE *query = PG_GETARG_CIRCLE_P(1);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
BOX bbox;
bool result;
......@@ -693,8 +693,8 @@ gist_circle_consistent(PG_FUNCTION_ARGS)
/*
* Since the operators are marked lossy anyway, we can just use
* rtree_internal_consistent even at leaf nodes. (This works
* in part because the index entries are bounding boxes not circles.)
* rtree_internal_consistent even at leaf nodes. (This works in part
* because the index entries are bounding boxes not circles.)
*/
bbox.high.x = query->center.x + query->radius;
bbox.low.x = query->center.x - query->radius;
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.60 2005/09/22 18:49:45 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.61 2005/09/22 20:44:36 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -120,11 +120,11 @@ gistrescan(PG_FUNCTION_ARGS)
scan->numberOfKeys * sizeof(ScanKeyData));
/*
* Modify the scan key so that all the Consistent method is
* called for all comparisons. The original operator is passed
* to the Consistent function in the form of its strategy
* number, which is available from the sk_strategy field, and
* its subtype from the sk_subtype field.
* Modify the scan key so that all the Consistent method is called for
* all comparisons. The original operator is passed to the Consistent
* function in the form of its strategy number, which is available
* from the sk_strategy field, and its subtype from the sk_subtype
* field.
*/
for (i = 0; i < scan->numberOfKeys; i++)
scan->keyData[i].sk_func = so->giststate->consistentFn[scan->keyData[i].sk_attno - 1];
......@@ -138,7 +138,7 @@ gistmarkpos(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque so;
GISTSearchStack *o,
GISTSearchStack *o,
*n,
*tmp;
......@@ -187,7 +187,7 @@ gistrestrpos(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque so;
GISTSearchStack *o,
GISTSearchStack *o,
*n,
*tmp;
......@@ -308,9 +308,9 @@ ReleaseResources_gist(void)
GISTScanList next;
/*
* Note: this should be a no-op during normal query shutdown. However,
* in an abort situation ExecutorEnd is not called and so there may be
* open index scans to clean up.
* Note: this should be a no-op during normal query shutdown. However, in
* an abort situation ExecutorEnd is not called and so there may be open
* index scans to clean up.
*/
prev = NULL;
......@@ -338,8 +338,8 @@ gistadjscans(Relation rel, int op, BlockNumber blkno, OffsetNumber offnum, XLogR
GISTScanList l;
Oid relid;
if ( XLogRecPtrIsInvalid(newlsn) || XLogRecPtrIsInvalid(oldlsn) )
return;
if (XLogRecPtrIsInvalid(newlsn) || XLogRecPtrIsInvalid(oldlsn))
return;
relid = RelationGetRelid(rel);
for (l = GISTScans; l != NULL; l = l->gsl_next)
......@@ -365,7 +365,7 @@ gistadjone(IndexScanDesc scan,
BlockNumber blkno,
OffsetNumber offnum, XLogRecPtr newlsn, XLogRecPtr oldlsn)
{
GISTScanOpaque so = (GISTScanOpaque) scan->opaque ;
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
adjustiptr(scan, &(scan->currentItemData), so->stack, op, blkno, offnum, newlsn, oldlsn);
adjustiptr(scan, &(scan->currentMarkData), so->markstk, op, blkno, offnum, newlsn, oldlsn);
......@@ -399,7 +399,8 @@ adjustiptr(IndexScanDesc scan,
{
case GISTOP_DEL:
/* back up one if we need to */
if (curoff >= offnum && XLByteEQ(stk->lsn, oldlsn) ) /* the same vesrion of page */
if (curoff >= offnum && XLByteEQ(stk->lsn, oldlsn)) /* the same vesrion of
* page */
{
if (curoff > FirstOffsetNumber)
{
......@@ -409,8 +410,7 @@ adjustiptr(IndexScanDesc scan,
else
{
/*
* remember that we're before the current
* tuple
* remember that we're before the current tuple
*/
ItemPointerSet(iptr, blkno, FirstOffsetNumber);
if (iptr == &(scan->currentItemData))
......@@ -435,6 +435,7 @@ gistfreestack(GISTSearchStack *s)
while (s != NULL)
{
GISTSearchStack *p = s->next;
pfree(s);
s = p;
}
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.6 2005/09/22 18:49:45 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.7 2005/09/22 20:44:36 momjian Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
......@@ -22,9 +22,9 @@
#include "storage/freespace.h"
/* group flags ( in gistadjsubkey ) */
#define LEFT_ADDED 0x01
#define LEFT_ADDED 0x01
#define RIGHT_ADDED 0x02
#define BOTH_ADDED ( LEFT_ADDED | RIGHT_ADDED )
#define BOTH_ADDED ( LEFT_ADDED | RIGHT_ADDED )
/*
......@@ -47,8 +47,7 @@
} while(0);
static void
gistpenalty(GISTSTATE *giststate, int attno,
static void gistpenalty(GISTSTATE *giststate, int attno,
GISTENTRY *key1, bool isNull1,
GISTENTRY *key2, bool isNull2, float *penalty);
......@@ -57,13 +56,13 @@ gistpenalty(GISTSTATE *giststate, int attno,
*/
OffsetNumber
gistfillbuffer(Relation r, Page page, IndexTuple *itup,
int len, OffsetNumber off)
int len, OffsetNumber off)
{
OffsetNumber l = InvalidOffsetNumber;
int i;
if ( off == InvalidOffsetNumber )
off = ( PageIsEmpty(page) ) ? FirstOffsetNumber :
if (off == InvalidOffsetNumber)
off = (PageIsEmpty(page)) ? FirstOffsetNumber :
OffsetNumberNext(PageGetMaxOffsetNumber(page));
for (i = 0; i < len; i++)
......@@ -137,13 +136,13 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
GistEntryVector *evec;
int i;
GISTENTRY centry[INDEX_MAX_KEYS];
IndexTuple res;
IndexTuple res;
evec = (GistEntryVector *) palloc(((len == 1) ? 2 : len) * sizeof(GISTENTRY) + GEVHDRSZ);
for(i = 0; i<len; i++)
if ( GistTupleIsInvalid( itvec[i] ) )
return gist_form_invalid_tuple( InvalidBlockNumber );
for (i = 0; i < len; i++)
if (GistTupleIsInvalid(itvec[i]))
return gist_form_invalid_tuple(InvalidBlockNumber);
for (i = 0; i < r->rd_att->natts; i++)
{
......@@ -155,6 +154,7 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
for (j = 0; j < len; j++)
{
bool IsNull;
datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull);
if (IsNull)
continue;
......@@ -176,7 +176,7 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
}
else
{
int datumsize;
int datumsize;
if (real_len == 1)
{
......@@ -202,7 +202,7 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
}
res = index_form_tuple(giststate->tupdesc, attr, isnull);
GistTupleSetValid( res );
GistTupleSetValid(res);
return res;
}
......@@ -227,9 +227,9 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
IndexTuple newtup = NULL;
int i;
if ( GistTupleIsInvalid(oldtup) || GistTupleIsInvalid(addtup) )
return gist_form_invalid_tuple( ItemPointerGetBlockNumber( &(oldtup->t_tid) ) );
if (GistTupleIsInvalid(oldtup) || GistTupleIsInvalid(addtup))
return gist_form_invalid_tuple(ItemPointerGetBlockNumber(&(oldtup->t_tid)));
evec = palloc(2 * sizeof(GISTENTRY) + GEVHDRSZ);
evec->n = 2;
ev0p = &(evec->vector[0]);
......@@ -268,7 +268,7 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
}
else
{
bool result;
bool result;
FunctionCall3(&giststate->equalFn[i],
ev0p->key,
......@@ -301,7 +301,7 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
void
gistunionsubkey(Relation r, GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITVEC *spl, bool isall)
{
int lr;
int lr;
for (lr = 0; lr < 2; lr++)
{
......@@ -309,7 +309,7 @@ gistunionsubkey(Relation r, GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITV
int i;
Datum *attr;
int len,
*attrsize;
*attrsize;
bool *isnull;
GistEntryVector *evec;
......@@ -354,7 +354,7 @@ gistunionsubkey(Relation r, GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITV
&(evec->vector[real_len]),
datum,
NULL, NULL, (OffsetNumber) 0,
ATTSIZE(datum, giststate->tupdesc, i + 1, IsNull),
ATTSIZE(datum, giststate->tupdesc, i + 1, IsNull),
FALSE, IsNull);
real_len++;
......@@ -402,14 +402,14 @@ gistfindgroup(GISTSTATE *giststate, GISTENTRY *valvec, GIST_SPLITVEC *spl)
int curid = 1;
/*
* first key is always not null (see gistinsert), so we may not check
* for nulls
* first key is always not null (see gistinsert), so we may not check for
* nulls
*/
for (i = 0; i < spl->spl_nleft; i++)
{
int j;
int len;
bool result;
int j;
int len;
bool result;
if (spl->spl_idgrp[spl->spl_left[i]])
continue;
......@@ -540,12 +540,12 @@ gistadjsubkey(Relation r,
for (j = 1; j < r->rd_att->natts; j++)
{
gistentryinit(entry, v->spl_lattr[j], r, NULL,
(OffsetNumber) 0, v->spl_lattrsize[j], FALSE);
(OffsetNumber) 0, v->spl_lattrsize[j], FALSE);
gistpenalty(giststate, j, &entry, v->spl_lisnull[j],
&identry[j], isnull[j], &lpenalty);
gistentryinit(entry, v->spl_rattr[j], r, NULL,
(OffsetNumber) 0, v->spl_rattrsize[j], FALSE);
(OffsetNumber) 0, v->spl_rattrsize[j], FALSE);
gistpenalty(giststate, j, &entry, v->spl_risnull[j],
&identry[j], isnull[j], &rpenalty);
......@@ -555,8 +555,7 @@ gistadjsubkey(Relation r,
}
/*
* add
* XXX: refactor this to avoid duplicating code
* add XXX: refactor this to avoid duplicating code
*/
if (lpenalty < rpenalty)
{
......@@ -643,12 +642,13 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
{
int j;
IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
if ( !GistPageIsLeaf(p) && GistTupleIsInvalid(itup) ) {
if (!GistPageIsLeaf(p) && GistTupleIsInvalid(itup))
{
ereport(LOG,
(errmsg("index \"%s\" needs VACUUM or REINDEX to finish crash recovery",
RelationGetRelationName(r))));
continue;
continue;
}
sum_grow = 0;
......@@ -683,7 +683,7 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
}
}
if ( which == InvalidOffsetNumber )
if (which == InvalidOffsetNumber)
which = FirstOffsetNumber;
return which;
......@@ -775,7 +775,8 @@ gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
for (i = 0; i < r->rd_att->natts; i++)
{
Datum datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]);
Datum datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]);
gistdentryinit(giststate, i, &attdata[i],
datum, r, p, o,
ATTSIZE(datum, giststate->tupdesc, i + 1, isnull[i]),
......@@ -801,8 +802,8 @@ void
GISTInitBuffer(Buffer b, uint32 f)
{
GISTPageOpaque opaque;
Page page;
Size pageSize;
Page page;
Size pageSize;
pageSize = BufferGetPageSize(b);
page = BufferGetPage(b);
......@@ -811,15 +812,16 @@ GISTInitBuffer(Buffer b, uint32 f)
opaque = GistPageGetOpaque(page);
opaque->flags = f;
opaque->rightlink = InvalidBlockNumber;
memset( &(opaque->nsn), 0, sizeof(GistNSN) );
memset(&(opaque->nsn), 0, sizeof(GistNSN));
}
void
gistUserPicksplit(Relation r, GistEntryVector *entryvec, GIST_SPLITVEC *v,
IndexTuple *itup, int len, GISTSTATE *giststate) {
gistUserPicksplit(Relation r, GistEntryVector *entryvec, GIST_SPLITVEC *v,
IndexTuple *itup, int len, GISTSTATE *giststate)
{
/*
* now let the user-defined picksplit function set up the split
* vector; in entryvec have no null value!!
* now let the user-defined picksplit function set up the split vector; in
* entryvec have no null value!!
*/
FunctionCall2(&giststate->picksplitFn[0],
PointerGetDatum(entryvec),
......@@ -837,8 +839,8 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, GIST_SPLITVEC *v,
v->spl_risnull[0] = false;
/*
* if index is multikey, then we must to try get smaller bounding box
* for subkey(s)
* if index is multikey, then we must to try get smaller bounding box for
* subkey(s)
*/
if (r->rd_att->natts > 1)
{
......@@ -854,35 +856,42 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, GIST_SPLITVEC *v,
gistunionsubkey(r, giststate, itup, v, false);
/*
* if possible, we insert equivalent tuples with control by
* penalty for a subkey(s)
* if possible, we insert equivalent tuples with control by penalty
* for a subkey(s)
*/
if (MaxGrpId > 1)
gistadjsubkey(r, itup, len, v, giststate);
}
}
Buffer
gistNewBuffer(Relation r) {
Buffer buffer = InvalidBuffer;
bool needLock;
Buffer
gistNewBuffer(Relation r)
{
Buffer buffer = InvalidBuffer;
bool needLock;
while(true) {
while (true)
{
BlockNumber blkno = GetFreeIndexPage(&r->rd_node);
if (blkno == InvalidBlockNumber)
break;
buffer = ReadBuffer(r, blkno);
if ( ConditionalLockBuffer(buffer) ) {
Page page = BufferGetPage(buffer);
if ( GistPageIsDeleted( page ) ) {
GistPageSetNonDeleted( page );
if (ConditionalLockBuffer(buffer))
{
Page page = BufferGetPage(buffer);
if (GistPageIsDeleted(page))
{
GistPageSetNonDeleted(page);
return buffer;
} else
}
else
LockBuffer(buffer, GIST_UNLOCK);
}
ReleaseBuffer( buffer );
ReleaseBuffer(buffer);
}
needLock = !RELATION_IS_LOCAL(r);
......@@ -895,6 +904,6 @@ gistNewBuffer(Relation r) {
if (needLock)
UnlockRelationForExtension(r, ExclusiveLock);
return buffer;
}
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.8 2005/09/22 18:49:45 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.9 2005/09/22 20:44:36 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -25,162 +25,198 @@
#include "storage/freespace.h"
#include "storage/smgr.h"
/* filled by gistbulkdelete, cleared by gistvacuumpcleanup */
static bool needFullVacuum = false;
/* filled by gistbulkdelete, cleared by gistvacuumpcleanup */
static bool needFullVacuum = false;
typedef struct {
typedef struct
{
GISTSTATE giststate;
Relation index;
MemoryContext opCtx;
IndexBulkDeleteResult *result;
MemoryContext opCtx;
IndexBulkDeleteResult *result;
} GistVacuum;
typedef struct {
IndexTuple *itup;
int ituplen;
typedef struct
{
IndexTuple *itup;
int ituplen;
bool emptypage;
} ArrayTuple;
static ArrayTuple
gistVacuumUpdate( GistVacuum *gv, BlockNumber blkno, bool needunion ) {
gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
{
ArrayTuple res = {NULL, 0, false};
Buffer buffer;
Page page;
OffsetNumber i, maxoff;
OffsetNumber i,
maxoff;
ItemId iid;
int lenaddon=4, curlenaddon=0, ntodelete=0;
IndexTuple idxtuple, *addon=NULL;
bool needwrite=false;
OffsetNumber todelete[MaxOffsetNumber];
ItemPointerData *completed=NULL;
int ncompleted=0, lencompleted=16;
int lenaddon = 4,
curlenaddon = 0,
ntodelete = 0;
IndexTuple idxtuple,
*addon = NULL;
bool needwrite = false;
OffsetNumber todelete[MaxOffsetNumber];
ItemPointerData *completed = NULL;
int ncompleted = 0,
lencompleted = 16;
buffer = ReadBuffer(gv->index, blkno);
page = (Page) BufferGetPage(buffer);
maxoff = PageGetMaxOffsetNumber(page);
if ( GistPageIsLeaf(page) ) {
if ( GistTuplesDeleted(page) ) {
if (GistPageIsLeaf(page))
{
if (GistTuplesDeleted(page))
{
needunion = needwrite = true;
GistClearTuplesDeleted(page);
}
} else {
completed = (ItemPointerData*)palloc( sizeof(ItemPointerData)*lencompleted );
addon=(IndexTuple*)palloc(sizeof(IndexTuple)*lenaddon);
}
else
{
completed = (ItemPointerData *) palloc(sizeof(ItemPointerData) * lencompleted);
addon = (IndexTuple *) palloc(sizeof(IndexTuple) * lenaddon);
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
ArrayTuple chldtuple;
bool needchildunion;
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
ArrayTuple chldtuple;
bool needchildunion;
iid = PageGetItemId(page, i);
idxtuple = (IndexTuple) PageGetItem(page, iid);
needchildunion = (GistTupleIsInvalid(idxtuple)) ? true : false;
if ( needchildunion )
if (needchildunion)
elog(DEBUG2, "gistVacuumUpdate: need union for block %u",
ItemPointerGetBlockNumber(&(idxtuple->t_tid)));
chldtuple = gistVacuumUpdate( gv, ItemPointerGetBlockNumber(&(idxtuple->t_tid)),
needchildunion );
if ( chldtuple.ituplen || chldtuple.emptypage ) {
chldtuple = gistVacuumUpdate(gv, ItemPointerGetBlockNumber(&(idxtuple->t_tid)),
needchildunion);
if (chldtuple.ituplen || chldtuple.emptypage)
{
PageIndexTupleDelete(page, i);
todelete[ ntodelete++ ] = i;
i--; maxoff--;
needwrite=needunion=true;
if ( chldtuple.ituplen ) {
while( curlenaddon + chldtuple.ituplen >= lenaddon ) {
lenaddon*=2;
addon=(IndexTuple*)repalloc( addon, sizeof(IndexTuple)*lenaddon );
todelete[ntodelete++] = i;
i--;
maxoff--;
needwrite = needunion = true;
if (chldtuple.ituplen)
{
while (curlenaddon + chldtuple.ituplen >= lenaddon)
{
lenaddon *= 2;
addon = (IndexTuple *) repalloc(addon, sizeof(IndexTuple) * lenaddon);
}
memcpy( addon + curlenaddon, chldtuple.itup, chldtuple.ituplen * sizeof(IndexTuple) );
memcpy(addon + curlenaddon, chldtuple.itup, chldtuple.ituplen * sizeof(IndexTuple));
curlenaddon += chldtuple.ituplen;
if ( chldtuple.ituplen > 1 ) {
/* child was splitted, so we need mark completion insert(split) */
int j;
while( ncompleted + chldtuple.ituplen > lencompleted ) {
lencompleted*=2;
completed = (ItemPointerData*)repalloc(completed, sizeof(ItemPointerData) * lencompleted);
}
for(j=0;j<chldtuple.ituplen;j++) {
ItemPointerCopy( &(chldtuple.itup[j]->t_tid), completed + ncompleted );
ncompleted++;
if (chldtuple.ituplen > 1)
{
/*
* child was splitted, so we need mark completion
* insert(split)
*/
int j;
while (ncompleted + chldtuple.ituplen > lencompleted)
{
lencompleted *= 2;
completed = (ItemPointerData *) repalloc(completed, sizeof(ItemPointerData) * lencompleted);
}
for (j = 0; j < chldtuple.ituplen; j++)
{
ItemPointerCopy(&(chldtuple.itup[j]->t_tid), completed + ncompleted);
ncompleted++;
}
}
pfree( chldtuple.itup );
pfree(chldtuple.itup);
}
}
}
if ( curlenaddon ) {
if (curlenaddon)
{
/* insert updated tuples */
if (gistnospace(page, addon, curlenaddon)) {
if (gistnospace(page, addon, curlenaddon))
{
/* there is no space on page to insert tuples */
IndexTuple *vec;
SplitedPageLayout *dist=NULL,*ptr;
int i;
MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx);
IndexTuple *vec;
SplitedPageLayout *dist = NULL,
*ptr;
int i;
MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx);
vec = gistextractbuffer(buffer, &(res.ituplen));
vec = gistjoinvector(vec, &(res.ituplen), addon, curlenaddon);
res.itup = gistSplit(gv->index, buffer, vec, &(res.ituplen), &dist, &(gv->giststate));
res.itup = gistSplit(gv->index, buffer, vec, &(res.ituplen), &dist, &(gv->giststate));
MemoryContextSwitchTo(oldCtx);
vec = (IndexTuple*)palloc( sizeof(IndexTuple) * res.ituplen );
for(i=0;i<res.ituplen;i++) {
vec[i] = (IndexTuple)palloc( IndexTupleSize(res.itup[i]) );
memcpy( vec[i], res.itup[i], IndexTupleSize(res.itup[i]) );
vec = (IndexTuple *) palloc(sizeof(IndexTuple) * res.ituplen);
for (i = 0; i < res.ituplen; i++)
{
vec[i] = (IndexTuple) palloc(IndexTupleSize(res.itup[i]));
memcpy(vec[i], res.itup[i], IndexTupleSize(res.itup[i]));
}
res.itup = vec;
res.itup = vec;
if ( !gv->index->rd_istemp ) {
XLogRecPtr recptr;
XLogRecData *rdata;
ItemPointerData key; /* set key for incomplete insert */
char *xlinfo;
if (!gv->index->rd_istemp)
{
XLogRecPtr recptr;
XLogRecData *rdata;
ItemPointerData key; /* set key for incomplete
* insert */
char *xlinfo;
ItemPointerSet(&key, blkno, TUPLE_IS_VALID);
rdata = formSplitRdata(gv->index->rd_node, blkno,
&key, dist);
&key, dist);
xlinfo = rdata->data;
START_CRIT_SECTION();
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
ptr = dist;
while(ptr) {
while (ptr)
{
PageSetLSN(BufferGetPage(ptr->buffer), recptr);
PageSetTLI(BufferGetPage(ptr->buffer), ThisTimeLineID);
ptr=ptr->next;
ptr = ptr->next;
}
END_CRIT_SECTION();
pfree( xlinfo );
pfree( rdata );
} else {
pfree(xlinfo);
pfree(rdata);
}
else
{
ptr = dist;
while(ptr) {
while (ptr)
{
PageSetLSN(BufferGetPage(ptr->buffer), XLogRecPtrForTemp);
ptr=ptr->next;
ptr = ptr->next;
}
}
ptr = dist;
while(ptr) {
if ( BufferGetBlockNumber(ptr->buffer) != blkno )
LockBuffer( ptr->buffer, GIST_UNLOCK );
while (ptr)
{
if (BufferGetBlockNumber(ptr->buffer) != blkno)
LockBuffer(ptr->buffer, GIST_UNLOCK);
WriteBuffer(ptr->buffer);
ptr=ptr->next;
ptr = ptr->next;
}
if ( blkno == GIST_ROOT_BLKNO ) {
ItemPointerData key; /* set key for incomplete insert */
if (blkno == GIST_ROOT_BLKNO)
{
ItemPointerData key; /* set key for incomplete
* insert */
ItemPointerSet(&key, blkno, TUPLE_IS_VALID);
......@@ -191,82 +227,98 @@ gistVacuumUpdate( GistVacuum *gv, BlockNumber blkno, bool needunion ) {
WriteNoReleaseBuffer(buffer);
}
needwrite=false;
needwrite = false;
MemoryContextReset(gv->opCtx);
needunion = false; /* gistSplit already forms unions */
} else {
needunion = false; /* gistSplit already forms unions */
}
else
{
/* enough free space */
gistfillbuffer(gv->index, page, addon, curlenaddon, InvalidOffsetNumber);
}
gistfillbuffer(gv->index, page, addon, curlenaddon, InvalidOffsetNumber);
}
}
}
if ( needunion ) {
/* forms union for page or check empty*/
if ( PageIsEmpty(page) ) {
if ( blkno == GIST_ROOT_BLKNO ) {
needwrite=true;
GistPageSetLeaf( page );
} else {
needwrite=true;
res.emptypage=true;
GistPageSetDeleted( page );
if (needunion)
{
/* forms union for page or check empty */
if (PageIsEmpty(page))
{
if (blkno == GIST_ROOT_BLKNO)
{
needwrite = true;
GistPageSetLeaf(page);
}
else
{
needwrite = true;
res.emptypage = true;
GistPageSetDeleted(page);
gv->result->pages_deleted++;
}
} else {
IndexTuple *vec, tmp;
int veclen=0;
}
else
{
IndexTuple *vec,
tmp;
int veclen = 0;
MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx);
vec = gistextractbuffer(buffer, &veclen);
tmp = gistunion(gv->index, vec, veclen, &(gv->giststate));
tmp = gistunion(gv->index, vec, veclen, &(gv->giststate));
MemoryContextSwitchTo(oldCtx);
res.itup=(IndexTuple*)palloc( sizeof(IndexTuple) );
res.itup = (IndexTuple *) palloc(sizeof(IndexTuple));
res.ituplen = 1;
res.itup[0] = (IndexTuple)palloc( IndexTupleSize(tmp) );
memcpy( res.itup[0], tmp, IndexTupleSize(tmp) );
res.itup[0] = (IndexTuple) palloc(IndexTupleSize(tmp));
memcpy(res.itup[0], tmp, IndexTupleSize(tmp));
ItemPointerSetBlockNumber(&(res.itup[0]->t_tid), blkno);
GistTupleSetValid( res.itup[0] );
GistTupleSetValid(res.itup[0]);
MemoryContextReset(gv->opCtx);
}
}
if ( needwrite ) {
if ( !gv->index->rd_istemp ) {
if (needwrite)
{
if (!gv->index->rd_istemp)
{
XLogRecData *rdata;
XLogRecPtr recptr;
char *xlinfo;
char *xlinfo;
rdata = formUpdateRdata(gv->index->rd_node, blkno, todelete, ntodelete,
res.emptypage, addon, curlenaddon, NULL );
rdata = formUpdateRdata(gv->index->rd_node, blkno, todelete, ntodelete,
res.emptypage, addon, curlenaddon, NULL);
xlinfo = rdata->data;
START_CRIT_SECTION();
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_ENTRY_UPDATE, rdata);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
END_CRIT_SECTION();
pfree( xlinfo );
pfree( rdata );
} else
pfree(xlinfo);
pfree(rdata);
}
else
PageSetLSN(page, XLogRecPtrForTemp);
WriteBuffer( buffer );
} else
ReleaseBuffer( buffer );
WriteBuffer(buffer);
}
else
ReleaseBuffer(buffer);
if ( ncompleted && !gv->index->rd_istemp )
gistxlogInsertCompletion( gv->index->rd_node, completed, ncompleted );
if (ncompleted && !gv->index->rd_istemp)
gistxlogInsertCompletion(gv->index->rd_node, completed, ncompleted);
for(i=0;i<curlenaddon;i++)
pfree( addon[i] );
if (addon) pfree(addon);
if (completed) pfree(completed);
for (i = 0; i < curlenaddon; i++)
pfree(addon[i]);
if (addon)
pfree(addon);
if (completed)
pfree(completed);
return res;
}
......@@ -278,17 +330,23 @@ gistVacuumUpdate( GistVacuum *gv, BlockNumber blkno, bool needunion ) {
*/
Datum
gistvacuumcleanup(PG_FUNCTION_ARGS) {
gistvacuumcleanup(PG_FUNCTION_ARGS)
{
Relation rel = (Relation) PG_GETARG_POINTER(0);
IndexVacuumCleanupInfo *info = (IndexVacuumCleanupInfo *) PG_GETARG_POINTER(1);
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2);
BlockNumber npages, blkno;
BlockNumber nFreePages, *freePages, maxFreePages;
BlockNumber lastBlock = GIST_ROOT_BLKNO, lastFilledBlock = GIST_ROOT_BLKNO;
bool needLock;
BlockNumber npages,
blkno;
BlockNumber nFreePages,
*freePages,
maxFreePages;
BlockNumber lastBlock = GIST_ROOT_BLKNO,
lastFilledBlock = GIST_ROOT_BLKNO;
bool needLock;
/* gistVacuumUpdate may cause hard work */
if ( info->vacuum_full ) {
if (info->vacuum_full)
{
GistVacuum gv;
ArrayTuple res;
......@@ -300,17 +358,20 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) {
gv.result = stats;
/* walk through the entire index for update tuples */
res = gistVacuumUpdate( &gv, GIST_ROOT_BLKNO, false );
/* cleanup */
if (res.itup) {
int i;
for(i=0;i<res.ituplen;i++)
pfree( res.itup[i] );
pfree( res.itup );
res = gistVacuumUpdate(&gv, GIST_ROOT_BLKNO, false);
/* cleanup */
if (res.itup)
{
int i;
for (i = 0; i < res.ituplen; i++)
pfree(res.itup[i]);
pfree(res.itup);
}
freeGISTstate(&(gv.giststate));
MemoryContextDelete(gv.opCtx);
} else if (needFullVacuum)
freeGISTstate(&(gv.giststate));
MemoryContextDelete(gv.opCtx);
}
else if (needFullVacuum)
ereport(NOTICE,
(errmsg("index \"%s\" needs VACUUM FULL or REINDEX to finish crash recovery",
RelationGetRelationName(rel))));
......@@ -318,8 +379,8 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) {
needFullVacuum = false;
needLock = !RELATION_IS_LOCAL(rel);
if ( info->vacuum_full )
needLock = false; /* relation locked with AccessExclusiveLock */
if (info->vacuum_full)
needLock = false; /* relation locked with AccessExclusiveLock */
/* try to find deleted pages */
if (needLock)
......@@ -329,45 +390,52 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) {
UnlockRelationForExtension(rel, ExclusiveLock);
maxFreePages = npages;
if ( maxFreePages > MaxFSMPages )
if (maxFreePages > MaxFSMPages)
maxFreePages = MaxFSMPages;
nFreePages = 0;
freePages = (BlockNumber*) palloc (sizeof(BlockNumber) * maxFreePages);
for(blkno=GIST_ROOT_BLKNO+1;blkno<npages;blkno++) {
Buffer buffer = ReadBuffer(rel, blkno);
Page page;
LockBuffer( buffer, GIST_SHARE );
page=(Page)BufferGetPage(buffer);
if ( GistPageIsDeleted(page) ) {
if (nFreePages < maxFreePages) {
freePages[ nFreePages ] = blkno;
freePages = (BlockNumber *) palloc(sizeof(BlockNumber) * maxFreePages);
for (blkno = GIST_ROOT_BLKNO + 1; blkno < npages; blkno++)
{
Buffer buffer = ReadBuffer(rel, blkno);
Page page;
LockBuffer(buffer, GIST_SHARE);
page = (Page) BufferGetPage(buffer);
if (GistPageIsDeleted(page))
{
if (nFreePages < maxFreePages)
{
freePages[nFreePages] = blkno;
nFreePages++;
}
} else
}
else
lastFilledBlock = blkno;
LockBuffer( buffer, GIST_UNLOCK );
LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer);
}
lastBlock = npages-1;
if ( info->vacuum_full && nFreePages>0 ) { /* try to truncate index */
int i;
for(i=0;i<nFreePages;i++)
if ( freePages[i] >= lastFilledBlock ) {
lastBlock = npages - 1;
if (info->vacuum_full && nFreePages > 0)
{ /* try to truncate index */
int i;
for (i = 0; i < nFreePages; i++)
if (freePages[i] >= lastFilledBlock)
{
nFreePages = i;
break;
}
if ( lastBlock > lastFilledBlock )
RelationTruncate( rel, lastFilledBlock+1 );
if (lastBlock > lastFilledBlock)
RelationTruncate(rel, lastFilledBlock + 1);
stats->pages_removed = lastBlock - lastFilledBlock;
}
RecordIndexFreeSpace( &rel->rd_node, nFreePages, freePages );
pfree( freePages );
RecordIndexFreeSpace(&rel->rd_node, nFreePages, freePages);
pfree(freePages);
/* return statistics */
stats->pages_free = nFreePages;
......@@ -378,33 +446,37 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) {
UnlockRelationForExtension(rel, ExclusiveLock);
if (info->vacuum_full)
UnlockRelation(rel, AccessExclusiveLock);
UnlockRelation(rel, AccessExclusiveLock);
PG_RETURN_POINTER(stats);
}
typedef struct GistBDItem {
typedef struct GistBDItem
{
GistNSN parentlsn;
BlockNumber blkno;
struct GistBDItem *next;
BlockNumber blkno;
struct GistBDItem *next;
} GistBDItem;
static void
pushStackIfSplited(Page page, GistBDItem *stack) {
pushStackIfSplited(Page page, GistBDItem *stack)
{
GISTPageOpaque opaque = GistPageGetOpaque(page);
if ( stack->blkno!=GIST_ROOT_BLKNO && !XLogRecPtrIsInvalid( stack->parentlsn ) &&
XLByteLT( stack->parentlsn, opaque->nsn) &&
opaque->rightlink != InvalidBlockNumber /* sanity check */ ) {
if (stack->blkno != GIST_ROOT_BLKNO && !XLogRecPtrIsInvalid(stack->parentlsn) &&
XLByteLT(stack->parentlsn, opaque->nsn) &&
opaque->rightlink != InvalidBlockNumber /* sanity check */ )
{
/* split page detected, install right link to the stack */
GistBDItem *ptr = (GistBDItem*) palloc(sizeof(GistBDItem));
GistBDItem *ptr = (GistBDItem *) palloc(sizeof(GistBDItem));
ptr->blkno = opaque->rightlink;
ptr->parentlsn = stack->parentlsn;
ptr->next = stack->next;
stack->next = ptr;
}
}
}
/*
......@@ -416,38 +488,44 @@ pushStackIfSplited(Page page, GistBDItem *stack) {
* Result: a palloc'd struct containing statistical info for VACUUM displays.
*/
Datum
gistbulkdelete(PG_FUNCTION_ARGS) {
gistbulkdelete(PG_FUNCTION_ARGS)
{
Relation rel = (Relation) PG_GETARG_POINTER(0);
IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1);
void* callback_state = (void *) PG_GETARG_POINTER(2);
IndexBulkDeleteResult *result = (IndexBulkDeleteResult*)palloc0(sizeof(IndexBulkDeleteResult));
GistBDItem *stack, *ptr;
bool needLock;
stack = (GistBDItem*) palloc0(sizeof(GistBDItem));
void *callback_state = (void *) PG_GETARG_POINTER(2);
IndexBulkDeleteResult *result = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
GistBDItem *stack,
*ptr;
bool needLock;
stack = (GistBDItem *) palloc0(sizeof(GistBDItem));
stack->blkno = GIST_ROOT_BLKNO;
needFullVacuum = false;
while( stack ) {
Buffer buffer = ReadBuffer(rel, stack->blkno);
Page page;
OffsetNumber i, maxoff;
while (stack)
{
Buffer buffer = ReadBuffer(rel, stack->blkno);
Page page;
OffsetNumber i,
maxoff;
IndexTuple idxtuple;
ItemId iid;
LockBuffer(buffer, GIST_SHARE);
page = (Page) BufferGetPage(buffer);
LockBuffer(buffer, GIST_SHARE);
page = (Page) BufferGetPage(buffer);
if ( GistPageIsLeaf(page) ) {
if (GistPageIsLeaf(page))
{
OffsetNumber todelete[MaxOffsetNumber];
int ntodelete = 0;
int ntodelete = 0;
LockBuffer(buffer, GIST_UNLOCK);
LockBuffer(buffer, GIST_EXCLUSIVE);
page = (Page) BufferGetPage(buffer);
if ( stack->blkno==GIST_ROOT_BLKNO && !GistPageIsLeaf(page) ) {
page = (Page) BufferGetPage(buffer);
if (stack->blkno == GIST_ROOT_BLKNO && !GistPageIsLeaf(page))
{
/* the only root can become non-leaf during relock */
LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer);
......@@ -455,37 +533,46 @@ gistbulkdelete(PG_FUNCTION_ARGS) {
continue;
}
/* check for split proceeded after look at parent,
we should check it after relock */
/*
* check for split proceeded after look at parent, we should check
* it after relock
*/
pushStackIfSplited(page, stack);
maxoff = PageGetMaxOffsetNumber(page);
for(i=FirstOffsetNumber;i<=maxoff;i=OffsetNumberNext(i)) {
iid = PageGetItemId(page, i);
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
iid = PageGetItemId(page, i);
idxtuple = (IndexTuple) PageGetItem(page, iid);
if ( callback(&(idxtuple->t_tid), callback_state) ) {
if (callback(&(idxtuple->t_tid), callback_state))
{
PageIndexTupleDelete(page, i);
todelete[ ntodelete ] = i;
i--; maxoff--; ntodelete++;
todelete[ntodelete] = i;
i--;
maxoff--;
ntodelete++;
result->tuples_removed += 1;
Assert( maxoff == PageGetMaxOffsetNumber(page) );
} else
Assert(maxoff == PageGetMaxOffsetNumber(page));
}
else
result->num_index_tuples += 1;
}
if ( ntodelete ) {
if (ntodelete)
{
GistMarkTuplesDeleted(page);
if (!rel->rd_istemp ) {
if (!rel->rd_istemp)
{
XLogRecData *rdata;
XLogRecPtr recptr;
XLogRecPtr recptr;
gistxlogEntryUpdate *xlinfo;
rdata = formUpdateRdata(rel->rd_node, stack->blkno, todelete, ntodelete,
false, NULL, 0, NULL);
xlinfo = (gistxlogEntryUpdate*)rdata->data;
false, NULL, 0, NULL);
xlinfo = (gistxlogEntryUpdate *) rdata->data;
START_CRIT_SECTION();
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_ENTRY_UPDATE, rdata);
......@@ -493,39 +580,43 @@ gistbulkdelete(PG_FUNCTION_ARGS) {
PageSetTLI(page, ThisTimeLineID);
END_CRIT_SECTION();
pfree( xlinfo );
pfree( rdata );
} else
pfree(xlinfo);
pfree(rdata);
}
else
PageSetLSN(page, XLogRecPtrForTemp);
WriteNoReleaseBuffer( buffer );
WriteNoReleaseBuffer(buffer);
}
} else {
}
else
{
/* check for split proceeded after look at parent */
pushStackIfSplited(page, stack);
maxoff = PageGetMaxOffsetNumber(page);
for(i=FirstOffsetNumber;i<=maxoff;i=OffsetNumberNext(i)) {
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
iid = PageGetItemId(page, i);
idxtuple = (IndexTuple) PageGetItem(page, iid);
ptr = (GistBDItem*) palloc(sizeof(GistBDItem));
ptr->blkno = ItemPointerGetBlockNumber( &(idxtuple->t_tid) );
ptr->parentlsn = PageGetLSN( page );
ptr = (GistBDItem *) palloc(sizeof(GistBDItem));
ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
ptr->parentlsn = PageGetLSN(page);
ptr->next = stack->next;
stack->next = ptr;
if ( GistTupleIsInvalid(idxtuple) )
if (GistTupleIsInvalid(idxtuple))
needFullVacuum = true;
}
}
LockBuffer( buffer, GIST_UNLOCK );
ReleaseBuffer( buffer );
LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer);
ptr = stack->next;
pfree( stack );
pfree(stack);
stack = ptr;
vacuum_delay_point();
......@@ -539,6 +630,5 @@ gistbulkdelete(PG_FUNCTION_ARGS) {
if (needLock)
UnlockRelationForExtension(rel, ExclusiveLock);
PG_RETURN_POINTER( result );
PG_RETURN_POINTER(result);
}
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.8 2005/09/22 18:49:45 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.9 2005/09/22 20:44:36 momjian Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
......@@ -23,34 +23,38 @@
#include "utils/memutils.h"
typedef struct {
gistxlogEntryUpdate *data;
typedef struct
{
gistxlogEntryUpdate *data;
int len;
IndexTuple *itup;
OffsetNumber *todelete;
IndexTuple *itup;
OffsetNumber *todelete;
} EntryUpdateRecord;
typedef struct {
gistxlogPage *header;
IndexTuple *itup;
typedef struct
{
gistxlogPage *header;
IndexTuple *itup;
} NewPage;
typedef struct {
gistxlogPageSplit *data;
NewPage *page;
typedef struct
{
gistxlogPageSplit *data;
NewPage *page;
} PageSplitRecord;
/* track for incomplete inserts, idea was taken from nbtxlog.c */
typedef struct gistIncompleteInsert {
RelFileNode node;
BlockNumber origblkno; /* for splits */
ItemPointerData key;
int lenblk;
BlockNumber *blkno;
typedef struct gistIncompleteInsert
{
RelFileNode node;
BlockNumber origblkno; /* for splits */
ItemPointerData key;
int lenblk;
BlockNumber *blkno;
XLogRecPtr lsn;
BlockNumber *path;
int pathlen;
BlockNumber *path;
int pathlen;
} gistIncompleteInsert;
......@@ -63,84 +67,98 @@ static List *incomplete_inserts;
( \
ItemPointerGetOffsetNumber(a) == ItemPointerGetOffsetNumber(b) && \
ItemPointerGetBlockNumber (a) == ItemPointerGetBlockNumber(b) \
)
)
static void
pushIncompleteInsert(RelFileNode node, XLogRecPtr lsn, ItemPointerData key,
BlockNumber *blkno, int lenblk,
PageSplitRecord *xlinfo /* to extract blkno info */ ) {
BlockNumber *blkno, int lenblk,
PageSplitRecord *xlinfo /* to extract blkno info */ )
{
MemoryContext oldCxt = MemoryContextSwitchTo(insertCtx);
gistIncompleteInsert *ninsert = (gistIncompleteInsert*)palloc( sizeof(gistIncompleteInsert) );
gistIncompleteInsert *ninsert = (gistIncompleteInsert *) palloc(sizeof(gistIncompleteInsert));
ninsert->node = node;
ninsert->key = key;
ninsert->lsn = lsn;
ninsert->key = key;
ninsert->lsn = lsn;
if ( lenblk && blkno ) {
if (lenblk && blkno)
{
ninsert->lenblk = lenblk;
ninsert->blkno = (BlockNumber*)palloc( sizeof(BlockNumber)*ninsert->lenblk );
memcpy(ninsert->blkno, blkno, sizeof(BlockNumber)*ninsert->lenblk);
ninsert->blkno = (BlockNumber *) palloc(sizeof(BlockNumber) * ninsert->lenblk);
memcpy(ninsert->blkno, blkno, sizeof(BlockNumber) * ninsert->lenblk);
ninsert->origblkno = *blkno;
} else {
int i;
}
else
{
int i;
Assert( xlinfo );
Assert(xlinfo);
ninsert->lenblk = xlinfo->data->npage;
ninsert->blkno = (BlockNumber*)palloc( sizeof(BlockNumber)*ninsert->lenblk );
for(i=0;i<ninsert->lenblk;i++)
ninsert->blkno = (BlockNumber *) palloc(sizeof(BlockNumber) * ninsert->lenblk);
for (i = 0; i < ninsert->lenblk; i++)
ninsert->blkno[i] = xlinfo->page[i].header->blkno;
ninsert->origblkno = xlinfo->data->origblkno;
}
Assert( ninsert->lenblk>0 );
Assert(ninsert->lenblk > 0);
incomplete_inserts = lappend(incomplete_inserts, ninsert);
MemoryContextSwitchTo(oldCxt);
}
static void
forgetIncompleteInsert(RelFileNode node, ItemPointerData key) {
forgetIncompleteInsert(RelFileNode node, ItemPointerData key)
{
ListCell *l;
foreach(l, incomplete_inserts) {
gistIncompleteInsert *insert = (gistIncompleteInsert*) lfirst(l);
foreach(l, incomplete_inserts)
{
gistIncompleteInsert *insert = (gistIncompleteInsert *) lfirst(l);
if (RelFileNodeEquals(node, insert->node) && ItemPointerEQ(&(insert->key), &(key)))
{
if ( RelFileNodeEquals(node, insert->node) && ItemPointerEQ( &(insert->key), &(key) ) ) {
/* found */
pfree( insert->blkno );
pfree(insert->blkno);
incomplete_inserts = list_delete_ptr(incomplete_inserts, insert);
pfree( insert );
pfree(insert);
break;
}
}
}
}
static void
decodeEntryUpdateRecord(EntryUpdateRecord *decoded, XLogRecord *record) {
char *begin = XLogRecGetData(record), *ptr;
int i=0, addpath=0;
decodeEntryUpdateRecord(EntryUpdateRecord *decoded, XLogRecord *record)
{
char *begin = XLogRecGetData(record),
*ptr;
int i = 0,
addpath = 0;
decoded->data = (gistxlogEntryUpdate*)begin;
decoded->data = (gistxlogEntryUpdate *) begin;
if ( decoded->data->ntodelete ) {
decoded->todelete = (OffsetNumber*)(begin + sizeof( gistxlogEntryUpdate ) + addpath);
addpath = MAXALIGN( sizeof(OffsetNumber) * decoded->data->ntodelete );
} else
decoded->todelete = NULL;
if (decoded->data->ntodelete)
{
decoded->todelete = (OffsetNumber *) (begin + sizeof(gistxlogEntryUpdate) + addpath);
addpath = MAXALIGN(sizeof(OffsetNumber) * decoded->data->ntodelete);
}
else
decoded->todelete = NULL;
decoded->len=0;
ptr=begin+sizeof( gistxlogEntryUpdate ) + addpath;
while( ptr - begin < record->xl_len ) {
decoded->len = 0;
ptr = begin + sizeof(gistxlogEntryUpdate) + addpath;
while (ptr - begin < record->xl_len)
{
decoded->len++;
ptr += IndexTupleSize( (IndexTuple)ptr );
}
decoded->itup=(IndexTuple*)palloc( sizeof( IndexTuple ) * decoded->len );
ptr=begin+sizeof( gistxlogEntryUpdate ) + addpath;
while( ptr - begin < record->xl_len ) {
decoded->itup[i] = (IndexTuple)ptr;
ptr += IndexTupleSize( decoded->itup[i] );
ptr += IndexTupleSize((IndexTuple) ptr);
}
decoded->itup = (IndexTuple *) palloc(sizeof(IndexTuple) * decoded->len);
ptr = begin + sizeof(gistxlogEntryUpdate) + addpath;
while (ptr - begin < record->xl_len)
{
decoded->itup[i] = (IndexTuple) ptr;
ptr += IndexTupleSize(decoded->itup[i]);
i++;
}
}
......@@ -149,13 +167,14 @@ decodeEntryUpdateRecord(EntryUpdateRecord *decoded, XLogRecord *record) {
* redo any page update (except page split)
*/
static void
gistRedoEntryUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot) {
EntryUpdateRecord xlrec;
Relation reln;
Buffer buffer;
Page page;
gistRedoEntryUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot)
{
EntryUpdateRecord xlrec;
Relation reln;
Buffer buffer;
Page page;
decodeEntryUpdateRecord( &xlrec, record );
decodeEntryUpdateRecord(&xlrec, record);
reln = XLogOpenRelation(xlrec.data->node);
if (!RelationIsValid(reln))
......@@ -165,49 +184,61 @@ gistRedoEntryUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot) {
elog(PANIC, "block %u unfound", xlrec.data->blkno);
page = (Page) BufferGetPage(buffer);
if ( isnewroot ) {
if ( !PageIsNew((PageHeader) page) && XLByteLE(lsn, PageGetLSN(page)) ) {
if (isnewroot)
{
if (!PageIsNew((PageHeader) page) && XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return;
}
} else {
if ( PageIsNew((PageHeader) page) )
}
else
{
if (PageIsNew((PageHeader) page))
elog(PANIC, "uninitialized page %u", xlrec.data->blkno);
if (XLByteLE(lsn, PageGetLSN(page))) {
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return;
}
}
if ( xlrec.data->isemptypage ) {
while( !PageIsEmpty(page) )
PageIndexTupleDelete( page, FirstOffsetNumber );
if ( xlrec.data->blkno == GIST_ROOT_BLKNO )
GistPageSetLeaf( page );
if (xlrec.data->isemptypage)
{
while (!PageIsEmpty(page))
PageIndexTupleDelete(page, FirstOffsetNumber);
if (xlrec.data->blkno == GIST_ROOT_BLKNO)
GistPageSetLeaf(page);
else
GistPageSetDeleted( page );
} else {
if ( isnewroot )
GistPageSetDeleted(page);
}
else
{
if (isnewroot)
GISTInitBuffer(buffer, 0);
else if ( xlrec.data->ntodelete ) {
int i;
for(i=0; i < xlrec.data->ntodelete ; i++)
else if (xlrec.data->ntodelete)
{
int i;
for (i = 0; i < xlrec.data->ntodelete; i++)
PageIndexTupleDelete(page, xlrec.todelete[i]);
if ( GistPageIsLeaf(page) )
if (GistPageIsLeaf(page))
GistMarkTuplesDeleted(page);
}
/* add tuples */
if ( xlrec.len > 0 )
if (xlrec.len > 0)
gistfillbuffer(reln, page, xlrec.itup, xlrec.len, InvalidOffsetNumber);
/* special case: leafpage, nothing to insert, nothing to delete, then
vacuum marks page */
if ( GistPageIsLeaf(page) && xlrec.len == 0 && xlrec.data->ntodelete == 0 )
GistClearTuplesDeleted(page);
/*
* special case: leafpage, nothing to insert, nothing to delete, then
* vacuum marks page
*/
if (GistPageIsLeaf(page) && xlrec.len == 0 && xlrec.data->ntodelete == 0)
GistClearTuplesDeleted(page);
}
PageSetLSN(page, lsn);
......@@ -216,123 +247,135 @@ gistRedoEntryUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot) {
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
if ( ItemPointerIsValid( &(xlrec.data->key) ) ) {
if ( incomplete_inserts != NIL )
if (ItemPointerIsValid(&(xlrec.data->key)))
{
if (incomplete_inserts != NIL)
forgetIncompleteInsert(xlrec.data->node, xlrec.data->key);
if ( !isnewroot && xlrec.data->blkno!=GIST_ROOT_BLKNO )
pushIncompleteInsert(xlrec.data->node, lsn, xlrec.data->key,
&(xlrec.data->blkno), 1,
NULL);
if (!isnewroot && xlrec.data->blkno != GIST_ROOT_BLKNO)
pushIncompleteInsert(xlrec.data->node, lsn, xlrec.data->key,
&(xlrec.data->blkno), 1,
NULL);
}
}
static void
decodePageSplitRecord(PageSplitRecord *decoded, XLogRecord *record) {
char *begin = XLogRecGetData(record), *ptr;
int j,i=0;
decoded->data = (gistxlogPageSplit*)begin;
decoded->page = (NewPage*)palloc( sizeof(NewPage) * decoded->data->npage );
ptr=begin+sizeof( gistxlogPageSplit );
for(i=0;i<decoded->data->npage;i++) {
Assert( ptr - begin < record->xl_len );
decoded->page[i].header = (gistxlogPage*)ptr;
decodePageSplitRecord(PageSplitRecord *decoded, XLogRecord *record)
{
char *begin = XLogRecGetData(record),
*ptr;
int j,
i = 0;
decoded->data = (gistxlogPageSplit *) begin;
decoded->page = (NewPage *) palloc(sizeof(NewPage) * decoded->data->npage);
ptr = begin + sizeof(gistxlogPageSplit);
for (i = 0; i < decoded->data->npage; i++)
{
Assert(ptr - begin < record->xl_len);
decoded->page[i].header = (gistxlogPage *) ptr;
ptr += sizeof(gistxlogPage);
decoded->page[i].itup = (IndexTuple*)
palloc( sizeof(IndexTuple) * decoded->page[i].header->num );
j=0;
while(j<decoded->page[i].header->num) {
Assert( ptr - begin < record->xl_len );
decoded->page[i].itup[j] = (IndexTuple)ptr;
ptr += IndexTupleSize((IndexTuple)ptr);
decoded->page[i].itup = (IndexTuple *)
palloc(sizeof(IndexTuple) * decoded->page[i].header->num);
j = 0;
while (j < decoded->page[i].header->num)
{
Assert(ptr - begin < record->xl_len);
decoded->page[i].itup[j] = (IndexTuple) ptr;
ptr += IndexTupleSize((IndexTuple) ptr);
j++;
}
}
}
static void
gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record ) {
PageSplitRecord xlrec;
Relation reln;
Buffer buffer;
Page page;
int i;
int flags=0;
decodePageSplitRecord( &xlrec, record );
gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
{
PageSplitRecord xlrec;
Relation reln;
Buffer buffer;
Page page;
int i;
int flags = 0;
decodePageSplitRecord(&xlrec, record);
reln = XLogOpenRelation(xlrec.data->node);
if (!RelationIsValid(reln))
return;
/* first of all wee need get F_LEAF flag from original page */
buffer = XLogReadBuffer( false, reln, xlrec.data->origblkno);
buffer = XLogReadBuffer(false, reln, xlrec.data->origblkno);
if (!BufferIsValid(buffer))
elog(PANIC, "block %u unfound", xlrec.data->origblkno);
page = (Page) BufferGetPage(buffer);
if ( PageIsNew((PageHeader) page) )
if (PageIsNew((PageHeader) page))
elog(PANIC, "uninitialized page %u", xlrec.data->origblkno);
flags = ( GistPageIsLeaf(page) ) ? F_LEAF : 0;
flags = (GistPageIsLeaf(page)) ? F_LEAF : 0;
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
/* loop around all pages */
for(i=0;i<xlrec.data->npage;i++) {
NewPage *newpage = xlrec.page + i;
bool isorigpage = (xlrec.data->origblkno == newpage->header->blkno) ? true : false;
buffer = XLogReadBuffer( !isorigpage, reln, newpage->header->blkno);
for (i = 0; i < xlrec.data->npage; i++)
{
NewPage *newpage = xlrec.page + i;
bool isorigpage = (xlrec.data->origblkno == newpage->header->blkno) ? true : false;
buffer = XLogReadBuffer(!isorigpage, reln, newpage->header->blkno);
if (!BufferIsValid(buffer))
elog(PANIC, "block %u unfound", newpage->header->blkno);
page = (Page) BufferGetPage(buffer);
if (XLByteLE(lsn, PageGetLSN(page))) {
if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
continue;
}
/* ok, clear buffer */
GISTInitBuffer(buffer, flags);
GISTInitBuffer(buffer, flags);
/* and fill it */
gistfillbuffer(reln, page, newpage->itup, newpage->header->num, FirstOffsetNumber);
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
}
if ( ItemPointerIsValid( &(xlrec.data->key) ) ) {
if ( incomplete_inserts != NIL )
if (ItemPointerIsValid(&(xlrec.data->key)))
{
if (incomplete_inserts != NIL)
forgetIncompleteInsert(xlrec.data->node, xlrec.data->key);
pushIncompleteInsert(xlrec.data->node, lsn, xlrec.data->key,
NULL, 0,
&xlrec);
pushIncompleteInsert(xlrec.data->node, lsn, xlrec.data->key,
NULL, 0,
&xlrec);
}
}
static void
gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) {
RelFileNode *node = (RelFileNode*)XLogRecGetData(record);
Relation reln;
Buffer buffer;
Page page;
gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
Relation reln;
Buffer buffer;
Page page;
reln = XLogOpenRelation(*node);
if (!RelationIsValid(reln))
return;
buffer = XLogReadBuffer( true, reln, GIST_ROOT_BLKNO);
buffer = XLogReadBuffer(true, reln, GIST_ROOT_BLKNO);
if (!BufferIsValid(buffer))
elog(PANIC, "root block unfound");
page = (Page) BufferGetPage(buffer);
if (!PageIsNew((PageHeader) page) && XLByteLE(lsn, PageGetLSN(page))) {
if (!PageIsNew((PageHeader) page) && XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return;
......@@ -343,46 +386,51 @@ gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) {
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
WriteBuffer(buffer);
}
static void
gistRedoCompleteInsert(XLogRecPtr lsn, XLogRecord *record) {
char *begin = XLogRecGetData(record), *ptr;
gistxlogInsertComplete *xlrec;
gistRedoCompleteInsert(XLogRecPtr lsn, XLogRecord *record)
{
char *begin = XLogRecGetData(record),
*ptr;
gistxlogInsertComplete *xlrec;
xlrec = (gistxlogInsertComplete*)begin;
xlrec = (gistxlogInsertComplete *) begin;
ptr = begin + sizeof( gistxlogInsertComplete );
while( ptr - begin < record->xl_len ) {
Assert( record->xl_len - (ptr - begin) >= sizeof(ItemPointerData) );
forgetIncompleteInsert( xlrec->node, *((ItemPointerData*)ptr) );
ptr = begin + sizeof(gistxlogInsertComplete);
while (ptr - begin < record->xl_len)
{
Assert(record->xl_len - (ptr - begin) >= sizeof(ItemPointerData));
forgetIncompleteInsert(xlrec->node, *((ItemPointerData *) ptr));
ptr += sizeof(ItemPointerData);
}
}
}
void
gist_redo(XLogRecPtr lsn, XLogRecord *record)
{
uint8 info = record->xl_info & ~XLR_INFO_MASK;
uint8 info = record->xl_info & ~XLR_INFO_MASK;
MemoryContext oldCxt;
oldCxt = MemoryContextSwitchTo(opCtx);
switch (info) {
case XLOG_GIST_ENTRY_UPDATE:
case XLOG_GIST_ENTRY_DELETE:
gistRedoEntryUpdateRecord(lsn, record,false);
switch (info)
{
case XLOG_GIST_ENTRY_UPDATE:
case XLOG_GIST_ENTRY_DELETE:
gistRedoEntryUpdateRecord(lsn, record, false);
break;
case XLOG_GIST_NEW_ROOT:
gistRedoEntryUpdateRecord(lsn, record,true);
case XLOG_GIST_NEW_ROOT:
gistRedoEntryUpdateRecord(lsn, record, true);
break;
case XLOG_GIST_PAGE_SPLIT:
gistRedoPageSplitRecord(lsn, record);
case XLOG_GIST_PAGE_SPLIT:
gistRedoPageSplitRecord(lsn, record);
break;
case XLOG_GIST_CREATE_INDEX:
case XLOG_GIST_CREATE_INDEX:
gistRedoCreateIndex(lsn, record);
break;
case XLOG_GIST_INSERT_COMPLETE:
case XLOG_GIST_INSERT_COMPLETE:
gistRedoCompleteInsert(lsn, record);
break;
default:
......@@ -396,422 +444,478 @@ gist_redo(XLogRecPtr lsn, XLogRecord *record)
static void
out_target(char *buf, RelFileNode node, ItemPointerData key)
{
sprintf(buf + strlen(buf), "rel %u/%u/%u; tid %u/%u",
node.spcNode, node.dbNode, node.relNode,
ItemPointerGetBlockNumber(&key),
ItemPointerGetOffsetNumber(&key));
sprintf(buf + strlen(buf), "rel %u/%u/%u; tid %u/%u",
node.spcNode, node.dbNode, node.relNode,
ItemPointerGetBlockNumber(&key),
ItemPointerGetOffsetNumber(&key));
}
static void
out_gistxlogEntryUpdate(char *buf, gistxlogEntryUpdate *xlrec) {
out_gistxlogEntryUpdate(char *buf, gistxlogEntryUpdate *xlrec)
{
out_target(buf, xlrec->node, xlrec->key);
sprintf(buf + strlen(buf), "; block number %u",
xlrec->blkno);
sprintf(buf + strlen(buf), "; block number %u",
xlrec->blkno);
}
static void
out_gistxlogPageSplit(char *buf, gistxlogPageSplit *xlrec) {
out_gistxlogPageSplit(char *buf, gistxlogPageSplit *xlrec)
{
strcat(buf, "page_split: ");
out_target(buf, xlrec->node, xlrec->key);
sprintf(buf + strlen(buf), "; block number %u splits to %d pages",
xlrec->origblkno, xlrec->npage);
sprintf(buf + strlen(buf), "; block number %u splits to %d pages",
xlrec->origblkno, xlrec->npage);
}
void
gist_desc(char *buf, uint8 xl_info, char *rec)
{
uint8 info = xl_info & ~XLR_INFO_MASK;
uint8 info = xl_info & ~XLR_INFO_MASK;
switch (info) {
case XLOG_GIST_ENTRY_UPDATE:
switch (info)
{
case XLOG_GIST_ENTRY_UPDATE:
strcat(buf, "entry_update: ");
out_gistxlogEntryUpdate(buf, (gistxlogEntryUpdate*)rec);
out_gistxlogEntryUpdate(buf, (gistxlogEntryUpdate *) rec);
break;
case XLOG_GIST_ENTRY_DELETE:
case XLOG_GIST_ENTRY_DELETE:
strcat(buf, "entry_delete: ");
out_gistxlogEntryUpdate(buf, (gistxlogEntryUpdate*)rec);
out_gistxlogEntryUpdate(buf, (gistxlogEntryUpdate *) rec);
break;
case XLOG_GIST_NEW_ROOT:
case XLOG_GIST_NEW_ROOT:
strcat(buf, "new_root: ");
out_target(buf, ((gistxlogEntryUpdate*)rec)->node, ((gistxlogEntryUpdate*)rec)->key);
out_target(buf, ((gistxlogEntryUpdate *) rec)->node, ((gistxlogEntryUpdate *) rec)->key);
break;
case XLOG_GIST_PAGE_SPLIT:
out_gistxlogPageSplit(buf, (gistxlogPageSplit*)rec);
case XLOG_GIST_PAGE_SPLIT:
out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
break;
case XLOG_GIST_CREATE_INDEX:
sprintf(buf + strlen(buf), "create_index: rel %u/%u/%u",
((RelFileNode*)rec)->spcNode,
((RelFileNode*)rec)->dbNode,
((RelFileNode*)rec)->relNode);
case XLOG_GIST_CREATE_INDEX:
sprintf(buf + strlen(buf), "create_index: rel %u/%u/%u",
((RelFileNode *) rec)->spcNode,
((RelFileNode *) rec)->dbNode,
((RelFileNode *) rec)->relNode);
break;
case XLOG_GIST_INSERT_COMPLETE:
sprintf(buf + strlen(buf), "complete_insert: rel %u/%u/%u",
((gistxlogInsertComplete*)rec)->node.spcNode,
((gistxlogInsertComplete*)rec)->node.dbNode,
((gistxlogInsertComplete*)rec)->node.relNode);
case XLOG_GIST_INSERT_COMPLETE:
sprintf(buf + strlen(buf), "complete_insert: rel %u/%u/%u",
((gistxlogInsertComplete *) rec)->node.spcNode,
((gistxlogInsertComplete *) rec)->node.dbNode,
((gistxlogInsertComplete *) rec)->node.relNode);
break;
default:
elog(PANIC, "gist_desc: unknown op code %u", info);
}
}
IndexTuple
gist_form_invalid_tuple(BlockNumber blkno) {
/* we don't alloc space for null's bitmap, this is invalid tuple,
be carefull in read and write code */
Size size = IndexInfoFindDataOffset(0);
IndexTuple tuple=(IndexTuple)palloc0( size );
IndexTuple
gist_form_invalid_tuple(BlockNumber blkno)
{
/*
* we don't alloc space for null's bitmap, this is invalid tuple, be
* carefull in read and write code
*/
Size size = IndexInfoFindDataOffset(0);
IndexTuple tuple = (IndexTuple) palloc0(size);
tuple->t_info |= size;
ItemPointerSetBlockNumber(&(tuple->t_tid), blkno);
GistTupleSetInvalid( tuple );
GistTupleSetInvalid(tuple);
return tuple;
}
static Buffer
gistXLogReadAndLockBuffer( Relation r, BlockNumber blkno ) {
Buffer buffer = XLogReadBuffer( false, r, blkno );
gistXLogReadAndLockBuffer(Relation r, BlockNumber blkno)
{
Buffer buffer = XLogReadBuffer(false, r, blkno);
if (!BufferIsValid(buffer))
elog(PANIC, "block %u unfound", blkno);
if ( PageIsNew( (PageHeader)(BufferGetPage(buffer)) ) )
if (PageIsNew((PageHeader) (BufferGetPage(buffer))))
elog(PANIC, "uninitialized page %u", blkno);
return buffer;
}
static void
gixtxlogFindPath( Relation index, gistIncompleteInsert *insert ) {
gixtxlogFindPath(Relation index, gistIncompleteInsert *insert)
{
GISTInsertStack *top;
insert->pathlen = 0;
insert->path = NULL;
if ( (top=gistFindPath(index, insert->origblkno, gistXLogReadAndLockBuffer)) != NULL ) {
int i;
GISTInsertStack *ptr=top;
while(ptr) {
if ((top = gistFindPath(index, insert->origblkno, gistXLogReadAndLockBuffer)) != NULL)
{
int i;
GISTInsertStack *ptr = top;
while (ptr)
{
insert->pathlen++;
ptr = ptr->parent;
}
insert->path=(BlockNumber*)palloc( sizeof(BlockNumber) * insert->pathlen );
insert->path = (BlockNumber *) palloc(sizeof(BlockNumber) * insert->pathlen);
i=0;
i = 0;
ptr = top;
while(ptr) {
while (ptr)
{
insert->path[i] = ptr->blkno;
i++;
ptr = ptr->parent;
}
} else
}
else
elog(LOG, "lost parent for block %u", insert->origblkno);
}
/*
* Continue insert after crash. In normal situation, there isn't any incomplete
* Continue insert after crash. In normal situation, there isn't any incomplete
* inserts, but if it might be after crash, WAL may has not a record of completetion.
*
* Although stored LSN in gistIncompleteInsert is a LSN of child page,
* we can compare it with LSN of parent, because parent is always locked
* while we change child page (look at gistmakedeal). So if parent's LSN is
*
* Although stored LSN in gistIncompleteInsert is a LSN of child page,
* we can compare it with LSN of parent, because parent is always locked
* while we change child page (look at gistmakedeal). So if parent's LSN is
* lesser than stored lsn then changes in parent doesn't do yet.
*/
*/
static void
gistContinueInsert(gistIncompleteInsert *insert) {
IndexTuple *itup;
int i, lenitup;
Relation index;
gistContinueInsert(gistIncompleteInsert *insert)
{
IndexTuple *itup;
int i,
lenitup;
Relation index;
index = XLogOpenRelation(insert->node);
if (!RelationIsValid(index))
if (!RelationIsValid(index))
return;
/* needed vector itup never will be more than initial lenblkno+2,
because during this processing Indextuple can be only smaller */
lenitup = insert->lenblk;
itup = (IndexTuple*)palloc(sizeof(IndexTuple)*(lenitup+2 /*guarantee root split*/));
for(i=0;i<insert->lenblk;i++)
itup[i] = gist_form_invalid_tuple( insert->blkno[i] );
if ( insert->origblkno==GIST_ROOT_BLKNO ) {
/*it was split root, so we should only make new root.
it can't be simple insert into root, look at call
pushIncompleteInsert in gistRedoPageSplitRecord */
Buffer buffer = XLogReadBuffer(true, index, GIST_ROOT_BLKNO);
Page page;
/*
* needed vector itup never will be more than initial lenblkno+2, because
* during this processing Indextuple can be only smaller
*/
lenitup = insert->lenblk;
itup = (IndexTuple *) palloc(sizeof(IndexTuple) * (lenitup + 2 /* guarantee root split */ ));
for (i = 0; i < insert->lenblk; i++)
itup[i] = gist_form_invalid_tuple(insert->blkno[i]);
if (insert->origblkno == GIST_ROOT_BLKNO)
{
/*
* it was split root, so we should only make new root. it can't be
* simple insert into root, look at call pushIncompleteInsert in
* gistRedoPageSplitRecord
*/
Buffer buffer = XLogReadBuffer(true, index, GIST_ROOT_BLKNO);
Page page;
if (!BufferIsValid(buffer))
elog(PANIC, "root block unfound");
page = BufferGetPage(buffer);
if (XLByteLE(insert->lsn, PageGetLSN(page))) {
page = BufferGetPage(buffer);
if (XLByteLE(insert->lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return;
}
GISTInitBuffer(buffer, 0);
page = BufferGetPage(buffer);
gistfillbuffer(index, page, itup, lenitup, FirstOffsetNumber);
GISTInitBuffer(buffer, 0);
page = BufferGetPage(buffer);
gistfillbuffer(index, page, itup, lenitup, FirstOffsetNumber);
PageSetLSN(page, insert->lsn);
PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
} else {
Buffer *buffers;
Page *pages;
int numbuffer;
WriteBuffer(buffer);
}
else
{
Buffer *buffers;
Page *pages;
int numbuffer;
/* construct path */
gixtxlogFindPath( index, insert );
gixtxlogFindPath(index, insert);
Assert(insert->pathlen > 0);
Assert( insert->pathlen > 0 );
buffers = (Buffer *) palloc(sizeof(Buffer) * (insert->lenblk + 2 /* guarantee root split */ ));
pages = (Page *) palloc(sizeof(Page) * (insert->lenblk + 2 /* guarantee root split */ ));
buffers= (Buffer*) palloc( sizeof(Buffer) * (insert->lenblk+2/*guarantee root split*/) );
pages = (Page*) palloc( sizeof(Page ) * (insert->lenblk+2/*guarantee root split*/) );
for (i = 0; i < insert->pathlen; i++)
{
int j,
k,
pituplen = 0,
childfound = 0;
for(i=0;i<insert->pathlen;i++) {
int j, k, pituplen=0, childfound=0;
numbuffer=1;
buffers[numbuffer-1] = XLogReadBuffer(false, index, insert->path[i]);
if (!BufferIsValid(buffers[numbuffer-1]))
numbuffer = 1;
buffers[numbuffer - 1] = XLogReadBuffer(false, index, insert->path[i]);
if (!BufferIsValid(buffers[numbuffer - 1]))
elog(PANIC, "block %u unfound", insert->path[i]);
pages[numbuffer-1] = BufferGetPage( buffers[numbuffer-1] );
if ( PageIsNew((PageHeader)(pages[numbuffer-1])) )
pages[numbuffer - 1] = BufferGetPage(buffers[numbuffer - 1]);
if (PageIsNew((PageHeader) (pages[numbuffer - 1])))
elog(PANIC, "uninitialized page %u", insert->path[i]);
if (XLByteLE(insert->lsn, PageGetLSN(pages[numbuffer-1]))) {
LockBuffer(buffers[numbuffer-1], BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffers[numbuffer-1]);
if (XLByteLE(insert->lsn, PageGetLSN(pages[numbuffer - 1])))
{
LockBuffer(buffers[numbuffer - 1], BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffers[numbuffer - 1]);
return;
}
pituplen = PageGetMaxOffsetNumber(pages[numbuffer-1]);
pituplen = PageGetMaxOffsetNumber(pages[numbuffer - 1]);
/* remove old IndexTuples */
for(j=0;j<pituplen && childfound<lenitup;j++) {
for (j = 0; j < pituplen && childfound < lenitup; j++)
{
BlockNumber blkno;
ItemId iid = PageGetItemId(pages[numbuffer-1], j+FirstOffsetNumber);
IndexTuple idxtup = (IndexTuple) PageGetItem(pages[numbuffer-1], iid);
ItemId iid = PageGetItemId(pages[numbuffer - 1], j + FirstOffsetNumber);
IndexTuple idxtup = (IndexTuple) PageGetItem(pages[numbuffer - 1], iid);
blkno = ItemPointerGetBlockNumber( &(idxtup->t_tid) );
blkno = ItemPointerGetBlockNumber(&(idxtup->t_tid));
for(k=0;k<lenitup;k++)
if ( ItemPointerGetBlockNumber( &(itup[k]->t_tid) ) == blkno ) {
PageIndexTupleDelete(pages[numbuffer-1], j+FirstOffsetNumber);
j--; pituplen--;
for (k = 0; k < lenitup; k++)
if (ItemPointerGetBlockNumber(&(itup[k]->t_tid)) == blkno)
{
PageIndexTupleDelete(pages[numbuffer - 1], j + FirstOffsetNumber);
j--;
pituplen--;
childfound++;
break;
}
}
if ( gistnospace(pages[numbuffer-1], itup, lenitup) ) {
if (gistnospace(pages[numbuffer - 1], itup, lenitup))
{
/* no space left on page, so we should split */
buffers[numbuffer] = XLogReadBuffer(true, index, P_NEW);
if (!BufferIsValid(buffers[numbuffer]))
elog(PANIC, "could not obtain new block");
GISTInitBuffer(buffers[numbuffer], 0);
pages[numbuffer] = BufferGetPage( buffers[numbuffer] );
gistfillbuffer( index, pages[numbuffer], itup, lenitup, FirstOffsetNumber );
GISTInitBuffer(buffers[numbuffer], 0);
pages[numbuffer] = BufferGetPage(buffers[numbuffer]);
gistfillbuffer(index, pages[numbuffer], itup, lenitup, FirstOffsetNumber);
numbuffer++;
if ( BufferGetBlockNumber( buffers[0] ) == GIST_ROOT_BLKNO ) {
if (BufferGetBlockNumber(buffers[0]) == GIST_ROOT_BLKNO)
{
IndexTuple *parentitup;
/* we split root, just copy tuples from old root to new page */
parentitup = gistextractbuffer(buffers[numbuffer-1], &pituplen);
/*
* we split root, just copy tuples from old root to new
* page
*/
parentitup = gistextractbuffer(buffers[numbuffer - 1], &pituplen);
/* sanity check */
if ( i+1 != insert->pathlen )
elog(PANIC,"unexpected pathlen in index \"%s\"",
RelationGetRelationName( index ));
if (i + 1 != insert->pathlen)
elog(PANIC, "unexpected pathlen in index \"%s\"",
RelationGetRelationName(index));
/* fill new page */
/* fill new page */
buffers[numbuffer] = XLogReadBuffer(true, index, P_NEW);
if (!BufferIsValid(buffers[numbuffer]))
elog(PANIC, "could not obtain new block");
GISTInitBuffer(buffers[numbuffer], 0);
pages[numbuffer] = BufferGetPage( buffers[numbuffer] );
GISTInitBuffer(buffers[numbuffer], 0);
pages[numbuffer] = BufferGetPage(buffers[numbuffer]);
gistfillbuffer(index, pages[numbuffer], parentitup, pituplen, FirstOffsetNumber);
numbuffer++;
/* fill root page */
GISTInitBuffer(buffers[0], 0);
for(j=1;j<numbuffer;j++) {
IndexTuple tuple = gist_form_invalid_tuple( BufferGetBlockNumber( buffers[j] ) );
if (PageAddItem(pages[0],
(Item)tuple,
IndexTupleSize( tuple ),
(OffsetNumber)j,
LP_USED) == InvalidOffsetNumber)
for (j = 1; j < numbuffer; j++)
{
IndexTuple tuple = gist_form_invalid_tuple(BufferGetBlockNumber(buffers[j]));
if (PageAddItem(pages[0],
(Item) tuple,
IndexTupleSize(tuple),
(OffsetNumber) j,
LP_USED) == InvalidOffsetNumber)
elog(PANIC, "failed to add item to index page in \"%s\"",
RelationGetRelationName( index ));
}
RelationGetRelationName(index));
}
}
} else
gistfillbuffer( index, pages[numbuffer-1], itup, lenitup, InvalidOffsetNumber);
}
else
gistfillbuffer(index, pages[numbuffer - 1], itup, lenitup, InvalidOffsetNumber);
lenitup=numbuffer;
for(j=0;j<numbuffer;j++) {
itup[j]=gist_form_invalid_tuple( BufferGetBlockNumber( buffers[j] ) );
lenitup = numbuffer;
for (j = 0; j < numbuffer; j++)
{
itup[j] = gist_form_invalid_tuple(BufferGetBlockNumber(buffers[j]));
PageSetLSN(pages[j], insert->lsn);
PageSetTLI(pages[j], ThisTimeLineID);
GistPageGetOpaque(pages[j])->rightlink = InvalidBlockNumber;
LockBuffer(buffers[j], BUFFER_LOCK_UNLOCK);
WriteBuffer( buffers[j] );
WriteBuffer(buffers[j]);
}
}
}
ereport(LOG,
(errmsg("index %u/%u/%u needs VACUUM or REINDEX to finish crash recovery",
insert->node.spcNode, insert->node.dbNode, insert->node.relNode),
errdetail("Incomplete insertion detected during crash replay.")));
(errmsg("index %u/%u/%u needs VACUUM or REINDEX to finish crash recovery",
insert->node.spcNode, insert->node.dbNode, insert->node.relNode),
errdetail("Incomplete insertion detected during crash replay.")));
}
void
gist_xlog_startup(void) {
incomplete_inserts=NIL;
gist_xlog_startup(void)
{
incomplete_inserts = NIL;
insertCtx = AllocSetContextCreate(CurrentMemoryContext,
"GiST recovery temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
"GiST recovery temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
opCtx = createTempGistContext();
}
void
gist_xlog_cleanup(void) {
gist_xlog_cleanup(void)
{
ListCell *l;
List *reverse=NIL;
List *reverse = NIL;
MemoryContext oldCxt = MemoryContextSwitchTo(insertCtx);
/* we should call gistContinueInsert in reverse order */
foreach(l, incomplete_inserts)
foreach(l, incomplete_inserts)
reverse = lappend(reverse, lfirst(l));
MemoryContextSwitchTo(opCtx);
foreach(l, reverse) {
gistIncompleteInsert *insert = (gistIncompleteInsert*) lfirst(l);
foreach(l, reverse)
{
gistIncompleteInsert *insert = (gistIncompleteInsert *) lfirst(l);
gistContinueInsert(insert);
MemoryContextReset(opCtx);
}
MemoryContextSwitchTo(oldCxt);
MemoryContextDelete(opCtx);
MemoryContextDelete(insertCtx);
MemoryContextDelete(insertCtx);
}
XLogRecData *
formSplitRdata(RelFileNode node, BlockNumber blkno,
ItemPointer key, SplitedPageLayout *dist ) {
XLogRecData *rdata;
gistxlogPageSplit *xlrec = (gistxlogPageSplit*)palloc(sizeof(gistxlogPageSplit));
SplitedPageLayout *ptr;
int npage = 0, cur=1;
ptr=dist;
while( ptr ) {
formSplitRdata(RelFileNode node, BlockNumber blkno,
ItemPointer key, SplitedPageLayout *dist)
{
XLogRecData *rdata;
gistxlogPageSplit *xlrec = (gistxlogPageSplit *) palloc(sizeof(gistxlogPageSplit));
SplitedPageLayout *ptr;
int npage = 0,
cur = 1;
ptr = dist;
while (ptr)
{
npage++;
ptr=ptr->next;
ptr = ptr->next;
}
rdata = (XLogRecData*)palloc(sizeof(XLogRecData)*(npage*2 + 2));
rdata = (XLogRecData *) palloc(sizeof(XLogRecData) * (npage * 2 + 2));
xlrec->node = node;
xlrec->origblkno = blkno;
xlrec->npage = (uint16)npage;
if ( key )
xlrec->npage = (uint16) npage;
if (key)
xlrec->key = *key;
else
ItemPointerSetInvalid( &(xlrec->key) );
ItemPointerSetInvalid(&(xlrec->key));
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) xlrec;
rdata[0].len = sizeof( gistxlogPageSplit );
rdata[0].next = NULL;
rdata[0].data = (char *) xlrec;
rdata[0].len = sizeof(gistxlogPageSplit);
rdata[0].next = NULL;
ptr=dist;
while(ptr) {
ptr = dist;
while (ptr)
{
rdata[cur].buffer = InvalidBuffer;
rdata[cur].data = (char*)&(ptr->block);
rdata[cur].len = sizeof(gistxlogPage);
rdata[cur-1].next = &(rdata[cur]);
rdata[cur].data = (char *) &(ptr->block);
rdata[cur].len = sizeof(gistxlogPage);
rdata[cur - 1].next = &(rdata[cur]);
cur++;
rdata[cur].buffer = InvalidBuffer;
rdata[cur].data = (char*)(ptr->list);
rdata[cur].len = ptr->lenlist;
rdata[cur-1].next = &(rdata[cur]);
rdata[cur].next=NULL;
rdata[cur].data = (char *) (ptr->list);
rdata[cur].len = ptr->lenlist;
rdata[cur - 1].next = &(rdata[cur]);
rdata[cur].next = NULL;
cur++;
ptr=ptr->next;
ptr = ptr->next;
}
return rdata;
return rdata;
}
XLogRecData *
formUpdateRdata(RelFileNode node, BlockNumber blkno,
OffsetNumber *todelete, int ntodelete, bool emptypage,
IndexTuple *itup, int ituplen, ItemPointer key ) {
XLogRecData *rdata;
gistxlogEntryUpdate *xlrec = (gistxlogEntryUpdate*)palloc(sizeof(gistxlogEntryUpdate));
formUpdateRdata(RelFileNode node, BlockNumber blkno,
OffsetNumber *todelete, int ntodelete, bool emptypage,
IndexTuple *itup, int ituplen, ItemPointer key)
{
XLogRecData *rdata;
gistxlogEntryUpdate *xlrec = (gistxlogEntryUpdate *) palloc(sizeof(gistxlogEntryUpdate));
xlrec->node = node;
xlrec->blkno = blkno;
if ( key )
if (key)
xlrec->key = *key;
else
ItemPointerSetInvalid( &(xlrec->key) );
if ( emptypage ) {
ItemPointerSetInvalid(&(xlrec->key));
if (emptypage)
{
xlrec->isemptypage = true;
xlrec->ntodelete = 0;
rdata = (XLogRecData*)palloc( sizeof(XLogRecData) );
rdata = (XLogRecData *) palloc(sizeof(XLogRecData));
rdata->buffer = InvalidBuffer;
rdata->data = (char*)xlrec;
rdata->data = (char *) xlrec;
rdata->len = sizeof(gistxlogEntryUpdate);
rdata->next = NULL;
} else {
int cur=1,i;
}
else
{
int cur = 1,
i;
xlrec->isemptypage = false;
xlrec->ntodelete = ntodelete;
rdata = (XLogRecData*) palloc( sizeof(XLogRecData) * ( 2 + ituplen ) );
rdata = (XLogRecData *) palloc(sizeof(XLogRecData) * (2 + ituplen));
rdata->buffer = InvalidBuffer;
rdata->data = (char*)xlrec;
rdata->data = (char *) xlrec;
rdata->len = sizeof(gistxlogEntryUpdate);
rdata->next = NULL;
if ( ntodelete ) {
rdata[cur-1].next = &(rdata[cur]);
if (ntodelete)
{
rdata[cur - 1].next = &(rdata[cur]);
rdata[cur].buffer = InvalidBuffer;
rdata[cur].data = (char*)todelete;
rdata[cur].len = MAXALIGN(sizeof(OffsetNumber)*ntodelete);
rdata[cur].data = (char *) todelete;
rdata[cur].len = MAXALIGN(sizeof(OffsetNumber) * ntodelete);
rdata[cur].next = NULL;
cur++;
}
/* new tuples */
for(i=0;i<ituplen;i++) {
for (i = 0; i < ituplen; i++)
{
rdata[cur].buffer = InvalidBuffer;
rdata[cur].data = (char*)(itup[i]);
rdata[cur].len = IndexTupleSize(itup[i]);
rdata[cur].next = NULL;
rdata[cur-1].next = &(rdata[cur]);
rdata[cur].data = (char *) (itup[i]);
rdata[cur].len = IndexTupleSize(itup[i]);
rdata[cur].next = NULL;
rdata[cur - 1].next = &(rdata[cur]);
cur++;
}
}
......@@ -819,29 +923,30 @@ formUpdateRdata(RelFileNode node, BlockNumber blkno,
return rdata;
}
XLogRecPtr
gistxlogInsertCompletion(RelFileNode node, ItemPointerData *keys, int len) {
gistxlogInsertComplete xlrec;
XLogRecData rdata[2];
XLogRecPtr recptr;
XLogRecPtr
gistxlogInsertCompletion(RelFileNode node, ItemPointerData *keys, int len)
{
gistxlogInsertComplete xlrec;
XLogRecData rdata[2];
XLogRecPtr recptr;
Assert(len>0);
Assert(len > 0);
xlrec.node = node;
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &xlrec;
rdata[0].len = sizeof( gistxlogInsertComplete );
rdata[0].next = &(rdata[1]);
rdata[0].data = (char *) &xlrec;
rdata[0].len = sizeof(gistxlogInsertComplete);
rdata[0].next = &(rdata[1]);
rdata[1].buffer = InvalidBuffer;
rdata[1].data = (char *) keys;
rdata[1].len = sizeof( ItemPointerData ) * len;
rdata[1].next = NULL;
rdata[1].data = (char *) keys;
rdata[1].len = sizeof(ItemPointerData) * len;
rdata[1].next = NULL;
START_CRIT_SECTION();
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_INSERT_COMPLETE, rdata);
END_CRIT_SECTION();
return recptr;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment