Commit b3364fc8 authored by Bruce Momjian's avatar Bruce Momjian

pgindent new GIST index code, per request from Tom.

parent 08817bdb
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.125 2005/06/30 17:52:13 teodor Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.126 2005/09/22 20:44:36 momjian Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include "miscadmin.h" #include "miscadmin.h"
#include "utils/memutils.h" #include "utils/memutils.h"
const XLogRecPtr XLogRecPtrForTemp = { 1, 1 }; const XLogRecPtr XLogRecPtrForTemp = {1, 1};
/* Working state for gistbuild and its callback */ /* Working state for gistbuild and its callback */
typedef struct typedef struct
...@@ -46,7 +46,7 @@ static void gistdoinsert(Relation r, ...@@ -46,7 +46,7 @@ static void gistdoinsert(Relation r,
IndexTuple itup, IndexTuple itup,
GISTSTATE *GISTstate); GISTSTATE *GISTstate);
static void gistfindleaf(GISTInsertState *state, static void gistfindleaf(GISTInsertState *state,
GISTSTATE *giststate); GISTSTATE *giststate);
#define ROTATEDIST(d) do { \ #define ROTATEDIST(d) do { \
...@@ -55,7 +55,7 @@ static void gistfindleaf(GISTInsertState *state, ...@@ -55,7 +55,7 @@ static void gistfindleaf(GISTInsertState *state,
tmp->next = (d); \ tmp->next = (d); \
(d)=tmp; \ (d)=tmp; \
} while(0) } while(0)
/* /*
* Create and return a temporary memory context for use by GiST. We * Create and return a temporary memory context for use by GiST. We
...@@ -65,15 +65,15 @@ static void gistfindleaf(GISTInsertState *state, ...@@ -65,15 +65,15 @@ static void gistfindleaf(GISTInsertState *state,
* GiST code itself, to avoid the need to do some awkward manual * GiST code itself, to avoid the need to do some awkward manual
* memory management. * memory management.
*/ */
MemoryContext MemoryContext
createTempGistContext(void) createTempGistContext(void)
{ {
return AllocSetContextCreate(CurrentMemoryContext, return AllocSetContextCreate(CurrentMemoryContext,
"GiST temporary context", "GiST temporary context",
ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE); ALLOCSET_DEFAULT_MAXSIZE);
} }
/* /*
* Routine to build an index. Basically calls insert over and over. * Routine to build an index. Basically calls insert over and over.
...@@ -92,8 +92,8 @@ gistbuild(PG_FUNCTION_ARGS) ...@@ -92,8 +92,8 @@ gistbuild(PG_FUNCTION_ARGS)
Buffer buffer; Buffer buffer;
/* /*
* We expect to be called exactly once for any index relation. If * We expect to be called exactly once for any index relation. If that's
* that's not the case, big trouble's what we have. * not the case, big trouble's what we have.
*/ */
if (RelationGetNumberOfBlocks(index) != 0) if (RelationGetNumberOfBlocks(index) != 0)
elog(ERROR, "index \"%s\" already contains data", elog(ERROR, "index \"%s\" already contains data",
...@@ -105,15 +105,16 @@ gistbuild(PG_FUNCTION_ARGS) ...@@ -105,15 +105,16 @@ gistbuild(PG_FUNCTION_ARGS)
/* initialize the root page */ /* initialize the root page */
buffer = gistNewBuffer(index); buffer = gistNewBuffer(index);
GISTInitBuffer(buffer, F_LEAF); GISTInitBuffer(buffer, F_LEAF);
if ( !index->rd_istemp ) { if (!index->rd_istemp)
XLogRecPtr recptr; {
XLogRecData rdata; XLogRecPtr recptr;
Page page; XLogRecData rdata;
Page page;
rdata.buffer = InvalidBuffer; rdata.buffer = InvalidBuffer;
rdata.data = (char*)&(index->rd_node); rdata.data = (char *) &(index->rd_node);
rdata.len = sizeof(RelFileNode); rdata.len = sizeof(RelFileNode);
rdata.next = NULL; rdata.next = NULL;
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
...@@ -124,7 +125,8 @@ gistbuild(PG_FUNCTION_ARGS) ...@@ -124,7 +125,8 @@ gistbuild(PG_FUNCTION_ARGS)
PageSetTLI(page, ThisTimeLineID); PageSetTLI(page, ThisTimeLineID);
END_CRIT_SECTION(); END_CRIT_SECTION();
} else }
else
PageSetLSN(BufferGetPage(buffer), XLogRecPtrForTemp); PageSetLSN(BufferGetPage(buffer), XLogRecPtrForTemp);
LockBuffer(buffer, GIST_UNLOCK); LockBuffer(buffer, GIST_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
...@@ -132,9 +134,10 @@ gistbuild(PG_FUNCTION_ARGS) ...@@ -132,9 +134,10 @@ gistbuild(PG_FUNCTION_ARGS)
/* build the index */ /* build the index */
buildstate.numindexattrs = indexInfo->ii_NumIndexAttrs; buildstate.numindexattrs = indexInfo->ii_NumIndexAttrs;
buildstate.indtuples = 0; buildstate.indtuples = 0;
/* /*
* create a temporary memory context that is reset once for each * create a temporary memory context that is reset once for each tuple
* tuple inserted into the index * inserted into the index
*/ */
buildstate.tmpCtx = createTempGistContext(); buildstate.tmpCtx = createTempGistContext();
...@@ -185,7 +188,7 @@ gistbuildCallback(Relation index, ...@@ -185,7 +188,7 @@ gistbuildCallback(Relation index,
{ {
gistcentryinit(&buildstate->giststate, i, &tmpcentry, values[i], gistcentryinit(&buildstate->giststate, i, &tmpcentry, values[i],
NULL, NULL, (OffsetNumber) 0, NULL, NULL, (OffsetNumber) 0,
-1 /* size is currently bogus */, TRUE, FALSE); -1 /* size is currently bogus */ , TRUE, FALSE);
values[i] = tmpcentry.key; values[i] = tmpcentry.key;
} }
} }
...@@ -195,11 +198,11 @@ gistbuildCallback(Relation index, ...@@ -195,11 +198,11 @@ gistbuildCallback(Relation index,
itup->t_tid = htup->t_self; itup->t_tid = htup->t_self;
/* /*
* Since we already have the index relation locked, we call * Since we already have the index relation locked, we call gistdoinsert
* gistdoinsert directly. Normal access method calls dispatch through * directly. Normal access method calls dispatch through gistinsert,
* gistinsert, which locks the relation for write. This is the right * which locks the relation for write. This is the right thing to do if
* thing to do if you're inserting single tups, but not when you're * you're inserting single tups, but not when you're initializing the
* initializing the whole index at once. * whole index at once.
*/ */
gistdoinsert(index, itup, &buildstate->giststate); gistdoinsert(index, itup, &buildstate->giststate);
...@@ -221,6 +224,7 @@ gistinsert(PG_FUNCTION_ARGS) ...@@ -221,6 +224,7 @@ gistinsert(PG_FUNCTION_ARGS)
Datum *values = (Datum *) PG_GETARG_POINTER(1); Datum *values = (Datum *) PG_GETARG_POINTER(1);
bool *isnull = (bool *) PG_GETARG_POINTER(2); bool *isnull = (bool *) PG_GETARG_POINTER(2);
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
#ifdef NOT_USED #ifdef NOT_USED
Relation heapRel = (Relation) PG_GETARG_POINTER(4); Relation heapRel = (Relation) PG_GETARG_POINTER(4);
bool checkUnique = PG_GETARG_BOOL(5); bool checkUnique = PG_GETARG_BOOL(5);
...@@ -250,7 +254,7 @@ gistinsert(PG_FUNCTION_ARGS) ...@@ -250,7 +254,7 @@ gistinsert(PG_FUNCTION_ARGS)
{ {
gistcentryinit(&giststate, i, &tmpentry, values[i], gistcentryinit(&giststate, i, &tmpentry, values[i],
NULL, NULL, (OffsetNumber) 0, NULL, NULL, (OffsetNumber) 0,
-1 /* size is currently bogus */, TRUE, FALSE); -1 /* size is currently bogus */ , TRUE, FALSE);
values[i] = tmpentry.key; values[i] = tmpentry.key;
} }
} }
...@@ -276,148 +280,167 @@ gistinsert(PG_FUNCTION_ARGS) ...@@ -276,148 +280,167 @@ gistinsert(PG_FUNCTION_ARGS)
static void static void
gistdoinsert(Relation r, IndexTuple itup, GISTSTATE *giststate) gistdoinsert(Relation r, IndexTuple itup, GISTSTATE *giststate)
{ {
GISTInsertState state; GISTInsertState state;
memset(&state, 0, sizeof(GISTInsertState)); memset(&state, 0, sizeof(GISTInsertState));
state.itup = (IndexTuple *) palloc(sizeof(IndexTuple)); state.itup = (IndexTuple *) palloc(sizeof(IndexTuple));
state.itup[0] = (IndexTuple) palloc(IndexTupleSize(itup)); state.itup[0] = (IndexTuple) palloc(IndexTupleSize(itup));
memcpy(state.itup[0], itup, IndexTupleSize(itup)); memcpy(state.itup[0], itup, IndexTupleSize(itup));
state.ituplen=1; state.ituplen = 1;
state.r = r; state.r = r;
state.key = itup->t_tid; state.key = itup->t_tid;
state.needInsertComplete = true; state.needInsertComplete = true;
state.stack = (GISTInsertStack*)palloc0(sizeof(GISTInsertStack)); state.stack = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
state.stack->blkno=GIST_ROOT_BLKNO; state.stack->blkno = GIST_ROOT_BLKNO;
gistfindleaf(&state, giststate); gistfindleaf(&state, giststate);
gistmakedeal(&state, giststate); gistmakedeal(&state, giststate);
} }
static bool static bool
gistplacetopage(GISTInsertState *state, GISTSTATE *giststate) { gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
bool is_splitted = false; {
bool is_leaf = (GistPageIsLeaf(state->stack->page)) ? true : false; bool is_splitted = false;
bool is_leaf = (GistPageIsLeaf(state->stack->page)) ? true : false;
if ( !is_leaf ) if (!is_leaf)
/* /*
* This node's key has been modified, either because a child * This node's key has been modified, either because a child split
* split occurred or because we needed to adjust our key for * occurred or because we needed to adjust our key for an insert in a
* an insert in a child node. Therefore, remove the old * child node. Therefore, remove the old version of this node's key.
* version of this node's key.
*/ */
PageIndexTupleDelete(state->stack->page, state->stack->childoffnum); PageIndexTupleDelete(state->stack->page, state->stack->childoffnum);
if (gistnospace(state->stack->page, state->itup, state->ituplen)) if (gistnospace(state->stack->page, state->itup, state->ituplen))
{ {
/* no space for insertion */ /* no space for insertion */
IndexTuple *itvec, IndexTuple *itvec,
*newitup; *newitup;
int tlen,olen; int tlen,
SplitedPageLayout *dist=NULL, *ptr; olen;
SplitedPageLayout *dist = NULL,
*ptr;
is_splitted = true; is_splitted = true;
itvec = gistextractbuffer(state->stack->buffer, &tlen); itvec = gistextractbuffer(state->stack->buffer, &tlen);
olen=tlen; olen = tlen;
itvec = gistjoinvector(itvec, &tlen, state->itup, state->ituplen); itvec = gistjoinvector(itvec, &tlen, state->itup, state->ituplen);
newitup = gistSplit(state->r, state->stack->buffer, itvec, &tlen, &dist, giststate); newitup = gistSplit(state->r, state->stack->buffer, itvec, &tlen, &dist, giststate);
if ( !state->r->rd_istemp ) { if (!state->r->rd_istemp)
{
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData *rdata; XLogRecData *rdata;
rdata = formSplitRdata(state->r->rd_node, state->stack->blkno, rdata = formSplitRdata(state->r->rd_node, state->stack->blkno,
&(state->key), dist); &(state->key), dist);
START_CRIT_SECTION(); START_CRIT_SECTION();
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
ptr = dist; ptr = dist;
while(ptr) { while (ptr)
{
PageSetLSN(BufferGetPage(ptr->buffer), recptr); PageSetLSN(BufferGetPage(ptr->buffer), recptr);
PageSetTLI(BufferGetPage(ptr->buffer), ThisTimeLineID); PageSetTLI(BufferGetPage(ptr->buffer), ThisTimeLineID);
ptr=ptr->next; ptr = ptr->next;
} }
END_CRIT_SECTION(); END_CRIT_SECTION();
} else { }
else
{
ptr = dist; ptr = dist;
while(ptr) { while (ptr)
{
PageSetLSN(BufferGetPage(ptr->buffer), XLogRecPtrForTemp); PageSetLSN(BufferGetPage(ptr->buffer), XLogRecPtrForTemp);
ptr=ptr->next; ptr = ptr->next;
} }
} }
state->itup = newitup; state->itup = newitup;
state->ituplen = tlen; /* now tlen >= 2 */ state->ituplen = tlen; /* now tlen >= 2 */
if ( state->stack->blkno == GIST_ROOT_BLKNO ) { if (state->stack->blkno == GIST_ROOT_BLKNO)
{
gistnewroot(state->r, state->stack->buffer, state->itup, state->ituplen, &(state->key)); gistnewroot(state->r, state->stack->buffer, state->itup, state->ituplen, &(state->key));
state->needInsertComplete=false; state->needInsertComplete = false;
ptr = dist; ptr = dist;
while(ptr) { while (ptr)
Page page = (Page)BufferGetPage(ptr->buffer); {
GistPageGetOpaque(page)->rightlink = ( ptr->next ) ? Page page = (Page) BufferGetPage(ptr->buffer);
GistPageGetOpaque(page)->rightlink = (ptr->next) ?
ptr->next->block.blkno : InvalidBlockNumber; ptr->next->block.blkno : InvalidBlockNumber;
GistPageGetOpaque(page)->nsn = PageGetLSN(page); GistPageGetOpaque(page)->nsn = PageGetLSN(page);
LockBuffer( ptr->buffer, GIST_UNLOCK ); LockBuffer(ptr->buffer, GIST_UNLOCK);
WriteBuffer(ptr->buffer); WriteBuffer(ptr->buffer);
ptr=ptr->next; ptr = ptr->next;
} }
} else { }
Page page; else
BlockNumber rightrightlink = InvalidBlockNumber; {
SplitedPageLayout *ourpage=NULL; Page page;
GistNSN oldnsn; BlockNumber rightrightlink = InvalidBlockNumber;
SplitedPageLayout *ourpage = NULL;
GistNSN oldnsn;
GISTPageOpaque opaque; GISTPageOpaque opaque;
/* move origpage to first in chain */ /* move origpage to first in chain */
if ( dist->block.blkno != state->stack->blkno ) { if (dist->block.blkno != state->stack->blkno)
{
ptr = dist; ptr = dist;
while(ptr->next) { while (ptr->next)
if ( ptr->next->block.blkno == state->stack->blkno ) { {
if (ptr->next->block.blkno == state->stack->blkno)
{
ourpage = ptr->next; ourpage = ptr->next;
ptr->next = ptr->next->next; ptr->next = ptr->next->next;
ourpage->next = dist; ourpage->next = dist;
dist = ourpage; dist = ourpage;
break; break;
} }
ptr=ptr->next; ptr = ptr->next;
} }
Assert( ourpage != NULL ); Assert(ourpage != NULL);
} else }
else
ourpage = dist; ourpage = dist;
/* now gets all needed data, and sets nsn's */ /* now gets all needed data, and sets nsn's */
page = (Page)BufferGetPage(ourpage->buffer); page = (Page) BufferGetPage(ourpage->buffer);
opaque = GistPageGetOpaque(page); opaque = GistPageGetOpaque(page);
rightrightlink = opaque->rightlink; rightrightlink = opaque->rightlink;
oldnsn = opaque->nsn; oldnsn = opaque->nsn;
opaque->nsn = PageGetLSN(page); opaque->nsn = PageGetLSN(page);
opaque->rightlink = ourpage->next->block.blkno; opaque->rightlink = ourpage->next->block.blkno;
/* fills and write all new pages. /*
They isn't linked into tree yet */ * fills and write all new pages. They isn't linked into tree yet
*/
ptr = ourpage->next; ptr = ourpage->next;
while(ptr) { while (ptr)
page = (Page)BufferGetPage(ptr->buffer); {
GistPageGetOpaque(page)->rightlink = ( ptr->next ) ? page = (Page) BufferGetPage(ptr->buffer);
GistPageGetOpaque(page)->rightlink = (ptr->next) ?
ptr->next->block.blkno : rightrightlink; ptr->next->block.blkno : rightrightlink;
/* only for last set oldnsn */ /* only for last set oldnsn */
GistPageGetOpaque(page)->nsn = ( ptr->next ) ? GistPageGetOpaque(page)->nsn = (ptr->next) ?
opaque->nsn : oldnsn; opaque->nsn : oldnsn;
LockBuffer(ptr->buffer, GIST_UNLOCK); LockBuffer(ptr->buffer, GIST_UNLOCK);
WriteBuffer(ptr->buffer); WriteBuffer(ptr->buffer);
ptr=ptr->next; ptr = ptr->next;
} }
} }
WriteNoReleaseBuffer( state->stack->buffer ); WriteNoReleaseBuffer(state->stack->buffer);
} }
else else
{ {
...@@ -427,20 +450,23 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate) { ...@@ -427,20 +450,23 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate) {
gistfillbuffer(state->r, state->stack->page, state->itup, state->ituplen, InvalidOffsetNumber); gistfillbuffer(state->r, state->stack->page, state->itup, state->ituplen, InvalidOffsetNumber);
oldlsn = PageGetLSN(state->stack->page); oldlsn = PageGetLSN(state->stack->page);
if ( !state->r->rd_istemp ) { if (!state->r->rd_istemp)
OffsetNumber noffs=0, offs[ MAXALIGN( sizeof(OffsetNumber) ) / sizeof(OffsetNumber) ]; {
OffsetNumber noffs = 0,
offs[MAXALIGN(sizeof(OffsetNumber)) / sizeof(OffsetNumber)];
XLogRecPtr recptr; XLogRecPtr recptr;
XLogRecData *rdata; XLogRecData *rdata;
if ( !is_leaf ) { if (!is_leaf)
/*only on inner page we should delete previous version */ {
/* only on inner page we should delete previous version */
offs[0] = state->stack->childoffnum; offs[0] = state->stack->childoffnum;
noffs=1; noffs = 1;
} }
rdata = formUpdateRdata(state->r->rd_node, state->stack->blkno, rdata = formUpdateRdata(state->r->rd_node, state->stack->blkno,
offs, noffs, false, state->itup, state->ituplen, offs, noffs, false, state->itup, state->ituplen,
&(state->key)); &(state->key));
START_CRIT_SECTION(); START_CRIT_SECTION();
...@@ -449,69 +475,84 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate) { ...@@ -449,69 +475,84 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate) {
PageSetTLI(state->stack->page, ThisTimeLineID); PageSetTLI(state->stack->page, ThisTimeLineID);
END_CRIT_SECTION(); END_CRIT_SECTION();
} else }
else
PageSetLSN(state->stack->page, XLogRecPtrForTemp); PageSetLSN(state->stack->page, XLogRecPtrForTemp);
if ( state->stack->blkno == GIST_ROOT_BLKNO ) if (state->stack->blkno == GIST_ROOT_BLKNO)
state->needInsertComplete=false; state->needInsertComplete = false;
WriteNoReleaseBuffer(state->stack->buffer); WriteNoReleaseBuffer(state->stack->buffer);
if (!is_leaf) /* small optimization: inform scan ablout deleting... */ if (!is_leaf) /* small optimization: inform scan ablout
gistadjscans(state->r, GISTOP_DEL, state->stack->blkno, * deleting... */
state->stack->childoffnum, PageGetLSN(state->stack->page), oldlsn ); gistadjscans(state->r, GISTOP_DEL, state->stack->blkno,
state->stack->childoffnum, PageGetLSN(state->stack->page), oldlsn);
if (state->ituplen > 1) if (state->ituplen > 1)
{ /* previous is_splitted==true */ { /* previous is_splitted==true */
/* /*
* child was splited, so we must form union for insertion in * child was splited, so we must form union for insertion in
* parent * parent
*/ */
IndexTuple newtup = gistunion(state->r, state->itup, state->ituplen, giststate); IndexTuple newtup = gistunion(state->r, state->itup, state->ituplen, giststate);
ItemPointerSetBlockNumber(&(newtup->t_tid), state->stack->blkno); ItemPointerSetBlockNumber(&(newtup->t_tid), state->stack->blkno);
state->itup[0] = newtup; state->itup[0] = newtup;
state->ituplen = 1; state->ituplen = 1;
} else if (is_leaf) { }
/* itup[0] store key to adjust parent, we set it to valid else if (is_leaf)
to correct check by GistTupleIsInvalid macro in gistgetadjusted() */ {
/*
* itup[0] store key to adjust parent, we set it to valid to
* correct check by GistTupleIsInvalid macro in gistgetadjusted()
*/
ItemPointerSetBlockNumber(&(state->itup[0]->t_tid), state->stack->blkno); ItemPointerSetBlockNumber(&(state->itup[0]->t_tid), state->stack->blkno);
GistTupleSetValid( state->itup[0] ); GistTupleSetValid(state->itup[0]);
} }
} }
return is_splitted; return is_splitted;
} }
/* /*
* returns stack of pages, all pages in stack are pinned, and * returns stack of pages, all pages in stack are pinned, and
* leaf is X-locked * leaf is X-locked
*/ */
static void static void
gistfindleaf(GISTInsertState *state, GISTSTATE *giststate) gistfindleaf(GISTInsertState *state, GISTSTATE *giststate)
{ {
ItemId iid; ItemId iid;
IndexTuple idxtuple; IndexTuple idxtuple;
GISTPageOpaque opaque; GISTPageOpaque opaque;
/* walk down, We don't lock page for a long time, but so /*
we should be ready to recheck path in a bad case... * walk down, We don't lock page for a long time, but so we should be
We remember, that page->lsn should never be invalid. */ * ready to recheck path in a bad case... We remember, that page->lsn
while( true ) { * should never be invalid.
*/
while (true)
{
if ( XLogRecPtrIsInvalid( state->stack->lsn ) ) if (XLogRecPtrIsInvalid(state->stack->lsn))
state->stack->buffer = ReadBuffer(state->r, state->stack->blkno); state->stack->buffer = ReadBuffer(state->r, state->stack->blkno);
LockBuffer( state->stack->buffer, GIST_SHARE ); LockBuffer(state->stack->buffer, GIST_SHARE);
state->stack->page = (Page) BufferGetPage(state->stack->buffer); state->stack->page = (Page) BufferGetPage(state->stack->buffer);
opaque = GistPageGetOpaque(state->stack->page); opaque = GistPageGetOpaque(state->stack->page);
state->stack->lsn = PageGetLSN(state->stack->page); state->stack->lsn = PageGetLSN(state->stack->page);
Assert( state->r->rd_istemp || !XLogRecPtrIsInvalid( state->stack->lsn ) ); Assert(state->r->rd_istemp || !XLogRecPtrIsInvalid(state->stack->lsn));
if ( state->stack->blkno != GIST_ROOT_BLKNO && if (state->stack->blkno != GIST_ROOT_BLKNO &&
XLByteLT( state->stack->parent->lsn, opaque->nsn) ) { XLByteLT(state->stack->parent->lsn, opaque->nsn))
/* caused split non-root page is detected, go up to parent to choose best child */ {
LockBuffer( state->stack->buffer, GIST_UNLOCK ); /*
ReleaseBuffer( state->stack->buffer ); * caused split non-root page is detected, go up to parent to
* choose best child
*/
LockBuffer(state->stack->buffer, GIST_UNLOCK);
ReleaseBuffer(state->stack->buffer);
state->stack = state->stack->parent; state->stack = state->stack->parent;
continue; continue;
} }
...@@ -519,62 +560,76 @@ gistfindleaf(GISTInsertState *state, GISTSTATE *giststate) ...@@ -519,62 +560,76 @@ gistfindleaf(GISTInsertState *state, GISTSTATE *giststate)
if (!GistPageIsLeaf(state->stack->page)) if (!GistPageIsLeaf(state->stack->page))
{ {
/* /*
* This is an internal page, so continue to walk down the * This is an internal page, so continue to walk down the tree. We
* tree. We find the child node that has the minimum insertion * find the child node that has the minimum insertion penalty and
* penalty and recursively invoke ourselves to modify that * recursively invoke ourselves to modify that node. Once the
* node. Once the recursive call returns, we may need to * recursive call returns, we may need to adjust the parent node
* adjust the parent node for two reasons: the child node * for two reasons: the child node split, or the key in this node
* split, or the key in this node needs to be adjusted for the * needs to be adjusted for the newly inserted key below us.
* newly inserted key below us. */
*/ GISTInsertStack *item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
GISTInsertStack *item=(GISTInsertStack*)palloc0(sizeof(GISTInsertStack));
state->stack->childoffnum = gistchoose(state->r, state->stack->page, state->itup[0], giststate); state->stack->childoffnum = gistchoose(state->r, state->stack->page, state->itup[0], giststate);
iid = PageGetItemId(state->stack->page, state->stack->childoffnum); iid = PageGetItemId(state->stack->page, state->stack->childoffnum);
idxtuple = (IndexTuple) PageGetItem(state->stack->page, iid); idxtuple = (IndexTuple) PageGetItem(state->stack->page, iid);
item->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); item->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
LockBuffer( state->stack->buffer, GIST_UNLOCK ); LockBuffer(state->stack->buffer, GIST_UNLOCK);
item->parent = state->stack; item->parent = state->stack;
item->child = NULL; item->child = NULL;
if ( state->stack ) if (state->stack)
state->stack->child = item; state->stack->child = item;
state->stack = item; state->stack = item;
} else { }
else
{
/* be carefull, during unlock/lock page may be changed... */ /* be carefull, during unlock/lock page may be changed... */
LockBuffer( state->stack->buffer, GIST_UNLOCK ); LockBuffer(state->stack->buffer, GIST_UNLOCK);
LockBuffer( state->stack->buffer, GIST_EXCLUSIVE ); LockBuffer(state->stack->buffer, GIST_EXCLUSIVE);
state->stack->page = (Page) BufferGetPage(state->stack->buffer); state->stack->page = (Page) BufferGetPage(state->stack->buffer);
opaque = GistPageGetOpaque(state->stack->page); opaque = GistPageGetOpaque(state->stack->page);
if ( state->stack->blkno == GIST_ROOT_BLKNO ) { if (state->stack->blkno == GIST_ROOT_BLKNO)
/* the only page can become inner instead of leaf is a root page, {
so for root we should recheck it */ /*
if ( !GistPageIsLeaf(state->stack->page) ) { * the only page can become inner instead of leaf is a root
/* very rarely situation: during unlock/lock index * page, so for root we should recheck it
with number of pages = 1 was increased */ */
LockBuffer( state->stack->buffer, GIST_UNLOCK ); if (!GistPageIsLeaf(state->stack->page))
{
/*
* very rarely situation: during unlock/lock index with
* number of pages = 1 was increased
*/
LockBuffer(state->stack->buffer, GIST_UNLOCK);
continue; continue;
} }
/* we don't need to check root split, because checking
leaf/inner is enough to recognize split for root */ /*
* we don't need to check root split, because checking
} else if ( XLByteLT( state->stack->parent->lsn, opaque->nsn) ) { * leaf/inner is enough to recognize split for root
/* detecting split during unlock/lock, so we should */
find better child on parent*/
}
else if (XLByteLT(state->stack->parent->lsn, opaque->nsn))
{
/*
* detecting split during unlock/lock, so we should find
* better child on parent
*/
/* forget buffer */ /* forget buffer */
LockBuffer( state->stack->buffer, GIST_UNLOCK ); LockBuffer(state->stack->buffer, GIST_UNLOCK);
ReleaseBuffer( state->stack->buffer ); ReleaseBuffer(state->stack->buffer);
state->stack = state->stack->parent; state->stack = state->stack->parent;
continue; continue;
} }
state->stack->lsn = PageGetLSN( state->stack->page ); state->stack->lsn = PageGetLSN(state->stack->page);
/* ok we found a leaf page and it X-locked */ /* ok we found a leaf page and it X-locked */
break; break;
} }
...@@ -587,10 +642,12 @@ gistfindleaf(GISTInsertState *state, GISTSTATE *giststate) ...@@ -587,10 +642,12 @@ gistfindleaf(GISTInsertState *state, GISTSTATE *giststate)
* Should have the same interface as XLogReadBuffer * Should have the same interface as XLogReadBuffer
*/ */
static Buffer static Buffer
gistReadAndLockBuffer( Relation r, BlockNumber blkno ) { gistReadAndLockBuffer(Relation r, BlockNumber blkno)
Buffer buffer = ReadBuffer( r, blkno ); {
LockBuffer( buffer, GIST_SHARE ); Buffer buffer = ReadBuffer(r, blkno);
return buffer;
LockBuffer(buffer, GIST_SHARE);
return buffer;
} }
/* /*
...@@ -598,38 +655,45 @@ gistReadAndLockBuffer( Relation r, BlockNumber blkno ) { ...@@ -598,38 +655,45 @@ gistReadAndLockBuffer( Relation r, BlockNumber blkno ) {
* to prevent deadlocks, it should lock only one page simultaneously. * to prevent deadlocks, it should lock only one page simultaneously.
* Function uses in recovery and usial mode, so should work with different * Function uses in recovery and usial mode, so should work with different
* read functions (gistReadAndLockBuffer and XLogReadBuffer) * read functions (gistReadAndLockBuffer and XLogReadBuffer)
* returns from the begining of closest parent; * returns from the begining of closest parent;
*/ */
GISTInsertStack* GISTInsertStack *
gistFindPath( Relation r, BlockNumber child, Buffer (*myReadBuffer)(Relation, BlockNumber) ) { gistFindPath(Relation r, BlockNumber child, Buffer (*myReadBuffer) (Relation, BlockNumber))
Page page; {
Buffer buffer; Page page;
OffsetNumber i, maxoff; Buffer buffer;
ItemId iid; OffsetNumber i,
IndexTuple idxtuple; maxoff;
GISTInsertStack *top, *tail, *ptr; ItemId iid;
BlockNumber blkno; IndexTuple idxtuple;
GISTInsertStack *top,
top = tail = (GISTInsertStack*)palloc0( sizeof(GISTInsertStack) ); *tail,
*ptr;
BlockNumber blkno;
top = tail = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
top->blkno = GIST_ROOT_BLKNO; top->blkno = GIST_ROOT_BLKNO;
while( top && top->blkno != child ) { while (top && top->blkno != child)
buffer = myReadBuffer(r, top->blkno); /* buffer locked */ {
page = (Page)BufferGetPage( buffer ); buffer = myReadBuffer(r, top->blkno); /* buffer locked */
page = (Page) BufferGetPage(buffer);
if ( GistPageIsLeaf(page) ) { if (GistPageIsLeaf(page))
{
/* we can safety go away, follows only leaf pages */ /* we can safety go away, follows only leaf pages */
LockBuffer( buffer, GIST_UNLOCK ); LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer( buffer ); ReleaseBuffer(buffer);
return NULL; return NULL;
} }
top->lsn = PageGetLSN(page); top->lsn = PageGetLSN(page);
if ( top->parent && XLByteLT( top->parent->lsn, GistPageGetOpaque(page)->nsn) && if (top->parent && XLByteLT(top->parent->lsn, GistPageGetOpaque(page)->nsn) &&
GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */) { GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
{
/* page splited while we thinking of... */ /* page splited while we thinking of... */
ptr = (GISTInsertStack*)palloc0( sizeof(GISTInsertStack) ); ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
ptr->blkno = GistPageGetOpaque(page)->rightlink; ptr->blkno = GistPageGetOpaque(page)->rightlink;
ptr->childoffnum = InvalidOffsetNumber; ptr->childoffnum = InvalidOffsetNumber;
ptr->parent = top; ptr->parent = top;
...@@ -637,119 +701,143 @@ gistFindPath( Relation r, BlockNumber child, Buffer (*myReadBuffer)(Relation, B ...@@ -637,119 +701,143 @@ gistFindPath( Relation r, BlockNumber child, Buffer (*myReadBuffer)(Relation, B
tail->next = ptr; tail->next = ptr;
tail = ptr; tail = ptr;
} }
maxoff = PageGetMaxOffsetNumber(page); maxoff = PageGetMaxOffsetNumber(page);
for(i = FirstOffsetNumber; i<= maxoff; i = OffsetNumberNext(i)) { for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
iid = PageGetItemId(page, i); iid = PageGetItemId(page, i);
idxtuple = (IndexTuple) PageGetItem(page, iid); idxtuple = (IndexTuple) PageGetItem(page, iid);
blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
if ( blkno == child ) { if (blkno == child)
{
OffsetNumber poff = InvalidOffsetNumber; OffsetNumber poff = InvalidOffsetNumber;
/* make childs links */ /* make childs links */
ptr = top; ptr = top;
while( ptr->parent ) { while (ptr->parent)
{
/* set child link */ /* set child link */
ptr->parent->child = ptr; ptr->parent->child = ptr;
/* move childoffnum.. */ /* move childoffnum.. */
if ( ptr == top ) { if (ptr == top)
/*first iteration*/ {
/* first iteration */
poff = ptr->parent->childoffnum; poff = ptr->parent->childoffnum;
ptr->parent->childoffnum = ptr->childoffnum; ptr->parent->childoffnum = ptr->childoffnum;
} else { }
else
{
OffsetNumber tmp = ptr->parent->childoffnum; OffsetNumber tmp = ptr->parent->childoffnum;
ptr->parent->childoffnum = poff; ptr->parent->childoffnum = poff;
poff = tmp; poff = tmp;
} }
ptr = ptr->parent; ptr = ptr->parent;
} }
top->childoffnum = i; top->childoffnum = i;
LockBuffer( buffer, GIST_UNLOCK ); LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer( buffer ); ReleaseBuffer(buffer);
return top; return top;
} else { }
else
{
/* Install next inner page to the end of stack */ /* Install next inner page to the end of stack */
ptr = (GISTInsertStack*)palloc0( sizeof(GISTInsertStack) ); ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
ptr->blkno = blkno; ptr->blkno = blkno;
ptr->childoffnum = i; /* set offsetnumber of child to child !!! */ ptr->childoffnum = i; /* set offsetnumber of child to child
* !!! */
ptr->parent = top; ptr->parent = top;
ptr->next = NULL; ptr->next = NULL;
tail->next = ptr; tail->next = ptr;
tail = ptr; tail = ptr;
} }
} }
LockBuffer( buffer, GIST_UNLOCK ); LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer( buffer ); ReleaseBuffer(buffer);
top = top->next; top = top->next;
} }
return NULL; return NULL;
} }
/* /*
* Returns X-locked parent of stack page * Returns X-locked parent of stack page
*/ */
static void static void
gistFindCorrectParent( Relation r, GISTInsertStack *child ) { gistFindCorrectParent(Relation r, GISTInsertStack *child)
GISTInsertStack *parent = child->parent; {
GISTInsertStack *parent = child->parent;
LockBuffer( parent->buffer, GIST_EXCLUSIVE );
parent->page = (Page)BufferGetPage( parent->buffer ); LockBuffer(parent->buffer, GIST_EXCLUSIVE);
parent->page = (Page) BufferGetPage(parent->buffer);
/* here we don't need to distinguish between split and page update */ /* here we don't need to distinguish between split and page update */
if ( parent->childoffnum == InvalidOffsetNumber || !XLByteEQ( parent->lsn, PageGetLSN(parent->page) ) ) { if (parent->childoffnum == InvalidOffsetNumber || !XLByteEQ(parent->lsn, PageGetLSN(parent->page)))
{
/* parent is changed, look child in right links until found */ /* parent is changed, look child in right links until found */
OffsetNumber i, maxoff; OffsetNumber i,
ItemId iid; maxoff;
IndexTuple idxtuple; ItemId iid;
GISTInsertStack *ptr; IndexTuple idxtuple;
GISTInsertStack *ptr;
while(true) {
while (true)
{
maxoff = PageGetMaxOffsetNumber(parent->page); maxoff = PageGetMaxOffsetNumber(parent->page);
for(i = FirstOffsetNumber; i<= maxoff; i = OffsetNumberNext(i)) { for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
iid = PageGetItemId(parent->page, i); iid = PageGetItemId(parent->page, i);
idxtuple = (IndexTuple) PageGetItem(parent->page, iid); idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
if ( ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno ) { if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
{
/* yes!!, found */ /* yes!!, found */
parent->childoffnum = i; parent->childoffnum = i;
return; return;
} }
} }
parent->blkno = GistPageGetOpaque( parent->page )->rightlink; parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
LockBuffer( parent->buffer, GIST_UNLOCK ); LockBuffer(parent->buffer, GIST_UNLOCK);
ReleaseBuffer( parent->buffer ); ReleaseBuffer(parent->buffer);
if ( parent->blkno == InvalidBlockNumber ) if (parent->blkno == InvalidBlockNumber)
/* end of chain and still didn't found parent,
It's very-very rare situation when root splited */ /*
* end of chain and still didn't found parent, It's very-very
* rare situation when root splited
*/
break; break;
parent->buffer = ReadBuffer( r, parent->blkno ); parent->buffer = ReadBuffer(r, parent->blkno);
LockBuffer( parent->buffer, GIST_EXCLUSIVE ); LockBuffer(parent->buffer, GIST_EXCLUSIVE);
parent->page = (Page)BufferGetPage( parent->buffer ); parent->page = (Page) BufferGetPage(parent->buffer);
} }
/* awful!!, we need search tree to find parent ... , /*
but before we should release all old parent */ * awful!!, we need search tree to find parent ... , but before we
* should release all old parent
*/
ptr = child->parent->parent; /* child->parent already released above */ ptr = child->parent->parent; /* child->parent already released
while(ptr) { * above */
ReleaseBuffer( ptr->buffer ); while (ptr)
{
ReleaseBuffer(ptr->buffer);
ptr = ptr->parent; ptr = ptr->parent;
} }
/* ok, find new path */ /* ok, find new path */
ptr = parent = gistFindPath(r, child->blkno, gistReadAndLockBuffer); ptr = parent = gistFindPath(r, child->blkno, gistReadAndLockBuffer);
Assert( ptr!=NULL ); Assert(ptr != NULL);
/* read all buffers as supposed in caller */ /* read all buffers as supposed in caller */
while( ptr ) { while (ptr)
ptr->buffer = ReadBuffer( r, ptr->blkno ); {
ptr->page = (Page)BufferGetPage( ptr->buffer ); ptr->buffer = ReadBuffer(r, ptr->blkno);
ptr->page = (Page) BufferGetPage(ptr->buffer);
ptr = ptr->parent; ptr = ptr->parent;
} }
...@@ -758,78 +846,90 @@ gistFindCorrectParent( Relation r, GISTInsertStack *child ) { ...@@ -758,78 +846,90 @@ gistFindCorrectParent( Relation r, GISTInsertStack *child ) {
parent->child = child; parent->child = child;
/* make recursive call to normal processing */ /* make recursive call to normal processing */
gistFindCorrectParent( r, child ); gistFindCorrectParent(r, child);
} }
return; return;
} }
void void
gistmakedeal(GISTInsertState *state, GISTSTATE *giststate) { gistmakedeal(GISTInsertState *state, GISTSTATE *giststate)
{
int is_splitted; int is_splitted;
ItemId iid; ItemId iid;
IndexTuple oldtup, newtup; IndexTuple oldtup,
newtup;
/* walk up */ /* walk up */
while( true ) { while (true)
/* {
* After this call: 1. if child page was splited, then itup /*
* contains keys for each page 2. if child page wasn't splited, * After this call: 1. if child page was splited, then itup contains
* then itup contains additional for adjustment of current key * keys for each page 2. if child page wasn't splited, then itup
*/ * contains additional for adjustment of current key
*/
if ( state->stack->parent ) {
/* X-lock parent page before proceed child, if (state->stack->parent)
gistFindCorrectParent should find and lock it */ {
gistFindCorrectParent( state->r, state->stack ); /*
* X-lock parent page before proceed child, gistFindCorrectParent
* should find and lock it
*/
gistFindCorrectParent(state->r, state->stack);
} }
is_splitted = gistplacetopage(state, giststate); is_splitted = gistplacetopage(state, giststate);
/* parent locked above, so release child buffer */ /* parent locked above, so release child buffer */
LockBuffer(state->stack->buffer, GIST_UNLOCK ); LockBuffer(state->stack->buffer, GIST_UNLOCK);
ReleaseBuffer( state->stack->buffer ); ReleaseBuffer(state->stack->buffer);
/* pop parent page from stack */ /* pop parent page from stack */
state->stack = state->stack->parent; state->stack = state->stack->parent;
/* stack is void */ /* stack is void */
if ( ! state->stack ) if (!state->stack)
break; break;
/* child did not split, so we can check is it needed to update parent tuple */ /*
* child did not split, so we can check is it needed to update parent
* tuple
*/
if (!is_splitted) if (!is_splitted)
{ {
/* parent's tuple */ /* parent's tuple */
iid = PageGetItemId(state->stack->page, state->stack->childoffnum); iid = PageGetItemId(state->stack->page, state->stack->childoffnum);
oldtup = (IndexTuple) PageGetItem(state->stack->page, iid); oldtup = (IndexTuple) PageGetItem(state->stack->page, iid);
newtup = gistgetadjusted(state->r, oldtup, state->itup[0], giststate); newtup = gistgetadjusted(state->r, oldtup, state->itup[0], giststate);
if (!newtup) { /* not need to update key */ if (!newtup)
LockBuffer( state->stack->buffer, GIST_UNLOCK ); { /* not need to update key */
LockBuffer(state->stack->buffer, GIST_UNLOCK);
break; break;
} }
state->itup[0] = newtup; state->itup[0] = newtup;
} }
} /* while */ } /* while */
/* release all parent buffers */ /* release all parent buffers */
while( state->stack ) { while (state->stack)
{
ReleaseBuffer(state->stack->buffer); ReleaseBuffer(state->stack->buffer);
state->stack = state->stack->parent; state->stack = state->stack->parent;
} }
/* say to xlog that insert is completed */ /* say to xlog that insert is completed */
if ( state->needInsertComplete && !state->r->rd_istemp ) if (state->needInsertComplete && !state->r->rd_istemp)
gistxlogInsertCompletion(state->r->rd_node, &(state->key), 1); gistxlogInsertCompletion(state->r->rd_node, &(state->key), 1);
} }
static void static void
gistToRealOffset(OffsetNumber *arr, int len, OffsetNumber *reasloffset) { gistToRealOffset(OffsetNumber *arr, int len, OffsetNumber *reasloffset)
int i; {
int i;
for(i=0;i<len;i++) for (i = 0; i < len; i++)
arr[i] = reasloffset[ arr[i] ]; arr[i] = reasloffset[arr[i]];
} }
/* /*
...@@ -840,7 +940,7 @@ gistSplit(Relation r, ...@@ -840,7 +940,7 @@ gistSplit(Relation r,
Buffer buffer, Buffer buffer,
IndexTuple *itup, /* contains compressed entry */ IndexTuple *itup, /* contains compressed entry */
int *len, int *len,
SplitedPageLayout **dist, SplitedPageLayout **dist,
GISTSTATE *giststate) GISTSTATE *giststate)
{ {
Page p; Page p;
...@@ -856,24 +956,25 @@ gistSplit(Relation r, ...@@ -856,24 +956,25 @@ gistSplit(Relation r,
GISTPageOpaque opaque; GISTPageOpaque opaque;
GIST_SPLITVEC v; GIST_SPLITVEC v;
GistEntryVector *entryvec; GistEntryVector *entryvec;
int i, fakeoffset, int i,
fakeoffset,
nlen; nlen;
OffsetNumber *realoffset; OffsetNumber *realoffset;
IndexTuple *cleaneditup = itup; IndexTuple *cleaneditup = itup;
int lencleaneditup = *len; int lencleaneditup = *len;
p = (Page) BufferGetPage(buffer); p = (Page) BufferGetPage(buffer);
opaque = GistPageGetOpaque(p); opaque = GistPageGetOpaque(p);
/* /*
* The root of the tree is the first block in the relation. If we're * The root of the tree is the first block in the relation. If we're
* about to split the root, we need to do some hocus-pocus to enforce * about to split the root, we need to do some hocus-pocus to enforce this
* this guarantee. * guarantee.
*/ */
if (BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO) if (BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO)
{ {
leftbuf = gistNewBuffer(r); leftbuf = gistNewBuffer(r);
GISTInitBuffer(leftbuf, opaque->flags&F_LEAF); GISTInitBuffer(leftbuf, opaque->flags & F_LEAF);
lbknum = BufferGetBlockNumber(leftbuf); lbknum = BufferGetBlockNumber(leftbuf);
left = (Page) BufferGetPage(leftbuf); left = (Page) BufferGetPage(leftbuf);
} }
...@@ -886,7 +987,7 @@ gistSplit(Relation r, ...@@ -886,7 +987,7 @@ gistSplit(Relation r,
} }
rightbuf = gistNewBuffer(r); rightbuf = gistNewBuffer(r);
GISTInitBuffer(rightbuf, opaque->flags&F_LEAF); GISTInitBuffer(rightbuf, opaque->flags & F_LEAF);
rbknum = BufferGetBlockNumber(rightbuf); rbknum = BufferGetBlockNumber(rightbuf);
right = (Page) BufferGetPage(rightbuf); right = (Page) BufferGetPage(rightbuf);
...@@ -901,10 +1002,11 @@ gistSplit(Relation r, ...@@ -901,10 +1002,11 @@ gistSplit(Relation r,
Datum datum; Datum datum;
bool IsNull; bool IsNull;
if (!GistPageIsLeaf(p) && GistTupleIsInvalid( itup[i - 1] )) { if (!GistPageIsLeaf(p) && GistTupleIsInvalid(itup[i - 1]))
{
entryvec->n--; entryvec->n--;
/* remember position of invalid tuple */ /* remember position of invalid tuple */
realoffset[ entryvec->n ] = i; realoffset[entryvec->n] = i;
continue; continue;
} }
...@@ -913,54 +1015,62 @@ gistSplit(Relation r, ...@@ -913,54 +1015,62 @@ gistSplit(Relation r,
datum, r, p, i, datum, r, p, i,
ATTSIZE(datum, giststate->tupdesc, 1, IsNull), ATTSIZE(datum, giststate->tupdesc, 1, IsNull),
FALSE, IsNull); FALSE, IsNull);
realoffset[ fakeoffset ] = i; realoffset[fakeoffset] = i;
fakeoffset++; fakeoffset++;
} }
/* /*
* if it was invalid tuple then we need special processing. If * if it was invalid tuple then we need special processing. If it's
* it's possible, we move all invalid tuples on right page. * possible, we move all invalid tuples on right page. We should remember,
* We should remember, that union with invalid tuples * that union with invalid tuples is a invalid tuple.
* is a invalid tuple. */
*/ if (entryvec->n != *len + 1)
if ( entryvec->n != *len + 1 ) { {
lencleaneditup = entryvec->n-1; lencleaneditup = entryvec->n - 1;
cleaneditup = (IndexTuple*)palloc(lencleaneditup * sizeof(IndexTuple)); cleaneditup = (IndexTuple *) palloc(lencleaneditup * sizeof(IndexTuple));
for(i=1;i<entryvec->n;i++) for (i = 1; i < entryvec->n; i++)
cleaneditup[i-1] = itup[ realoffset[ i ]-1 ]; cleaneditup[i - 1] = itup[realoffset[i] - 1];
if ( gistnospace( left, cleaneditup, lencleaneditup ) ) { if (gistnospace(left, cleaneditup, lencleaneditup))
/* no space on left to put all good tuples, so picksplit */ {
/* no space on left to put all good tuples, so picksplit */
gistUserPicksplit(r, entryvec, &v, cleaneditup, lencleaneditup, giststate); gistUserPicksplit(r, entryvec, &v, cleaneditup, lencleaneditup, giststate);
v.spl_leftvalid = true; v.spl_leftvalid = true;
v.spl_rightvalid = false; v.spl_rightvalid = false;
gistToRealOffset( v.spl_left, v.spl_nleft, realoffset ); gistToRealOffset(v.spl_left, v.spl_nleft, realoffset);
gistToRealOffset( v.spl_right, v.spl_nright, realoffset ); gistToRealOffset(v.spl_right, v.spl_nright, realoffset);
} else { }
/* we can try to store all valid tuples on one page */ else
v.spl_right = (OffsetNumber*)palloc( entryvec->n * sizeof(OffsetNumber) ); {
v.spl_left = (OffsetNumber*)palloc( entryvec->n * sizeof(OffsetNumber) ); /* we can try to store all valid tuples on one page */
v.spl_right = (OffsetNumber *) palloc(entryvec->n * sizeof(OffsetNumber));
if ( lencleaneditup==0 ) { v.spl_left = (OffsetNumber *) palloc(entryvec->n * sizeof(OffsetNumber));
if (lencleaneditup == 0)
{
/* all tuples are invalid, so moves half of its to right */ /* all tuples are invalid, so moves half of its to right */
v.spl_leftvalid = v.spl_rightvalid = false; v.spl_leftvalid = v.spl_rightvalid = false;
v.spl_nright = 0; v.spl_nright = 0;
v.spl_nleft = 0; v.spl_nleft = 0;
for(i=1;i<=*len;i++) for (i = 1; i <= *len; i++)
if ( i-1<*len/2 ) if (i - 1 < *len / 2)
v.spl_left[ v.spl_nleft++ ] = i; v.spl_left[v.spl_nleft++] = i;
else else
v.spl_right[ v.spl_nright++ ] = i; v.spl_right[v.spl_nright++] = i;
} else { }
/* we will not call gistUserPicksplit, just put good else
tuples on left and invalid on right */ {
/*
* we will not call gistUserPicksplit, just put good tuples on
* left and invalid on right
*/
v.spl_nleft = lencleaneditup; v.spl_nleft = lencleaneditup;
v.spl_nright = 0; v.spl_nright = 0;
for(i=1;i<entryvec->n;i++) for (i = 1; i < entryvec->n; i++)
v.spl_left[i-1] = i; v.spl_left[i - 1] = i;
gistToRealOffset( v.spl_left, v.spl_nleft, realoffset ); gistToRealOffset(v.spl_left, v.spl_nleft, realoffset);
v.spl_lattr[0] = v.spl_ldatum = (Datum)0; v.spl_lattr[0] = v.spl_ldatum = (Datum) 0;
v.spl_rattr[0] = v.spl_rdatum = (Datum)0; v.spl_rattr[0] = v.spl_rdatum = (Datum) 0;
v.spl_lisnull[0] = true; v.spl_lisnull[0] = true;
v.spl_risnull[0] = true; v.spl_risnull[0] = true;
gistunionsubkey(r, giststate, itup, &v, true); gistunionsubkey(r, giststate, itup, &v, true);
...@@ -968,16 +1078,18 @@ gistSplit(Relation r, ...@@ -968,16 +1078,18 @@ gistSplit(Relation r,
v.spl_rightvalid = false; v.spl_rightvalid = false;
} }
} }
} else { }
/* there is no invalid tuples, so usial processing */ else
{
/* there is no invalid tuples, so usial processing */
gistUserPicksplit(r, entryvec, &v, itup, *len, giststate); gistUserPicksplit(r, entryvec, &v, itup, *len, giststate);
v.spl_leftvalid = v.spl_rightvalid = true; v.spl_leftvalid = v.spl_rightvalid = true;
} }
/* form left and right vector */ /* form left and right vector */
lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (*len+1)); lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (*len + 1));
rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (*len+1)); rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (*len + 1));
for (i = 0; i < v.spl_nleft; i++) for (i = 0; i < v.spl_nleft; i++)
lvectup[i] = itup[v.spl_left[i] - 1]; lvectup[i] = itup[v.spl_left[i] - 1];
...@@ -986,7 +1098,8 @@ gistSplit(Relation r, ...@@ -986,7 +1098,8 @@ gistSplit(Relation r,
rvectup[i] = itup[v.spl_right[i] - 1]; rvectup[i] = itup[v.spl_right[i] - 1];
/* place invalid tuples on right page if itsn't done yet */ /* place invalid tuples on right page if itsn't done yet */
for (fakeoffset = entryvec->n; fakeoffset < *len+1 && lencleaneditup; fakeoffset++) { for (fakeoffset = entryvec->n; fakeoffset < *len + 1 && lencleaneditup; fakeoffset++)
{
rvectup[v.spl_nright++] = itup[realoffset[fakeoffset] - 1]; rvectup[v.spl_nright++] = itup[realoffset[fakeoffset] - 1];
} }
...@@ -999,26 +1112,27 @@ gistSplit(Relation r, ...@@ -999,26 +1112,27 @@ gistSplit(Relation r,
} }
else else
{ {
char *ptr; char *ptr;
gistfillbuffer(r, right, rvectup, v.spl_nright, FirstOffsetNumber); gistfillbuffer(r, right, rvectup, v.spl_nright, FirstOffsetNumber);
/* XLOG stuff */ /* XLOG stuff */
ROTATEDIST(*dist); ROTATEDIST(*dist);
(*dist)->block.blkno = BufferGetBlockNumber(rightbuf); (*dist)->block.blkno = BufferGetBlockNumber(rightbuf);
(*dist)->block.num = v.spl_nright; (*dist)->block.num = v.spl_nright;
(*dist)->list = (IndexTupleData*)palloc( BLCKSZ ); (*dist)->list = (IndexTupleData *) palloc(BLCKSZ);
ptr = (char*) ( (*dist)->list ); ptr = (char *) ((*dist)->list);
for(i=0;i<v.spl_nright;i++) { for (i = 0; i < v.spl_nright; i++)
memcpy( ptr, rvectup[i], IndexTupleSize( rvectup[i] ) ); {
ptr += IndexTupleSize( rvectup[i] ); memcpy(ptr, rvectup[i], IndexTupleSize(rvectup[i]));
ptr += IndexTupleSize(rvectup[i]);
} }
(*dist)->lenlist = ptr - ( (char*) ( (*dist)->list ) ); (*dist)->lenlist = ptr - ((char *) ((*dist)->list));
(*dist)->buffer = rightbuf; (*dist)->buffer = rightbuf;
nlen = 1; nlen = 1;
newtup = (IndexTuple *) palloc(sizeof(IndexTuple) * 1); newtup = (IndexTuple *) palloc(sizeof(IndexTuple) * 1);
newtup[0] = ( v.spl_rightvalid ) ? gistFormTuple(giststate, r, v.spl_rattr, v.spl_rattrsize, v.spl_risnull) newtup[0] = (v.spl_rightvalid) ? gistFormTuple(giststate, r, v.spl_rattr, v.spl_rattrsize, v.spl_risnull)
: gist_form_invalid_tuple( rbknum ); : gist_form_invalid_tuple(rbknum);
ItemPointerSetBlockNumber(&(newtup[0]->t_tid), rbknum); ItemPointerSetBlockNumber(&(newtup[0]->t_tid), rbknum);
} }
...@@ -1034,34 +1148,35 @@ gistSplit(Relation r, ...@@ -1034,34 +1148,35 @@ gistSplit(Relation r,
} }
else else
{ {
char *ptr; char *ptr;
gistfillbuffer(r, left, lvectup, v.spl_nleft, FirstOffsetNumber); gistfillbuffer(r, left, lvectup, v.spl_nleft, FirstOffsetNumber);
/* XLOG stuff */ /* XLOG stuff */
ROTATEDIST(*dist); ROTATEDIST(*dist);
(*dist)->block.blkno = BufferGetBlockNumber(leftbuf); (*dist)->block.blkno = BufferGetBlockNumber(leftbuf);
(*dist)->block.num = v.spl_nleft; (*dist)->block.num = v.spl_nleft;
(*dist)->list = (IndexTupleData*)palloc( BLCKSZ ); (*dist)->list = (IndexTupleData *) palloc(BLCKSZ);
ptr = (char*) ( (*dist)->list ); ptr = (char *) ((*dist)->list);
for(i=0;i<v.spl_nleft;i++) { for (i = 0; i < v.spl_nleft; i++)
memcpy( ptr, lvectup[i], IndexTupleSize( lvectup[i] ) ); {
ptr += IndexTupleSize( lvectup[i] ); memcpy(ptr, lvectup[i], IndexTupleSize(lvectup[i]));
ptr += IndexTupleSize(lvectup[i]);
} }
(*dist)->lenlist = ptr - ( (char*) ( (*dist)->list ) ); (*dist)->lenlist = ptr - ((char *) ((*dist)->list));
(*dist)->buffer = leftbuf; (*dist)->buffer = leftbuf;
if (BufferGetBlockNumber(buffer) != GIST_ROOT_BLKNO) if (BufferGetBlockNumber(buffer) != GIST_ROOT_BLKNO)
PageRestoreTempPage(left, p); PageRestoreTempPage(left, p);
nlen += 1; nlen += 1;
newtup = (IndexTuple *) repalloc(newtup, sizeof(IndexTuple) * nlen); newtup = (IndexTuple *) repalloc(newtup, sizeof(IndexTuple) * nlen);
newtup[nlen - 1] = ( v.spl_leftvalid ) ? gistFormTuple(giststate, r, v.spl_lattr, v.spl_lattrsize, v.spl_lisnull) newtup[nlen - 1] = (v.spl_leftvalid) ? gistFormTuple(giststate, r, v.spl_lattr, v.spl_lattrsize, v.spl_lisnull)
: gist_form_invalid_tuple( lbknum ); : gist_form_invalid_tuple(lbknum);
ItemPointerSetBlockNumber(&(newtup[nlen - 1]->t_tid), lbknum); ItemPointerSetBlockNumber(&(newtup[nlen - 1]->t_tid), lbknum);
} }
GistClearTuplesDeleted(p); GistClearTuplesDeleted(p);
*len = nlen; *len = nlen;
return newtup; return newtup;
} }
...@@ -1071,18 +1186,19 @@ gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer ke ...@@ -1071,18 +1186,19 @@ gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer ke
{ {
Page page; Page page;
Assert( BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO ); Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
GISTInitBuffer(buffer, 0); GISTInitBuffer(buffer, 0);
gistfillbuffer(r, page, itup, len, FirstOffsetNumber); gistfillbuffer(r, page, itup, len, FirstOffsetNumber);
if ( !r->rd_istemp ) { if (!r->rd_istemp)
XLogRecPtr recptr; {
XLogRecData *rdata; XLogRecPtr recptr;
XLogRecData *rdata;
rdata = formUpdateRdata(r->rd_node, GIST_ROOT_BLKNO, rdata = formUpdateRdata(r->rd_node, GIST_ROOT_BLKNO,
NULL, 0, false, itup, len, key); NULL, 0, false, itup, len, key);
START_CRIT_SECTION(); START_CRIT_SECTION();
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_NEW_ROOT, rdata); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_NEW_ROOT, rdata);
...@@ -1090,7 +1206,8 @@ gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer ke ...@@ -1090,7 +1206,8 @@ gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer ke
PageSetTLI(page, ThisTimeLineID); PageSetTLI(page, ThisTimeLineID);
END_CRIT_SECTION(); END_CRIT_SECTION();
} else }
else
PageSetLSN(page, XLogRecPtrForTemp); PageSetLSN(page, XLogRecPtrForTemp);
} }
...@@ -1136,4 +1253,3 @@ freeGISTstate(GISTSTATE *giststate) ...@@ -1136,4 +1253,3 @@ freeGISTstate(GISTSTATE *giststate)
{ {
/* no work */ /* no work */
} }
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.50 2005/06/27 12:45:22 teodor Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.51 2005/09/22 20:44:36 momjian Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -20,64 +20,71 @@ ...@@ -20,64 +20,71 @@
#include "utils/memutils.h" #include "utils/memutils.h"
static OffsetNumber gistfindnext(IndexScanDesc scan, OffsetNumber n, static OffsetNumber gistfindnext(IndexScanDesc scan, OffsetNumber n,
ScanDirection dir); ScanDirection dir);
static int gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples); static int gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples);
static bool gistindex_keytest(IndexTuple tuple, IndexScanDesc scan, static bool gistindex_keytest(IndexTuple tuple, IndexScanDesc scan,
OffsetNumber offset); OffsetNumber offset);
static void static void
killtuple(Relation r, GISTScanOpaque so, ItemPointer iptr) { killtuple(Relation r, GISTScanOpaque so, ItemPointer iptr)
Buffer buffer = so->curbuf; {
Buffer buffer = so->curbuf;
for(;;) { for (;;)
Page p; {
Page p;
BlockNumber blkno; BlockNumber blkno;
OffsetNumber offset, maxoff; OffsetNumber offset,
maxoff;
LockBuffer(buffer, GIST_SHARE);
p = (Page) BufferGetPage(buffer);
LockBuffer( buffer, GIST_SHARE ); if (buffer == so->curbuf && XLByteEQ(so->stack->lsn, PageGetLSN(p)))
p = (Page)BufferGetPage( buffer ); {
if ( buffer == so->curbuf && XLByteEQ( so->stack->lsn, PageGetLSN(p) ) ) {
/* page unchanged, so all is simple */ /* page unchanged, so all is simple */
offset = ItemPointerGetOffsetNumber(iptr); offset = ItemPointerGetOffsetNumber(iptr);
PageGetItemId(p, offset)->lp_flags |= LP_DELETE; PageGetItemId(p, offset)->lp_flags |= LP_DELETE;
SetBufferCommitInfoNeedsSave(buffer); SetBufferCommitInfoNeedsSave(buffer);
LockBuffer( buffer, GIST_UNLOCK ); LockBuffer(buffer, GIST_UNLOCK);
break; break;
} }
maxoff = PageGetMaxOffsetNumber( p ); maxoff = PageGetMaxOffsetNumber(p);
for(offset = FirstOffsetNumber; offset<= maxoff; offset = OffsetNumberNext(offset)) { for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
IndexTuple ituple = (IndexTuple) PageGetItem(p, PageGetItemId(p, offset)); {
IndexTuple ituple = (IndexTuple) PageGetItem(p, PageGetItemId(p, offset));
if ( ItemPointerEquals( &(ituple->t_tid), iptr ) ) { if (ItemPointerEquals(&(ituple->t_tid), iptr))
{
/* found */ /* found */
PageGetItemId(p, offset)->lp_flags |= LP_DELETE; PageGetItemId(p, offset)->lp_flags |= LP_DELETE;
SetBufferCommitInfoNeedsSave(buffer); SetBufferCommitInfoNeedsSave(buffer);
LockBuffer( buffer, GIST_UNLOCK ); LockBuffer(buffer, GIST_UNLOCK);
if ( buffer != so->curbuf ) if (buffer != so->curbuf)
ReleaseBuffer( buffer ); ReleaseBuffer(buffer);
return; return;
} }
} }
/* follow right link */ /* follow right link */
/* /*
* ??? is it good? if tuple dropped by concurrent vacuum, * ??? is it good? if tuple dropped by concurrent vacuum, we will read
* we will read all leaf pages... * all leaf pages...
*/ */
blkno = GistPageGetOpaque(p)->rightlink; blkno = GistPageGetOpaque(p)->rightlink;
LockBuffer( buffer, GIST_UNLOCK ); LockBuffer(buffer, GIST_UNLOCK);
if ( buffer != so->curbuf ) if (buffer != so->curbuf)
ReleaseBuffer( buffer ); ReleaseBuffer(buffer);
if ( blkno==InvalidBlockNumber ) if (blkno == InvalidBlockNumber)
/* can't found, dropped by somebody else */ /* can't found, dropped by somebody else */
return; return;
buffer = ReadBuffer( r, blkno ); buffer = ReadBuffer(r, blkno);
} }
} }
/* /*
* gistgettuple() -- Get the next tuple in the scan * gistgettuple() -- Get the next tuple in the scan
...@@ -85,27 +92,27 @@ killtuple(Relation r, GISTScanOpaque so, ItemPointer iptr) { ...@@ -85,27 +92,27 @@ killtuple(Relation r, GISTScanOpaque so, ItemPointer iptr) {
Datum Datum
gistgettuple(PG_FUNCTION_ARGS) gistgettuple(PG_FUNCTION_ARGS)
{ {
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
GISTScanOpaque so; GISTScanOpaque so;
ItemPointerData tid; ItemPointerData tid;
bool res; bool res;
so = (GISTScanOpaque) scan->opaque; so = (GISTScanOpaque) scan->opaque;
/* /*
* If we have produced an index tuple in the past and the executor * If we have produced an index tuple in the past and the executor has
* has informed us we need to mark it as "killed", do so now. * informed us we need to mark it as "killed", do so now.
*/ */
if (scan->kill_prior_tuple && ItemPointerIsValid(&(scan->currentItemData))) if (scan->kill_prior_tuple && ItemPointerIsValid(&(scan->currentItemData)))
killtuple(scan->indexRelation, so, &(scan->currentItemData)); killtuple(scan->indexRelation, so, &(scan->currentItemData));
/* /*
* Get the next tuple that matches the search key. If asked to * Get the next tuple that matches the search key. If asked to skip killed
* skip killed tuples, continue looping until we find a non-killed * tuples, continue looping until we find a non-killed tuple that matches
* tuple that matches the search key. * the search key.
*/ */
res = ( gistnext(scan, dir, &tid, 1, scan->ignore_killed_tuples) ) ? true : false; res = (gistnext(scan, dir, &tid, 1, scan->ignore_killed_tuples)) ? true : false;
PG_RETURN_BOOL(res); PG_RETURN_BOOL(res);
} }
...@@ -114,12 +121,12 @@ Datum ...@@ -114,12 +121,12 @@ Datum
gistgetmulti(PG_FUNCTION_ARGS) gistgetmulti(PG_FUNCTION_ARGS)
{ {
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
int32 max_tids = PG_GETARG_INT32(2); int32 max_tids = PG_GETARG_INT32(2);
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3); int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
*returned_tids = gistnext(scan, ForwardScanDirection, tids, max_tids, false); *returned_tids = gistnext(scan, ForwardScanDirection, tids, max_tids, false);
PG_RETURN_BOOL(*returned_tids == max_tids); PG_RETURN_BOOL(*returned_tids == max_tids);
} }
...@@ -128,17 +135,17 @@ gistgetmulti(PG_FUNCTION_ARGS) ...@@ -128,17 +135,17 @@ gistgetmulti(PG_FUNCTION_ARGS)
* either to fetch the first such tuple or subsequent matching * either to fetch the first such tuple or subsequent matching
* tuples. Returns true iff a matching tuple was found. * tuples. Returns true iff a matching tuple was found.
*/ */
static int static int
gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples) gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples)
{ {
Page p; Page p;
OffsetNumber n; OffsetNumber n;
GISTScanOpaque so; GISTScanOpaque so;
GISTSearchStack *stk; GISTSearchStack *stk;
IndexTuple it; IndexTuple it;
GISTPageOpaque opaque; GISTPageOpaque opaque;
bool resetoffset=false; bool resetoffset = false;
int ntids=0; int ntids = 0;
so = (GISTScanOpaque) scan->opaque; so = (GISTScanOpaque) scan->opaque;
...@@ -149,59 +156,67 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b ...@@ -149,59 +156,67 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b
Assert(so->stack == NULL); Assert(so->stack == NULL);
so->curbuf = ReadBuffer(scan->indexRelation, GIST_ROOT_BLKNO); so->curbuf = ReadBuffer(scan->indexRelation, GIST_ROOT_BLKNO);
stk = so->stack = (GISTSearchStack*) palloc0( sizeof(GISTSearchStack) ); stk = so->stack = (GISTSearchStack *) palloc0(sizeof(GISTSearchStack));
stk->next = NULL; stk->next = NULL;
stk->block = GIST_ROOT_BLKNO; stk->block = GIST_ROOT_BLKNO;
} else if ( so->curbuf == InvalidBuffer ) { }
else if (so->curbuf == InvalidBuffer)
{
return 0; return 0;
} }
for(;;) { for (;;)
{
/* First of all, we need lock buffer */ /* First of all, we need lock buffer */
Assert( so->curbuf != InvalidBuffer ); Assert(so->curbuf != InvalidBuffer);
LockBuffer( so->curbuf, GIST_SHARE ); LockBuffer(so->curbuf, GIST_SHARE);
p = BufferGetPage(so->curbuf); p = BufferGetPage(so->curbuf);
opaque = GistPageGetOpaque( p ); opaque = GistPageGetOpaque(p);
resetoffset = false; resetoffset = false;
if ( XLogRecPtrIsInvalid( so->stack->lsn ) || !XLByteEQ( so->stack->lsn, PageGetLSN(p) ) ) { if (XLogRecPtrIsInvalid(so->stack->lsn) || !XLByteEQ(so->stack->lsn, PageGetLSN(p)))
{
/* page changed from last visit or visit first time , reset offset */ /* page changed from last visit or visit first time , reset offset */
so->stack->lsn = PageGetLSN(p); so->stack->lsn = PageGetLSN(p);
resetoffset = true; resetoffset = true;
/* check page split, occured from last visit or visit to parent */ /* check page split, occured from last visit or visit to parent */
if ( !XLogRecPtrIsInvalid( so->stack->parentlsn ) && if (!XLogRecPtrIsInvalid(so->stack->parentlsn) &&
XLByteLT( so->stack->parentlsn, opaque->nsn ) && XLByteLT(so->stack->parentlsn, opaque->nsn) &&
opaque->rightlink != InvalidBlockNumber /* sanity check */ && opaque->rightlink != InvalidBlockNumber /* sanity check */ &&
(so->stack->next==NULL || so->stack->next->block != opaque->rightlink) /* check if already added */) { (so->stack->next == NULL || so->stack->next->block != opaque->rightlink) /* check if already
added */ )
{
/* detect page split, follow right link to add pages */ /* detect page split, follow right link to add pages */
stk = (GISTSearchStack*) palloc( sizeof(GISTSearchStack) ); stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack));
stk->next = so->stack->next; stk->next = so->stack->next;
stk->block = opaque->rightlink; stk->block = opaque->rightlink;
stk->parentlsn = so->stack->parentlsn; stk->parentlsn = so->stack->parentlsn;
memset( &(stk->lsn), 0, sizeof(GistNSN) ); memset(&(stk->lsn), 0, sizeof(GistNSN));
so->stack->next = stk; so->stack->next = stk;
} }
} }
/* if page is empty, then just skip it */ /* if page is empty, then just skip it */
if ( PageIsEmpty(p) ) { if (PageIsEmpty(p))
LockBuffer( so->curbuf, GIST_UNLOCK ); {
LockBuffer(so->curbuf, GIST_UNLOCK);
stk = so->stack->next; stk = so->stack->next;
pfree( so->stack ); pfree(so->stack);
so->stack = stk; so->stack = stk;
if (so->stack == NULL) { if (so->stack == NULL)
{
ReleaseBuffer(so->curbuf); ReleaseBuffer(so->curbuf);
so->curbuf = InvalidBuffer; so->curbuf = InvalidBuffer;
return ntids; return ntids;
} }
so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation, so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
stk->block); stk->block);
continue; continue;
} }
...@@ -215,33 +230,33 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b ...@@ -215,33 +230,33 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b
else else
{ {
n = ItemPointerGetOffsetNumber(&(scan->currentItemData)); n = ItemPointerGetOffsetNumber(&(scan->currentItemData));
if (ScanDirectionIsBackward(dir)) if (ScanDirectionIsBackward(dir))
n = OffsetNumberPrev(n); n = OffsetNumberPrev(n);
else else
n = OffsetNumberNext(n); n = OffsetNumberNext(n);
} }
/* wonderfull, we can look at page */ /* wonderfull, we can look at page */
for(;;) for (;;)
{ {
n = gistfindnext(scan, n, dir); n = gistfindnext(scan, n, dir);
if (!OffsetNumberIsValid(n)) if (!OffsetNumberIsValid(n))
{ {
/* /*
* We ran out of matching index entries on the current * We ran out of matching index entries on the current page,
* page, so pop the top stack entry and use it to continue * so pop the top stack entry and use it to continue the
* the search. * search.
*/ */
LockBuffer( so->curbuf, GIST_UNLOCK ); LockBuffer(so->curbuf, GIST_UNLOCK);
stk = so->stack->next; stk = so->stack->next;
pfree( so->stack ); pfree(so->stack);
so->stack = stk; so->stack = stk;
/* If we're out of stack entries, we're done */ /* If we're out of stack entries, we're done */
if (so->stack == NULL) if (so->stack == NULL)
{ {
ReleaseBuffer(so->curbuf); ReleaseBuffer(so->curbuf);
...@@ -250,8 +265,8 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b ...@@ -250,8 +265,8 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b
} }
so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation, so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
stk->block); stk->block);
/* XXX go up */ /* XXX go up */
break; break;
} }
...@@ -259,20 +274,22 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b ...@@ -259,20 +274,22 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b
{ {
/* /*
* We've found a matching index entry in a leaf page, so * We've found a matching index entry in a leaf page, so
* return success. Note that we keep "curbuf" pinned so * return success. Note that we keep "curbuf" pinned so that
* that we can efficiently resume the index scan later. * we can efficiently resume the index scan later.
*/ */
ItemPointerSet(&(scan->currentItemData), ItemPointerSet(&(scan->currentItemData),
BufferGetBlockNumber(so->curbuf), n); BufferGetBlockNumber(so->curbuf), n);
if ( ! ( ignore_killed_tuples && ItemIdDeleted(PageGetItemId(p, n)) ) ) { if (!(ignore_killed_tuples && ItemIdDeleted(PageGetItemId(p, n))))
{
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
tids[ntids] = scan->xs_ctup.t_self = it->t_tid; tids[ntids] = scan->xs_ctup.t_self = it->t_tid;
ntids++; ntids++;
if ( ntids == maxtids ) { if (ntids == maxtids)
LockBuffer( so->curbuf, GIST_UNLOCK ); {
LockBuffer(so->curbuf, GIST_UNLOCK);
return ntids; return ntids;
} }
} }
...@@ -281,14 +298,14 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b ...@@ -281,14 +298,14 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, b
{ {
/* /*
* We've found an entry in an internal node whose key is * We've found an entry in an internal node whose key is
* consistent with the search key, so push it to stack * consistent with the search key, so push it to stack
*/ */
stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack)); stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack));
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
stk->block = ItemPointerGetBlockNumber(&(it->t_tid)); stk->block = ItemPointerGetBlockNumber(&(it->t_tid));
memset( &(stk->lsn), 0, sizeof(GistNSN) ); memset(&(stk->lsn), 0, sizeof(GistNSN));
stk->parentlsn = so->stack->lsn; stk->parentlsn = so->stack->lsn;
stk->next = so->stack->next; stk->next = so->stack->next;
...@@ -320,12 +337,12 @@ gistindex_keytest(IndexTuple tuple, ...@@ -320,12 +337,12 @@ gistindex_keytest(IndexTuple tuple,
IndexScanDesc scan, IndexScanDesc scan,
OffsetNumber offset) OffsetNumber offset)
{ {
int keySize = scan->numberOfKeys; int keySize = scan->numberOfKeys;
ScanKey key = scan->keyData; ScanKey key = scan->keyData;
Relation r = scan->indexRelation; Relation r = scan->indexRelation;
GISTScanOpaque so; GISTScanOpaque so;
Page p; Page p;
GISTSTATE *giststate; GISTSTATE *giststate;
so = (GISTScanOpaque) scan->opaque; so = (GISTScanOpaque) scan->opaque;
giststate = so->giststate; giststate = so->giststate;
...@@ -334,9 +351,10 @@ gistindex_keytest(IndexTuple tuple, ...@@ -334,9 +351,10 @@ gistindex_keytest(IndexTuple tuple,
IncrIndexProcessed(); IncrIndexProcessed();
/* /*
* Tuple doesn't restore after crash recovery because of inclomplete insert * Tuple doesn't restore after crash recovery because of inclomplete
*/ * insert
if ( !GistPageIsLeaf(p) && GistTupleIsInvalid(tuple) ) */
if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple))
return true; return true;
while (keySize > 0) while (keySize > 0)
...@@ -366,13 +384,12 @@ gistindex_keytest(IndexTuple tuple, ...@@ -366,13 +384,12 @@ gistindex_keytest(IndexTuple tuple,
FALSE, isNull); FALSE, isNull);
/* /*
* Call the Consistent function to evaluate the test. The * Call the Consistent function to evaluate the test. The arguments
* arguments are the index datum (as a GISTENTRY*), the comparison * are the index datum (as a GISTENTRY*), the comparison datum, and
* datum, and the comparison operator's strategy number and * the comparison operator's strategy number and subtype from pg_amop.
* subtype from pg_amop.
* *
* (Presently there's no need to pass the subtype since it'll always * (Presently there's no need to pass the subtype since it'll always be
* be zero, but might as well pass it for possible future use.) * zero, but might as well pass it for possible future use.)
*/ */
test = FunctionCall4(&key->sk_func, test = FunctionCall4(&key->sk_func,
PointerGetDatum(&de), PointerGetDatum(&de),
...@@ -399,26 +416,26 @@ gistindex_keytest(IndexTuple tuple, ...@@ -399,26 +416,26 @@ gistindex_keytest(IndexTuple tuple,
static OffsetNumber static OffsetNumber
gistfindnext(IndexScanDesc scan, OffsetNumber n, ScanDirection dir) gistfindnext(IndexScanDesc scan, OffsetNumber n, ScanDirection dir)
{ {
OffsetNumber maxoff; OffsetNumber maxoff;
IndexTuple it; IndexTuple it;
GISTScanOpaque so; GISTScanOpaque so;
MemoryContext oldcxt; MemoryContext oldcxt;
Page p; Page p;
so = (GISTScanOpaque) scan->opaque; so = (GISTScanOpaque) scan->opaque;
p = BufferGetPage(so->curbuf); p = BufferGetPage(so->curbuf);
maxoff = PageGetMaxOffsetNumber(p); maxoff = PageGetMaxOffsetNumber(p);
/* /*
* Make sure we're in a short-lived memory context when we invoke * Make sure we're in a short-lived memory context when we invoke a
* a user-supplied GiST method in gistindex_keytest(), so we don't * user-supplied GiST method in gistindex_keytest(), so we don't leak
* leak memory * memory
*/ */
oldcxt = MemoryContextSwitchTo(so->tempCxt); oldcxt = MemoryContextSwitchTo(so->tempCxt);
/* /*
* If we modified the index during the scan, we may have a pointer to * If we modified the index during the scan, we may have a pointer to a
* a ghost tuple, before the scan. If this is the case, back up one. * ghost tuple, before the scan. If this is the case, back up one.
*/ */
if (so->flags & GS_CURBEFORE) if (so->flags & GS_CURBEFORE)
{ {
...@@ -442,9 +459,8 @@ gistfindnext(IndexScanDesc scan, OffsetNumber n, ScanDirection dir) ...@@ -442,9 +459,8 @@ gistfindnext(IndexScanDesc scan, OffsetNumber n, ScanDirection dir)
MemoryContextReset(so->tempCxt); MemoryContextReset(so->tempCxt);
/* /*
* If we found a matching entry, return its offset; otherwise * If we found a matching entry, return its offset; otherwise return
* return InvalidOffsetNumber to inform the caller to go to the * InvalidOffsetNumber to inform the caller to go to the next page.
* next page.
*/ */
if (n >= FirstOffsetNumber && n <= maxoff) if (n >= FirstOffsetNumber && n <= maxoff)
return n; return n;
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.1 2005/07/01 19:19:02 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.2 2005/09/22 20:44:36 momjian Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -30,10 +30,10 @@ typedef struct ...@@ -30,10 +30,10 @@ typedef struct
static int compare_KB(const void *a, const void *b); static int compare_KB(const void *a, const void *b);
static bool gist_box_leaf_consistent(BOX *key, BOX *query, static bool gist_box_leaf_consistent(BOX *key, BOX *query,
StrategyNumber strategy); StrategyNumber strategy);
static double size_box(Datum dbox); static double size_box(Datum dbox);
static bool rtree_internal_consistent(BOX *key, BOX *query, static bool rtree_internal_consistent(BOX *key, BOX *query,
StrategyNumber strategy); StrategyNumber strategy);
/************************************************** /**************************************************
...@@ -268,11 +268,11 @@ gist_box_picksplit(PG_FUNCTION_ARGS) ...@@ -268,11 +268,11 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
#define ADDLIST( list, unionD, pos, num ) do { \ #define ADDLIST( list, unionD, pos, num ) do { \
if ( pos ) { \ if ( pos ) { \
if ( (unionD)->high.x < cur->high.x ) (unionD)->high.x = cur->high.x; \ if ( (unionD)->high.x < cur->high.x ) (unionD)->high.x = cur->high.x; \
if ( (unionD)->low.x > cur->low.x ) (unionD)->low.x = cur->low.x; \ if ( (unionD)->low.x > cur->low.x ) (unionD)->low.x = cur->low.x; \
if ( (unionD)->high.y < cur->high.y ) (unionD)->high.y = cur->high.y; \ if ( (unionD)->high.y < cur->high.y ) (unionD)->high.y = cur->high.y; \
if ( (unionD)->low.y > cur->low.y ) (unionD)->low.y = cur->low.y; \ if ( (unionD)->low.y > cur->low.y ) (unionD)->low.y = cur->low.y; \
} else { \ } else { \
memcpy( (void*)(unionD), (void*) cur, sizeof( BOX ) ); \ memcpy( (void*)(unionD), (void*) cur, sizeof( BOX ) ); \
} \ } \
(list)[pos] = num; \ (list)[pos] = num; \
(pos)++; \ (pos)++; \
...@@ -411,62 +411,62 @@ gist_box_leaf_consistent(BOX *key, BOX *query, StrategyNumber strategy) ...@@ -411,62 +411,62 @@ gist_box_leaf_consistent(BOX *key, BOX *query, StrategyNumber strategy)
case RTLeftStrategyNumber: case RTLeftStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_left, retval = DatumGetBool(DirectFunctionCall2(box_left,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTOverLeftStrategyNumber: case RTOverLeftStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overleft, retval = DatumGetBool(DirectFunctionCall2(box_overleft,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTOverlapStrategyNumber: case RTOverlapStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overlap, retval = DatumGetBool(DirectFunctionCall2(box_overlap,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTOverRightStrategyNumber: case RTOverRightStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overright, retval = DatumGetBool(DirectFunctionCall2(box_overright,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTRightStrategyNumber: case RTRightStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_right, retval = DatumGetBool(DirectFunctionCall2(box_right,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTSameStrategyNumber: case RTSameStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_same, retval = DatumGetBool(DirectFunctionCall2(box_same,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTContainsStrategyNumber: case RTContainsStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_contain, retval = DatumGetBool(DirectFunctionCall2(box_contain,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTContainedByStrategyNumber: case RTContainedByStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_contained, retval = DatumGetBool(DirectFunctionCall2(box_contained,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTOverBelowStrategyNumber: case RTOverBelowStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overbelow, retval = DatumGetBool(DirectFunctionCall2(box_overbelow,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTBelowStrategyNumber: case RTBelowStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_below, retval = DatumGetBool(DirectFunctionCall2(box_below,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTAboveStrategyNumber: case RTAboveStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_above, retval = DatumGetBool(DirectFunctionCall2(box_above,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTOverAboveStrategyNumber: case RTOverAboveStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overabove, retval = DatumGetBool(DirectFunctionCall2(box_overabove,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
default: default:
retval = FALSE; retval = FALSE;
...@@ -477,7 +477,7 @@ gist_box_leaf_consistent(BOX *key, BOX *query, StrategyNumber strategy) ...@@ -477,7 +477,7 @@ gist_box_leaf_consistent(BOX *key, BOX *query, StrategyNumber strategy)
static double static double
size_box(Datum dbox) size_box(Datum dbox)
{ {
BOX *box = DatumGetBoxP(dbox); BOX *box = DatumGetBoxP(dbox);
if (box == NULL || box->high.x <= box->low.x || box->high.y <= box->low.y) if (box == NULL || box->high.x <= box->low.x || box->high.y <= box->low.y)
return 0.0; return 0.0;
...@@ -506,58 +506,58 @@ rtree_internal_consistent(BOX *key, BOX *query, StrategyNumber strategy) ...@@ -506,58 +506,58 @@ rtree_internal_consistent(BOX *key, BOX *query, StrategyNumber strategy)
case RTLeftStrategyNumber: case RTLeftStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_overright, retval = !DatumGetBool(DirectFunctionCall2(box_overright,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTOverLeftStrategyNumber: case RTOverLeftStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_right, retval = !DatumGetBool(DirectFunctionCall2(box_right,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTOverlapStrategyNumber: case RTOverlapStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overlap, retval = DatumGetBool(DirectFunctionCall2(box_overlap,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTOverRightStrategyNumber: case RTOverRightStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_left, retval = !DatumGetBool(DirectFunctionCall2(box_left,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTRightStrategyNumber: case RTRightStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_overleft, retval = !DatumGetBool(DirectFunctionCall2(box_overleft,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTSameStrategyNumber: case RTSameStrategyNumber:
case RTContainsStrategyNumber: case RTContainsStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_contain, retval = DatumGetBool(DirectFunctionCall2(box_contain,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTContainedByStrategyNumber: case RTContainedByStrategyNumber:
retval = DatumGetBool(DirectFunctionCall2(box_overlap, retval = DatumGetBool(DirectFunctionCall2(box_overlap,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTOverBelowStrategyNumber: case RTOverBelowStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_above, retval = !DatumGetBool(DirectFunctionCall2(box_above,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTBelowStrategyNumber: case RTBelowStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_overabove, retval = !DatumGetBool(DirectFunctionCall2(box_overabove,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTAboveStrategyNumber: case RTAboveStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_overbelow, retval = !DatumGetBool(DirectFunctionCall2(box_overbelow,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
case RTOverAboveStrategyNumber: case RTOverAboveStrategyNumber:
retval = !DatumGetBool(DirectFunctionCall2(box_below, retval = !DatumGetBool(DirectFunctionCall2(box_below,
PointerGetDatum(key), PointerGetDatum(key),
PointerGetDatum(query))); PointerGetDatum(query)));
break; break;
default: default:
retval = FALSE; retval = FALSE;
...@@ -621,8 +621,8 @@ gist_poly_consistent(PG_FUNCTION_ARGS) ...@@ -621,8 +621,8 @@ gist_poly_consistent(PG_FUNCTION_ARGS)
/* /*
* Since the operators are marked lossy anyway, we can just use * Since the operators are marked lossy anyway, we can just use
* rtree_internal_consistent even at leaf nodes. (This works * rtree_internal_consistent even at leaf nodes. (This works in part
* in part because the index entries are bounding boxes not polygons.) * because the index entries are bounding boxes not polygons.)
*/ */
result = rtree_internal_consistent(DatumGetBoxP(entry->key), result = rtree_internal_consistent(DatumGetBoxP(entry->key),
&(query->boundbox), strategy); &(query->boundbox), strategy);
...@@ -651,7 +651,7 @@ gist_circle_compress(PG_FUNCTION_ARGS) ...@@ -651,7 +651,7 @@ gist_circle_compress(PG_FUNCTION_ARGS)
retval = palloc(sizeof(GISTENTRY)); retval = palloc(sizeof(GISTENTRY));
if (DatumGetCircleP(entry->key) != NULL) if (DatumGetCircleP(entry->key) != NULL)
{ {
CIRCLE *in = DatumGetCircleP(entry->key); CIRCLE *in = DatumGetCircleP(entry->key);
BOX *r; BOX *r;
r = (BOX *) palloc(sizeof(BOX)); r = (BOX *) palloc(sizeof(BOX));
...@@ -683,7 +683,7 @@ Datum ...@@ -683,7 +683,7 @@ Datum
gist_circle_consistent(PG_FUNCTION_ARGS) gist_circle_consistent(PG_FUNCTION_ARGS)
{ {
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
CIRCLE *query = PG_GETARG_CIRCLE_P(1); CIRCLE *query = PG_GETARG_CIRCLE_P(1);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
BOX bbox; BOX bbox;
bool result; bool result;
...@@ -693,8 +693,8 @@ gist_circle_consistent(PG_FUNCTION_ARGS) ...@@ -693,8 +693,8 @@ gist_circle_consistent(PG_FUNCTION_ARGS)
/* /*
* Since the operators are marked lossy anyway, we can just use * Since the operators are marked lossy anyway, we can just use
* rtree_internal_consistent even at leaf nodes. (This works * rtree_internal_consistent even at leaf nodes. (This works in part
* in part because the index entries are bounding boxes not circles.) * because the index entries are bounding boxes not circles.)
*/ */
bbox.high.x = query->center.x + query->radius; bbox.high.x = query->center.x + query->radius;
bbox.low.x = query->center.x - query->radius; bbox.low.x = query->center.x - query->radius;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.60 2005/09/22 18:49:45 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.61 2005/09/22 20:44:36 momjian Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -120,11 +120,11 @@ gistrescan(PG_FUNCTION_ARGS) ...@@ -120,11 +120,11 @@ gistrescan(PG_FUNCTION_ARGS)
scan->numberOfKeys * sizeof(ScanKeyData)); scan->numberOfKeys * sizeof(ScanKeyData));
/* /*
* Modify the scan key so that all the Consistent method is * Modify the scan key so that all the Consistent method is called for
* called for all comparisons. The original operator is passed * all comparisons. The original operator is passed to the Consistent
* to the Consistent function in the form of its strategy * function in the form of its strategy number, which is available
* number, which is available from the sk_strategy field, and * from the sk_strategy field, and its subtype from the sk_subtype
* its subtype from the sk_subtype field. * field.
*/ */
for (i = 0; i < scan->numberOfKeys; i++) for (i = 0; i < scan->numberOfKeys; i++)
scan->keyData[i].sk_func = so->giststate->consistentFn[scan->keyData[i].sk_attno - 1]; scan->keyData[i].sk_func = so->giststate->consistentFn[scan->keyData[i].sk_attno - 1];
...@@ -138,7 +138,7 @@ gistmarkpos(PG_FUNCTION_ARGS) ...@@ -138,7 +138,7 @@ gistmarkpos(PG_FUNCTION_ARGS)
{ {
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque so; GISTScanOpaque so;
GISTSearchStack *o, GISTSearchStack *o,
*n, *n,
*tmp; *tmp;
...@@ -187,7 +187,7 @@ gistrestrpos(PG_FUNCTION_ARGS) ...@@ -187,7 +187,7 @@ gistrestrpos(PG_FUNCTION_ARGS)
{ {
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque so; GISTScanOpaque so;
GISTSearchStack *o, GISTSearchStack *o,
*n, *n,
*tmp; *tmp;
...@@ -308,9 +308,9 @@ ReleaseResources_gist(void) ...@@ -308,9 +308,9 @@ ReleaseResources_gist(void)
GISTScanList next; GISTScanList next;
/* /*
* Note: this should be a no-op during normal query shutdown. However, * Note: this should be a no-op during normal query shutdown. However, in
* in an abort situation ExecutorEnd is not called and so there may be * an abort situation ExecutorEnd is not called and so there may be open
* open index scans to clean up. * index scans to clean up.
*/ */
prev = NULL; prev = NULL;
...@@ -338,8 +338,8 @@ gistadjscans(Relation rel, int op, BlockNumber blkno, OffsetNumber offnum, XLogR ...@@ -338,8 +338,8 @@ gistadjscans(Relation rel, int op, BlockNumber blkno, OffsetNumber offnum, XLogR
GISTScanList l; GISTScanList l;
Oid relid; Oid relid;
if ( XLogRecPtrIsInvalid(newlsn) || XLogRecPtrIsInvalid(oldlsn) ) if (XLogRecPtrIsInvalid(newlsn) || XLogRecPtrIsInvalid(oldlsn))
return; return;
relid = RelationGetRelid(rel); relid = RelationGetRelid(rel);
for (l = GISTScans; l != NULL; l = l->gsl_next) for (l = GISTScans; l != NULL; l = l->gsl_next)
...@@ -365,7 +365,7 @@ gistadjone(IndexScanDesc scan, ...@@ -365,7 +365,7 @@ gistadjone(IndexScanDesc scan,
BlockNumber blkno, BlockNumber blkno,
OffsetNumber offnum, XLogRecPtr newlsn, XLogRecPtr oldlsn) OffsetNumber offnum, XLogRecPtr newlsn, XLogRecPtr oldlsn)
{ {
GISTScanOpaque so = (GISTScanOpaque) scan->opaque ; GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
adjustiptr(scan, &(scan->currentItemData), so->stack, op, blkno, offnum, newlsn, oldlsn); adjustiptr(scan, &(scan->currentItemData), so->stack, op, blkno, offnum, newlsn, oldlsn);
adjustiptr(scan, &(scan->currentMarkData), so->markstk, op, blkno, offnum, newlsn, oldlsn); adjustiptr(scan, &(scan->currentMarkData), so->markstk, op, blkno, offnum, newlsn, oldlsn);
...@@ -399,7 +399,8 @@ adjustiptr(IndexScanDesc scan, ...@@ -399,7 +399,8 @@ adjustiptr(IndexScanDesc scan,
{ {
case GISTOP_DEL: case GISTOP_DEL:
/* back up one if we need to */ /* back up one if we need to */
if (curoff >= offnum && XLByteEQ(stk->lsn, oldlsn) ) /* the same vesrion of page */ if (curoff >= offnum && XLByteEQ(stk->lsn, oldlsn)) /* the same vesrion of
* page */
{ {
if (curoff > FirstOffsetNumber) if (curoff > FirstOffsetNumber)
{ {
...@@ -409,8 +410,7 @@ adjustiptr(IndexScanDesc scan, ...@@ -409,8 +410,7 @@ adjustiptr(IndexScanDesc scan,
else else
{ {
/* /*
* remember that we're before the current * remember that we're before the current tuple
* tuple
*/ */
ItemPointerSet(iptr, blkno, FirstOffsetNumber); ItemPointerSet(iptr, blkno, FirstOffsetNumber);
if (iptr == &(scan->currentItemData)) if (iptr == &(scan->currentItemData))
...@@ -435,6 +435,7 @@ gistfreestack(GISTSearchStack *s) ...@@ -435,6 +435,7 @@ gistfreestack(GISTSearchStack *s)
while (s != NULL) while (s != NULL)
{ {
GISTSearchStack *p = s->next; GISTSearchStack *p = s->next;
pfree(s); pfree(s);
s = p; s = p;
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.6 2005/09/22 18:49:45 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.7 2005/09/22 20:44:36 momjian Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h" #include "postgres.h"
...@@ -22,9 +22,9 @@ ...@@ -22,9 +22,9 @@
#include "storage/freespace.h" #include "storage/freespace.h"
/* group flags ( in gistadjsubkey ) */ /* group flags ( in gistadjsubkey ) */
#define LEFT_ADDED 0x01 #define LEFT_ADDED 0x01
#define RIGHT_ADDED 0x02 #define RIGHT_ADDED 0x02
#define BOTH_ADDED ( LEFT_ADDED | RIGHT_ADDED ) #define BOTH_ADDED ( LEFT_ADDED | RIGHT_ADDED )
/* /*
...@@ -47,8 +47,7 @@ ...@@ -47,8 +47,7 @@
} while(0); } while(0);
static void static void gistpenalty(GISTSTATE *giststate, int attno,
gistpenalty(GISTSTATE *giststate, int attno,
GISTENTRY *key1, bool isNull1, GISTENTRY *key1, bool isNull1,
GISTENTRY *key2, bool isNull2, float *penalty); GISTENTRY *key2, bool isNull2, float *penalty);
...@@ -57,13 +56,13 @@ gistpenalty(GISTSTATE *giststate, int attno, ...@@ -57,13 +56,13 @@ gistpenalty(GISTSTATE *giststate, int attno,
*/ */
OffsetNumber OffsetNumber
gistfillbuffer(Relation r, Page page, IndexTuple *itup, gistfillbuffer(Relation r, Page page, IndexTuple *itup,
int len, OffsetNumber off) int len, OffsetNumber off)
{ {
OffsetNumber l = InvalidOffsetNumber; OffsetNumber l = InvalidOffsetNumber;
int i; int i;
if ( off == InvalidOffsetNumber ) if (off == InvalidOffsetNumber)
off = ( PageIsEmpty(page) ) ? FirstOffsetNumber : off = (PageIsEmpty(page)) ? FirstOffsetNumber :
OffsetNumberNext(PageGetMaxOffsetNumber(page)); OffsetNumberNext(PageGetMaxOffsetNumber(page));
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
...@@ -137,13 +136,13 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate) ...@@ -137,13 +136,13 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
GistEntryVector *evec; GistEntryVector *evec;
int i; int i;
GISTENTRY centry[INDEX_MAX_KEYS]; GISTENTRY centry[INDEX_MAX_KEYS];
IndexTuple res; IndexTuple res;
evec = (GistEntryVector *) palloc(((len == 1) ? 2 : len) * sizeof(GISTENTRY) + GEVHDRSZ); evec = (GistEntryVector *) palloc(((len == 1) ? 2 : len) * sizeof(GISTENTRY) + GEVHDRSZ);
for(i = 0; i<len; i++) for (i = 0; i < len; i++)
if ( GistTupleIsInvalid( itvec[i] ) ) if (GistTupleIsInvalid(itvec[i]))
return gist_form_invalid_tuple( InvalidBlockNumber ); return gist_form_invalid_tuple(InvalidBlockNumber);
for (i = 0; i < r->rd_att->natts; i++) for (i = 0; i < r->rd_att->natts; i++)
{ {
...@@ -155,6 +154,7 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate) ...@@ -155,6 +154,7 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
for (j = 0; j < len; j++) for (j = 0; j < len; j++)
{ {
bool IsNull; bool IsNull;
datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull); datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull);
if (IsNull) if (IsNull)
continue; continue;
...@@ -176,7 +176,7 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate) ...@@ -176,7 +176,7 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
} }
else else
{ {
int datumsize; int datumsize;
if (real_len == 1) if (real_len == 1)
{ {
...@@ -202,7 +202,7 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate) ...@@ -202,7 +202,7 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
} }
res = index_form_tuple(giststate->tupdesc, attr, isnull); res = index_form_tuple(giststate->tupdesc, attr, isnull);
GistTupleSetValid( res ); GistTupleSetValid(res);
return res; return res;
} }
...@@ -227,9 +227,9 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis ...@@ -227,9 +227,9 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
IndexTuple newtup = NULL; IndexTuple newtup = NULL;
int i; int i;
if ( GistTupleIsInvalid(oldtup) || GistTupleIsInvalid(addtup) ) if (GistTupleIsInvalid(oldtup) || GistTupleIsInvalid(addtup))
return gist_form_invalid_tuple( ItemPointerGetBlockNumber( &(oldtup->t_tid) ) ); return gist_form_invalid_tuple(ItemPointerGetBlockNumber(&(oldtup->t_tid)));
evec = palloc(2 * sizeof(GISTENTRY) + GEVHDRSZ); evec = palloc(2 * sizeof(GISTENTRY) + GEVHDRSZ);
evec->n = 2; evec->n = 2;
ev0p = &(evec->vector[0]); ev0p = &(evec->vector[0]);
...@@ -268,7 +268,7 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis ...@@ -268,7 +268,7 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
} }
else else
{ {
bool result; bool result;
FunctionCall3(&giststate->equalFn[i], FunctionCall3(&giststate->equalFn[i],
ev0p->key, ev0p->key,
...@@ -301,7 +301,7 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis ...@@ -301,7 +301,7 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
void void
gistunionsubkey(Relation r, GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITVEC *spl, bool isall) gistunionsubkey(Relation r, GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITVEC *spl, bool isall)
{ {
int lr; int lr;
for (lr = 0; lr < 2; lr++) for (lr = 0; lr < 2; lr++)
{ {
...@@ -309,7 +309,7 @@ gistunionsubkey(Relation r, GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITV ...@@ -309,7 +309,7 @@ gistunionsubkey(Relation r, GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITV
int i; int i;
Datum *attr; Datum *attr;
int len, int len,
*attrsize; *attrsize;
bool *isnull; bool *isnull;
GistEntryVector *evec; GistEntryVector *evec;
...@@ -354,7 +354,7 @@ gistunionsubkey(Relation r, GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITV ...@@ -354,7 +354,7 @@ gistunionsubkey(Relation r, GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITV
&(evec->vector[real_len]), &(evec->vector[real_len]),
datum, datum,
NULL, NULL, (OffsetNumber) 0, NULL, NULL, (OffsetNumber) 0,
ATTSIZE(datum, giststate->tupdesc, i + 1, IsNull), ATTSIZE(datum, giststate->tupdesc, i + 1, IsNull),
FALSE, IsNull); FALSE, IsNull);
real_len++; real_len++;
...@@ -402,14 +402,14 @@ gistfindgroup(GISTSTATE *giststate, GISTENTRY *valvec, GIST_SPLITVEC *spl) ...@@ -402,14 +402,14 @@ gistfindgroup(GISTSTATE *giststate, GISTENTRY *valvec, GIST_SPLITVEC *spl)
int curid = 1; int curid = 1;
/* /*
* first key is always not null (see gistinsert), so we may not check * first key is always not null (see gistinsert), so we may not check for
* for nulls * nulls
*/ */
for (i = 0; i < spl->spl_nleft; i++) for (i = 0; i < spl->spl_nleft; i++)
{ {
int j; int j;
int len; int len;
bool result; bool result;
if (spl->spl_idgrp[spl->spl_left[i]]) if (spl->spl_idgrp[spl->spl_left[i]])
continue; continue;
...@@ -540,12 +540,12 @@ gistadjsubkey(Relation r, ...@@ -540,12 +540,12 @@ gistadjsubkey(Relation r,
for (j = 1; j < r->rd_att->natts; j++) for (j = 1; j < r->rd_att->natts; j++)
{ {
gistentryinit(entry, v->spl_lattr[j], r, NULL, gistentryinit(entry, v->spl_lattr[j], r, NULL,
(OffsetNumber) 0, v->spl_lattrsize[j], FALSE); (OffsetNumber) 0, v->spl_lattrsize[j], FALSE);
gistpenalty(giststate, j, &entry, v->spl_lisnull[j], gistpenalty(giststate, j, &entry, v->spl_lisnull[j],
&identry[j], isnull[j], &lpenalty); &identry[j], isnull[j], &lpenalty);
gistentryinit(entry, v->spl_rattr[j], r, NULL, gistentryinit(entry, v->spl_rattr[j], r, NULL,
(OffsetNumber) 0, v->spl_rattrsize[j], FALSE); (OffsetNumber) 0, v->spl_rattrsize[j], FALSE);
gistpenalty(giststate, j, &entry, v->spl_risnull[j], gistpenalty(giststate, j, &entry, v->spl_risnull[j],
&identry[j], isnull[j], &rpenalty); &identry[j], isnull[j], &rpenalty);
...@@ -555,8 +555,7 @@ gistadjsubkey(Relation r, ...@@ -555,8 +555,7 @@ gistadjsubkey(Relation r,
} }
/* /*
* add * add XXX: refactor this to avoid duplicating code
* XXX: refactor this to avoid duplicating code
*/ */
if (lpenalty < rpenalty) if (lpenalty < rpenalty)
{ {
...@@ -643,12 +642,13 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ ...@@ -643,12 +642,13 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
{ {
int j; int j;
IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
if ( !GistPageIsLeaf(p) && GistTupleIsInvalid(itup) ) { if (!GistPageIsLeaf(p) && GistTupleIsInvalid(itup))
{
ereport(LOG, ereport(LOG,
(errmsg("index \"%s\" needs VACUUM or REINDEX to finish crash recovery", (errmsg("index \"%s\" needs VACUUM or REINDEX to finish crash recovery",
RelationGetRelationName(r)))); RelationGetRelationName(r))));
continue; continue;
} }
sum_grow = 0; sum_grow = 0;
...@@ -683,7 +683,7 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ ...@@ -683,7 +683,7 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
} }
} }
if ( which == InvalidOffsetNumber ) if (which == InvalidOffsetNumber)
which = FirstOffsetNumber; which = FirstOffsetNumber;
return which; return which;
...@@ -775,7 +775,8 @@ gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p, ...@@ -775,7 +775,8 @@ gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
for (i = 0; i < r->rd_att->natts; i++) for (i = 0; i < r->rd_att->natts; i++)
{ {
Datum datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]); Datum datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]);
gistdentryinit(giststate, i, &attdata[i], gistdentryinit(giststate, i, &attdata[i],
datum, r, p, o, datum, r, p, o,
ATTSIZE(datum, giststate->tupdesc, i + 1, isnull[i]), ATTSIZE(datum, giststate->tupdesc, i + 1, isnull[i]),
...@@ -801,8 +802,8 @@ void ...@@ -801,8 +802,8 @@ void
GISTInitBuffer(Buffer b, uint32 f) GISTInitBuffer(Buffer b, uint32 f)
{ {
GISTPageOpaque opaque; GISTPageOpaque opaque;
Page page; Page page;
Size pageSize; Size pageSize;
pageSize = BufferGetPageSize(b); pageSize = BufferGetPageSize(b);
page = BufferGetPage(b); page = BufferGetPage(b);
...@@ -811,15 +812,16 @@ GISTInitBuffer(Buffer b, uint32 f) ...@@ -811,15 +812,16 @@ GISTInitBuffer(Buffer b, uint32 f)
opaque = GistPageGetOpaque(page); opaque = GistPageGetOpaque(page);
opaque->flags = f; opaque->flags = f;
opaque->rightlink = InvalidBlockNumber; opaque->rightlink = InvalidBlockNumber;
memset( &(opaque->nsn), 0, sizeof(GistNSN) ); memset(&(opaque->nsn), 0, sizeof(GistNSN));
} }
void void
gistUserPicksplit(Relation r, GistEntryVector *entryvec, GIST_SPLITVEC *v, gistUserPicksplit(Relation r, GistEntryVector *entryvec, GIST_SPLITVEC *v,
IndexTuple *itup, int len, GISTSTATE *giststate) { IndexTuple *itup, int len, GISTSTATE *giststate)
{
/* /*
* now let the user-defined picksplit function set up the split * now let the user-defined picksplit function set up the split vector; in
* vector; in entryvec have no null value!! * entryvec have no null value!!
*/ */
FunctionCall2(&giststate->picksplitFn[0], FunctionCall2(&giststate->picksplitFn[0],
PointerGetDatum(entryvec), PointerGetDatum(entryvec),
...@@ -837,8 +839,8 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, GIST_SPLITVEC *v, ...@@ -837,8 +839,8 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, GIST_SPLITVEC *v,
v->spl_risnull[0] = false; v->spl_risnull[0] = false;
/* /*
* if index is multikey, then we must to try get smaller bounding box * if index is multikey, then we must to try get smaller bounding box for
* for subkey(s) * subkey(s)
*/ */
if (r->rd_att->natts > 1) if (r->rd_att->natts > 1)
{ {
...@@ -854,35 +856,42 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, GIST_SPLITVEC *v, ...@@ -854,35 +856,42 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, GIST_SPLITVEC *v,
gistunionsubkey(r, giststate, itup, v, false); gistunionsubkey(r, giststate, itup, v, false);
/* /*
* if possible, we insert equivalent tuples with control by * if possible, we insert equivalent tuples with control by penalty
* penalty for a subkey(s) * for a subkey(s)
*/ */
if (MaxGrpId > 1) if (MaxGrpId > 1)
gistadjsubkey(r, itup, len, v, giststate); gistadjsubkey(r, itup, len, v, giststate);
} }
} }
Buffer Buffer
gistNewBuffer(Relation r) { gistNewBuffer(Relation r)
Buffer buffer = InvalidBuffer; {
bool needLock; Buffer buffer = InvalidBuffer;
bool needLock;
while(true) { while (true)
{
BlockNumber blkno = GetFreeIndexPage(&r->rd_node); BlockNumber blkno = GetFreeIndexPage(&r->rd_node);
if (blkno == InvalidBlockNumber) if (blkno == InvalidBlockNumber)
break; break;
buffer = ReadBuffer(r, blkno); buffer = ReadBuffer(r, blkno);
if ( ConditionalLockBuffer(buffer) ) { if (ConditionalLockBuffer(buffer))
Page page = BufferGetPage(buffer); {
if ( GistPageIsDeleted( page ) ) { Page page = BufferGetPage(buffer);
GistPageSetNonDeleted( page );
if (GistPageIsDeleted(page))
{
GistPageSetNonDeleted(page);
return buffer; return buffer;
} else }
else
LockBuffer(buffer, GIST_UNLOCK); LockBuffer(buffer, GIST_UNLOCK);
} }
ReleaseBuffer( buffer ); ReleaseBuffer(buffer);
} }
needLock = !RELATION_IS_LOCAL(r); needLock = !RELATION_IS_LOCAL(r);
...@@ -895,6 +904,6 @@ gistNewBuffer(Relation r) { ...@@ -895,6 +904,6 @@ gistNewBuffer(Relation r) {
if (needLock) if (needLock)
UnlockRelationForExtension(r, ExclusiveLock); UnlockRelationForExtension(r, ExclusiveLock);
return buffer; return buffer;
} }
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.8 2005/09/22 18:49:45 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.9 2005/09/22 20:44:36 momjian Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -25,162 +25,198 @@ ...@@ -25,162 +25,198 @@
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/smgr.h" #include "storage/smgr.h"
/* filled by gistbulkdelete, cleared by gistvacuumpcleanup */ /* filled by gistbulkdelete, cleared by gistvacuumpcleanup */
static bool needFullVacuum = false; static bool needFullVacuum = false;
typedef struct { typedef struct
{
GISTSTATE giststate; GISTSTATE giststate;
Relation index; Relation index;
MemoryContext opCtx; MemoryContext opCtx;
IndexBulkDeleteResult *result; IndexBulkDeleteResult *result;
} GistVacuum; } GistVacuum;
typedef struct { typedef struct
IndexTuple *itup; {
int ituplen; IndexTuple *itup;
int ituplen;
bool emptypage; bool emptypage;
} ArrayTuple; } ArrayTuple;
static ArrayTuple static ArrayTuple
gistVacuumUpdate( GistVacuum *gv, BlockNumber blkno, bool needunion ) { gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
{
ArrayTuple res = {NULL, 0, false}; ArrayTuple res = {NULL, 0, false};
Buffer buffer; Buffer buffer;
Page page; Page page;
OffsetNumber i, maxoff; OffsetNumber i,
maxoff;
ItemId iid; ItemId iid;
int lenaddon=4, curlenaddon=0, ntodelete=0; int lenaddon = 4,
IndexTuple idxtuple, *addon=NULL; curlenaddon = 0,
bool needwrite=false; ntodelete = 0;
OffsetNumber todelete[MaxOffsetNumber]; IndexTuple idxtuple,
ItemPointerData *completed=NULL; *addon = NULL;
int ncompleted=0, lencompleted=16; bool needwrite = false;
OffsetNumber todelete[MaxOffsetNumber];
ItemPointerData *completed = NULL;
int ncompleted = 0,
lencompleted = 16;
buffer = ReadBuffer(gv->index, blkno); buffer = ReadBuffer(gv->index, blkno);
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
maxoff = PageGetMaxOffsetNumber(page); maxoff = PageGetMaxOffsetNumber(page);
if ( GistPageIsLeaf(page) ) { if (GistPageIsLeaf(page))
if ( GistTuplesDeleted(page) ) { {
if (GistTuplesDeleted(page))
{
needunion = needwrite = true; needunion = needwrite = true;
GistClearTuplesDeleted(page); GistClearTuplesDeleted(page);
} }
} else { }
completed = (ItemPointerData*)palloc( sizeof(ItemPointerData)*lencompleted ); else
addon=(IndexTuple*)palloc(sizeof(IndexTuple)*lenaddon); {
completed = (ItemPointerData *) palloc(sizeof(ItemPointerData) * lencompleted);
addon = (IndexTuple *) palloc(sizeof(IndexTuple) * lenaddon);
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
ArrayTuple chldtuple; {
bool needchildunion; ArrayTuple chldtuple;
bool needchildunion;
iid = PageGetItemId(page, i); iid = PageGetItemId(page, i);
idxtuple = (IndexTuple) PageGetItem(page, iid); idxtuple = (IndexTuple) PageGetItem(page, iid);
needchildunion = (GistTupleIsInvalid(idxtuple)) ? true : false; needchildunion = (GistTupleIsInvalid(idxtuple)) ? true : false;
if ( needchildunion ) if (needchildunion)
elog(DEBUG2, "gistVacuumUpdate: need union for block %u", elog(DEBUG2, "gistVacuumUpdate: need union for block %u",
ItemPointerGetBlockNumber(&(idxtuple->t_tid))); ItemPointerGetBlockNumber(&(idxtuple->t_tid)));
chldtuple = gistVacuumUpdate( gv, ItemPointerGetBlockNumber(&(idxtuple->t_tid)), chldtuple = gistVacuumUpdate(gv, ItemPointerGetBlockNumber(&(idxtuple->t_tid)),
needchildunion ); needchildunion);
if ( chldtuple.ituplen || chldtuple.emptypage ) { if (chldtuple.ituplen || chldtuple.emptypage)
{
PageIndexTupleDelete(page, i); PageIndexTupleDelete(page, i);
todelete[ ntodelete++ ] = i; todelete[ntodelete++] = i;
i--; maxoff--; i--;
needwrite=needunion=true; maxoff--;
needwrite = needunion = true;
if ( chldtuple.ituplen ) {
while( curlenaddon + chldtuple.ituplen >= lenaddon ) { if (chldtuple.ituplen)
lenaddon*=2; {
addon=(IndexTuple*)repalloc( addon, sizeof(IndexTuple)*lenaddon ); while (curlenaddon + chldtuple.ituplen >= lenaddon)
{
lenaddon *= 2;
addon = (IndexTuple *) repalloc(addon, sizeof(IndexTuple) * lenaddon);
} }
memcpy( addon + curlenaddon, chldtuple.itup, chldtuple.ituplen * sizeof(IndexTuple) ); memcpy(addon + curlenaddon, chldtuple.itup, chldtuple.ituplen * sizeof(IndexTuple));
curlenaddon += chldtuple.ituplen; curlenaddon += chldtuple.ituplen;
if ( chldtuple.ituplen > 1 ) { if (chldtuple.ituplen > 1)
/* child was splitted, so we need mark completion insert(split) */ {
int j; /*
* child was splitted, so we need mark completion
while( ncompleted + chldtuple.ituplen > lencompleted ) { * insert(split)
lencompleted*=2; */
completed = (ItemPointerData*)repalloc(completed, sizeof(ItemPointerData) * lencompleted); int j;
}
for(j=0;j<chldtuple.ituplen;j++) { while (ncompleted + chldtuple.ituplen > lencompleted)
ItemPointerCopy( &(chldtuple.itup[j]->t_tid), completed + ncompleted ); {
ncompleted++; lencompleted *= 2;
completed = (ItemPointerData *) repalloc(completed, sizeof(ItemPointerData) * lencompleted);
}
for (j = 0; j < chldtuple.ituplen; j++)
{
ItemPointerCopy(&(chldtuple.itup[j]->t_tid), completed + ncompleted);
ncompleted++;
} }
} }
pfree( chldtuple.itup ); pfree(chldtuple.itup);
} }
} }
} }
if ( curlenaddon ) { if (curlenaddon)
{
/* insert updated tuples */ /* insert updated tuples */
if (gistnospace(page, addon, curlenaddon)) { if (gistnospace(page, addon, curlenaddon))
{
/* there is no space on page to insert tuples */ /* there is no space on page to insert tuples */
IndexTuple *vec; IndexTuple *vec;
SplitedPageLayout *dist=NULL,*ptr; SplitedPageLayout *dist = NULL,
int i; *ptr;
MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx); int i;
MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx);
vec = gistextractbuffer(buffer, &(res.ituplen)); vec = gistextractbuffer(buffer, &(res.ituplen));
vec = gistjoinvector(vec, &(res.ituplen), addon, curlenaddon); vec = gistjoinvector(vec, &(res.ituplen), addon, curlenaddon);
res.itup = gistSplit(gv->index, buffer, vec, &(res.ituplen), &dist, &(gv->giststate)); res.itup = gistSplit(gv->index, buffer, vec, &(res.ituplen), &dist, &(gv->giststate));
MemoryContextSwitchTo(oldCtx); MemoryContextSwitchTo(oldCtx);
vec = (IndexTuple*)palloc( sizeof(IndexTuple) * res.ituplen ); vec = (IndexTuple *) palloc(sizeof(IndexTuple) * res.ituplen);
for(i=0;i<res.ituplen;i++) { for (i = 0; i < res.ituplen; i++)
vec[i] = (IndexTuple)palloc( IndexTupleSize(res.itup[i]) ); {
memcpy( vec[i], res.itup[i], IndexTupleSize(res.itup[i]) ); vec[i] = (IndexTuple) palloc(IndexTupleSize(res.itup[i]));
memcpy(vec[i], res.itup[i], IndexTupleSize(res.itup[i]));
} }
res.itup = vec; res.itup = vec;
if ( !gv->index->rd_istemp ) { if (!gv->index->rd_istemp)
XLogRecPtr recptr; {
XLogRecData *rdata; XLogRecPtr recptr;
ItemPointerData key; /* set key for incomplete insert */ XLogRecData *rdata;
char *xlinfo; ItemPointerData key; /* set key for incomplete
* insert */
char *xlinfo;
ItemPointerSet(&key, blkno, TUPLE_IS_VALID); ItemPointerSet(&key, blkno, TUPLE_IS_VALID);
rdata = formSplitRdata(gv->index->rd_node, blkno, rdata = formSplitRdata(gv->index->rd_node, blkno,
&key, dist); &key, dist);
xlinfo = rdata->data; xlinfo = rdata->data;
START_CRIT_SECTION(); START_CRIT_SECTION();
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
ptr = dist; ptr = dist;
while(ptr) { while (ptr)
{
PageSetLSN(BufferGetPage(ptr->buffer), recptr); PageSetLSN(BufferGetPage(ptr->buffer), recptr);
PageSetTLI(BufferGetPage(ptr->buffer), ThisTimeLineID); PageSetTLI(BufferGetPage(ptr->buffer), ThisTimeLineID);
ptr=ptr->next; ptr = ptr->next;
} }
END_CRIT_SECTION(); END_CRIT_SECTION();
pfree( xlinfo ); pfree(xlinfo);
pfree( rdata ); pfree(rdata);
} else { }
else
{
ptr = dist; ptr = dist;
while(ptr) { while (ptr)
{
PageSetLSN(BufferGetPage(ptr->buffer), XLogRecPtrForTemp); PageSetLSN(BufferGetPage(ptr->buffer), XLogRecPtrForTemp);
ptr=ptr->next; ptr = ptr->next;
} }
} }
ptr = dist; ptr = dist;
while(ptr) { while (ptr)
if ( BufferGetBlockNumber(ptr->buffer) != blkno ) {
LockBuffer( ptr->buffer, GIST_UNLOCK ); if (BufferGetBlockNumber(ptr->buffer) != blkno)
LockBuffer(ptr->buffer, GIST_UNLOCK);
WriteBuffer(ptr->buffer); WriteBuffer(ptr->buffer);
ptr=ptr->next; ptr = ptr->next;
} }
if ( blkno == GIST_ROOT_BLKNO ) { if (blkno == GIST_ROOT_BLKNO)
ItemPointerData key; /* set key for incomplete insert */ {
ItemPointerData key; /* set key for incomplete
* insert */
ItemPointerSet(&key, blkno, TUPLE_IS_VALID); ItemPointerSet(&key, blkno, TUPLE_IS_VALID);
...@@ -191,82 +227,98 @@ gistVacuumUpdate( GistVacuum *gv, BlockNumber blkno, bool needunion ) { ...@@ -191,82 +227,98 @@ gistVacuumUpdate( GistVacuum *gv, BlockNumber blkno, bool needunion ) {
WriteNoReleaseBuffer(buffer); WriteNoReleaseBuffer(buffer);
} }
needwrite=false; needwrite = false;
MemoryContextReset(gv->opCtx); MemoryContextReset(gv->opCtx);
needunion = false; /* gistSplit already forms unions */ needunion = false; /* gistSplit already forms unions */
} else { }
else
{
/* enough free space */ /* enough free space */
gistfillbuffer(gv->index, page, addon, curlenaddon, InvalidOffsetNumber); gistfillbuffer(gv->index, page, addon, curlenaddon, InvalidOffsetNumber);
} }
} }
} }
if ( needunion ) { if (needunion)
/* forms union for page or check empty*/ {
if ( PageIsEmpty(page) ) { /* forms union for page or check empty */
if ( blkno == GIST_ROOT_BLKNO ) { if (PageIsEmpty(page))
needwrite=true; {
GistPageSetLeaf( page ); if (blkno == GIST_ROOT_BLKNO)
} else { {
needwrite=true; needwrite = true;
res.emptypage=true; GistPageSetLeaf(page);
GistPageSetDeleted( page ); }
else
{
needwrite = true;
res.emptypage = true;
GistPageSetDeleted(page);
gv->result->pages_deleted++; gv->result->pages_deleted++;
} }
} else { }
IndexTuple *vec, tmp; else
int veclen=0; {
IndexTuple *vec,
tmp;
int veclen = 0;
MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx); MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx);
vec = gistextractbuffer(buffer, &veclen); vec = gistextractbuffer(buffer, &veclen);
tmp = gistunion(gv->index, vec, veclen, &(gv->giststate)); tmp = gistunion(gv->index, vec, veclen, &(gv->giststate));
MemoryContextSwitchTo(oldCtx); MemoryContextSwitchTo(oldCtx);
res.itup=(IndexTuple*)palloc( sizeof(IndexTuple) ); res.itup = (IndexTuple *) palloc(sizeof(IndexTuple));
res.ituplen = 1; res.ituplen = 1;
res.itup[0] = (IndexTuple)palloc( IndexTupleSize(tmp) ); res.itup[0] = (IndexTuple) palloc(IndexTupleSize(tmp));
memcpy( res.itup[0], tmp, IndexTupleSize(tmp) ); memcpy(res.itup[0], tmp, IndexTupleSize(tmp));
ItemPointerSetBlockNumber(&(res.itup[0]->t_tid), blkno); ItemPointerSetBlockNumber(&(res.itup[0]->t_tid), blkno);
GistTupleSetValid( res.itup[0] ); GistTupleSetValid(res.itup[0]);
MemoryContextReset(gv->opCtx); MemoryContextReset(gv->opCtx);
} }
} }
if ( needwrite ) { if (needwrite)
if ( !gv->index->rd_istemp ) { {
if (!gv->index->rd_istemp)
{
XLogRecData *rdata; XLogRecData *rdata;
XLogRecPtr recptr; XLogRecPtr recptr;
char *xlinfo; char *xlinfo;
rdata = formUpdateRdata(gv->index->rd_node, blkno, todelete, ntodelete, rdata = formUpdateRdata(gv->index->rd_node, blkno, todelete, ntodelete,
res.emptypage, addon, curlenaddon, NULL ); res.emptypage, addon, curlenaddon, NULL);
xlinfo = rdata->data; xlinfo = rdata->data;
START_CRIT_SECTION(); START_CRIT_SECTION();
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_ENTRY_UPDATE, rdata); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_ENTRY_UPDATE, rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID); PageSetTLI(page, ThisTimeLineID);
END_CRIT_SECTION(); END_CRIT_SECTION();
pfree( xlinfo ); pfree(xlinfo);
pfree( rdata ); pfree(rdata);
} else }
else
PageSetLSN(page, XLogRecPtrForTemp); PageSetLSN(page, XLogRecPtrForTemp);
WriteBuffer( buffer ); WriteBuffer(buffer);
} else }
ReleaseBuffer( buffer ); else
ReleaseBuffer(buffer);
if ( ncompleted && !gv->index->rd_istemp ) if (ncompleted && !gv->index->rd_istemp)
gistxlogInsertCompletion( gv->index->rd_node, completed, ncompleted ); gistxlogInsertCompletion(gv->index->rd_node, completed, ncompleted);
for(i=0;i<curlenaddon;i++) for (i = 0; i < curlenaddon; i++)
pfree( addon[i] ); pfree(addon[i]);
if (addon) pfree(addon); if (addon)
if (completed) pfree(completed); pfree(addon);
if (completed)
pfree(completed);
return res; return res;
} }
...@@ -278,17 +330,23 @@ gistVacuumUpdate( GistVacuum *gv, BlockNumber blkno, bool needunion ) { ...@@ -278,17 +330,23 @@ gistVacuumUpdate( GistVacuum *gv, BlockNumber blkno, bool needunion ) {
*/ */
Datum Datum
gistvacuumcleanup(PG_FUNCTION_ARGS) { gistvacuumcleanup(PG_FUNCTION_ARGS)
{
Relation rel = (Relation) PG_GETARG_POINTER(0); Relation rel = (Relation) PG_GETARG_POINTER(0);
IndexVacuumCleanupInfo *info = (IndexVacuumCleanupInfo *) PG_GETARG_POINTER(1); IndexVacuumCleanupInfo *info = (IndexVacuumCleanupInfo *) PG_GETARG_POINTER(1);
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2);
BlockNumber npages, blkno; BlockNumber npages,
BlockNumber nFreePages, *freePages, maxFreePages; blkno;
BlockNumber lastBlock = GIST_ROOT_BLKNO, lastFilledBlock = GIST_ROOT_BLKNO; BlockNumber nFreePages,
bool needLock; *freePages,
maxFreePages;
BlockNumber lastBlock = GIST_ROOT_BLKNO,
lastFilledBlock = GIST_ROOT_BLKNO;
bool needLock;
/* gistVacuumUpdate may cause hard work */ /* gistVacuumUpdate may cause hard work */
if ( info->vacuum_full ) { if (info->vacuum_full)
{
GistVacuum gv; GistVacuum gv;
ArrayTuple res; ArrayTuple res;
...@@ -300,17 +358,20 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) { ...@@ -300,17 +358,20 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) {
gv.result = stats; gv.result = stats;
/* walk through the entire index for update tuples */ /* walk through the entire index for update tuples */
res = gistVacuumUpdate( &gv, GIST_ROOT_BLKNO, false ); res = gistVacuumUpdate(&gv, GIST_ROOT_BLKNO, false);
/* cleanup */ /* cleanup */
if (res.itup) { if (res.itup)
int i; {
for(i=0;i<res.ituplen;i++) int i;
pfree( res.itup[i] );
pfree( res.itup ); for (i = 0; i < res.ituplen; i++)
pfree(res.itup[i]);
pfree(res.itup);
} }
freeGISTstate(&(gv.giststate)); freeGISTstate(&(gv.giststate));
MemoryContextDelete(gv.opCtx); MemoryContextDelete(gv.opCtx);
} else if (needFullVacuum) }
else if (needFullVacuum)
ereport(NOTICE, ereport(NOTICE,
(errmsg("index \"%s\" needs VACUUM FULL or REINDEX to finish crash recovery", (errmsg("index \"%s\" needs VACUUM FULL or REINDEX to finish crash recovery",
RelationGetRelationName(rel)))); RelationGetRelationName(rel))));
...@@ -318,8 +379,8 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) { ...@@ -318,8 +379,8 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) {
needFullVacuum = false; needFullVacuum = false;
needLock = !RELATION_IS_LOCAL(rel); needLock = !RELATION_IS_LOCAL(rel);
if ( info->vacuum_full ) if (info->vacuum_full)
needLock = false; /* relation locked with AccessExclusiveLock */ needLock = false; /* relation locked with AccessExclusiveLock */
/* try to find deleted pages */ /* try to find deleted pages */
if (needLock) if (needLock)
...@@ -329,45 +390,52 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) { ...@@ -329,45 +390,52 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) {
UnlockRelationForExtension(rel, ExclusiveLock); UnlockRelationForExtension(rel, ExclusiveLock);
maxFreePages = npages; maxFreePages = npages;
if ( maxFreePages > MaxFSMPages ) if (maxFreePages > MaxFSMPages)
maxFreePages = MaxFSMPages; maxFreePages = MaxFSMPages;
nFreePages = 0; nFreePages = 0;
freePages = (BlockNumber*) palloc (sizeof(BlockNumber) * maxFreePages); freePages = (BlockNumber *) palloc(sizeof(BlockNumber) * maxFreePages);
for(blkno=GIST_ROOT_BLKNO+1;blkno<npages;blkno++) { for (blkno = GIST_ROOT_BLKNO + 1; blkno < npages; blkno++)
Buffer buffer = ReadBuffer(rel, blkno); {
Page page; Buffer buffer = ReadBuffer(rel, blkno);
Page page;
LockBuffer( buffer, GIST_SHARE );
page=(Page)BufferGetPage(buffer); LockBuffer(buffer, GIST_SHARE);
page = (Page) BufferGetPage(buffer);
if ( GistPageIsDeleted(page) ) {
if (nFreePages < maxFreePages) { if (GistPageIsDeleted(page))
freePages[ nFreePages ] = blkno; {
if (nFreePages < maxFreePages)
{
freePages[nFreePages] = blkno;
nFreePages++; nFreePages++;
} }
} else }
else
lastFilledBlock = blkno; lastFilledBlock = blkno;
LockBuffer( buffer, GIST_UNLOCK ); LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
} }
lastBlock = npages-1; lastBlock = npages - 1;
if ( info->vacuum_full && nFreePages>0 ) { /* try to truncate index */ if (info->vacuum_full && nFreePages > 0)
int i; { /* try to truncate index */
for(i=0;i<nFreePages;i++) int i;
if ( freePages[i] >= lastFilledBlock ) {
for (i = 0; i < nFreePages; i++)
if (freePages[i] >= lastFilledBlock)
{
nFreePages = i; nFreePages = i;
break; break;
} }
if ( lastBlock > lastFilledBlock ) if (lastBlock > lastFilledBlock)
RelationTruncate( rel, lastFilledBlock+1 ); RelationTruncate(rel, lastFilledBlock + 1);
stats->pages_removed = lastBlock - lastFilledBlock; stats->pages_removed = lastBlock - lastFilledBlock;
} }
RecordIndexFreeSpace( &rel->rd_node, nFreePages, freePages ); RecordIndexFreeSpace(&rel->rd_node, nFreePages, freePages);
pfree( freePages ); pfree(freePages);
/* return statistics */ /* return statistics */
stats->pages_free = nFreePages; stats->pages_free = nFreePages;
...@@ -378,33 +446,37 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) { ...@@ -378,33 +446,37 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) {
UnlockRelationForExtension(rel, ExclusiveLock); UnlockRelationForExtension(rel, ExclusiveLock);
if (info->vacuum_full) if (info->vacuum_full)
UnlockRelation(rel, AccessExclusiveLock); UnlockRelation(rel, AccessExclusiveLock);
PG_RETURN_POINTER(stats); PG_RETURN_POINTER(stats);
} }
typedef struct GistBDItem { typedef struct GistBDItem
{
GistNSN parentlsn; GistNSN parentlsn;
BlockNumber blkno; BlockNumber blkno;
struct GistBDItem *next; struct GistBDItem *next;
} GistBDItem; } GistBDItem;
static void static void
pushStackIfSplited(Page page, GistBDItem *stack) { pushStackIfSplited(Page page, GistBDItem *stack)
{
GISTPageOpaque opaque = GistPageGetOpaque(page); GISTPageOpaque opaque = GistPageGetOpaque(page);
if ( stack->blkno!=GIST_ROOT_BLKNO && !XLogRecPtrIsInvalid( stack->parentlsn ) && if (stack->blkno != GIST_ROOT_BLKNO && !XLogRecPtrIsInvalid(stack->parentlsn) &&
XLByteLT( stack->parentlsn, opaque->nsn) && XLByteLT(stack->parentlsn, opaque->nsn) &&
opaque->rightlink != InvalidBlockNumber /* sanity check */ ) { opaque->rightlink != InvalidBlockNumber /* sanity check */ )
{
/* split page detected, install right link to the stack */ /* split page detected, install right link to the stack */
GistBDItem *ptr = (GistBDItem*) palloc(sizeof(GistBDItem)); GistBDItem *ptr = (GistBDItem *) palloc(sizeof(GistBDItem));
ptr->blkno = opaque->rightlink; ptr->blkno = opaque->rightlink;
ptr->parentlsn = stack->parentlsn; ptr->parentlsn = stack->parentlsn;
ptr->next = stack->next; ptr->next = stack->next;
stack->next = ptr; stack->next = ptr;
} }
} }
/* /*
...@@ -416,38 +488,44 @@ pushStackIfSplited(Page page, GistBDItem *stack) { ...@@ -416,38 +488,44 @@ pushStackIfSplited(Page page, GistBDItem *stack) {
* Result: a palloc'd struct containing statistical info for VACUUM displays. * Result: a palloc'd struct containing statistical info for VACUUM displays.
*/ */
Datum Datum
gistbulkdelete(PG_FUNCTION_ARGS) { gistbulkdelete(PG_FUNCTION_ARGS)
{
Relation rel = (Relation) PG_GETARG_POINTER(0); Relation rel = (Relation) PG_GETARG_POINTER(0);
IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1); IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1);
void* callback_state = (void *) PG_GETARG_POINTER(2); void *callback_state = (void *) PG_GETARG_POINTER(2);
IndexBulkDeleteResult *result = (IndexBulkDeleteResult*)palloc0(sizeof(IndexBulkDeleteResult)); IndexBulkDeleteResult *result = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
GistBDItem *stack, *ptr; GistBDItem *stack,
bool needLock; *ptr;
bool needLock;
stack = (GistBDItem*) palloc0(sizeof(GistBDItem));
stack = (GistBDItem *) palloc0(sizeof(GistBDItem));
stack->blkno = GIST_ROOT_BLKNO; stack->blkno = GIST_ROOT_BLKNO;
needFullVacuum = false; needFullVacuum = false;
while( stack ) { while (stack)
Buffer buffer = ReadBuffer(rel, stack->blkno); {
Page page; Buffer buffer = ReadBuffer(rel, stack->blkno);
OffsetNumber i, maxoff; Page page;
OffsetNumber i,
maxoff;
IndexTuple idxtuple; IndexTuple idxtuple;
ItemId iid; ItemId iid;
LockBuffer(buffer, GIST_SHARE); LockBuffer(buffer, GIST_SHARE);
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
if ( GistPageIsLeaf(page) ) { if (GistPageIsLeaf(page))
{
OffsetNumber todelete[MaxOffsetNumber]; OffsetNumber todelete[MaxOffsetNumber];
int ntodelete = 0; int ntodelete = 0;
LockBuffer(buffer, GIST_UNLOCK); LockBuffer(buffer, GIST_UNLOCK);
LockBuffer(buffer, GIST_EXCLUSIVE); LockBuffer(buffer, GIST_EXCLUSIVE);
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
if ( stack->blkno==GIST_ROOT_BLKNO && !GistPageIsLeaf(page) ) { if (stack->blkno == GIST_ROOT_BLKNO && !GistPageIsLeaf(page))
{
/* the only root can become non-leaf during relock */ /* the only root can become non-leaf during relock */
LockBuffer(buffer, GIST_UNLOCK); LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
...@@ -455,37 +533,46 @@ gistbulkdelete(PG_FUNCTION_ARGS) { ...@@ -455,37 +533,46 @@ gistbulkdelete(PG_FUNCTION_ARGS) {
continue; continue;
} }
/* check for split proceeded after look at parent, /*
we should check it after relock */ * check for split proceeded after look at parent, we should check
* it after relock
*/
pushStackIfSplited(page, stack); pushStackIfSplited(page, stack);
maxoff = PageGetMaxOffsetNumber(page); maxoff = PageGetMaxOffsetNumber(page);
for(i=FirstOffsetNumber;i<=maxoff;i=OffsetNumberNext(i)) { for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
iid = PageGetItemId(page, i); {
iid = PageGetItemId(page, i);
idxtuple = (IndexTuple) PageGetItem(page, iid); idxtuple = (IndexTuple) PageGetItem(page, iid);
if ( callback(&(idxtuple->t_tid), callback_state) ) { if (callback(&(idxtuple->t_tid), callback_state))
{
PageIndexTupleDelete(page, i); PageIndexTupleDelete(page, i);
todelete[ ntodelete ] = i; todelete[ntodelete] = i;
i--; maxoff--; ntodelete++; i--;
maxoff--;
ntodelete++;
result->tuples_removed += 1; result->tuples_removed += 1;
Assert( maxoff == PageGetMaxOffsetNumber(page) ); Assert(maxoff == PageGetMaxOffsetNumber(page));
} else }
else
result->num_index_tuples += 1; result->num_index_tuples += 1;
} }
if ( ntodelete ) { if (ntodelete)
{
GistMarkTuplesDeleted(page); GistMarkTuplesDeleted(page);
if (!rel->rd_istemp ) { if (!rel->rd_istemp)
{
XLogRecData *rdata; XLogRecData *rdata;
XLogRecPtr recptr; XLogRecPtr recptr;
gistxlogEntryUpdate *xlinfo; gistxlogEntryUpdate *xlinfo;
rdata = formUpdateRdata(rel->rd_node, stack->blkno, todelete, ntodelete, rdata = formUpdateRdata(rel->rd_node, stack->blkno, todelete, ntodelete,
false, NULL, 0, NULL); false, NULL, 0, NULL);
xlinfo = (gistxlogEntryUpdate*)rdata->data; xlinfo = (gistxlogEntryUpdate *) rdata->data;
START_CRIT_SECTION(); START_CRIT_SECTION();
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_ENTRY_UPDATE, rdata); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_ENTRY_UPDATE, rdata);
...@@ -493,39 +580,43 @@ gistbulkdelete(PG_FUNCTION_ARGS) { ...@@ -493,39 +580,43 @@ gistbulkdelete(PG_FUNCTION_ARGS) {
PageSetTLI(page, ThisTimeLineID); PageSetTLI(page, ThisTimeLineID);
END_CRIT_SECTION(); END_CRIT_SECTION();
pfree( xlinfo ); pfree(xlinfo);
pfree( rdata ); pfree(rdata);
} else }
else
PageSetLSN(page, XLogRecPtrForTemp); PageSetLSN(page, XLogRecPtrForTemp);
WriteNoReleaseBuffer( buffer ); WriteNoReleaseBuffer(buffer);
} }
} else { }
else
{
/* check for split proceeded after look at parent */ /* check for split proceeded after look at parent */
pushStackIfSplited(page, stack); pushStackIfSplited(page, stack);
maxoff = PageGetMaxOffsetNumber(page); maxoff = PageGetMaxOffsetNumber(page);
for(i=FirstOffsetNumber;i<=maxoff;i=OffsetNumberNext(i)) { for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
iid = PageGetItemId(page, i); iid = PageGetItemId(page, i);
idxtuple = (IndexTuple) PageGetItem(page, iid); idxtuple = (IndexTuple) PageGetItem(page, iid);
ptr = (GistBDItem*) palloc(sizeof(GistBDItem)); ptr = (GistBDItem *) palloc(sizeof(GistBDItem));
ptr->blkno = ItemPointerGetBlockNumber( &(idxtuple->t_tid) ); ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
ptr->parentlsn = PageGetLSN( page ); ptr->parentlsn = PageGetLSN(page);
ptr->next = stack->next; ptr->next = stack->next;
stack->next = ptr; stack->next = ptr;
if ( GistTupleIsInvalid(idxtuple) ) if (GistTupleIsInvalid(idxtuple))
needFullVacuum = true; needFullVacuum = true;
} }
} }
LockBuffer( buffer, GIST_UNLOCK ); LockBuffer(buffer, GIST_UNLOCK);
ReleaseBuffer( buffer ); ReleaseBuffer(buffer);
ptr = stack->next; ptr = stack->next;
pfree( stack ); pfree(stack);
stack = ptr; stack = ptr;
vacuum_delay_point(); vacuum_delay_point();
...@@ -539,6 +630,5 @@ gistbulkdelete(PG_FUNCTION_ARGS) { ...@@ -539,6 +630,5 @@ gistbulkdelete(PG_FUNCTION_ARGS) {
if (needLock) if (needLock)
UnlockRelationForExtension(rel, ExclusiveLock); UnlockRelationForExtension(rel, ExclusiveLock);
PG_RETURN_POINTER( result ); PG_RETURN_POINTER(result);
} }
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.8 2005/09/22 18:49:45 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.9 2005/09/22 20:44:36 momjian Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h" #include "postgres.h"
...@@ -23,34 +23,38 @@ ...@@ -23,34 +23,38 @@
#include "utils/memutils.h" #include "utils/memutils.h"
typedef struct { typedef struct
gistxlogEntryUpdate *data; {
gistxlogEntryUpdate *data;
int len; int len;
IndexTuple *itup; IndexTuple *itup;
OffsetNumber *todelete; OffsetNumber *todelete;
} EntryUpdateRecord; } EntryUpdateRecord;
typedef struct { typedef struct
gistxlogPage *header; {
IndexTuple *itup; gistxlogPage *header;
IndexTuple *itup;
} NewPage; } NewPage;
typedef struct { typedef struct
gistxlogPageSplit *data; {
NewPage *page; gistxlogPageSplit *data;
NewPage *page;
} PageSplitRecord; } PageSplitRecord;
/* track for incomplete inserts, idea was taken from nbtxlog.c */ /* track for incomplete inserts, idea was taken from nbtxlog.c */
typedef struct gistIncompleteInsert { typedef struct gistIncompleteInsert
RelFileNode node; {
BlockNumber origblkno; /* for splits */ RelFileNode node;
ItemPointerData key; BlockNumber origblkno; /* for splits */
int lenblk; ItemPointerData key;
BlockNumber *blkno; int lenblk;
BlockNumber *blkno;
XLogRecPtr lsn; XLogRecPtr lsn;
BlockNumber *path; BlockNumber *path;
int pathlen; int pathlen;
} gistIncompleteInsert; } gistIncompleteInsert;
...@@ -63,84 +67,98 @@ static List *incomplete_inserts; ...@@ -63,84 +67,98 @@ static List *incomplete_inserts;
( \ ( \
ItemPointerGetOffsetNumber(a) == ItemPointerGetOffsetNumber(b) && \ ItemPointerGetOffsetNumber(a) == ItemPointerGetOffsetNumber(b) && \
ItemPointerGetBlockNumber (a) == ItemPointerGetBlockNumber(b) \ ItemPointerGetBlockNumber (a) == ItemPointerGetBlockNumber(b) \
) )
static void static void
pushIncompleteInsert(RelFileNode node, XLogRecPtr lsn, ItemPointerData key, pushIncompleteInsert(RelFileNode node, XLogRecPtr lsn, ItemPointerData key,
BlockNumber *blkno, int lenblk, BlockNumber *blkno, int lenblk,
PageSplitRecord *xlinfo /* to extract blkno info */ ) { PageSplitRecord *xlinfo /* to extract blkno info */ )
{
MemoryContext oldCxt = MemoryContextSwitchTo(insertCtx); MemoryContext oldCxt = MemoryContextSwitchTo(insertCtx);
gistIncompleteInsert *ninsert = (gistIncompleteInsert*)palloc( sizeof(gistIncompleteInsert) ); gistIncompleteInsert *ninsert = (gistIncompleteInsert *) palloc(sizeof(gistIncompleteInsert));
ninsert->node = node; ninsert->node = node;
ninsert->key = key; ninsert->key = key;
ninsert->lsn = lsn; ninsert->lsn = lsn;
if ( lenblk && blkno ) { if (lenblk && blkno)
{
ninsert->lenblk = lenblk; ninsert->lenblk = lenblk;
ninsert->blkno = (BlockNumber*)palloc( sizeof(BlockNumber)*ninsert->lenblk ); ninsert->blkno = (BlockNumber *) palloc(sizeof(BlockNumber) * ninsert->lenblk);
memcpy(ninsert->blkno, blkno, sizeof(BlockNumber)*ninsert->lenblk); memcpy(ninsert->blkno, blkno, sizeof(BlockNumber) * ninsert->lenblk);
ninsert->origblkno = *blkno; ninsert->origblkno = *blkno;
} else { }
int i; else
{
int i;
Assert( xlinfo ); Assert(xlinfo);
ninsert->lenblk = xlinfo->data->npage; ninsert->lenblk = xlinfo->data->npage;
ninsert->blkno = (BlockNumber*)palloc( sizeof(BlockNumber)*ninsert->lenblk ); ninsert->blkno = (BlockNumber *) palloc(sizeof(BlockNumber) * ninsert->lenblk);
for(i=0;i<ninsert->lenblk;i++) for (i = 0; i < ninsert->lenblk; i++)
ninsert->blkno[i] = xlinfo->page[i].header->blkno; ninsert->blkno[i] = xlinfo->page[i].header->blkno;
ninsert->origblkno = xlinfo->data->origblkno; ninsert->origblkno = xlinfo->data->origblkno;
} }
Assert( ninsert->lenblk>0 ); Assert(ninsert->lenblk > 0);
incomplete_inserts = lappend(incomplete_inserts, ninsert); incomplete_inserts = lappend(incomplete_inserts, ninsert);
MemoryContextSwitchTo(oldCxt); MemoryContextSwitchTo(oldCxt);
} }
static void static void
forgetIncompleteInsert(RelFileNode node, ItemPointerData key) { forgetIncompleteInsert(RelFileNode node, ItemPointerData key)
{
ListCell *l; ListCell *l;
foreach(l, incomplete_inserts) { foreach(l, incomplete_inserts)
gistIncompleteInsert *insert = (gistIncompleteInsert*) lfirst(l); {
gistIncompleteInsert *insert = (gistIncompleteInsert *) lfirst(l);
if (RelFileNodeEquals(node, insert->node) && ItemPointerEQ(&(insert->key), &(key)))
{
if ( RelFileNodeEquals(node, insert->node) && ItemPointerEQ( &(insert->key), &(key) ) ) {
/* found */ /* found */
pfree( insert->blkno ); pfree(insert->blkno);
incomplete_inserts = list_delete_ptr(incomplete_inserts, insert); incomplete_inserts = list_delete_ptr(incomplete_inserts, insert);
pfree( insert ); pfree(insert);
break; break;
} }
} }
} }
static void static void
decodeEntryUpdateRecord(EntryUpdateRecord *decoded, XLogRecord *record) { decodeEntryUpdateRecord(EntryUpdateRecord *decoded, XLogRecord *record)
char *begin = XLogRecGetData(record), *ptr; {
int i=0, addpath=0; char *begin = XLogRecGetData(record),
*ptr;
int i = 0,
addpath = 0;
decoded->data = (gistxlogEntryUpdate*)begin; decoded->data = (gistxlogEntryUpdate *) begin;
if ( decoded->data->ntodelete ) { if (decoded->data->ntodelete)
decoded->todelete = (OffsetNumber*)(begin + sizeof( gistxlogEntryUpdate ) + addpath); {
addpath = MAXALIGN( sizeof(OffsetNumber) * decoded->data->ntodelete ); decoded->todelete = (OffsetNumber *) (begin + sizeof(gistxlogEntryUpdate) + addpath);
} else addpath = MAXALIGN(sizeof(OffsetNumber) * decoded->data->ntodelete);
decoded->todelete = NULL; }
else
decoded->todelete = NULL;
decoded->len=0; decoded->len = 0;
ptr=begin+sizeof( gistxlogEntryUpdate ) + addpath; ptr = begin + sizeof(gistxlogEntryUpdate) + addpath;
while( ptr - begin < record->xl_len ) { while (ptr - begin < record->xl_len)
{
decoded->len++; decoded->len++;
ptr += IndexTupleSize( (IndexTuple)ptr ); ptr += IndexTupleSize((IndexTuple) ptr);
} }
decoded->itup=(IndexTuple*)palloc( sizeof( IndexTuple ) * decoded->len ); decoded->itup = (IndexTuple *) palloc(sizeof(IndexTuple) * decoded->len);
ptr=begin+sizeof( gistxlogEntryUpdate ) + addpath; ptr = begin + sizeof(gistxlogEntryUpdate) + addpath;
while( ptr - begin < record->xl_len ) { while (ptr - begin < record->xl_len)
decoded->itup[i] = (IndexTuple)ptr; {
ptr += IndexTupleSize( decoded->itup[i] ); decoded->itup[i] = (IndexTuple) ptr;
ptr += IndexTupleSize(decoded->itup[i]);
i++; i++;
} }
} }
...@@ -149,13 +167,14 @@ decodeEntryUpdateRecord(EntryUpdateRecord *decoded, XLogRecord *record) { ...@@ -149,13 +167,14 @@ decodeEntryUpdateRecord(EntryUpdateRecord *decoded, XLogRecord *record) {
* redo any page update (except page split) * redo any page update (except page split)
*/ */
static void static void
gistRedoEntryUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot) { gistRedoEntryUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot)
EntryUpdateRecord xlrec; {
Relation reln; EntryUpdateRecord xlrec;
Buffer buffer; Relation reln;
Page page; Buffer buffer;
Page page;
decodeEntryUpdateRecord( &xlrec, record ); decodeEntryUpdateRecord(&xlrec, record);
reln = XLogOpenRelation(xlrec.data->node); reln = XLogOpenRelation(xlrec.data->node);
if (!RelationIsValid(reln)) if (!RelationIsValid(reln))
...@@ -165,49 +184,61 @@ gistRedoEntryUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot) { ...@@ -165,49 +184,61 @@ gistRedoEntryUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot) {
elog(PANIC, "block %u unfound", xlrec.data->blkno); elog(PANIC, "block %u unfound", xlrec.data->blkno);
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
if ( isnewroot ) { if (isnewroot)
if ( !PageIsNew((PageHeader) page) && XLByteLE(lsn, PageGetLSN(page)) ) { {
if (!PageIsNew((PageHeader) page) && XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
return; return;
} }
} else { }
if ( PageIsNew((PageHeader) page) ) else
{
if (PageIsNew((PageHeader) page))
elog(PANIC, "uninitialized page %u", xlrec.data->blkno); elog(PANIC, "uninitialized page %u", xlrec.data->blkno);
if (XLByteLE(lsn, PageGetLSN(page))) { if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
return; return;
} }
} }
if ( xlrec.data->isemptypage ) { if (xlrec.data->isemptypage)
while( !PageIsEmpty(page) ) {
PageIndexTupleDelete( page, FirstOffsetNumber ); while (!PageIsEmpty(page))
PageIndexTupleDelete(page, FirstOffsetNumber);
if ( xlrec.data->blkno == GIST_ROOT_BLKNO )
GistPageSetLeaf( page ); if (xlrec.data->blkno == GIST_ROOT_BLKNO)
GistPageSetLeaf(page);
else else
GistPageSetDeleted( page ); GistPageSetDeleted(page);
} else { }
if ( isnewroot ) else
{
if (isnewroot)
GISTInitBuffer(buffer, 0); GISTInitBuffer(buffer, 0);
else if ( xlrec.data->ntodelete ) { else if (xlrec.data->ntodelete)
int i; {
for(i=0; i < xlrec.data->ntodelete ; i++) int i;
for (i = 0; i < xlrec.data->ntodelete; i++)
PageIndexTupleDelete(page, xlrec.todelete[i]); PageIndexTupleDelete(page, xlrec.todelete[i]);
if ( GistPageIsLeaf(page) ) if (GistPageIsLeaf(page))
GistMarkTuplesDeleted(page); GistMarkTuplesDeleted(page);
} }
/* add tuples */ /* add tuples */
if ( xlrec.len > 0 ) if (xlrec.len > 0)
gistfillbuffer(reln, page, xlrec.itup, xlrec.len, InvalidOffsetNumber); gistfillbuffer(reln, page, xlrec.itup, xlrec.len, InvalidOffsetNumber);
/* special case: leafpage, nothing to insert, nothing to delete, then /*
vacuum marks page */ * special case: leafpage, nothing to insert, nothing to delete, then
if ( GistPageIsLeaf(page) && xlrec.len == 0 && xlrec.data->ntodelete == 0 ) * vacuum marks page
GistClearTuplesDeleted(page); */
if (GistPageIsLeaf(page) && xlrec.len == 0 && xlrec.data->ntodelete == 0)
GistClearTuplesDeleted(page);
} }
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
...@@ -216,123 +247,135 @@ gistRedoEntryUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot) { ...@@ -216,123 +247,135 @@ gistRedoEntryUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot) {
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
if ( ItemPointerIsValid( &(xlrec.data->key) ) ) { if (ItemPointerIsValid(&(xlrec.data->key)))
if ( incomplete_inserts != NIL ) {
if (incomplete_inserts != NIL)
forgetIncompleteInsert(xlrec.data->node, xlrec.data->key); forgetIncompleteInsert(xlrec.data->node, xlrec.data->key);
if ( !isnewroot && xlrec.data->blkno!=GIST_ROOT_BLKNO ) if (!isnewroot && xlrec.data->blkno != GIST_ROOT_BLKNO)
pushIncompleteInsert(xlrec.data->node, lsn, xlrec.data->key, pushIncompleteInsert(xlrec.data->node, lsn, xlrec.data->key,
&(xlrec.data->blkno), 1, &(xlrec.data->blkno), 1,
NULL); NULL);
} }
} }
static void static void
decodePageSplitRecord(PageSplitRecord *decoded, XLogRecord *record) { decodePageSplitRecord(PageSplitRecord *decoded, XLogRecord *record)
char *begin = XLogRecGetData(record), *ptr; {
int j,i=0; char *begin = XLogRecGetData(record),
*ptr;
decoded->data = (gistxlogPageSplit*)begin; int j,
decoded->page = (NewPage*)palloc( sizeof(NewPage) * decoded->data->npage ); i = 0;
ptr=begin+sizeof( gistxlogPageSplit ); decoded->data = (gistxlogPageSplit *) begin;
for(i=0;i<decoded->data->npage;i++) { decoded->page = (NewPage *) palloc(sizeof(NewPage) * decoded->data->npage);
Assert( ptr - begin < record->xl_len );
decoded->page[i].header = (gistxlogPage*)ptr; ptr = begin + sizeof(gistxlogPageSplit);
for (i = 0; i < decoded->data->npage; i++)
{
Assert(ptr - begin < record->xl_len);
decoded->page[i].header = (gistxlogPage *) ptr;
ptr += sizeof(gistxlogPage); ptr += sizeof(gistxlogPage);
decoded->page[i].itup = (IndexTuple*) decoded->page[i].itup = (IndexTuple *)
palloc( sizeof(IndexTuple) * decoded->page[i].header->num ); palloc(sizeof(IndexTuple) * decoded->page[i].header->num);
j=0; j = 0;
while(j<decoded->page[i].header->num) { while (j < decoded->page[i].header->num)
Assert( ptr - begin < record->xl_len ); {
decoded->page[i].itup[j] = (IndexTuple)ptr; Assert(ptr - begin < record->xl_len);
ptr += IndexTupleSize((IndexTuple)ptr); decoded->page[i].itup[j] = (IndexTuple) ptr;
ptr += IndexTupleSize((IndexTuple) ptr);
j++; j++;
} }
} }
} }
static void static void
gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record ) { gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
PageSplitRecord xlrec; {
Relation reln; PageSplitRecord xlrec;
Buffer buffer; Relation reln;
Page page; Buffer buffer;
int i; Page page;
int flags=0; int i;
int flags = 0;
decodePageSplitRecord( &xlrec, record );
decodePageSplitRecord(&xlrec, record);
reln = XLogOpenRelation(xlrec.data->node); reln = XLogOpenRelation(xlrec.data->node);
if (!RelationIsValid(reln)) if (!RelationIsValid(reln))
return; return;
/* first of all wee need get F_LEAF flag from original page */ /* first of all wee need get F_LEAF flag from original page */
buffer = XLogReadBuffer( false, reln, xlrec.data->origblkno); buffer = XLogReadBuffer(false, reln, xlrec.data->origblkno);
if (!BufferIsValid(buffer)) if (!BufferIsValid(buffer))
elog(PANIC, "block %u unfound", xlrec.data->origblkno); elog(PANIC, "block %u unfound", xlrec.data->origblkno);
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
if ( PageIsNew((PageHeader) page) ) if (PageIsNew((PageHeader) page))
elog(PANIC, "uninitialized page %u", xlrec.data->origblkno); elog(PANIC, "uninitialized page %u", xlrec.data->origblkno);
flags = ( GistPageIsLeaf(page) ) ? F_LEAF : 0; flags = (GistPageIsLeaf(page)) ? F_LEAF : 0;
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
/* loop around all pages */ /* loop around all pages */
for(i=0;i<xlrec.data->npage;i++) { for (i = 0; i < xlrec.data->npage; i++)
NewPage *newpage = xlrec.page + i; {
bool isorigpage = (xlrec.data->origblkno == newpage->header->blkno) ? true : false; NewPage *newpage = xlrec.page + i;
bool isorigpage = (xlrec.data->origblkno == newpage->header->blkno) ? true : false;
buffer = XLogReadBuffer( !isorigpage, reln, newpage->header->blkno);
buffer = XLogReadBuffer(!isorigpage, reln, newpage->header->blkno);
if (!BufferIsValid(buffer)) if (!BufferIsValid(buffer))
elog(PANIC, "block %u unfound", newpage->header->blkno); elog(PANIC, "block %u unfound", newpage->header->blkno);
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
if (XLByteLE(lsn, PageGetLSN(page))) { if (XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
continue; continue;
} }
/* ok, clear buffer */ /* ok, clear buffer */
GISTInitBuffer(buffer, flags); GISTInitBuffer(buffer, flags);
/* and fill it */ /* and fill it */
gistfillbuffer(reln, page, newpage->itup, newpage->header->num, FirstOffsetNumber); gistfillbuffer(reln, page, newpage->itup, newpage->header->num, FirstOffsetNumber);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
if ( ItemPointerIsValid( &(xlrec.data->key) ) ) { if (ItemPointerIsValid(&(xlrec.data->key)))
if ( incomplete_inserts != NIL ) {
if (incomplete_inserts != NIL)
forgetIncompleteInsert(xlrec.data->node, xlrec.data->key); forgetIncompleteInsert(xlrec.data->node, xlrec.data->key);
pushIncompleteInsert(xlrec.data->node, lsn, xlrec.data->key, pushIncompleteInsert(xlrec.data->node, lsn, xlrec.data->key,
NULL, 0, NULL, 0,
&xlrec); &xlrec);
} }
} }
static void static void
gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) { gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
RelFileNode *node = (RelFileNode*)XLogRecGetData(record); {
Relation reln; RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
Buffer buffer; Relation reln;
Page page; Buffer buffer;
Page page;
reln = XLogOpenRelation(*node); reln = XLogOpenRelation(*node);
if (!RelationIsValid(reln)) if (!RelationIsValid(reln))
return; return;
buffer = XLogReadBuffer( true, reln, GIST_ROOT_BLKNO); buffer = XLogReadBuffer(true, reln, GIST_ROOT_BLKNO);
if (!BufferIsValid(buffer)) if (!BufferIsValid(buffer))
elog(PANIC, "root block unfound"); elog(PANIC, "root block unfound");
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
if (!PageIsNew((PageHeader) page) && XLByteLE(lsn, PageGetLSN(page))) { if (!PageIsNew((PageHeader) page) && XLByteLE(lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
return; return;
...@@ -343,46 +386,51 @@ gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) { ...@@ -343,46 +386,51 @@ gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) {
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
static void static void
gistRedoCompleteInsert(XLogRecPtr lsn, XLogRecord *record) { gistRedoCompleteInsert(XLogRecPtr lsn, XLogRecord *record)
char *begin = XLogRecGetData(record), *ptr; {
gistxlogInsertComplete *xlrec; char *begin = XLogRecGetData(record),
*ptr;
gistxlogInsertComplete *xlrec;
xlrec = (gistxlogInsertComplete*)begin; xlrec = (gistxlogInsertComplete *) begin;
ptr = begin + sizeof( gistxlogInsertComplete ); ptr = begin + sizeof(gistxlogInsertComplete);
while( ptr - begin < record->xl_len ) { while (ptr - begin < record->xl_len)
Assert( record->xl_len - (ptr - begin) >= sizeof(ItemPointerData) ); {
forgetIncompleteInsert( xlrec->node, *((ItemPointerData*)ptr) ); Assert(record->xl_len - (ptr - begin) >= sizeof(ItemPointerData));
forgetIncompleteInsert(xlrec->node, *((ItemPointerData *) ptr));
ptr += sizeof(ItemPointerData); ptr += sizeof(ItemPointerData);
} }
} }
void void
gist_redo(XLogRecPtr lsn, XLogRecord *record) gist_redo(XLogRecPtr lsn, XLogRecord *record)
{ {
uint8 info = record->xl_info & ~XLR_INFO_MASK; uint8 info = record->xl_info & ~XLR_INFO_MASK;
MemoryContext oldCxt; MemoryContext oldCxt;
oldCxt = MemoryContextSwitchTo(opCtx); oldCxt = MemoryContextSwitchTo(opCtx);
switch (info) { switch (info)
case XLOG_GIST_ENTRY_UPDATE: {
case XLOG_GIST_ENTRY_DELETE: case XLOG_GIST_ENTRY_UPDATE:
gistRedoEntryUpdateRecord(lsn, record,false); case XLOG_GIST_ENTRY_DELETE:
gistRedoEntryUpdateRecord(lsn, record, false);
break; break;
case XLOG_GIST_NEW_ROOT: case XLOG_GIST_NEW_ROOT:
gistRedoEntryUpdateRecord(lsn, record,true); gistRedoEntryUpdateRecord(lsn, record, true);
break; break;
case XLOG_GIST_PAGE_SPLIT: case XLOG_GIST_PAGE_SPLIT:
gistRedoPageSplitRecord(lsn, record); gistRedoPageSplitRecord(lsn, record);
break; break;
case XLOG_GIST_CREATE_INDEX: case XLOG_GIST_CREATE_INDEX:
gistRedoCreateIndex(lsn, record); gistRedoCreateIndex(lsn, record);
break; break;
case XLOG_GIST_INSERT_COMPLETE: case XLOG_GIST_INSERT_COMPLETE:
gistRedoCompleteInsert(lsn, record); gistRedoCompleteInsert(lsn, record);
break; break;
default: default:
...@@ -396,422 +444,478 @@ gist_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -396,422 +444,478 @@ gist_redo(XLogRecPtr lsn, XLogRecord *record)
static void static void
out_target(char *buf, RelFileNode node, ItemPointerData key) out_target(char *buf, RelFileNode node, ItemPointerData key)
{ {
sprintf(buf + strlen(buf), "rel %u/%u/%u; tid %u/%u", sprintf(buf + strlen(buf), "rel %u/%u/%u; tid %u/%u",
node.spcNode, node.dbNode, node.relNode, node.spcNode, node.dbNode, node.relNode,
ItemPointerGetBlockNumber(&key), ItemPointerGetBlockNumber(&key),
ItemPointerGetOffsetNumber(&key)); ItemPointerGetOffsetNumber(&key));
} }
static void static void
out_gistxlogEntryUpdate(char *buf, gistxlogEntryUpdate *xlrec) { out_gistxlogEntryUpdate(char *buf, gistxlogEntryUpdate *xlrec)
{
out_target(buf, xlrec->node, xlrec->key); out_target(buf, xlrec->node, xlrec->key);
sprintf(buf + strlen(buf), "; block number %u", sprintf(buf + strlen(buf), "; block number %u",
xlrec->blkno); xlrec->blkno);
} }
static void static void
out_gistxlogPageSplit(char *buf, gistxlogPageSplit *xlrec) { out_gistxlogPageSplit(char *buf, gistxlogPageSplit *xlrec)
{
strcat(buf, "page_split: "); strcat(buf, "page_split: ");
out_target(buf, xlrec->node, xlrec->key); out_target(buf, xlrec->node, xlrec->key);
sprintf(buf + strlen(buf), "; block number %u splits to %d pages", sprintf(buf + strlen(buf), "; block number %u splits to %d pages",
xlrec->origblkno, xlrec->npage); xlrec->origblkno, xlrec->npage);
} }
void void
gist_desc(char *buf, uint8 xl_info, char *rec) gist_desc(char *buf, uint8 xl_info, char *rec)
{ {
uint8 info = xl_info & ~XLR_INFO_MASK; uint8 info = xl_info & ~XLR_INFO_MASK;
switch (info) { switch (info)
case XLOG_GIST_ENTRY_UPDATE: {
case XLOG_GIST_ENTRY_UPDATE:
strcat(buf, "entry_update: "); strcat(buf, "entry_update: ");
out_gistxlogEntryUpdate(buf, (gistxlogEntryUpdate*)rec); out_gistxlogEntryUpdate(buf, (gistxlogEntryUpdate *) rec);
break; break;
case XLOG_GIST_ENTRY_DELETE: case XLOG_GIST_ENTRY_DELETE:
strcat(buf, "entry_delete: "); strcat(buf, "entry_delete: ");
out_gistxlogEntryUpdate(buf, (gistxlogEntryUpdate*)rec); out_gistxlogEntryUpdate(buf, (gistxlogEntryUpdate *) rec);
break; break;
case XLOG_GIST_NEW_ROOT: case XLOG_GIST_NEW_ROOT:
strcat(buf, "new_root: "); strcat(buf, "new_root: ");
out_target(buf, ((gistxlogEntryUpdate*)rec)->node, ((gistxlogEntryUpdate*)rec)->key); out_target(buf, ((gistxlogEntryUpdate *) rec)->node, ((gistxlogEntryUpdate *) rec)->key);
break; break;
case XLOG_GIST_PAGE_SPLIT: case XLOG_GIST_PAGE_SPLIT:
out_gistxlogPageSplit(buf, (gistxlogPageSplit*)rec); out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
break; break;
case XLOG_GIST_CREATE_INDEX: case XLOG_GIST_CREATE_INDEX:
sprintf(buf + strlen(buf), "create_index: rel %u/%u/%u", sprintf(buf + strlen(buf), "create_index: rel %u/%u/%u",
((RelFileNode*)rec)->spcNode, ((RelFileNode *) rec)->spcNode,
((RelFileNode*)rec)->dbNode, ((RelFileNode *) rec)->dbNode,
((RelFileNode*)rec)->relNode); ((RelFileNode *) rec)->relNode);
break; break;
case XLOG_GIST_INSERT_COMPLETE: case XLOG_GIST_INSERT_COMPLETE:
sprintf(buf + strlen(buf), "complete_insert: rel %u/%u/%u", sprintf(buf + strlen(buf), "complete_insert: rel %u/%u/%u",
((gistxlogInsertComplete*)rec)->node.spcNode, ((gistxlogInsertComplete *) rec)->node.spcNode,
((gistxlogInsertComplete*)rec)->node.dbNode, ((gistxlogInsertComplete *) rec)->node.dbNode,
((gistxlogInsertComplete*)rec)->node.relNode); ((gistxlogInsertComplete *) rec)->node.relNode);
break; break;
default: default:
elog(PANIC, "gist_desc: unknown op code %u", info); elog(PANIC, "gist_desc: unknown op code %u", info);
} }
} }
IndexTuple IndexTuple
gist_form_invalid_tuple(BlockNumber blkno) { gist_form_invalid_tuple(BlockNumber blkno)
/* we don't alloc space for null's bitmap, this is invalid tuple, {
be carefull in read and write code */ /*
Size size = IndexInfoFindDataOffset(0); * we don't alloc space for null's bitmap, this is invalid tuple, be
IndexTuple tuple=(IndexTuple)palloc0( size ); * carefull in read and write code
*/
Size size = IndexInfoFindDataOffset(0);
IndexTuple tuple = (IndexTuple) palloc0(size);
tuple->t_info |= size; tuple->t_info |= size;
ItemPointerSetBlockNumber(&(tuple->t_tid), blkno); ItemPointerSetBlockNumber(&(tuple->t_tid), blkno);
GistTupleSetInvalid( tuple ); GistTupleSetInvalid(tuple);
return tuple; return tuple;
} }
static Buffer static Buffer
gistXLogReadAndLockBuffer( Relation r, BlockNumber blkno ) { gistXLogReadAndLockBuffer(Relation r, BlockNumber blkno)
Buffer buffer = XLogReadBuffer( false, r, blkno ); {
Buffer buffer = XLogReadBuffer(false, r, blkno);
if (!BufferIsValid(buffer)) if (!BufferIsValid(buffer))
elog(PANIC, "block %u unfound", blkno); elog(PANIC, "block %u unfound", blkno);
if ( PageIsNew( (PageHeader)(BufferGetPage(buffer)) ) ) if (PageIsNew((PageHeader) (BufferGetPage(buffer))))
elog(PANIC, "uninitialized page %u", blkno); elog(PANIC, "uninitialized page %u", blkno);
return buffer; return buffer;
} }
static void static void
gixtxlogFindPath( Relation index, gistIncompleteInsert *insert ) { gixtxlogFindPath(Relation index, gistIncompleteInsert *insert)
{
GISTInsertStack *top; GISTInsertStack *top;
insert->pathlen = 0; insert->pathlen = 0;
insert->path = NULL; insert->path = NULL;
if ( (top=gistFindPath(index, insert->origblkno, gistXLogReadAndLockBuffer)) != NULL ) { if ((top = gistFindPath(index, insert->origblkno, gistXLogReadAndLockBuffer)) != NULL)
int i; {
GISTInsertStack *ptr=top; int i;
while(ptr) { GISTInsertStack *ptr = top;
while (ptr)
{
insert->pathlen++; insert->pathlen++;
ptr = ptr->parent; ptr = ptr->parent;
} }
insert->path=(BlockNumber*)palloc( sizeof(BlockNumber) * insert->pathlen ); insert->path = (BlockNumber *) palloc(sizeof(BlockNumber) * insert->pathlen);
i=0; i = 0;
ptr = top; ptr = top;
while(ptr) { while (ptr)
{
insert->path[i] = ptr->blkno; insert->path[i] = ptr->blkno;
i++; i++;
ptr = ptr->parent; ptr = ptr->parent;
} }
} else }
else
elog(LOG, "lost parent for block %u", insert->origblkno); elog(LOG, "lost parent for block %u", insert->origblkno);
} }
/* /*
* Continue insert after crash. In normal situation, there isn't any incomplete * Continue insert after crash. In normal situation, there isn't any incomplete
* inserts, but if it might be after crash, WAL may has not a record of completetion. * inserts, but if it might be after crash, WAL may has not a record of completetion.
* *
* Although stored LSN in gistIncompleteInsert is a LSN of child page, * Although stored LSN in gistIncompleteInsert is a LSN of child page,
* we can compare it with LSN of parent, because parent is always locked * we can compare it with LSN of parent, because parent is always locked
* while we change child page (look at gistmakedeal). So if parent's LSN is * while we change child page (look at gistmakedeal). So if parent's LSN is
* lesser than stored lsn then changes in parent doesn't do yet. * lesser than stored lsn then changes in parent doesn't do yet.
*/ */
static void static void
gistContinueInsert(gistIncompleteInsert *insert) { gistContinueInsert(gistIncompleteInsert *insert)
IndexTuple *itup; {
int i, lenitup; IndexTuple *itup;
Relation index; int i,
lenitup;
Relation index;
index = XLogOpenRelation(insert->node); index = XLogOpenRelation(insert->node);
if (!RelationIsValid(index)) if (!RelationIsValid(index))
return; return;
/* needed vector itup never will be more than initial lenblkno+2, /*
because during this processing Indextuple can be only smaller */ * needed vector itup never will be more than initial lenblkno+2, because
lenitup = insert->lenblk; * during this processing Indextuple can be only smaller
itup = (IndexTuple*)palloc(sizeof(IndexTuple)*(lenitup+2 /*guarantee root split*/)); */
lenitup = insert->lenblk;
for(i=0;i<insert->lenblk;i++) itup = (IndexTuple *) palloc(sizeof(IndexTuple) * (lenitup + 2 /* guarantee root split */ ));
itup[i] = gist_form_invalid_tuple( insert->blkno[i] );
for (i = 0; i < insert->lenblk; i++)
if ( insert->origblkno==GIST_ROOT_BLKNO ) { itup[i] = gist_form_invalid_tuple(insert->blkno[i]);
/*it was split root, so we should only make new root.
it can't be simple insert into root, look at call if (insert->origblkno == GIST_ROOT_BLKNO)
pushIncompleteInsert in gistRedoPageSplitRecord */ {
Buffer buffer = XLogReadBuffer(true, index, GIST_ROOT_BLKNO); /*
Page page; * it was split root, so we should only make new root. it can't be
* simple insert into root, look at call pushIncompleteInsert in
* gistRedoPageSplitRecord
*/
Buffer buffer = XLogReadBuffer(true, index, GIST_ROOT_BLKNO);
Page page;
if (!BufferIsValid(buffer)) if (!BufferIsValid(buffer))
elog(PANIC, "root block unfound"); elog(PANIC, "root block unfound");
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
if (XLByteLE(insert->lsn, PageGetLSN(page))) { if (XLByteLE(insert->lsn, PageGetLSN(page)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
return; return;
} }
GISTInitBuffer(buffer, 0); GISTInitBuffer(buffer, 0);
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
gistfillbuffer(index, page, itup, lenitup, FirstOffsetNumber); gistfillbuffer(index, page, itup, lenitup, FirstOffsetNumber);
PageSetLSN(page, insert->lsn); PageSetLSN(page, insert->lsn);
PageSetTLI(page, ThisTimeLineID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} else { }
Buffer *buffers; else
Page *pages; {
int numbuffer; Buffer *buffers;
Page *pages;
int numbuffer;
/* construct path */ /* construct path */
gixtxlogFindPath( index, insert ); gixtxlogFindPath(index, insert);
Assert(insert->pathlen > 0);
Assert( insert->pathlen > 0 ); buffers = (Buffer *) palloc(sizeof(Buffer) * (insert->lenblk + 2 /* guarantee root split */ ));
pages = (Page *) palloc(sizeof(Page) * (insert->lenblk + 2 /* guarantee root split */ ));
buffers= (Buffer*) palloc( sizeof(Buffer) * (insert->lenblk+2/*guarantee root split*/) ); for (i = 0; i < insert->pathlen; i++)
pages = (Page*) palloc( sizeof(Page ) * (insert->lenblk+2/*guarantee root split*/) ); {
int j,
k,
pituplen = 0,
childfound = 0;
for(i=0;i<insert->pathlen;i++) { numbuffer = 1;
int j, k, pituplen=0, childfound=0; buffers[numbuffer - 1] = XLogReadBuffer(false, index, insert->path[i]);
if (!BufferIsValid(buffers[numbuffer - 1]))
numbuffer=1;
buffers[numbuffer-1] = XLogReadBuffer(false, index, insert->path[i]);
if (!BufferIsValid(buffers[numbuffer-1]))
elog(PANIC, "block %u unfound", insert->path[i]); elog(PANIC, "block %u unfound", insert->path[i]);
pages[numbuffer-1] = BufferGetPage( buffers[numbuffer-1] ); pages[numbuffer - 1] = BufferGetPage(buffers[numbuffer - 1]);
if ( PageIsNew((PageHeader)(pages[numbuffer-1])) ) if (PageIsNew((PageHeader) (pages[numbuffer - 1])))
elog(PANIC, "uninitialized page %u", insert->path[i]); elog(PANIC, "uninitialized page %u", insert->path[i]);
if (XLByteLE(insert->lsn, PageGetLSN(pages[numbuffer-1]))) { if (XLByteLE(insert->lsn, PageGetLSN(pages[numbuffer - 1])))
LockBuffer(buffers[numbuffer-1], BUFFER_LOCK_UNLOCK); {
ReleaseBuffer(buffers[numbuffer-1]); LockBuffer(buffers[numbuffer - 1], BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffers[numbuffer - 1]);
return; return;
} }
pituplen = PageGetMaxOffsetNumber(pages[numbuffer-1]); pituplen = PageGetMaxOffsetNumber(pages[numbuffer - 1]);
/* remove old IndexTuples */ /* remove old IndexTuples */
for(j=0;j<pituplen && childfound<lenitup;j++) { for (j = 0; j < pituplen && childfound < lenitup; j++)
{
BlockNumber blkno; BlockNumber blkno;
ItemId iid = PageGetItemId(pages[numbuffer-1], j+FirstOffsetNumber); ItemId iid = PageGetItemId(pages[numbuffer - 1], j + FirstOffsetNumber);
IndexTuple idxtup = (IndexTuple) PageGetItem(pages[numbuffer-1], iid); IndexTuple idxtup = (IndexTuple) PageGetItem(pages[numbuffer - 1], iid);
blkno = ItemPointerGetBlockNumber( &(idxtup->t_tid) ); blkno = ItemPointerGetBlockNumber(&(idxtup->t_tid));
for(k=0;k<lenitup;k++) for (k = 0; k < lenitup; k++)
if ( ItemPointerGetBlockNumber( &(itup[k]->t_tid) ) == blkno ) { if (ItemPointerGetBlockNumber(&(itup[k]->t_tid)) == blkno)
PageIndexTupleDelete(pages[numbuffer-1], j+FirstOffsetNumber); {
j--; pituplen--; PageIndexTupleDelete(pages[numbuffer - 1], j + FirstOffsetNumber);
j--;
pituplen--;
childfound++; childfound++;
break; break;
} }
} }
if ( gistnospace(pages[numbuffer-1], itup, lenitup) ) { if (gistnospace(pages[numbuffer - 1], itup, lenitup))
{
/* no space left on page, so we should split */ /* no space left on page, so we should split */
buffers[numbuffer] = XLogReadBuffer(true, index, P_NEW); buffers[numbuffer] = XLogReadBuffer(true, index, P_NEW);
if (!BufferIsValid(buffers[numbuffer])) if (!BufferIsValid(buffers[numbuffer]))
elog(PANIC, "could not obtain new block"); elog(PANIC, "could not obtain new block");
GISTInitBuffer(buffers[numbuffer], 0); GISTInitBuffer(buffers[numbuffer], 0);
pages[numbuffer] = BufferGetPage( buffers[numbuffer] ); pages[numbuffer] = BufferGetPage(buffers[numbuffer]);
gistfillbuffer( index, pages[numbuffer], itup, lenitup, FirstOffsetNumber ); gistfillbuffer(index, pages[numbuffer], itup, lenitup, FirstOffsetNumber);
numbuffer++; numbuffer++;
if ( BufferGetBlockNumber( buffers[0] ) == GIST_ROOT_BLKNO ) { if (BufferGetBlockNumber(buffers[0]) == GIST_ROOT_BLKNO)
{
IndexTuple *parentitup; IndexTuple *parentitup;
/* we split root, just copy tuples from old root to new page */ /*
parentitup = gistextractbuffer(buffers[numbuffer-1], &pituplen); * we split root, just copy tuples from old root to new
* page
*/
parentitup = gistextractbuffer(buffers[numbuffer - 1], &pituplen);
/* sanity check */ /* sanity check */
if ( i+1 != insert->pathlen ) if (i + 1 != insert->pathlen)
elog(PANIC,"unexpected pathlen in index \"%s\"", elog(PANIC, "unexpected pathlen in index \"%s\"",
RelationGetRelationName( index )); RelationGetRelationName(index));
/* fill new page */ /* fill new page */
buffers[numbuffer] = XLogReadBuffer(true, index, P_NEW); buffers[numbuffer] = XLogReadBuffer(true, index, P_NEW);
if (!BufferIsValid(buffers[numbuffer])) if (!BufferIsValid(buffers[numbuffer]))
elog(PANIC, "could not obtain new block"); elog(PANIC, "could not obtain new block");
GISTInitBuffer(buffers[numbuffer], 0); GISTInitBuffer(buffers[numbuffer], 0);
pages[numbuffer] = BufferGetPage( buffers[numbuffer] ); pages[numbuffer] = BufferGetPage(buffers[numbuffer]);
gistfillbuffer(index, pages[numbuffer], parentitup, pituplen, FirstOffsetNumber); gistfillbuffer(index, pages[numbuffer], parentitup, pituplen, FirstOffsetNumber);
numbuffer++; numbuffer++;
/* fill root page */ /* fill root page */
GISTInitBuffer(buffers[0], 0); GISTInitBuffer(buffers[0], 0);
for(j=1;j<numbuffer;j++) { for (j = 1; j < numbuffer; j++)
IndexTuple tuple = gist_form_invalid_tuple( BufferGetBlockNumber( buffers[j] ) ); {
if (PageAddItem(pages[0], IndexTuple tuple = gist_form_invalid_tuple(BufferGetBlockNumber(buffers[j]));
(Item)tuple,
IndexTupleSize( tuple ), if (PageAddItem(pages[0],
(OffsetNumber)j, (Item) tuple,
LP_USED) == InvalidOffsetNumber) IndexTupleSize(tuple),
(OffsetNumber) j,
LP_USED) == InvalidOffsetNumber)
elog(PANIC, "failed to add item to index page in \"%s\"", elog(PANIC, "failed to add item to index page in \"%s\"",
RelationGetRelationName( index )); RelationGetRelationName(index));
} }
} }
} else }
gistfillbuffer( index, pages[numbuffer-1], itup, lenitup, InvalidOffsetNumber); else
gistfillbuffer(index, pages[numbuffer - 1], itup, lenitup, InvalidOffsetNumber);
lenitup=numbuffer; lenitup = numbuffer;
for(j=0;j<numbuffer;j++) { for (j = 0; j < numbuffer; j++)
itup[j]=gist_form_invalid_tuple( BufferGetBlockNumber( buffers[j] ) ); {
itup[j] = gist_form_invalid_tuple(BufferGetBlockNumber(buffers[j]));
PageSetLSN(pages[j], insert->lsn); PageSetLSN(pages[j], insert->lsn);
PageSetTLI(pages[j], ThisTimeLineID); PageSetTLI(pages[j], ThisTimeLineID);
GistPageGetOpaque(pages[j])->rightlink = InvalidBlockNumber; GistPageGetOpaque(pages[j])->rightlink = InvalidBlockNumber;
LockBuffer(buffers[j], BUFFER_LOCK_UNLOCK); LockBuffer(buffers[j], BUFFER_LOCK_UNLOCK);
WriteBuffer( buffers[j] ); WriteBuffer(buffers[j]);
} }
} }
} }
ereport(LOG, ereport(LOG,
(errmsg("index %u/%u/%u needs VACUUM or REINDEX to finish crash recovery", (errmsg("index %u/%u/%u needs VACUUM or REINDEX to finish crash recovery",
insert->node.spcNode, insert->node.dbNode, insert->node.relNode), insert->node.spcNode, insert->node.dbNode, insert->node.relNode),
errdetail("Incomplete insertion detected during crash replay."))); errdetail("Incomplete insertion detected during crash replay.")));
} }
void void
gist_xlog_startup(void) { gist_xlog_startup(void)
incomplete_inserts=NIL; {
incomplete_inserts = NIL;
insertCtx = AllocSetContextCreate(CurrentMemoryContext, insertCtx = AllocSetContextCreate(CurrentMemoryContext,
"GiST recovery temporary context", "GiST recovery temporary context",
ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE); ALLOCSET_DEFAULT_MAXSIZE);
opCtx = createTempGistContext(); opCtx = createTempGistContext();
} }
void void
gist_xlog_cleanup(void) { gist_xlog_cleanup(void)
{
ListCell *l; ListCell *l;
List *reverse=NIL; List *reverse = NIL;
MemoryContext oldCxt = MemoryContextSwitchTo(insertCtx); MemoryContext oldCxt = MemoryContextSwitchTo(insertCtx);
/* we should call gistContinueInsert in reverse order */ /* we should call gistContinueInsert in reverse order */
foreach(l, incomplete_inserts) foreach(l, incomplete_inserts)
reverse = lappend(reverse, lfirst(l)); reverse = lappend(reverse, lfirst(l));
MemoryContextSwitchTo(opCtx); MemoryContextSwitchTo(opCtx);
foreach(l, reverse) { foreach(l, reverse)
gistIncompleteInsert *insert = (gistIncompleteInsert*) lfirst(l); {
gistIncompleteInsert *insert = (gistIncompleteInsert *) lfirst(l);
gistContinueInsert(insert); gistContinueInsert(insert);
MemoryContextReset(opCtx); MemoryContextReset(opCtx);
} }
MemoryContextSwitchTo(oldCxt); MemoryContextSwitchTo(oldCxt);
MemoryContextDelete(opCtx); MemoryContextDelete(opCtx);
MemoryContextDelete(insertCtx); MemoryContextDelete(insertCtx);
} }
XLogRecData * XLogRecData *
formSplitRdata(RelFileNode node, BlockNumber blkno, formSplitRdata(RelFileNode node, BlockNumber blkno,
ItemPointer key, SplitedPageLayout *dist ) { ItemPointer key, SplitedPageLayout *dist)
{
XLogRecData *rdata;
gistxlogPageSplit *xlrec = (gistxlogPageSplit*)palloc(sizeof(gistxlogPageSplit)); XLogRecData *rdata;
SplitedPageLayout *ptr; gistxlogPageSplit *xlrec = (gistxlogPageSplit *) palloc(sizeof(gistxlogPageSplit));
int npage = 0, cur=1; SplitedPageLayout *ptr;
int npage = 0,
ptr=dist; cur = 1;
while( ptr ) {
ptr = dist;
while (ptr)
{
npage++; npage++;
ptr=ptr->next; ptr = ptr->next;
} }
rdata = (XLogRecData*)palloc(sizeof(XLogRecData)*(npage*2 + 2)); rdata = (XLogRecData *) palloc(sizeof(XLogRecData) * (npage * 2 + 2));
xlrec->node = node; xlrec->node = node;
xlrec->origblkno = blkno; xlrec->origblkno = blkno;
xlrec->npage = (uint16)npage; xlrec->npage = (uint16) npage;
if ( key ) if (key)
xlrec->key = *key; xlrec->key = *key;
else else
ItemPointerSetInvalid( &(xlrec->key) ); ItemPointerSetInvalid(&(xlrec->key));
rdata[0].buffer = InvalidBuffer; rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) xlrec; rdata[0].data = (char *) xlrec;
rdata[0].len = sizeof( gistxlogPageSplit ); rdata[0].len = sizeof(gistxlogPageSplit);
rdata[0].next = NULL; rdata[0].next = NULL;
ptr=dist; ptr = dist;
while(ptr) { while (ptr)
{
rdata[cur].buffer = InvalidBuffer; rdata[cur].buffer = InvalidBuffer;
rdata[cur].data = (char*)&(ptr->block); rdata[cur].data = (char *) &(ptr->block);
rdata[cur].len = sizeof(gistxlogPage); rdata[cur].len = sizeof(gistxlogPage);
rdata[cur-1].next = &(rdata[cur]); rdata[cur - 1].next = &(rdata[cur]);
cur++; cur++;
rdata[cur].buffer = InvalidBuffer; rdata[cur].buffer = InvalidBuffer;
rdata[cur].data = (char*)(ptr->list); rdata[cur].data = (char *) (ptr->list);
rdata[cur].len = ptr->lenlist; rdata[cur].len = ptr->lenlist;
rdata[cur-1].next = &(rdata[cur]); rdata[cur - 1].next = &(rdata[cur]);
rdata[cur].next=NULL; rdata[cur].next = NULL;
cur++; cur++;
ptr=ptr->next; ptr = ptr->next;
} }
return rdata; return rdata;
} }
XLogRecData * XLogRecData *
formUpdateRdata(RelFileNode node, BlockNumber blkno, formUpdateRdata(RelFileNode node, BlockNumber blkno,
OffsetNumber *todelete, int ntodelete, bool emptypage, OffsetNumber *todelete, int ntodelete, bool emptypage,
IndexTuple *itup, int ituplen, ItemPointer key ) { IndexTuple *itup, int ituplen, ItemPointer key)
XLogRecData *rdata; {
gistxlogEntryUpdate *xlrec = (gistxlogEntryUpdate*)palloc(sizeof(gistxlogEntryUpdate)); XLogRecData *rdata;
gistxlogEntryUpdate *xlrec = (gistxlogEntryUpdate *) palloc(sizeof(gistxlogEntryUpdate));
xlrec->node = node; xlrec->node = node;
xlrec->blkno = blkno; xlrec->blkno = blkno;
if ( key ) if (key)
xlrec->key = *key; xlrec->key = *key;
else else
ItemPointerSetInvalid( &(xlrec->key) ); ItemPointerSetInvalid(&(xlrec->key));
if ( emptypage ) { if (emptypage)
{
xlrec->isemptypage = true; xlrec->isemptypage = true;
xlrec->ntodelete = 0; xlrec->ntodelete = 0;
rdata = (XLogRecData*)palloc( sizeof(XLogRecData) ); rdata = (XLogRecData *) palloc(sizeof(XLogRecData));
rdata->buffer = InvalidBuffer; rdata->buffer = InvalidBuffer;
rdata->data = (char*)xlrec; rdata->data = (char *) xlrec;
rdata->len = sizeof(gistxlogEntryUpdate); rdata->len = sizeof(gistxlogEntryUpdate);
rdata->next = NULL; rdata->next = NULL;
} else { }
int cur=1,i; else
{
int cur = 1,
i;
xlrec->isemptypage = false; xlrec->isemptypage = false;
xlrec->ntodelete = ntodelete; xlrec->ntodelete = ntodelete;
rdata = (XLogRecData*) palloc( sizeof(XLogRecData) * ( 2 + ituplen ) ); rdata = (XLogRecData *) palloc(sizeof(XLogRecData) * (2 + ituplen));
rdata->buffer = InvalidBuffer; rdata->buffer = InvalidBuffer;
rdata->data = (char*)xlrec; rdata->data = (char *) xlrec;
rdata->len = sizeof(gistxlogEntryUpdate); rdata->len = sizeof(gistxlogEntryUpdate);
rdata->next = NULL; rdata->next = NULL;
if ( ntodelete ) { if (ntodelete)
rdata[cur-1].next = &(rdata[cur]); {
rdata[cur - 1].next = &(rdata[cur]);
rdata[cur].buffer = InvalidBuffer; rdata[cur].buffer = InvalidBuffer;
rdata[cur].data = (char*)todelete; rdata[cur].data = (char *) todelete;
rdata[cur].len = MAXALIGN(sizeof(OffsetNumber)*ntodelete); rdata[cur].len = MAXALIGN(sizeof(OffsetNumber) * ntodelete);
rdata[cur].next = NULL; rdata[cur].next = NULL;
cur++; cur++;
} }
/* new tuples */ /* new tuples */
for(i=0;i<ituplen;i++) { for (i = 0; i < ituplen; i++)
{
rdata[cur].buffer = InvalidBuffer; rdata[cur].buffer = InvalidBuffer;
rdata[cur].data = (char*)(itup[i]); rdata[cur].data = (char *) (itup[i]);
rdata[cur].len = IndexTupleSize(itup[i]); rdata[cur].len = IndexTupleSize(itup[i]);
rdata[cur].next = NULL; rdata[cur].next = NULL;
rdata[cur-1].next = &(rdata[cur]); rdata[cur - 1].next = &(rdata[cur]);
cur++; cur++;
} }
} }
...@@ -819,29 +923,30 @@ formUpdateRdata(RelFileNode node, BlockNumber blkno, ...@@ -819,29 +923,30 @@ formUpdateRdata(RelFileNode node, BlockNumber blkno,
return rdata; return rdata;
} }
XLogRecPtr XLogRecPtr
gistxlogInsertCompletion(RelFileNode node, ItemPointerData *keys, int len) { gistxlogInsertCompletion(RelFileNode node, ItemPointerData *keys, int len)
gistxlogInsertComplete xlrec; {
XLogRecData rdata[2]; gistxlogInsertComplete xlrec;
XLogRecPtr recptr; XLogRecData rdata[2];
XLogRecPtr recptr;
Assert(len>0); Assert(len > 0);
xlrec.node = node; xlrec.node = node;
rdata[0].buffer = InvalidBuffer; rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &xlrec; rdata[0].data = (char *) &xlrec;
rdata[0].len = sizeof( gistxlogInsertComplete ); rdata[0].len = sizeof(gistxlogInsertComplete);
rdata[0].next = &(rdata[1]); rdata[0].next = &(rdata[1]);
rdata[1].buffer = InvalidBuffer; rdata[1].buffer = InvalidBuffer;
rdata[1].data = (char *) keys; rdata[1].data = (char *) keys;
rdata[1].len = sizeof( ItemPointerData ) * len; rdata[1].len = sizeof(ItemPointerData) * len;
rdata[1].next = NULL; rdata[1].next = NULL;
START_CRIT_SECTION(); START_CRIT_SECTION();
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_INSERT_COMPLETE, rdata); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_INSERT_COMPLETE, rdata);
END_CRIT_SECTION(); END_CRIT_SECTION();
return recptr; return recptr;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment