Commit 0454f131 authored by Tom Lane's avatar Tom Lane

Rewrite the rbtree routines so that an RBNode is the first field of the

struct representing a tree entry, rather than being a separately allocated
piece of storage.  This API is at least as clean as the old one (if not
more so --- there were some bizarre choices in there) and it permits a
very substantial memory savings, on the order of 2X in ginbulk.c's usage.

Also, fix minor memory leaks in code called by ginEntryInsert, in
particular in ginInsertValue and entryFillRoot, as well as ginEntryInsert
itself.  These leaks resulted in the GIN index build context continuing
to bloat even after we'd filled it to maintenance_work_mem and started
to dump data out to the index.

In combination these fixes restore the GIN index build code to honoring
the maintenance_work_mem limit about as well as it did in 8.4.  Speed
seems on par with 8.4 too, maybe even a bit faster, for a non-pathological
case in which HEAD was formerly slower.

Back-patch to 9.0 so we don't have a performance regression from 8.4.
parent afc2900f
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginbtree.c,v 1.15 2010/01/02 16:57:33 momjian Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/ginbtree.c,v 1.16 2010/08/01 02:12:42 tgl Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -267,6 +267,8 @@ findParents(GinBtree btree, GinBtreeStack *stack, ...@@ -267,6 +267,8 @@ findParents(GinBtree btree, GinBtreeStack *stack,
/* /*
* Insert value (stored in GinBtree) to tree described by stack * Insert value (stored in GinBtree) to tree described by stack
*
* NB: the passed-in stack is freed, as though by freeGinBtreeStack.
*/ */
void void
ginInsertValue(GinBtree btree, GinBtreeStack *stack) ginInsertValue(GinBtree btree, GinBtreeStack *stack)
...@@ -308,10 +310,11 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack) ...@@ -308,10 +310,11 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
PageSetTLI(page, ThisTimeLineID); PageSetTLI(page, ThisTimeLineID);
} }
UnlockReleaseBuffer(stack->buffer); LockBuffer(stack->buffer, GIN_UNLOCK);
END_CRIT_SECTION(); END_CRIT_SECTION();
freeGinBtreeStack(stack->parent); freeGinBtreeStack(stack);
return; return;
} }
else else
...@@ -325,7 +328,6 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack) ...@@ -325,7 +328,6 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
*/ */
newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata); newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata);
((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno; ((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno;
parent = stack->parent; parent = stack->parent;
...@@ -341,7 +343,6 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack) ...@@ -341,7 +343,6 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
((ginxlogSplit *) (rdata->data))->isRootSplit = TRUE; ((ginxlogSplit *) (rdata->data))->isRootSplit = TRUE;
((ginxlogSplit *) (rdata->data))->rrlink = InvalidBlockNumber; ((ginxlogSplit *) (rdata->data))->rrlink = InvalidBlockNumber;
page = BufferGetPage(stack->buffer); page = BufferGetPage(stack->buffer);
lpage = BufferGetPage(lbuffer); lpage = BufferGetPage(lbuffer);
rpage = BufferGetPage(rbuffer); rpage = BufferGetPage(rbuffer);
...@@ -375,10 +376,11 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack) ...@@ -375,10 +376,11 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
UnlockReleaseBuffer(rbuffer); UnlockReleaseBuffer(rbuffer);
UnlockReleaseBuffer(lbuffer); UnlockReleaseBuffer(lbuffer);
UnlockReleaseBuffer(stack->buffer); LockBuffer(stack->buffer, GIN_UNLOCK);
END_CRIT_SECTION(); END_CRIT_SECTION();
freeGinBtreeStack(stack);
return; return;
} }
else else
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.19 2010/02/26 02:00:33 momjian Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.20 2010/08/01 02:12:42 tgl Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -19,17 +19,21 @@ ...@@ -19,17 +19,21 @@
#include "utils/memutils.h" #include "utils/memutils.h"
#define DEF_NENTRY 2048 #define DEF_NENTRY 2048 /* EntryAccumulator allocation quantum */
#define DEF_NPTR 4 #define DEF_NPTR 5 /* ItemPointer initial allocation quantum */
static void *
ginAppendData(void *old, void *new, void *arg)
{
EntryAccumulator *eo = (EntryAccumulator *) old,
*en = (EntryAccumulator *) new;
/* Combiner function for rbtree.c */
static void
ginCombineData(RBNode *existing, const RBNode *newdata, void *arg)
{
EntryAccumulator *eo = (EntryAccumulator *) existing;
const EntryAccumulator *en = (const EntryAccumulator *) newdata;
BuildAccumulator *accum = (BuildAccumulator *) arg; BuildAccumulator *accum = (BuildAccumulator *) arg;
/*
* Note this code assumes that newdata contains only one itempointer.
*/
if (eo->number >= eo->length) if (eo->number >= eo->length)
{ {
accum->allocatedMemory -= GetMemoryChunkSpace(eo->list); accum->allocatedMemory -= GetMemoryChunkSpace(eo->list);
...@@ -53,29 +57,57 @@ ginAppendData(void *old, void *new, void *arg) ...@@ -53,29 +57,57 @@ ginAppendData(void *old, void *new, void *arg)
eo->list[eo->number] = en->list[0]; eo->list[eo->number] = en->list[0];
eo->number++; eo->number++;
return old;
} }
/* Comparator function for rbtree.c */
static int static int
cmpEntryAccumulator(const void *a, const void *b, void *arg) cmpEntryAccumulator(const RBNode *a, const RBNode *b, void *arg)
{ {
EntryAccumulator *ea = (EntryAccumulator *) a; const EntryAccumulator *ea = (const EntryAccumulator *) a;
EntryAccumulator *eb = (EntryAccumulator *) b; const EntryAccumulator *eb = (const EntryAccumulator *) b;
BuildAccumulator *accum = (BuildAccumulator *) arg; BuildAccumulator *accum = (BuildAccumulator *) arg;
return compareAttEntries(accum->ginstate, ea->attnum, ea->value, return compareAttEntries(accum->ginstate, ea->attnum, ea->value,
eb->attnum, eb->value); eb->attnum, eb->value);
} }
/* Allocator function for rbtree.c */
static RBNode *
ginAllocEntryAccumulator(void *arg)
{
BuildAccumulator *accum = (BuildAccumulator *) arg;
EntryAccumulator *ea;
/*
* Allocate memory by rather big chunks to decrease overhead. We have
* no need to reclaim RBNodes individually, so this costs nothing.
*/
if (accum->entryallocator == NULL || accum->length >= DEF_NENTRY)
{
accum->entryallocator = palloc(sizeof(EntryAccumulator) * DEF_NENTRY);
accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator);
accum->length = 0;
}
/* Allocate new RBNode from current chunk */
ea = accum->entryallocator + accum->length;
accum->length++;
return (RBNode *) ea;
}
void void
ginInitBA(BuildAccumulator *accum) ginInitBA(BuildAccumulator *accum)
{ {
accum->allocatedMemory = 0; accum->allocatedMemory = 0;
accum->length = 0;
accum->entryallocator = NULL; accum->entryallocator = NULL;
accum->tree = rb_create(cmpEntryAccumulator, ginAppendData, NULL, accum); accum->tree = rb_create(sizeof(EntryAccumulator),
accum->iterator = NULL; cmpEntryAccumulator,
accum->tmpList = NULL; ginCombineData,
ginAllocEntryAccumulator,
NULL, /* no freefunc needed */
(void *) accum);
} }
/* /*
...@@ -104,55 +136,41 @@ getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value) ...@@ -104,55 +136,41 @@ getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value)
static void static void
ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, Datum entry) ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, Datum entry)
{ {
EntryAccumulator *key, EntryAccumulator key;
*ea; EntryAccumulator *ea;
bool isNew;
/* /*
* Allocate memory by rather big chunk to decrease overhead, we don't keep * For the moment, fill only the fields of key that will be looked at
* pointer to previously allocated chunks because they will free by * by cmpEntryAccumulator or ginCombineData.
* MemoryContextReset() call.
*/ */
if (accum->entryallocator == NULL || accum->length >= DEF_NENTRY) key.attnum = attnum;
{ key.value = entry;
accum->entryallocator = palloc(sizeof(EntryAccumulator) * DEF_NENTRY); /* temporarily set up single-entry itempointer list */
accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator); key.list = heapptr;
accum->length = 0;
}
/* "Allocate" new key in chunk */ ea = (EntryAccumulator *) rb_insert(accum->tree, (RBNode *) &key, &isNew);
key = accum->entryallocator + accum->length;
accum->length++;
key->attnum = attnum; if (isNew)
key->value = entry;
/* To prevent multiple palloc/pfree cycles, we reuse array */
if (accum->tmpList == NULL)
accum->tmpList =
(ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
key->list = accum->tmpList;
key->list[0] = *heapptr;
ea = rb_insert(accum->tree, key);
if (ea == NULL)
{ {
/* /*
* The key has been inserted, so continue initialization. * Finish initializing new tree entry, including making permanent
* copies of the datum and itempointer.
*/ */
key->value = getDatumCopy(accum, attnum, entry); ea->value = getDatumCopy(accum, attnum, entry);
key->length = DEF_NPTR; ea->length = DEF_NPTR;
key->number = 1; ea->number = 1;
key->shouldSort = FALSE; ea->shouldSort = FALSE;
accum->allocatedMemory += GetMemoryChunkSpace(key->list); ea->list =
accum->tmpList = NULL; (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
ea->list[0] = *heapptr;
accum->allocatedMemory += GetMemoryChunkSpace(ea->list);
} }
else else
{ {
/* /*
* The key has been appended, so "free" allocated key by decrementing * ginCombineData did everything needed.
* chunk's counter.
*/ */
accum->length--;
} }
} }
...@@ -214,16 +232,20 @@ qsortCompareItemPointers(const void *a, const void *b) ...@@ -214,16 +232,20 @@ qsortCompareItemPointers(const void *a, const void *b)
return res; return res;
} }
/* Prepare to read out the rbtree contents using ginGetEntry */
void
ginBeginBAScan(BuildAccumulator *accum)
{
rb_begin_iterate(accum->tree, LeftRightWalk);
}
ItemPointerData * ItemPointerData *
ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *value, uint32 *n) ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *value, uint32 *n)
{ {
EntryAccumulator *entry; EntryAccumulator *entry;
ItemPointerData *list; ItemPointerData *list;
if (accum->iterator == NULL) entry = (EntryAccumulator *) rb_iterate(accum->tree);
accum->iterator = rb_begin_iterate(accum->tree, LeftRightWalk);
entry = rb_iterate(accum->iterator);
if (entry == NULL) if (entry == NULL)
return NULL; return NULL;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.24 2010/02/26 02:00:33 momjian Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.25 2010/08/01 02:12:42 tgl Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -615,7 +615,7 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR ...@@ -615,7 +615,7 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
} }
/* /*
* return newly allocate rightmost tuple * return newly allocated rightmost tuple
*/ */
IndexTuple IndexTuple
ginPageGetLinkItup(Buffer buf) ginPageGetLinkItup(Buffer buf)
...@@ -646,10 +646,12 @@ entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf) ...@@ -646,10 +646,12 @@ entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
itup = ginPageGetLinkItup(lbuf); itup = ginPageGetLinkItup(lbuf);
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index root page"); elog(ERROR, "failed to add item to index root page");
pfree(itup);
itup = ginPageGetLinkItup(rbuf); itup = ginPageGetLinkItup(rbuf);
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index root page"); elog(ERROR, "failed to add item to index root page");
pfree(itup);
} }
void void
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.7 2010/02/11 14:29:50 teodor Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.8 2010/08/01 02:12:42 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -786,6 +786,7 @@ ginInsertCleanup(Relation index, GinState *ginstate, ...@@ -786,6 +786,7 @@ ginInsertCleanup(Relation index, GinState *ginstate,
* significant amount of time - so, run it without locking pending * significant amount of time - so, run it without locking pending
* list. * list.
*/ */
ginBeginBAScan(&accum);
while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL) while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
{ {
ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE); ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
...@@ -820,6 +821,7 @@ ginInsertCleanup(Relation index, GinState *ginstate, ...@@ -820,6 +821,7 @@ ginInsertCleanup(Relation index, GinState *ginstate,
ginInitBA(&accum); ginInitBA(&accum);
processPendingPage(&accum, &datums, page, maxoff + 1); processPendingPage(&accum, &datums, page, maxoff + 1);
ginBeginBAScan(&accum);
while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL) while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE); ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.26 2010/02/11 14:29:50 teodor Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.27 2010/08/01 02:12:42 tgl Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -176,6 +176,7 @@ ginEntryInsert(Relation index, GinState *ginstate, ...@@ -176,6 +176,7 @@ ginEntryInsert(Relation index, GinState *ginstate,
gdi = prepareScanPostingTree(index, rootPostingTree, FALSE); gdi = prepareScanPostingTree(index, rootPostingTree, FALSE);
gdi->btree.isBuild = isBuild; gdi->btree.isBuild = isBuild;
insertItemPointer(gdi, items, nitem); insertItemPointer(gdi, items, nitem);
pfree(gdi);
return; return;
} }
...@@ -254,6 +255,7 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values, ...@@ -254,6 +255,7 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
uint32 nlist; uint32 nlist;
OffsetNumber attnum; OffsetNumber attnum;
ginBeginBAScan(&buildstate->accum);
while ((list = ginGetEntry(&buildstate->accum, &attnum, &entry, &nlist)) != NULL) while ((list = ginGetEntry(&buildstate->accum, &attnum, &entry, &nlist)) != NULL)
{ {
/* there could be many entries, so be willing to abort here */ /* there could be many entries, so be willing to abort here */
...@@ -360,6 +362,7 @@ ginbuild(PG_FUNCTION_ARGS) ...@@ -360,6 +362,7 @@ ginbuild(PG_FUNCTION_ARGS)
/* dump remaining entries to the index */ /* dump remaining entries to the index */
oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx); oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
ginBeginBAScan(&buildstate.accum);
while ((list = ginGetEntry(&buildstate.accum, &attnum, &entry, &nlist)) != NULL) while ((list = ginGetEntry(&buildstate.accum, &attnum, &entry, &nlist)) != NULL)
{ {
/* there could be many entries, so be willing to abort here */ /* there could be many entries, so be willing to abort here */
......
...@@ -17,10 +17,10 @@ ...@@ -17,10 +17,10 @@
* longest path from root to leaf is only about twice as long as the shortest, * longest path from root to leaf is only about twice as long as the shortest,
* so lookups are guaranteed to run in O(lg n) time. * so lookups are guaranteed to run in O(lg n) time.
* *
* Copyright (c) 1996-2009, PostgreSQL Global Development Group * Copyright (c) 2009-2010, PostgreSQL Global Development Group
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/misc/rbtree.c,v 1.3 2010/02/26 02:01:14 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/misc/rbtree.c,v 1.4 2010/08/01 02:12:42 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -28,12 +28,12 @@ ...@@ -28,12 +28,12 @@
#include "utils/rbtree.h" #include "utils/rbtree.h"
/**********************************************************************
* Declarations *
**********************************************************************/
/* /*
* Values for RBNode->iteratorState * Values of RBNode.iteratorState
*
* Note that iteratorState has an undefined value except in nodes that are
* currently being visited by an active iteration.
*/ */
#define InitialState (0) #define InitialState (0)
#define FirstStepDone (1) #define FirstStepDone (1)
...@@ -41,81 +41,130 @@ ...@@ -41,81 +41,130 @@
#define ThirdStepDone (3) #define ThirdStepDone (3)
/* /*
* Colors of node * Colors of nodes (values of RBNode.color)
*/ */
#define RBBLACK (0) #define RBBLACK (0)
#define RBRED (1) #define RBRED (1)
typedef struct RBNode /*
{ * RBTree control structure
uint32 iteratorState:2, */
color: 1,
unused:29;
struct RBNode *left;
struct RBNode *right;
struct RBNode *parent;
void *data;
} RBNode;
struct RBTree struct RBTree
{ {
RBNode *root; RBNode *root; /* root node, or RBNIL if tree is empty */
/* Iteration state */
RBNode *cur; /* current iteration node */
RBNode *(*iterate) (RBTree *rb);
/* Remaining fields are constant after rb_create */
Size node_size; /* actual size of tree nodes */
/* The caller-supplied manipulation functions */
rb_comparator comparator; rb_comparator comparator;
rb_appendator appendator; rb_combiner combiner;
rb_allocfunc allocfunc;
rb_freefunc freefunc; rb_freefunc freefunc;
/* Passthrough arg passed to all manipulation functions */
void *arg; void *arg;
}; };
struct RBTreeIterator
{
RBNode *node;
void *(*iterate) (RBTreeIterator *iterator);
};
/* /*
* all leafs are sentinels, use customized NIL name to prevent * all leafs are sentinels, use customized NIL name to prevent
* collision with sytem-wide NIL which is actually NULL * collision with system-wide constant NIL which is actually NULL
*/ */
#define RBNIL &sentinel #define RBNIL (&sentinel)
RBNode sentinel = {InitialState, RBBLACK, 0, RBNIL, RBNIL, NULL, NULL}; static RBNode sentinel = {InitialState, RBBLACK, RBNIL, RBNIL, NULL};
/**********************************************************************
* Create *
**********************************************************************/
/*
* rb_create: create an empty RBTree
*
* Arguments are:
* node_size: actual size of tree nodes (> sizeof(RBNode))
* The manipulation functions:
* comparator: compare two RBNodes for less/equal/greater
* combiner: merge an existing tree entry with a new one
* allocfunc: allocate a new RBNode
* freefunc: free an old RBNode
* arg: passthrough pointer that will be passed to the manipulation functions
*
* Note that the combiner's righthand argument will be a "proposed" tree node,
* ie the input to rb_insert, in which the RBNode fields themselves aren't
* valid. Similarly, either input to the comparator may be a "proposed" node.
* This shouldn't matter since the functions aren't supposed to look at the
* RBNode fields, only the extra fields of the struct the RBNode is embedded
* in.
*
* The freefunc should just be pfree or equivalent; it should NOT attempt
* to free any subsidiary data, because the node passed to it may not contain
* valid data! freefunc can be NULL if caller doesn't require retail
* space reclamation.
*
* The RBTree node is palloc'd in the caller's memory context. Note that
* all contents of the tree are actually allocated by the caller, not here.
*
* Since tree contents are managed by the caller, there is currently not
* an explicit "destroy" operation; typically a tree would be freed by
* resetting or deleting the memory context it's stored in. You can pfree
* the RBTree node if you feel the urge.
*/
RBTree * RBTree *
rb_create(rb_comparator comparator, rb_appendator appendator, rb_create(Size node_size,
rb_freefunc freefunc, void *arg) rb_comparator comparator,
rb_combiner combiner,
rb_allocfunc allocfunc,
rb_freefunc freefunc,
void *arg)
{ {
RBTree *tree = palloc(sizeof(RBTree)); RBTree *tree = (RBTree *) palloc(sizeof(RBTree));
Assert(node_size > sizeof(RBNode));
tree->root = RBNIL; tree->root = RBNIL;
tree->cur = RBNIL;
tree->iterate = NULL;
tree->node_size = node_size;
tree->comparator = comparator; tree->comparator = comparator;
tree->appendator = appendator; tree->combiner = combiner;
tree->allocfunc = allocfunc;
tree->freefunc = freefunc; tree->freefunc = freefunc;
tree->arg = arg; tree->arg = arg;
return tree; return tree;
} }
/* Copy the additional data fields from one RBNode to another */
static inline void
rb_copy_data(RBTree *rb, RBNode *dest, const RBNode *src)
{
memcpy(dest + 1, src + 1, rb->node_size - sizeof(RBNode));
}
/********************************************************************** /**********************************************************************
* Search * * Search *
**********************************************************************/ **********************************************************************/
void * /*
rb_find(RBTree *rb, void *data) * rb_find: search for a value in an RBTree
*
* data represents the value to try to find. Its RBNode fields need not
* be valid, it's the extra data in the larger struct that is of interest.
*
* Returns the matching tree entry, or NULL if no match is found.
*/
RBNode *
rb_find(RBTree *rb, const RBNode *data)
{ {
RBNode *node = rb->root; RBNode *node = rb->root;
int cmp;
while (node != RBNIL) while (node != RBNIL)
{ {
cmp = rb->comparator(data, node->data, rb->arg); int cmp = rb->comparator(data, node, rb->arg);
if (cmp == 0) if (cmp == 0)
return node->data; return node;
else if (cmp < 0) else if (cmp < 0)
node = node->left; node = node->left;
else else
...@@ -125,6 +174,32 @@ rb_find(RBTree *rb, void *data) ...@@ -125,6 +174,32 @@ rb_find(RBTree *rb, void *data)
return NULL; return NULL;
} }
/*
* rb_leftmost: fetch the leftmost (smallest-valued) tree node.
* Returns NULL if tree is empty.
*
* Note: in the original implementation this included an unlink step, but
* that's a bit awkward. Just call rb_delete on the result if that's what
* you want.
*/
RBNode *
rb_leftmost(RBTree *rb)
{
RBNode *node = rb->root;
RBNode *leftmost = rb->root;
while (node != RBNIL)
{
leftmost = node;
node = node->left;
}
if (leftmost != RBNIL)
return leftmost;
return NULL;
}
/********************************************************************** /**********************************************************************
* Insertion * * Insertion *
**********************************************************************/ **********************************************************************/
...@@ -309,13 +384,24 @@ rb_insert_fixup(RBTree *rb, RBNode *x) ...@@ -309,13 +384,24 @@ rb_insert_fixup(RBTree *rb, RBNode *x)
} }
/* /*
* Allocate node for data and insert in tree. * rb_insert: insert a new value into the tree.
* *
* Return old data (or result of appendator method) if it exists and NULL * data represents the value to insert. Its RBNode fields need not
* otherwise. * be valid, it's the extra data in the larger struct that is of interest.
*
* If the value represented by "data" is not present in the tree, then
* we copy "data" into a new tree entry and return that node, setting *isNew
* to true.
*
* If the value represented by "data" is already present, then we call the
* combiner function to merge data into the existing node, and return the
* existing node, setting *isNew to false.
*
* "data" is unmodified in either case; it's typically just a local
* variable in the caller.
*/ */
void * RBNode *
rb_insert(RBTree *rb, void *data) rb_insert(RBTree *rb, const RBNode *data, bool *isNew)
{ {
RBNode *current, RBNode *current,
*parent, *parent,
...@@ -325,43 +411,37 @@ rb_insert(RBTree *rb, void *data) ...@@ -325,43 +411,37 @@ rb_insert(RBTree *rb, void *data)
/* find where node belongs */ /* find where node belongs */
current = rb->root; current = rb->root;
parent = NULL; parent = NULL;
cmp = 0; cmp = 0; /* just to prevent compiler warning */
while (current != RBNIL) while (current != RBNIL)
{ {
cmp = rb->comparator(data, current->data, rb->arg); cmp = rb->comparator(data, current, rb->arg);
if (cmp == 0) if (cmp == 0)
{ {
/* /*
* Found node with given key. If appendator method is provided, * Found node with given key. Apply combiner.
* call it to join old and new data; else, new data replaces old
* data.
*/ */
if (rb->appendator) rb->combiner(current, data, rb->arg);
{ *isNew = false;
current->data = rb->appendator(current->data, data, rb->arg); return current;
return current->data;
}
else
{
void *old = current->data;
current->data = data;
return old;
}
} }
parent = current; parent = current;
current = (cmp < 0) ? current->left : current->right; current = (cmp < 0) ? current->left : current->right;
} }
/* setup new node in tree */ /*
x = palloc(sizeof(RBNode)); * Value is not present, so create a new node containing data.
x->data = data; */
x->parent = parent; *isNew = true;
x->left = RBNIL;
x->right = RBNIL; x = rb->allocfunc(rb->arg);
x->color = RBRED;
x->iteratorState = InitialState; x->iteratorState = InitialState;
x->color = RBRED;
x->left = RBNIL;
x->right = RBNIL;
x->parent = parent;
rb_copy_data(rb, x, data);
/* insert node in tree */ /* insert node in tree */
if (parent) if (parent)
...@@ -377,7 +457,8 @@ rb_insert(RBTree *rb, void *data) ...@@ -377,7 +457,8 @@ rb_insert(RBTree *rb, void *data)
} }
rb_insert_fixup(rb, x); rb_insert_fixup(rb, x);
return NULL;
return x;
} }
/********************************************************************** /**********************************************************************
...@@ -533,11 +614,11 @@ rb_delete_node(RBTree *rb, RBNode *z) ...@@ -533,11 +614,11 @@ rb_delete_node(RBTree *rb, RBNode *z)
} }
/* /*
* If we removed the tree successor of z rather than z itself, then attach * If we removed the tree successor of z rather than z itself, then move
* the data for the removed node to the one we were supposed to remove. * the data for the removed node to the one we were supposed to remove.
*/ */
if (y != z) if (y != z)
z->data = y->data; rb_copy_data(rb, z, y);
/* /*
* Removing a black node might make some paths from root to leaf contain * Removing a black node might make some paths from root to leaf contain
...@@ -546,260 +627,245 @@ rb_delete_node(RBTree *rb, RBNode *z) ...@@ -546,260 +627,245 @@ rb_delete_node(RBTree *rb, RBNode *z)
if (y->color == RBBLACK) if (y->color == RBBLACK)
rb_delete_fixup(rb, x); rb_delete_fixup(rb, x);
pfree(y); /* Now we can recycle the y node */
} if (rb->freefunc)
rb->freefunc(y, rb->arg);
extern void
rb_delete(RBTree *rb, void *data)
{
RBNode *node = rb->root;
int cmp;
while (node != RBNIL)
{
cmp = rb->comparator(data, node->data, rb->arg);
if (cmp == 0)
{
/* found node to delete */
if (rb->freefunc)
rb->freefunc (node->data);
node->data = NULL;
rb_delete_node(rb, node);
return;
}
else if (cmp < 0)
node = node->left;
else
node = node->right;
}
} }
/* /*
* Return data on left most node and delete * rb_delete: remove the given tree entry
* that node *
* "node" must have previously been found via rb_find or rb_leftmost.
* It is caller's responsibility to free any subsidiary data attached
* to the node before calling rb_delete. (Do *not* try to push that
* responsibility off to the freefunc, as some other physical node
* may be the one actually freed!)
*/ */
extern void * void
rb_leftmost(RBTree *rb) rb_delete(RBTree *rb, RBNode *node)
{ {
RBNode *node = rb->root; rb_delete_node(rb, node);
RBNode *leftmost = rb->root;
void *res = NULL;
while (node != RBNIL)
{
leftmost = node;
node = node->left;
}
if (leftmost != RBNIL)
{
res = leftmost->data;
leftmost->data = NULL;
rb_delete_node(rb, leftmost);
}
return res;
} }
/********************************************************************** /**********************************************************************
* Traverse * * Traverse *
**********************************************************************/ **********************************************************************/
static void * /*
rb_next_node(RBTreeIterator *iterator, RBNode *node) * The iterator routines were originally coded in tail-recursion style,
{ * which is nice to look at, but is trouble if your compiler isn't smart
node->iteratorState = InitialState; * enough to optimize it. Now we just use looping.
iterator->node = node; */
return iterator->iterate(iterator); #define descend(next_node) \
} do { \
(next_node)->iteratorState = InitialState; \
static void * node = rb->cur = (next_node); \
rb_left_right_iterator(RBTreeIterator *iterator) goto restart; \
} while (0)
#define ascend(next_node) \
do { \
node = rb->cur = (next_node); \
goto restart; \
} while (0)
static RBNode *
rb_left_right_iterator(RBTree *rb)
{ {
RBNode *node = iterator->node; RBNode *node = rb->cur;
restart:
switch (node->iteratorState) switch (node->iteratorState)
{ {
case InitialState: case InitialState:
if (node->left != RBNIL) if (node->left != RBNIL)
{ {
node->iteratorState = FirstStepDone; node->iteratorState = FirstStepDone;
return rb_next_node(iterator, node->left); descend(node->left);
} }
/* FALL THROUGH */
case FirstStepDone: case FirstStepDone:
node->iteratorState = SecondStepDone; node->iteratorState = SecondStepDone;
return node->data; return node;
case SecondStepDone: case SecondStepDone:
if (node->right != RBNIL) if (node->right != RBNIL)
{ {
node->iteratorState = ThirdStepDone; node->iteratorState = ThirdStepDone;
return rb_next_node(iterator, node->right); descend(node->right);
} }
/* FALL THROUGH */
case ThirdStepDone: case ThirdStepDone:
if (node->parent) if (node->parent)
{ ascend(node->parent);
iterator->node = node->parent;
return iterator->iterate(iterator);
}
break; break;
default: default:
elog(ERROR, "Unknow node state: %d", node->iteratorState); elog(ERROR, "unrecognized rbtree node state: %d",
node->iteratorState);
} }
return NULL; return NULL;
} }
static void * static RBNode *
rb_right_left_iterator(RBTreeIterator *iterator) rb_right_left_iterator(RBTree *rb)
{ {
RBNode *node = iterator->node; RBNode *node = rb->cur;
restart:
switch (node->iteratorState) switch (node->iteratorState)
{ {
case InitialState: case InitialState:
if (node->right != RBNIL) if (node->right != RBNIL)
{ {
node->iteratorState = FirstStepDone; node->iteratorState = FirstStepDone;
return rb_next_node(iterator, node->right); descend(node->right);
} }
/* FALL THROUGH */
case FirstStepDone: case FirstStepDone:
node->iteratorState = SecondStepDone; node->iteratorState = SecondStepDone;
return node->data; return node;
case SecondStepDone: case SecondStepDone:
if (node->left != RBNIL) if (node->left != RBNIL)
{ {
node->iteratorState = ThirdStepDone; node->iteratorState = ThirdStepDone;
return rb_next_node(iterator, node->left); descend(node->left);
} }
/* FALL THROUGH */
case ThirdStepDone: case ThirdStepDone:
if (node->parent) if (node->parent)
{ ascend(node->parent);
iterator->node = node->parent;
return iterator->iterate(iterator);
}
break; break;
default: default:
elog(ERROR, "Unknow node state: %d", node->iteratorState); elog(ERROR, "unrecognized rbtree node state: %d",
node->iteratorState);
} }
return NULL; return NULL;
} }
static void * static RBNode *
rb_direct_iterator(RBTreeIterator *iterator) rb_direct_iterator(RBTree *rb)
{ {
RBNode *node = iterator->node; RBNode *node = rb->cur;
restart:
switch (node->iteratorState) switch (node->iteratorState)
{ {
case InitialState: case InitialState:
node->iteratorState = FirstStepDone; node->iteratorState = FirstStepDone;
return node->data; return node;
case FirstStepDone: case FirstStepDone:
if (node->left != RBNIL) if (node->left != RBNIL)
{ {
node->iteratorState = SecondStepDone; node->iteratorState = SecondStepDone;
return rb_next_node(iterator, node->left); descend(node->left);
} }
/* FALL THROUGH */
case SecondStepDone: case SecondStepDone:
if (node->right != RBNIL) if (node->right != RBNIL)
{ {
node->iteratorState = ThirdStepDone; node->iteratorState = ThirdStepDone;
return rb_next_node(iterator, node->right); descend(node->right);
} }
/* FALL THROUGH */
case ThirdStepDone: case ThirdStepDone:
if (node->parent) if (node->parent)
{ ascend(node->parent);
iterator->node = node->parent;
return iterator->iterate(iterator);
}
break; break;
default: default:
elog(ERROR, "Unknow node state: %d", node->iteratorState); elog(ERROR, "unrecognized rbtree node state: %d",
node->iteratorState);
} }
return NULL; return NULL;
} }
static void * static RBNode *
rb_inverted_iterator(RBTreeIterator *iterator) rb_inverted_iterator(RBTree *rb)
{ {
RBNode *node = iterator->node; RBNode *node = rb->cur;
restart:
switch (node->iteratorState) switch (node->iteratorState)
{ {
case InitialState: case InitialState:
if (node->left != RBNIL) if (node->left != RBNIL)
{ {
node->iteratorState = FirstStepDone; node->iteratorState = FirstStepDone;
return rb_next_node(iterator, node->left); descend(node->left);
} }
/* FALL THROUGH */
case FirstStepDone: case FirstStepDone:
if (node->right != RBNIL) if (node->right != RBNIL)
{ {
node->iteratorState = SecondStepDone; node->iteratorState = SecondStepDone;
return rb_next_node(iterator, node->right); descend(node->right);
} }
/* FALL THROUGH */
case SecondStepDone: case SecondStepDone:
node->iteratorState = ThirdStepDone; node->iteratorState = ThirdStepDone;
return node->data; return node;
case ThirdStepDone: case ThirdStepDone:
if (node->parent) if (node->parent)
{ ascend(node->parent);
iterator->node = node->parent;
return iterator->iterate(iterator);
}
break; break;
default: default:
elog(ERROR, "Unknow node state: %d", node->iteratorState); elog(ERROR, "unrecognized rbtree node state: %d",
node->iteratorState);
} }
return NULL; return NULL;
} }
RBTreeIterator * /*
* rb_begin_iterate: prepare to traverse the tree in any of several orders
*
* After calling rb_begin_iterate, call rb_iterate repeatedly until it
* returns NULL or the traversal stops being of interest.
*
* If the tree is changed during traversal, results of further calls to
* rb_iterate are unspecified.
*
* Note: this used to return a separately palloc'd iterator control struct,
* but that's a bit pointless since the data structure is incapable of
* supporting multiple concurrent traversals. Now we just keep the state
* in RBTree.
*/
void
rb_begin_iterate(RBTree *rb, RBOrderControl ctrl) rb_begin_iterate(RBTree *rb, RBOrderControl ctrl)
{ {
RBTreeIterator *iterator = palloc(sizeof(RBTreeIterator)); rb->cur = rb->root;
if (rb->cur != RBNIL)
iterator->node = rb->root; rb->cur->iteratorState = InitialState;
if (iterator->node != RBNIL)
iterator->node->iteratorState = InitialState;
switch (ctrl) switch (ctrl)
{ {
case LeftRightWalk: /* visit left, then self, then right */ case LeftRightWalk: /* visit left, then self, then right */
iterator->iterate = rb_left_right_iterator; rb->iterate = rb_left_right_iterator;
break; break;
case RightLeftWalk: /* visit right, then self, then left */ case RightLeftWalk: /* visit right, then self, then left */
iterator->iterate = rb_right_left_iterator; rb->iterate = rb_right_left_iterator;
break; break;
case DirectWalk: /* visit self, then left, then right */ case DirectWalk: /* visit self, then left, then right */
iterator->iterate = rb_direct_iterator; rb->iterate = rb_direct_iterator;
break; break;
case InvertedWalk: /* visit left, then right, then self */ case InvertedWalk: /* visit left, then right, then self */
iterator->iterate = rb_inverted_iterator; rb->iterate = rb_inverted_iterator;
break; break;
default: default:
elog(ERROR, "Unknown iterator order: %d", ctrl); elog(ERROR, "unrecognized rbtree iteration order: %d", ctrl);
} }
return iterator;
} }
void * /*
rb_iterate(RBTreeIterator *iterator) * rb_iterate: return the next node in traversal order, or NULL if no more
*/
RBNode *
rb_iterate(RBTree *rb)
{ {
if (iterator->node == RBNIL) if (rb->cur == RBNIL)
return NULL; return NULL;
return iterator->iterate(iterator); return rb->iterate(rb);
}
void
rb_free_iterator(RBTreeIterator *iterator)
{
pfree(iterator);
} }
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* *
* Copyright (c) 2006-2010, PostgreSQL Global Development Group * Copyright (c) 2006-2010, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.39 2010/07/31 00:30:54 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.40 2010/08/01 02:12:42 tgl Exp $
*-------------------------------------------------------------------------- *--------------------------------------------------------------------------
*/ */
#ifndef GIN_H #ifndef GIN_H
...@@ -565,6 +565,7 @@ extern Datum ginarrayconsistent(PG_FUNCTION_ARGS); ...@@ -565,6 +565,7 @@ extern Datum ginarrayconsistent(PG_FUNCTION_ARGS);
/* ginbulk.c */ /* ginbulk.c */
typedef struct EntryAccumulator typedef struct EntryAccumulator
{ {
RBNode rbnode;
Datum value; Datum value;
uint32 length; uint32 length;
uint32 number; uint32 number;
...@@ -579,15 +580,14 @@ typedef struct ...@@ -579,15 +580,14 @@ typedef struct
long allocatedMemory; long allocatedMemory;
uint32 length; uint32 length;
EntryAccumulator *entryallocator; EntryAccumulator *entryallocator;
ItemPointerData *tmpList;
RBTree *tree; RBTree *tree;
RBTreeIterator *iterator;
} BuildAccumulator; } BuildAccumulator;
extern void ginInitBA(BuildAccumulator *accum); extern void ginInitBA(BuildAccumulator *accum);
extern void ginInsertRecordBA(BuildAccumulator *accum, extern void ginInsertRecordBA(BuildAccumulator *accum,
ItemPointer heapptr, ItemPointer heapptr,
OffsetNumber attnum, Datum *entries, int32 nentry); OffsetNumber attnum, Datum *entries, int32 nentry);
extern void ginBeginBAScan(BuildAccumulator *accum);
extern ItemPointerData *ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *entry, uint32 *n); extern ItemPointerData *ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *entry, uint32 *n);
/* ginfast.c */ /* ginfast.c */
......
...@@ -3,44 +3,64 @@ ...@@ -3,44 +3,64 @@
* rbtree.h * rbtree.h
* interface for PostgreSQL generic Red-Black binary tree package * interface for PostgreSQL generic Red-Black binary tree package
* *
* Copyright (c) 1996-2009, PostgreSQL Global Development Group * Copyright (c) 2009-2010, PostgreSQL Global Development Group
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/include/utils/rbtree.h,v 1.3 2010/05/11 18:14:01 rhaas Exp $ * $PostgreSQL: pgsql/src/include/utils/rbtree.h,v 1.4 2010/08/01 02:12:42 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#ifndef RBTREE_H #ifndef RBTREE_H
#define RBTREE_H #define RBTREE_H
/*
* RBNode is intended to be used as the first field of a larger struct,
* whose additional fields carry whatever payload data the caller needs
* for a tree entry. (The total size of that larger struct is passed to
* rb_create.) RBNode is declared here to support this usage, but
* callers must treat it as an opaque struct.
*/
typedef struct RBNode
{
char iteratorState; /* workspace for iterating through tree */
char color; /* node's current color, red or black */
struct RBNode *left; /* left child, or RBNIL if none */
struct RBNode *right; /* right child, or RBNIL if none */
struct RBNode *parent; /* parent, or NULL (not RBNIL!) if none */
} RBNode;
/* Opaque struct representing a whole tree */
typedef struct RBTree RBTree; typedef struct RBTree RBTree;
typedef struct RBTreeIterator RBTreeIterator;
typedef int (*rb_comparator) (const void *a, const void *b, void *arg); /* Available tree iteration orderings */
typedef void *(*rb_appendator) (void *currentdata, void *newval, void *arg); typedef enum RBOrderControl
typedef void (*rb_freefunc) (void *a); {
LeftRightWalk, /* inorder: left child, node, right child */
RightLeftWalk, /* reverse inorder: right, node, left */
DirectWalk, /* preorder: node, left child, right child */
InvertedWalk /* postorder: left child, right child, node */
} RBOrderControl;
/* Support functions to be provided by caller */
typedef int (*rb_comparator) (const RBNode *a, const RBNode *b, void *arg);
typedef void (*rb_combiner) (RBNode *existing, const RBNode *newdata, void *arg);
typedef RBNode *(*rb_allocfunc) (void *arg);
typedef void (*rb_freefunc) (RBNode *x, void *arg);
extern RBTree *rb_create(rb_comparator comparator, extern RBTree *rb_create(Size node_size,
rb_appendator appendator, rb_comparator comparator,
rb_combiner combiner,
rb_allocfunc allocfunc,
rb_freefunc freefunc, rb_freefunc freefunc,
void *arg); void *arg);
extern void *rb_find(RBTree *rb, void *data); extern RBNode *rb_find(RBTree *rb, const RBNode *data);
extern void *rb_insert(RBTree *rb, void *data); extern RBNode *rb_leftmost(RBTree *rb);
extern void rb_delete(RBTree *rb, void *data);
extern void *rb_leftmost(RBTree *rb);
typedef enum RBOrderControl extern RBNode *rb_insert(RBTree *rb, const RBNode *data, bool *isNew);
{ extern void rb_delete(RBTree *rb, RBNode *node);
LeftRightWalk,
RightLeftWalk,
DirectWalk,
InvertedWalk
} RBOrderControl;
extern RBTreeIterator *rb_begin_iterate(RBTree *rb, RBOrderControl ctrl); extern void rb_begin_iterate(RBTree *rb, RBOrderControl ctrl);
extern void *rb_iterate(RBTreeIterator *iterator); extern RBNode *rb_iterate(RBTree *rb);
extern void rb_free_iterator(RBTreeIterator *iterator);
#endif #endif /* RBTREE_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment