Commit 23416364 authored by Teodor Sigaev's avatar Teodor Sigaev

GIN improvements

- Replace sorted array of entries in maintenance_work_mem to binary tree,
  this should improve create performance.
- More precisely calculate allocated memory, eliminate leaks
  with user-defined extractValue()
- Improve wordings in tsearch2
parent fa601357
...@@ -313,12 +313,12 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen) ...@@ -313,12 +313,12 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
#ifdef IGNORE_LONGLEXEME #ifdef IGNORE_LONGLEXEME
ereport(NOTICE, ereport(NOTICE,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long"))); errmsg("A word you are indexing is too long. It will be ignored.")));
continue; continue;
#else #else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long"))); errmsg("A word you are indexing is too long")));
#endif #endif
} }
...@@ -470,12 +470,12 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 ...@@ -470,12 +470,12 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4
#ifdef IGNORE_LONGLEXEME #ifdef IGNORE_LONGLEXEME
ereport(NOTICE, ereport(NOTICE,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long"))); errmsg("A word you are indexing is too long. It will be ignored.")));
continue; continue;
#else #else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long"))); errmsg("A word you are indexing is too long")));
#endif #endif
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.1 2006/05/02 11:28:54 teodor Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.2 2006/07/11 16:55:34 teodor Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -22,17 +22,29 @@ ...@@ -22,17 +22,29 @@
#include "utils/memutils.h" #include "utils/memutils.h"
#include "access/tuptoaster.h" #include "access/tuptoaster.h"
#define DEF_NENTRY 128 #define DEF_NENTRY 2048
#define DEF_NPTR 4 #define DEF_NPTR 4
void void
ginInitBA(BuildAccumulator *accum) { ginInitBA(BuildAccumulator *accum) {
accum->maxdepth = 1;
accum->number = 0; accum->stackpos = 0;
accum->curget = 0; accum->entries = NULL;
accum->length = DEF_NENTRY; accum->stack = NULL;
accum->entries = (EntryAccumulator*)palloc0( sizeof(EntryAccumulator) * DEF_NENTRY ); accum->allocatedMemory = 0;
accum->allocatedMemory = sizeof(EntryAccumulator) * DEF_NENTRY; accum->entryallocator = NULL;
}
static EntryAccumulator*
EAAllocate( BuildAccumulator *accum ) {
if ( accum->entryallocator == NULL || accum->length>=DEF_NENTRY ) {
accum->entryallocator = palloc(sizeof(EntryAccumulator)*DEF_NENTRY);
accum->allocatedMemory += sizeof(EntryAccumulator)*DEF_NENTRY;
accum->length = 0;
}
accum->length++;
return accum->entryallocator + accum->length - 1;
} }
/* /*
...@@ -61,64 +73,133 @@ ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heap ...@@ -61,64 +73,133 @@ ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heap
entry->number++; entry->number++;
} }
static Datum
getDatumCopy(BuildAccumulator *accum, Datum value) {
Form_pg_attribute *att = accum->ginstate->tupdesc->attrs;
Datum newvalue;
int data_length = 0;
void *ptr;
if ( att[0]->attbyval ) {
store_att_byval(&newvalue, value, att[0]->attlen);
} else {
/* pass-by-reference */
if (att[0]->attlen == -1) {
/* varlena */
data_length = VARATT_SIZE(DatumGetPointer(value));
} else if (att[0]->attlen == -2) {
/* c-string */
data_length = strlen(DatumGetCString(value)) + 1;
} else {
/* fixed-length pass-by-reference */
Assert(att[0]->attlen > 0);
data_length = att[0]->attlen;
}
ptr = palloc( data_length );
memcpy(ptr, DatumGetPointer(value), data_length);
newvalue = PointerGetDatum(ptr);
}
accum->allocatedMemory+=data_length;
return newvalue;
}
/* /*
* Find/store one entry from indexed value. * Find/store one entry from indexed value.
* It supposes, that entry should be located between low and end of array of
* entries. Returns position of found/inserted entry
*/ */
static uint32 static void
ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry, uint32 low) { ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry) {
uint32 high = accum->number, mid; EntryAccumulator *ea = accum->entries, *pea = NULL;
int res; int res = 0;
uint32 depth = 1;
while(high>low) {
mid = low + ((high - low) / 2); while( ea ) {
res = compareEntries(accum->ginstate, entry, ea->value);
res = compareEntries(accum->ginstate, entry, accum->entries[mid].value); if ( res == 0 )
break; /* found */
if ( res == 0 ) { else {
ginInsertData( accum, accum->entries+mid, heapptr ); pea = ea;
return mid; if ( res < 0 )
} else if ( res > 0 ) ea = ea->left;
low = mid + 1; else
else ea = ea->right;
high = mid; }
depth++;
} }
/* did not find an entry, insert */ if ( depth > accum->maxdepth )
if ( accum->number >= accum->length ) { accum->maxdepth = depth;
accum->allocatedMemory += sizeof(EntryAccumulator) * accum->length;
accum->length *= 2;
accum->entries = (EntryAccumulator*)repalloc( accum->entries,
sizeof(EntryAccumulator) * accum->length );
}
if ( high != accum->number )
memmove( accum->entries+high+1, accum->entries+high, sizeof(EntryAccumulator) * (accum->number-high) );
accum->entries[high].value = entry; if ( ea == NULL ) {
accum->entries[high].length = DEF_NPTR; ea = EAAllocate(accum);
accum->entries[high].number = 1;
accum->entries[high].shouldSort = FALSE;
accum->entries[high].list = (ItemPointerData*)palloc(sizeof(ItemPointerData)*DEF_NPTR);
accum->entries[high].list[0] = *heapptr;
accum->allocatedMemory += sizeof(ItemPointerData)*DEF_NPTR; ea->left = ea->right = NULL;
accum->number++; ea->value = getDatumCopy(accum, entry);
ea->length = DEF_NPTR;
ea->number = 1;
ea->shouldSort = FALSE;
ea->list = (ItemPointerData*)palloc(sizeof(ItemPointerData)*DEF_NPTR);
ea->list[0] = *heapptr;
accum->allocatedMemory += sizeof(ItemPointerData)*DEF_NPTR;
return high; if ( pea == NULL )
accum->entries = ea;
else {
Assert( res != 0 );
if ( res < 0 )
pea->left = ea;
else
pea->right = ea;
}
} else
ginInsertData( accum, ea, heapptr );
} }
/*
* insert middle of left part the middle of right one,
* then calls itself for each parts
*/
static void
ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry,
uint32 low, uint32 high, uint32 offset) {
uint32 pos;
uint32 middle = (low+high)>>1;
pos = (low+middle)>>1;
if ( low!=middle && pos>=offset && pos-offset < nentry )
ginInsertEntry( accum, heapptr, entries[ pos-offset ]);
pos = (high+middle+1)>>1;
if ( middle+1 != high && pos>=offset && pos-offset < nentry )
ginInsertEntry( accum, heapptr, entries[ pos-offset ]);
if ( low!=middle )
ginChooseElem(accum, heapptr, entries, nentry, low, middle, offset );
if ( high!=middle+1 )
ginChooseElem(accum, heapptr, entries, nentry, middle+1, high, offset );
}
/* /*
* Insert one heap pointer. Requires entries to be sorted! * Insert one heap pointer. Suppose entries is sorted.
* Insertion order trys to get binary tree balanced: first insert middle value,
* next middle on left part and middle of right part.
*/ */
void void
ginInsertRecordBA( BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry ) { ginInsertRecordBA( BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry ) {
uint32 start=0,i; uint32 i, nbit=0, offset;
if (nentry==0)
return;
i=nentry-1;
for(;i>0;i>>=1) nbit++;
for(i=0;i<nentry;i++) nbit = 1<<nbit;
start = ginInsertEntry( accum, heapptr, entries[i], start); offset = (nbit-nentry)/2;
ginInsertEntry( accum, heapptr, entries[ (nbit>>1)-offset ]);
ginChooseElem(accum, heapptr, entries, nentry, 0, nbit, offset);
} }
static int static int
...@@ -128,28 +209,79 @@ qsortCompareItemPointers( const void *a, const void *b ) { ...@@ -128,28 +209,79 @@ qsortCompareItemPointers( const void *a, const void *b ) {
return res; return res;
} }
/*
* walk on binary tree and returns ordered nodes
*/
static EntryAccumulator*
walkTree( BuildAccumulator *accum ) {
EntryAccumulator *entry = accum->stack[ accum->stackpos ];
if ( entry->list != NULL ) {
/* return entry itself: we already was at left sublink */
return entry;
} else if ( entry->right && entry->right != accum->stack[ accum->stackpos+1 ] ) {
/* go on right sublink */
accum->stackpos++;
entry = entry->right;
/* find most-left value */
for(;;) {
accum->stack[ accum->stackpos ] = entry;
if ( entry->left ) {
accum->stackpos++;
entry = entry->left;
} else
break;
}
} else {
/* we already return all left subtree, itself and right subtree */
if ( accum->stackpos == 0 )
return 0;
accum->stackpos--;
return walkTree(accum);
}
return entry;
}
ItemPointerData* ItemPointerData*
ginGetEntry(BuildAccumulator *accum, Datum *value, uint32 *n) { ginGetEntry(BuildAccumulator *accum, Datum *value, uint32 *n) {
EntryAccumulator *entry; EntryAccumulator *entry;
ItemPointerData *list; ItemPointerData *list;
if ( accum->curget >= accum->number )
if ( accum->stack == NULL ) {
/* first call */
accum->stack = palloc0(sizeof(EntryAccumulator*)*(accum->maxdepth+1));
entry = accum->entries;
/* find most-left value */
for(;;) {
accum->stack[ accum->stackpos ] = entry;
if ( entry->left ) {
accum->stackpos++;
entry = entry->left;
} else
break;
}
} else {
pfree( accum->stack[ accum->stackpos ]->list );
accum->stack[ accum->stackpos ]->list = NULL;
entry = walkTree( accum );
}
if ( entry == NULL )
return NULL; return NULL;
else if ( accum->curget > 0 )
pfree( accum->entries[ accum->curget-1 ].list );
entry = accum->entries + accum->curget;
*n = entry->number; *n = entry->number;
*value = entry->value; *value = entry->value;
list = entry->list; list = entry->list;
accum->curget++;
Assert(list != NULL);
if ( entry->shouldSort && entry->number > 1 ) if ( entry->shouldSort && entry->number > 1 )
qsort(list, *n, sizeof(ItemPointerData), qsortCompareItemPointers); qsort(list, *n, sizeof(ItemPointerData), qsortCompareItemPointers);
return list; return list;
} }
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.2 2006/05/10 23:18:38 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.3 2006/07/11 16:55:34 teodor Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -26,6 +26,7 @@ typedef struct { ...@@ -26,6 +26,7 @@ typedef struct {
GinState ginstate; GinState ginstate;
double indtuples; double indtuples;
MemoryContext tmpCtx; MemoryContext tmpCtx;
MemoryContext funcCtx;
BuildAccumulator accum; BuildAccumulator accum;
} GinBuildState; } GinBuildState;
...@@ -189,19 +190,22 @@ ginEntryInsert( Relation index, GinState *ginstate, Datum value, ItemPointerData ...@@ -189,19 +190,22 @@ ginEntryInsert( Relation index, GinState *ginstate, Datum value, ItemPointerData
* Function isnt use during normal insert * Function isnt use during normal insert
*/ */
static uint32 static uint32
ginHeapTupleBulkInsert(BuildAccumulator *accum, Datum value, ItemPointer heapptr) { ginHeapTupleBulkInsert(GinBuildState *buildstate, Datum value, ItemPointer heapptr) {
Datum *entries; Datum *entries;
uint32 nentries; uint32 nentries;
MemoryContext oldCtx;
entries = extractEntriesSU( accum->ginstate, value, &nentries); oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
entries = extractEntriesSU( buildstate->accum.ginstate, value, &nentries);
MemoryContextSwitchTo(oldCtx);
if ( nentries==0 ) if ( nentries==0 )
/* nothing to insert */ /* nothing to insert */
return 0; return 0;
ginInsertRecordBA( accum, heapptr, entries, nentries); ginInsertRecordBA( &buildstate->accum, heapptr, entries, nentries);
pfree( entries ); MemoryContextReset(buildstate->funcCtx);
return nentries; return nentries;
} }
...@@ -218,7 +222,7 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values, ...@@ -218,7 +222,7 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
buildstate->indtuples += ginHeapTupleBulkInsert(&buildstate->accum, *values, &htup->t_self); buildstate->indtuples += ginHeapTupleBulkInsert(buildstate, *values, &htup->t_self);
/* we use only half maintenance_work_mem, because there is some leaks /* we use only half maintenance_work_mem, because there is some leaks
during insertion and extract values */ during insertion and extract values */
...@@ -297,6 +301,12 @@ ginbuild(PG_FUNCTION_ARGS) { ...@@ -297,6 +301,12 @@ ginbuild(PG_FUNCTION_ARGS) {
ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE); ALLOCSET_DEFAULT_MAXSIZE);
buildstate.funcCtx = AllocSetContextCreate(buildstate.tmpCtx,
"Gin build temporary context for user-defined function",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
buildstate.accum.ginstate = &buildstate.ginstate; buildstate.accum.ginstate = &buildstate.ginstate;
ginInitBA( &buildstate.accum ); ginInitBA( &buildstate.accum );
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* header file for postgres inverted index access method implementation. * header file for postgres inverted index access method implementation.
* *
* Copyright (c) 2006, PostgreSQL Global Development Group * Copyright (c) 2006, PostgreSQL Global Development Group
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.4 2006/07/11 13:54:24 momjian Exp $ * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.5 2006/07/11 16:55:34 teodor Exp $
*-------------------------------------------------------------------------- *--------------------------------------------------------------------------
*/ */
...@@ -414,21 +414,26 @@ extern Datum arraycontains(PG_FUNCTION_ARGS); ...@@ -414,21 +414,26 @@ extern Datum arraycontains(PG_FUNCTION_ARGS);
extern Datum arraycontained(PG_FUNCTION_ARGS); extern Datum arraycontained(PG_FUNCTION_ARGS);
/* ginbulk.c */ /* ginbulk.c */
typedef struct { typedef struct EntryAccumulator {
Datum value; Datum value;
uint32 length; uint32 length;
uint32 number; uint32 number;
ItemPointerData *list; ItemPointerData *list;
bool shouldSort; bool shouldSort;
struct EntryAccumulator *left;
struct EntryAccumulator *right;
} EntryAccumulator; } EntryAccumulator;
typedef struct { typedef struct {
GinState *ginstate; GinState *ginstate;
EntryAccumulator *entries; EntryAccumulator *entries;
uint32 length; uint32 maxdepth;
uint32 number; EntryAccumulator **stack;
uint32 curget; uint32 stackpos;
uint32 allocatedMemory; uint32 allocatedMemory;
uint32 length;
EntryAccumulator *entryallocator;
} BuildAccumulator; } BuildAccumulator;
extern void ginInitBA(BuildAccumulator *accum); extern void ginInitBA(BuildAccumulator *accum);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment