Commit fe1a9c33 authored by Tom Lane's avatar Tom Lane

Repair some problems in GIST-index contrib modules. Patch from

Teodor Sigaev <teodor@stack.net>.
parent e206ff59
...@@ -1457,6 +1457,10 @@ _int_common_picksplit(bytea *entryvec, ...@@ -1457,6 +1457,10 @@ _int_common_picksplit(bytea *entryvec,
v->spl_nleft = 0; v->spl_nleft = 0;
right = v->spl_right; right = v->spl_right;
v->spl_nright = 0; v->spl_nright = 0;
if ( seed_1 == 0 || seed_2 == 0 ) {
seed_1 = 1;
seed_2 = 2;
}
datum_alpha = (ArrayType *) DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_1].key); datum_alpha = (ArrayType *) DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_1].key);
datum_l = copy_intArrayType(datum_alpha); datum_l = copy_intArrayType(datum_alpha);
......
...@@ -198,23 +198,6 @@ Don't forget to do ...@@ -198,23 +198,6 @@ Don't forget to do
make clean; make; make install make clean; make; make install
2. 2.
As it was mentioned above we don't use explicitly ID of lexems
as in OpenFTS but use hash function (crc32) instead to map lexem to
integer. Our experiments show that probability of collision is quite small:
for english text it's about 10**(-6) and 10**(-5) for russian collection.
Default installation doesn't check for collisions but if your application
does need to guarantee an exact (no collisions) search, you need
to update system table to mark index islossy:
update pg_amop set amopreqcheck = true where amopclaid =
(select oid from pg_opclass where opcname = 'gist_txtidx_ops');
If you don't bother about collisions :
update pg_amop set amopreqcheck = false where amopclaid =
(select oid from pg_opclass where opcname = 'gist_txtidx_ops');
3.
txtidx doesn't preserve words ordering (this is not critical for searching) txtidx doesn't preserve words ordering (this is not critical for searching)
for performance reason, for example: for performance reason, for example:
...@@ -224,7 +207,7 @@ test=# select 'page two'::txtidx; ...@@ -224,7 +207,7 @@ test=# select 'page two'::txtidx;
'two' 'page' 'two' 'page'
(1 row) (1 row)
4. 3.
Indexed access provided by txtidx data type isn't always good Indexed access provided by txtidx data type isn't always good
because of internal data structure we use (RD-Tree). Particularly, because of internal data structure we use (RD-Tree). Particularly,
queries like '!gist' will be slower than just a sequential scan, queries like '!gist' will be slower than just a sequential scan,
...@@ -265,7 +248,7 @@ test=# select querytree( '!gist'::query_txt ); ...@@ -265,7 +248,7 @@ test=# select querytree( '!gist'::query_txt );
These two queries will be processed by scanning of full index ! These two queries will be processed by scanning of full index !
Very slow ! Very slow !
5. 4.
Following selects produce the same result Following selects produce the same result
select title from titles where titleidx @@ 'patch&gist'; select title from titles where titleidx @@ 'patch&gist';
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "utils/array.h" #include "utils/array.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "storage/bufpage.h" #include "storage/bufpage.h"
#include "access/tuptoaster.h"
#include "txtidx.h" #include "txtidx.h"
#include "query.h" #include "query.h"
...@@ -86,6 +87,15 @@ uniqueint( int4* a, int4 l ) { ...@@ -86,6 +87,15 @@ uniqueint( int4* a, int4 l ) {
return res + 1 - a; return res + 1 - a;
} }
static void
makesign( BITVECP sign, GISTTYPE *a) {
int4 k,len = ARRNELEM( a );
int4 *ptr = GETARR( a );
MemSet( (void*)sign, 0, sizeof(BITVEC) );
for(k=0;k<len;k++)
HASH( sign, ptr[k] );
}
Datum Datum
gtxtidx_compress(PG_FUNCTION_ARGS) { gtxtidx_compress(PG_FUNCTION_ARGS) {
GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0); GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
...@@ -110,8 +120,6 @@ gtxtidx_compress(PG_FUNCTION_ARGS) { ...@@ -110,8 +120,6 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
*arr = crc32_sz( (uint8*)&words[ ptr->pos ], ptr->len ); *arr = crc32_sz( (uint8*)&words[ ptr->pos ], ptr->len );
arr++; ptr++; arr++; ptr++;
} }
if ( val != toastedval )
pfree(val);
len = uniqueint( GETARR(res), val->size ); len = uniqueint( GETARR(res), val->size );
if ( len != val->size ) { if ( len != val->size ) {
...@@ -120,7 +128,22 @@ gtxtidx_compress(PG_FUNCTION_ARGS) { ...@@ -120,7 +128,22 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
len = CALCGTSIZE( ARRKEY, len ); len = CALCGTSIZE( ARRKEY, len );
res = (GISTTYPE*)repalloc( (void*)res, len ); res = (GISTTYPE*)repalloc( (void*)res, len );
res->len = len; res->len = len;
} }
if ( val != toastedval )
pfree(val);
/* make signature, if array is too long */
if ( res->len > TOAST_INDEX_TARGET ) {
GISTTYPE *ressign;
len = CALCGTSIZE( SIGNKEY, 0 );
ressign = (GISTTYPE*)palloc( len );
ressign->len = len;
ressign->flag = SIGNKEY;
makesign( GETSIGN(ressign), res );
pfree(res);
res = ressign;
}
retval = (GISTENTRY*)palloc(sizeof(GISTENTRY)); retval = (GISTENTRY*)palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res), gistentryinit(*retval, PointerGetDatum(res),
...@@ -379,15 +402,6 @@ gtxtidx_penalty(PG_FUNCTION_ARGS) { ...@@ -379,15 +402,6 @@ gtxtidx_penalty(PG_FUNCTION_ARGS) {
PG_RETURN_POINTER( penalty ); PG_RETURN_POINTER( penalty );
} }
static void
makesign( BITVECP sign, GISTTYPE *a) {
int4 k,len = ARRNELEM( a );
int4 *ptr = GETARR( a );
MemSet( (void*)sign, 0, sizeof(BITVEC) );
for(k=0;k<len;k++)
HASH( sign, ptr[k] );
}
typedef struct { typedef struct {
bool allistrue; bool allistrue;
BITVEC sign; BITVEC sign;
...@@ -503,6 +517,11 @@ gtxtidx_picksplit(PG_FUNCTION_ARGS) { ...@@ -503,6 +517,11 @@ gtxtidx_picksplit(PG_FUNCTION_ARGS) {
right = v->spl_right; right = v->spl_right;
v->spl_nright = 0; v->spl_nright = 0;
if ( seed_1 == 0 || seed_2 == 0 ) {
seed_1 = 1;
seed_2 = 2;
}
/* form initial .. */ /* form initial .. */
if ( cache[seed_1].allistrue ) { if ( cache[seed_1].allistrue ) {
datum_l = (GISTTYPE*)palloc( CALCGTSIZE( SIGNKEY|ALLISTRUE, 0 ) ); datum_l = (GISTTYPE*)palloc( CALCGTSIZE( SIGNKEY|ALLISTRUE, 0 ) );
......
...@@ -171,7 +171,7 @@ WHERE o.oprleft = t.oid and o.oprright=tq.oid ...@@ -171,7 +171,7 @@ WHERE o.oprleft = t.oid and o.oprright=tq.oid
and ( tq.typname='query_txt' or tq.typname='mquery_txt' ); and ( tq.typname='query_txt' or tq.typname='mquery_txt' );
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
SELECT opcl.oid, 1, false, c.opoid SELECT opcl.oid, 1, true, c.opoid
FROM pg_opclass opcl, txtidx_ops_tmp c FROM pg_opclass opcl, txtidx_ops_tmp c
WHERE WHERE
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist') opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
...@@ -179,7 +179,7 @@ INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) ...@@ -179,7 +179,7 @@ INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
and c.oprname = '@@'; and c.oprname = '@@';
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
SELECT opcl.oid, 2, false, c.opoid SELECT opcl.oid, 2, true, c.opoid
FROM pg_opclass opcl, txtidx_ops_tmp c FROM pg_opclass opcl, txtidx_ops_tmp c
WHERE WHERE
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist') opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment