Commit fe1a9c33 authored by Tom Lane's avatar Tom Lane

Repair some problems in GIST-index contrib modules. Patch from

Teodor Sigaev <teodor@stack.net>.
parent e206ff59
......@@ -1457,6 +1457,10 @@ _int_common_picksplit(bytea *entryvec,
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
if ( seed_1 == 0 || seed_2 == 0 ) {
seed_1 = 1;
seed_2 = 2;
}
datum_alpha = (ArrayType *) DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_1].key);
datum_l = copy_intArrayType(datum_alpha);
......
......@@ -198,23 +198,6 @@ Don't forget to do
make clean; make; make install
2.
As it was mentioned above we don't use explicitly ID of lexems
as in OpenFTS but use hash function (crc32) instead to map lexem to
integer. Our experiments show that probability of collision is quite small:
for english text it's about 10**(-6) and 10**(-5) for russian collection.
Default installation doesn't check for collisions but if your application
does need to guarantee an exact (no collisions) search, you need
to update system table to mark index islossy:
update pg_amop set amopreqcheck = true where amopclaid =
(select oid from pg_opclass where opcname = 'gist_txtidx_ops');
If you don't bother about collisions :
update pg_amop set amopreqcheck = false where amopclaid =
(select oid from pg_opclass where opcname = 'gist_txtidx_ops');
3.
txtidx doesn't preserve words ordering (this is not critical for searching)
for performance reason, for example:
......@@ -224,7 +207,7 @@ test=# select 'page two'::txtidx;
'two' 'page'
(1 row)
4.
3.
Indexed access provided by txtidx data type isn't always good
because of internal data structure we use (RD-Tree). Particularly,
queries like '!gist' will be slower than just a sequential scan,
......@@ -265,7 +248,7 @@ test=# select querytree( '!gist'::query_txt );
These two queries will be processed by scanning of full index !
Very slow !
5.
4.
Following selects produce the same result
select title from titles where titleidx @@ 'patch&gist';
......
......@@ -10,6 +10,7 @@
#include "utils/array.h"
#include "utils/builtins.h"
#include "storage/bufpage.h"
#include "access/tuptoaster.h"
#include "txtidx.h"
#include "query.h"
......@@ -86,6 +87,15 @@ uniqueint( int4* a, int4 l ) {
return res + 1 - a;
}
static void
makesign( BITVECP sign, GISTTYPE *a) {
int4 k,len = ARRNELEM( a );
int4 *ptr = GETARR( a );
MemSet( (void*)sign, 0, sizeof(BITVEC) );
for(k=0;k<len;k++)
HASH( sign, ptr[k] );
}
Datum
gtxtidx_compress(PG_FUNCTION_ARGS) {
GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
......@@ -110,8 +120,6 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
*arr = crc32_sz( (uint8*)&words[ ptr->pos ], ptr->len );
arr++; ptr++;
}
if ( val != toastedval )
pfree(val);
len = uniqueint( GETARR(res), val->size );
if ( len != val->size ) {
......@@ -120,7 +128,22 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
len = CALCGTSIZE( ARRKEY, len );
res = (GISTTYPE*)repalloc( (void*)res, len );
res->len = len;
}
}
if ( val != toastedval )
pfree(val);
/* make signature, if array is too long */
if ( res->len > TOAST_INDEX_TARGET ) {
GISTTYPE *ressign;
len = CALCGTSIZE( SIGNKEY, 0 );
ressign = (GISTTYPE*)palloc( len );
ressign->len = len;
ressign->flag = SIGNKEY;
makesign( GETSIGN(ressign), res );
pfree(res);
res = ressign;
}
retval = (GISTENTRY*)palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res),
......@@ -379,15 +402,6 @@ gtxtidx_penalty(PG_FUNCTION_ARGS) {
PG_RETURN_POINTER( penalty );
}
static void
makesign( BITVECP sign, GISTTYPE *a) {
int4 k,len = ARRNELEM( a );
int4 *ptr = GETARR( a );
MemSet( (void*)sign, 0, sizeof(BITVEC) );
for(k=0;k<len;k++)
HASH( sign, ptr[k] );
}
typedef struct {
bool allistrue;
BITVEC sign;
......@@ -503,6 +517,11 @@ gtxtidx_picksplit(PG_FUNCTION_ARGS) {
right = v->spl_right;
v->spl_nright = 0;
if ( seed_1 == 0 || seed_2 == 0 ) {
seed_1 = 1;
seed_2 = 2;
}
/* form initial .. */
if ( cache[seed_1].allistrue ) {
datum_l = (GISTTYPE*)palloc( CALCGTSIZE( SIGNKEY|ALLISTRUE, 0 ) );
......
......@@ -171,7 +171,7 @@ WHERE o.oprleft = t.oid and o.oprright=tq.oid
and ( tq.typname='query_txt' or tq.typname='mquery_txt' );
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
SELECT opcl.oid, 1, false, c.opoid
SELECT opcl.oid, 1, true, c.opoid
FROM pg_opclass opcl, txtidx_ops_tmp c
WHERE
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
......@@ -179,7 +179,7 @@ INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
and c.oprname = '@@';
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
SELECT opcl.oid, 2, false, c.opoid
SELECT opcl.oid, 2, true, c.opoid
FROM pg_opclass opcl, txtidx_ops_tmp c
WHERE
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment