Commit 47d5c3d5 authored by Bruce Momjian's avatar Bruce Momjian

Changes:

1 intarray: bugfix for int[]-int[] operation
2 intarray: split _int.c to several files (_int.c now is unused)
3 ntarray (gist__intbig_ops opclass): use special type for index storage
4 ltree (gist__ltree_ops opclass), intarray (gist__intbig_ops): optimize
GiST's
penalty and picksplit interface functions, now use Hemming distance.

Teodor Sigaev
parent d2e028b1
# $Header: /cvsroot/pgsql/contrib/intarray/Makefile,v 1.8 2001/09/06 10:49:29 petere Exp $ # $Header: /cvsroot/pgsql/contrib/intarray/Makefile,v 1.9 2003/06/11 18:44:14 momjian Exp $
subdir = contrib/intarray subdir = contrib/intarray
top_builddir = ../.. top_builddir = ../..
include $(top_builddir)/src/Makefile.global include $(top_builddir)/src/Makefile.global
MODULES = _int MODULE_big = _int
OBJS = _int_bool.o _int_gist.o _int_op.o _int_tool.o _intbig_gist.o
DATA_built = _int.sql DATA_built = _int.sql
DOCS = README.intarray DOCS = README.intarray
REGRESS = _int REGRESS = _int
......
This diff is collapsed.
...@@ -360,7 +360,24 @@ DEFAULT FOR TYPE _int4 USING gist AS ...@@ -360,7 +360,24 @@ DEFAULT FOR TYPE _int4 USING gist AS
-- intbig -- intbig
--------------------------------------------- ---------------------------------------------
-- define the GiST support methods -- define the GiST support methods
CREATE FUNCTION g_intbig_consistent(internal,_int4,int4)
CREATE FUNCTION _intbig_in(cstring)
RETURNS intbig_gkey
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE FUNCTION _intbig_out(intbig_gkey)
RETURNS cstring
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE TYPE intbig_gkey (
INTERNALLENGTH = -1,
INPUT = _intbig_in,
OUTPUT = _intbig_out
);
CREATE FUNCTION g_intbig_consistent(internal,internal,int4)
RETURNS bool RETURNS bool
AS 'MODULE_PATHNAME' AS 'MODULE_PATHNAME'
LANGUAGE 'C'; LANGUAGE 'C';
...@@ -390,7 +407,7 @@ RETURNS _int4 ...@@ -390,7 +407,7 @@ RETURNS _int4
AS 'MODULE_PATHNAME' AS 'MODULE_PATHNAME'
LANGUAGE 'C'; LANGUAGE 'C';
CREATE FUNCTION g_intbig_same(_int4, _int4, internal) CREATE FUNCTION g_intbig_same(internal, internal, internal)
RETURNS internal RETURNS internal
AS 'MODULE_PATHNAME' AS 'MODULE_PATHNAME'
LANGUAGE 'C'; LANGUAGE 'C';
...@@ -405,10 +422,11 @@ AS ...@@ -405,10 +422,11 @@ AS
OPERATOR 7 @ RECHECK, OPERATOR 7 @ RECHECK,
OPERATOR 8 ~ RECHECK, OPERATOR 8 ~ RECHECK,
OPERATOR 20 @@ (_int4, query_int) RECHECK, OPERATOR 20 @@ (_int4, query_int) RECHECK,
FUNCTION 1 g_intbig_consistent (internal, _int4, int4), FUNCTION 1 g_intbig_consistent (internal, internal, int4),
FUNCTION 2 g_intbig_union (bytea, internal), FUNCTION 2 g_intbig_union (bytea, internal),
FUNCTION 3 g_intbig_compress (internal), FUNCTION 3 g_intbig_compress (internal),
FUNCTION 4 g_intbig_decompress (internal), FUNCTION 4 g_intbig_decompress (internal),
FUNCTION 5 g_intbig_penalty (internal, internal, internal), FUNCTION 5 g_intbig_penalty (internal, internal, internal),
FUNCTION 6 g_intbig_picksplit (internal, internal), FUNCTION 6 g_intbig_picksplit (internal, internal),
FUNCTION 7 g_intbig_same (_int4, _int4, internal); FUNCTION 7 g_intbig_same (internal, internal, internal),
STORAGE intbig_gkey;
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
\set ECHO none \set ECHO none
psql:_int.sql:13: NOTICE: ProcedureCreate: type query_int is not yet defined psql:_int.sql:13: NOTICE: ProcedureCreate: type query_int is not yet defined
psql:_int.sql:18: NOTICE: Argument type "query_int" is only a shell psql:_int.sql:18: NOTICE: Argument type "query_int" is only a shell
psql:_int.sql:369: NOTICE: ProcedureCreate: type intbig_gkey is not yet defined
psql:_int.sql:374: NOTICE: Argument type "intbig_gkey" is only a shell
SELECT intset(1234); SELECT intset(1234);
intset intset
-------- --------
......
...@@ -211,35 +211,39 @@ sizebitvec(BITVECP sign) ...@@ -211,35 +211,39 @@ sizebitvec(BITVECP sign)
return size; return size;
} }
static int
hemdistsign(BITVECP a, BITVECP b) {
int i,dist=0;
ALOOPBIT(
if ( GETBIT(a,i) != GETBIT(b,i) )
dist++;
);
return dist;
}
static int
hemdist(ltree_gist *a, ltree_gist *b) {
if ( LTG_ISALLTRUE(a) ) {
if (LTG_ISALLTRUE(b))
return 0;
else
return ASIGLENBIT-sizebitvec(LTG_SIGN(b));
} else if (LTG_ISALLTRUE(b))
return ASIGLENBIT-sizebitvec(LTG_SIGN(a));
return hemdistsign( LTG_SIGN(a), LTG_SIGN(b) );
}
Datum Datum
_ltree_penalty(PG_FUNCTION_ARGS) _ltree_penalty(PG_FUNCTION_ARGS)
{ {
ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key); ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key); ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
float *penalty = (float *) PG_GETARG_POINTER(2); float *penalty = (float *) PG_GETARG_POINTER(2);
BITVECP orig = LTG_SIGN(origval);
if (LTG_ISALLTRUE(origval)) *penalty=hemdist(origval,newval);
{
*penalty = 0.1;
PG_RETURN_POINTER(penalty);
}
if (LTG_ISALLTRUE(newval))
*penalty = (float) (ASIGLENBIT - sizebitvec(orig));
else
{
unsigned char valtmp;
BITVECP nval = LTG_SIGN(newval);
int4 i,
unionsize = 0;
ALOOPBYTE(
valtmp = nval[i] | orig[i];
unionsize += SUMBIT(valtmp) - SUMBIT(orig[i]);
);
*penalty = (float) unionsize;
}
PG_RETURN_POINTER(penalty); PG_RETURN_POINTER(penalty);
} }
...@@ -264,28 +268,19 @@ _ltree_picksplit(PG_FUNCTION_ARGS) ...@@ -264,28 +268,19 @@ _ltree_picksplit(PG_FUNCTION_ARGS)
j; j;
ltree_gist *datum_l, ltree_gist *datum_l,
*datum_r; *datum_r;
ABITVEC union_l, BITVECP union_l,
union_r; union_r;
bool firsttime = true; int4 size_alpha, size_beta;
int4 size_alpha,
size_beta,
sizeu,
sizei;
int4 size_waste, int4 size_waste,
waste = 0.0; waste = -1;
int4 size_l,
size_r;
int4 nbytes; int4 nbytes;
OffsetNumber seed_1 = 0, OffsetNumber seed_1 = 0,
seed_2 = 0; seed_2 = 0;
OffsetNumber *left, OffsetNumber *left,
*right; *right;
OffsetNumber maxoff; OffsetNumber maxoff;
BITVECP ptra, BITVECP ptr;
ptrb,
ptrc;
int i; int i;
unsigned char valtmp;
SPLITCOST *costvector; SPLITCOST *costvector;
ltree_gist *_k, ltree_gist *_k,
*_j; *_j;
...@@ -295,57 +290,14 @@ _ltree_picksplit(PG_FUNCTION_ARGS) ...@@ -295,57 +290,14 @@ _ltree_picksplit(PG_FUNCTION_ARGS)
v->spl_left = (OffsetNumber *) palloc(nbytes); v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes); v->spl_right = (OffsetNumber *) palloc(nbytes);
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
{
_k = GETENTRY(entryvec, k); _k = GETENTRY(entryvec, k);
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
{ size_waste=hemdist(_k, GETENTRY(entryvec, j));
_j = GETENTRY(entryvec, j); if (size_waste > waste ) {
if (LTG_ISALLTRUE(_k) || LTG_ISALLTRUE(_j))
{
sizeu = ASIGLENBIT;
if (LTG_ISALLTRUE(_k) && LTG_ISALLTRUE(_j))
sizei = ASIGLENBIT;
else
sizei = (LTG_ISALLTRUE(_k)) ?
sizebitvec(LTG_SIGN(_j)) : sizebitvec(LTG_SIGN(_k));
}
else
{
sizeu = sizei = 0;
ptra = LTG_SIGN(_j);
ptrb = LTG_SIGN(_k);
/* critical section for bench !!! */
#define COUNT(pos) do { \
if ( GETBITBYTE(*(char*)ptra,pos) ) { \
sizeu++; \
if ( GETBITBYTE(*(char*)ptrb, pos) ) \
sizei++; \
} else if ( GETBITBYTE(*(char*)ptrb, pos) ) \
sizeu++; \
} while(0)
ALOOPBYTE(
COUNT(0);
COUNT(1);
COUNT(2);
COUNT(3);
COUNT(4);
COUNT(5);
COUNT(6);
COUNT(7);
ptra = (BITVECP) (((char *) ptra) + 1);
ptrb = (BITVECP) (((char *) ptrb) + 1);
);
}
size_waste = sizeu - sizei;
if (size_waste > waste || firsttime)
{
waste = size_waste; waste = size_waste;
seed_1 = k; seed_1 = k;
seed_2 = j; seed_2 = j;
firsttime = false;
} }
} }
} }
...@@ -367,7 +319,6 @@ _ltree_picksplit(PG_FUNCTION_ARGS) ...@@ -367,7 +319,6 @@ _ltree_picksplit(PG_FUNCTION_ARGS)
datum_l = (ltree_gist *) palloc(LTG_HDRSIZE); datum_l = (ltree_gist *) palloc(LTG_HDRSIZE);
datum_l->len = LTG_HDRSIZE; datum_l->len = LTG_HDRSIZE;
datum_l->flag = LTG_ALLTRUE; datum_l->flag = LTG_ALLTRUE;
size_l = ASIGLENBIT;
} }
else else
{ {
...@@ -375,14 +326,12 @@ _ltree_picksplit(PG_FUNCTION_ARGS) ...@@ -375,14 +326,12 @@ _ltree_picksplit(PG_FUNCTION_ARGS)
datum_l->len = LTG_HDRSIZE + ASIGLEN; datum_l->len = LTG_HDRSIZE + ASIGLEN;
datum_l->flag = 0; datum_l->flag = 0;
memcpy((void *) LTG_SIGN(datum_l), (void *) LTG_SIGN(GETENTRY(entryvec, seed_1)), sizeof(ABITVEC)); memcpy((void *) LTG_SIGN(datum_l), (void *) LTG_SIGN(GETENTRY(entryvec, seed_1)), sizeof(ABITVEC));
size_l = sizebitvec(LTG_SIGN(datum_l));
} }
if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_2))) if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)))
{ {
datum_r = (ltree_gist *) palloc(LTG_HDRSIZE); datum_r = (ltree_gist *) palloc(LTG_HDRSIZE);
datum_r->len = LTG_HDRSIZE; datum_r->len = LTG_HDRSIZE;
datum_r->flag = LTG_ALLTRUE; datum_r->flag = LTG_ALLTRUE;
size_r = ASIGLENBIT;
} }
else else
{ {
...@@ -390,7 +339,6 @@ _ltree_picksplit(PG_FUNCTION_ARGS) ...@@ -390,7 +339,6 @@ _ltree_picksplit(PG_FUNCTION_ARGS)
datum_r->len = LTG_HDRSIZE + ASIGLEN; datum_r->len = LTG_HDRSIZE + ASIGLEN;
datum_r->flag = 0; datum_r->flag = 0;
memcpy((void *) LTG_SIGN(datum_r), (void *) LTG_SIGN(GETENTRY(entryvec, seed_2)), sizeof(ABITVEC)); memcpy((void *) LTG_SIGN(datum_r), (void *) LTG_SIGN(GETENTRY(entryvec, seed_2)), sizeof(ABITVEC));
size_r = sizebitvec(LTG_SIGN(datum_r));
} }
maxoff = OffsetNumberNext(maxoff); maxoff = OffsetNumberNext(maxoff);
...@@ -400,55 +348,18 @@ _ltree_picksplit(PG_FUNCTION_ARGS) ...@@ -400,55 +348,18 @@ _ltree_picksplit(PG_FUNCTION_ARGS)
{ {
costvector[j - 1].pos = j; costvector[j - 1].pos = j;
_j = GETENTRY(entryvec, j); _j = GETENTRY(entryvec, j);
if (LTG_ISALLTRUE(_j)) size_alpha = hemdist(datum_l,_j);
{ size_beta = hemdist(datum_r,_j);
size_alpha = ASIGLENBIT - size_l;
size_beta = ASIGLENBIT - size_r;
}
else
{
ptra = LTG_SIGN(datum_l);
ptrb = LTG_SIGN(datum_r);
ptrc = LTG_SIGN(_j);
size_beta = size_alpha = 0;
if (LTG_ISALLTRUE(datum_l))
{
if (!LTG_ISALLTRUE(datum_r))
{
ALOOPBIT(
if (GETBIT(ptrc, i) && !GETBIT(ptrb, i))
size_beta++;
);
}
}
else if (LTG_ISALLTRUE(datum_r))
{
if (!LTG_ISALLTRUE(datum_l))
{
ALOOPBIT(
if (GETBIT(ptrc, i) && !GETBIT(ptra, i))
size_alpha++;
);
}
}
else
{
ALOOPBIT(
if (GETBIT(ptrc, i) && !GETBIT(ptra, i))
size_alpha++;
if (GETBIT(ptrc, i) && !GETBIT(ptrb, i))
size_beta++;
);
}
}
costvector[j - 1].cost = abs(size_alpha - size_beta); costvector[j - 1].cost = abs(size_alpha - size_beta);
} }
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
union_l=LTG_SIGN(datum_l);
union_r=LTG_SIGN(datum_r);
for (k = 0; k < maxoff; k++) for (k = 0; k < maxoff; k++)
{ {
j = costvector[k].pos; j = costvector[k].pos;
_j = GETENTRY(entryvec, j);
if (j == seed_1) if (j == seed_1)
{ {
*left++ = j; *left++ = j;
...@@ -461,62 +372,35 @@ _ltree_picksplit(PG_FUNCTION_ARGS) ...@@ -461,62 +372,35 @@ _ltree_picksplit(PG_FUNCTION_ARGS)
v->spl_nright++; v->spl_nright++;
continue; continue;
} }
_j = GETENTRY(entryvec, j);
size_alpha = hemdist(datum_l,_j);
size_beta = hemdist(datum_r,_j);
if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j)) if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
size_alpha = ASIGLENBIT;
else
{
ptra = LTG_SIGN(_j);
ptrb = LTG_SIGN(datum_l);
size_alpha = 0;
ALOOPBYTE(
valtmp = union_l[i] = ptra[i] | ptrb[i];
size_alpha += SUMBIT(valtmp);
);
}
if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
size_beta = ASIGLENBIT;
else
{
ptra = LTG_SIGN(_j);
ptrb = LTG_SIGN(datum_r);
size_beta = 0;
ALOOPBYTE(
valtmp = union_r[i] = ptra[i] | ptrb[i];
size_beta += SUMBIT(valtmp);
);
}
if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
{ {
if (!LTG_ISALLTRUE(datum_l)) if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j) ) {
{ if (!LTG_ISALLTRUE(datum_l))
if (size_alpha == ASIGLENBIT) MemSet((void *) union_l, 0xff, sizeof(ABITVEC));
{ } else {
if (size_alpha != size_l) ptr=LTG_SIGN(_j);
MemSet((void *) LTG_SIGN(datum_l), 0xff, sizeof(ABITVEC)); ALOOPBYTE(
} union_l[i] |= ptr[i];
else );
memcpy((void *) LTG_SIGN(datum_l), (void *) union_l, sizeof(ABITVEC));
} }
size_l = size_alpha;
*left++ = j; *left++ = j;
v->spl_nleft++; v->spl_nleft++;
} }
else else
{ {
if (!LTG_ISALLTRUE(datum_r)) if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j) ) {
{ if (!LTG_ISALLTRUE(datum_r))
if (size_beta == ASIGLENBIT) MemSet((void *) union_r, 0xff, sizeof(ABITVEC));
{ } else {
if (size_beta != size_r) ptr=LTG_SIGN(_j);
MemSet((void *) LTG_SIGN(datum_r), 0xff, sizeof(ABITVEC)); ALOOPBYTE(
} union_r[i] |= ptr[i];
else );
memcpy((void *) LTG_SIGN(datum_r), (void *) union_r, sizeof(ABITVEC));
} }
size_r = size_beta;
*right++ = j; *right++ = j;
v->spl_nright++; v->spl_nright++;
} }
......
...@@ -239,7 +239,7 @@ typedef struct ...@@ -239,7 +239,7 @@ typedef struct
/* GiST support for ltree[] */ /* GiST support for ltree[] */
#define ASIGLENINT (2*SIGLENINT) #define ASIGLENINT (7)
#define ASIGLEN (sizeof(int4)*ASIGLENINT) #define ASIGLEN (sizeof(int4)*ASIGLENINT)
#define ASIGLENBIT (ASIGLEN*BITBYTE) #define ASIGLENBIT (ASIGLEN*BITBYTE)
typedef unsigned char ABITVEC[ASIGLEN]; typedef unsigned char ABITVEC[ASIGLEN];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment