Commit 4bcb80a3 authored by Bruce Momjian's avatar Bruce Momjian

Update contrib intarray to Jan 25 version.

parent 5a38af7f
# $Header: /cvsroot/pgsql/contrib/intarray/Makefile,v 1.3 2001/02/20 19:20:27 petere Exp $
subdir = contrib/intarray
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
......@@ -12,7 +10,7 @@ NAME= _int
SO_MAJOR_VERSION= 1
SO_MINOR_VERSION= 0
override CPPFLAGS := -I$(srcdir) $(CPPFLAGS) -DPGSQL71
override CPPFLAGS += -I$(srcdir) -DPGSQL71
OBJS= _int.o
......@@ -23,31 +21,46 @@ include $(top_srcdir)/src/Makefile.shlib
$(NAME).sql: $(NAME).sql.in
sed 's,MODULE_PATHNAME,$(libdir)/$(shlib),g' $< >$@
sed -e 's:MODULE_PATHNAME:$(libdir)/$(shlib):g' < $< > $@
.PHONY: submake
submake:
$(MAKE) -C $(top_builddir)/src/test/regress pg_regress
# against installed postmaster
installcheck: submake
$(top_builddir)/src/test/regress/pg_regress _int
# in-tree test doesn't work yet (no way to install my shared library)
#check: all submake
# $(top_builddir)/src/test/regress/pg_regress --temp-install \
# --top-builddir=$(top_builddir) _int
check:
@echo "'$(MAKE) check' is not supported."
@echo "Do '$(MAKE) install', then '$(MAKE) installcheck' instead."
@echo "'make check' is not supported."
@echo "Do 'make install', then 'make installcheck' instead."
install: all installdirs install-lib
$(INSTALL_DATA) $(srcdir)/README.intarray $(docdir)/contrib
#$(INSTALL_DATA) $(srcdir)/README.$(NAME) $(docdir)/contrib
$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
installdirs:
$(mkinstalldirs) $(docdir)/contrib $(datadir)/contrib $(libdir)
uninstall: uninstall-lib
rm -f $(docdir)/contrib/README.intarray $(datadir)/contrib/$(NAME).sql
rm -f $(docdir)/contrib/README.$(NAME) $(datadir)/contrib/$(NAME).sql
clean distclean maintainer-clean: clean-lib
rm -f $(OBJS) $(NAME).sql
rm -f *.so y.tab.c y.tab.h $(OBJS) $(NAME).sql
# things created by various check targets
rm -rf results
rm -f regression.diffs regression.out
rm -rf results tmp_check log
rm -f regression.diffs regression.out regress.out run_check.out
ifeq ($(PORTNAME), win)
rm -f regress.def
endif
depend dep:
$(CC) -MM $(CFLAGS) *.c >depend
ifeq (depend,$(wildcard depend))
include depend
endif
This is an implementation of RD-tree data structure using GiST interface
of PostgreSQL. It has built-in lossy compression - must be declared
in index creation - with (islossy). Current implementation has index support
for one-dimensional array of int4's.
in index creation - with (islossy). Current implementation provides index
support for one-dimensional array of int4's - gist__int_ops, suitable for
small and medium size of arrays (used on default), and gist__intbig_ops for
indexing large arrays (we use superimposed signature with length of 4096
bits to represent sets).
All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov
(oleg@sai.msu.su). See http://www.sai.msu.su/~megera/postgres/gist
for additional information.
......@@ -25,7 +29,7 @@ EXAMPLE USAGE:
-- create indices
CREATE unique index message_key on message ( mid );
CREATE unique index message_section_map_key2 on message_section_map (sid, mid );
CREATE INDEX message_rdtree_idx on message using gist ( sections ) with ( islossy );
CREATE INDEX message_rdtree_idx on message using gist ( sections gist__int_ops) with ( islossy );
-- select some messages with section in 1 OR 2 - OVERLAP operator
select message.mid from message where message.sections && '{1,2}';
......
......@@ -4,11 +4,11 @@
format for these routines is dictated by Postgres architecture.
******************************************************************************/
#include "postgres.h"
#include <stdio.h>
#include <float.h>
#include <string.h>
#include "postgres.h"
#include "access/gist.h"
#include "access/itup.h"
#include "access/rtree.h"
......@@ -32,6 +32,7 @@
#endif
#define NDIM 1
#define ARRISNULL(x) ( (x) ? ( ( ARR_NDIM(x) == NDIM ) ? ( ( ARRSIZE( x ) ) ? 0 : 1 ) : 1 ) : 1 )
#define SORT(x) if ( ARRSIZE( x ) > 1 ) isort( (void*)ARRPTR( x ), ARRSIZE( x ) );
#define PREPAREARR(x) \
......@@ -39,6 +40,40 @@
if ( isort( (void*)ARRPTR( x ), ARRSIZE( x ) ) )\
x = _int_unique( x );\
}
/* bigint defines */
#define BITBYTE 8
#define SIGLENINT 128
#define SIGLEN ( sizeof(int)*SIGLENINT )
#define SIGLENBIT (SIGLEN*BITBYTE)
typedef char BITVEC[SIGLEN];
typedef char* BITVECP;
#define SIGPTR(x) ( (BITVECP) ARR_DATA_PTR(x) )
#define NULLIFY(a) MemSet( a, 0, sizeof( BITVEC ) )
#define NEWSIG(a) \
a=(BITVECP) malloc( sizeof( BITVEC );\
NULLIFY(a);
#define LOOPBYTE(a) \
for(i=0;i<SIGLEN;i++) {\
a;\
}
#define LOOPBIT(a) \
for(i=0;i<SIGLENBIT;i++) {\
a;\
}
#define getbytebit(x,i) ( *( (char*)(x) + (int)( i / BITBYTE ) ) )
#define clrbit(x,i) getbytebit(x,i) &= ~( 0x01 << ( i % BITBYTE ) )
#define setbit(x,i) getbytebit(x,i) |= ( 0x01 << ( i % BITBYTE ) )
#define getbit(x,i) ( getbytebit(x,i) >> ( i % BITBYTE ) & 0x01 )
#define union_sig(a,b,r) LOOPBYTE(r[i] = a[i] | b[i])
#define inter_sig(a,b,r) LOOPBYTE(r[i] = a[i] & b[i])
/*
#define GIST_DEBUG
#define GIST_QUERY_DEBUG
......@@ -58,17 +93,46 @@ static void printarr ( ArrayType * a, int num ) {
}
elog(NOTICE, "\t\t%s", bbb);
}
static void printbitvec( BITVEC bv ) {
int i;
char str[ SIGLENBIT+1 ];
str[ SIGLENBIT ] ='\0';
LOOPBIT( str[i] = ( getbit(bv,i) ) ? '1' : '0' );
elog(NOTICE,"BV: %s", str);
}
#endif
/*
** types for functions
*/
typedef ArrayType * (*formarray) (ArrayType*, ArrayType*);
typedef void (*formfloat) (ArrayType*, float*);
/*
** usefull function
*/
bool isort( int *a, const int len );
ArrayType * new_intArrayType( int num );
ArrayType * copy_intArrayType( ArrayType * a );
ArrayType * resize_intArrayType( ArrayType * a, int num );
int internal_size( int *a, int len );
ArrayType * _int_unique( ArrayType * a );
static bool isort( int *a, const int len );
static ArrayType * new_intArrayType( int num );
static ArrayType * copy_intArrayType( ArrayType * a );
static ArrayType * resize_intArrayType( ArrayType * a, int num );
static int internal_size( int *a, int len );
static ArrayType * _int_unique( ArrayType * a );
/* common gist function*/
static GIST_SPLITVEC * _int_common_picksplit(bytea *entryvec,
GIST_SPLITVEC *v,
formarray unionf,
formarray interf,
formfloat sizef);
static float * _int_common_penalty(GISTENTRY *origentry,
GISTENTRY *newentry,
float *result,
formarray unionf,
formfloat sizef);
static ArrayType * _int_common_union(bytea *entryvec,
int *sizep,
formarray unionf);
/*
** GiST support methods
......@@ -78,7 +142,6 @@ GISTENTRY * g_int_compress(GISTENTRY *entry);
GISTENTRY * g_int_decompress(GISTENTRY *entry);
float * g_int_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result);
GIST_SPLITVEC * g_int_picksplit(bytea *entryvec, GIST_SPLITVEC *v);
bool g_int_internal_consistent(ArrayType *key, ArrayType *query, StrategyNumber strategy);
ArrayType * g_int_union(bytea *entryvec, int *sizep);
bool * g_int_same(ArrayType *b1, ArrayType *b2, bool *result);
......@@ -100,6 +163,23 @@ ArrayType * _int_union(ArrayType *a, ArrayType *b);
ArrayType * _int_inter(ArrayType *a, ArrayType *b);
void rt__int_size(ArrayType *a, float* sz);
/*
** _intbig methods
*/
bool g_intbig_consistent(GISTENTRY *entry, ArrayType *query, StrategyNumber strategy);
GISTENTRY * g_intbig_compress(GISTENTRY *entry);
GISTENTRY * g_intbig_decompress(GISTENTRY *entry);
float * g_intbig_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result);
GIST_SPLITVEC * g_intbig_picksplit(bytea *entryvec, GIST_SPLITVEC *v);
ArrayType * g_intbig_union(bytea *entryvec, int *sizep);
bool * g_intbig_same(ArrayType *a, ArrayType *b, bool *result);
static bool _intbig_contains(ArrayType *a, ArrayType *b);
static bool _intbig_overlap(ArrayType *a, ArrayType *b);
static ArrayType * _intbig_union(ArrayType *a, ArrayType *b);
/*static ArrayType * _intbig_inter(ArrayType *a, ArrayType *b);*/
static void rt__intbig_size(ArrayType *a, float* sz);
static void gensign(BITVEC sign, int * a, int len);
/*****************************************************************************
* GiST functions
......@@ -116,51 +196,33 @@ g_int_consistent(GISTENTRY *entry,
ArrayType *query,
StrategyNumber strategy)
{
bool retval;
/* sort query for fast search, key is already sorted */
if ( ARRISNULL( query ) ) return FALSE;
PREPAREARR( query );
/*
** if entry is not leaf, use g_int_internal_consistent,
** else use g_int_leaf_consistent
*/
return(g_int_internal_consistent((ArrayType *)(entry->pred), query, strategy));
switch(strategy) {
case RTOverlapStrategyNumber:
retval = (bool)inner_int_overlap((ArrayType *)(entry->pred), query);
break;
case RTSameStrategyNumber:
case RTContainsStrategyNumber:
retval = (bool)inner_int_contains((ArrayType *)(entry->pred), query);
break;
case RTContainedByStrategyNumber:
retval = (bool)inner_int_overlap((ArrayType *)(entry->pred), query);
break;
default:
retval = FALSE;
}
return(retval);
}
/*
** The GiST Union method for _intments
** returns the minimal set that encloses all the entries in entryvec
*/
ArrayType *
g_int_union(bytea *entryvec, int *sizep)
{
int numranges, i;
ArrayType *out = (ArrayType *)NULL;
ArrayType *tmp;
numranges = (VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY);
tmp = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[0]).pred;
#ifdef GIST_DEBUG
elog(NOTICE, "union %d", numranges);
#endif
for (i = 1; i < numranges; i++) {
out = inner_int_union(tmp, (ArrayType *)
(((GISTENTRY *)(VARDATA(entryvec)))[i]).pred);
if (i > 1 && tmp) pfree(tmp);
tmp = out;
}
*sizep = VARSIZE( out );
#ifdef GIST_DEBUG
elog(NOTICE, "\t ENDunion %d %d", *sizep, ARRSIZE( out ) );
#endif
if ( *sizep == 0 ) {
pfree( out );
return NULL;
}
return(out);
return _int_common_union( entryvec, sizep, inner_int_union );
}
/*
......@@ -194,7 +256,6 @@ g_int_compress(GISTENTRY *entry)
#ifdef GIST_DEBUG
elog(NOTICE, "COMP IN: %d leaf; %d rel; %d page; %d offset; %d bytes; %d elems", entry->leafkey, (int)entry->rel, (int)entry->page, (int)entry->offset, (int)entry->bytes, len);
/* printarr( r, len ); */
#endif
if ( len >= 2*MAXNUMRANGE ) { /*compress*/
......@@ -260,7 +321,6 @@ g_int_decompress(GISTENTRY *entry)
#ifdef GIST_DEBUG
elog(NOTICE, "DECOMP IN: %d leaf; %d rel; %d page; %d offset; %d bytes; %d elems", entry->leafkey, (int)entry->rel, (int)entry->page, (int)entry->offset, (int)entry->bytes, lenin);
/* printarr( in, lenin ); */
#endif
lenr = internal_size(din, lenin);
......@@ -287,180 +347,19 @@ g_int_decompress(GISTENTRY *entry)
float *
g_int_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result)
{
Datum ud;
float tmp1, tmp2;
#ifdef GIST_DEBUG
elog(NOTICE, "penalty");
#endif
ud = (Datum)inner_int_union((ArrayType *)(origentry->pred), (ArrayType *)(newentry->pred));
rt__int_size((ArrayType *)ud, &tmp1);
rt__int_size((ArrayType *)(origentry->pred), &tmp2);
*result = tmp1 - tmp2;
pfree((char *)ud);
#ifdef GIST_DEBUG
elog(NOTICE, "--penalty\t%g", *result);
#endif
return(result);
return _int_common_penalty( origentry, newentry, result, inner_int_union, rt__int_size);
}
/*
** The GiST PickSplit method for _intments
** We use Guttman's poly time split algorithm
*/
GIST_SPLITVEC *
g_int_picksplit(bytea *entryvec,
GIST_SPLITVEC *v)
{
OffsetNumber i, j;
ArrayType *datum_alpha, *datum_beta;
ArrayType *datum_l, *datum_r;
ArrayType *union_d, *union_dl, *union_dr;
ArrayType *inter_d;
bool firsttime;
float size_alpha, size_beta, size_union, size_inter;
float size_waste, waste;
float size_l, size_r;
int nbytes;
OffsetNumber seed_1 = 0, seed_2 = 0;
OffsetNumber *left, *right;
OffsetNumber maxoff;
#ifdef GIST_DEBUG
elog(NOTICE, "--------picksplit %d",(VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY));
#endif
maxoff = ((VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)) - 2;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
firsttime = true;
waste = 0.0;
for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) {
datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred);
for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) {
datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[j].pred);
/* compute the wasted space by unioning these guys */
/* size_waste = size_union - size_inter; */
union_d = (ArrayType *)inner_int_union(datum_alpha, datum_beta);
rt__int_size(union_d, &size_union);
inter_d = (ArrayType *)inner_int_inter(datum_alpha, datum_beta);
rt__int_size(inter_d, &size_inter);
size_waste = size_union - size_inter;
pfree(union_d);
if (inter_d != (ArrayType *) NULL)
pfree(inter_d);
/*
* are these a more promising split that what we've
* already seen?
*/
if (size_waste > waste || firsttime) {
waste = size_waste;
seed_1 = i;
seed_2 = j;
firsttime = false;
}
}
}
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_1].pred);
datum_l = copy_intArrayType( datum_alpha );
rt__int_size((ArrayType *)datum_l, &size_l);
datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_2].pred);
datum_r = copy_intArrayType( datum_beta );
rt__int_size((ArrayType *)datum_r, &size_r);
/*
* Now split up the regions between the two seeds. An important
* property of this split algorithm is that the split vector v
* has the indices of items to be split in order in its left and
* right vectors. We exploit this property by doing a merge in
* the code that actually splits the page.
*
* For efficiency, we also place the new index tuple in this loop.
* This is handled at the very end, when we have placed all the
* existing tuples and i == maxoff + 1.
*/
maxoff = OffsetNumberNext(maxoff);
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
/*
* If we've already decided where to place this item, just
* put it on the right list. Otherwise, we need to figure
* out which page needs the least enlargement in order to
* store the item.
*/
if (i == seed_1) {
*left++ = i;
v->spl_nleft++;
continue;
} else if (i == seed_2) {
*right++ = i;
v->spl_nright++;
continue;
}
/* okay, which page needs least enlargement? */
datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred);
union_dl = (ArrayType *)inner_int_union(datum_l, datum_alpha);
union_dr = (ArrayType *)inner_int_union(datum_r, datum_alpha);
rt__int_size((ArrayType *)union_dl, &size_alpha);
rt__int_size((ArrayType *)union_dr, &size_beta);
/* pick which page to add it to */
if (size_alpha - size_l < size_beta - size_r) {
if ( datum_l ) pfree(datum_l);
if ( union_dr ) pfree(union_dr);
datum_l = union_dl;
size_l = size_alpha;
*left++ = i;
v->spl_nleft++;
} else {
if ( datum_r ) pfree(datum_r);
if ( union_dl ) pfree(union_dl);
datum_r = union_dr;
size_r = size_beta;
*right++ = i;
v->spl_nright++;
}
}
/**left = *right = FirstOffsetNumber;*/ /* sentinel value, see dosplit() */
if ( *(left-1) > *(right-1) ) {
*right = FirstOffsetNumber;
*(left-1) = InvalidOffsetNumber;
} else {
*left = FirstOffsetNumber;
*(right-1) = InvalidOffsetNumber;
}
v->spl_ldatum = (char *)datum_l;
v->spl_rdatum = (char *)datum_r;
#ifdef GIST_DEBUG
elog(NOTICE, "--------ENDpicksplit %d %d",v->spl_nleft, v->spl_nright);
#endif
return v;
return _int_common_picksplit( entryvec, v,
inner_int_union,
inner_int_inter,
rt__int_size);
}
/*
......@@ -478,34 +377,6 @@ g_int_same(ArrayType *b1, ArrayType *b2, bool *result)
return(result);
}
bool
g_int_internal_consistent(ArrayType *key,
ArrayType *query,
StrategyNumber strategy)
{
bool retval;
#ifdef GIST_QUERY_DEBUG
elog(NOTICE, "internal_consistent, %d", strategy);
#endif
switch(strategy) {
case RTOverlapStrategyNumber:
retval = (bool)inner_int_overlap(key, query);
break;
case RTSameStrategyNumber:
case RTContainsStrategyNumber:
retval = (bool)inner_int_contains(key, query);
break;
case RTContainedByStrategyNumber:
retval = (bool)inner_int_overlap(key, query);
break;
default:
retval = FALSE;
}
return(retval);
}
bool
_int_contained(ArrayType *a, ArrayType *b)
{
......@@ -653,7 +524,7 @@ inner_int_union ( ArrayType *a, ArrayType *b ) {
int i,j;
#ifdef GIST_DEBUG
/* elog(NOTICE, "inner_union %d %d", ARRISNULL( a ) , ARRISNULL( b ) ); */
elog(NOTICE, "inner_union %d %d", ARRISNULL( a ) , ARRISNULL( b ) );
#endif
if ( ARRISNULL( a ) && ARRISNULL( b ) ) return new_intArrayType(0);
......@@ -709,7 +580,7 @@ inner_int_inter ( ArrayType *a, ArrayType *b ) {
int i,j;
#ifdef GIST_DEBUG
/* elog(NOTICE, "inner_inter %d %d", ARRISNULL( a ), ARRISNULL( b ) ); */
elog(NOTICE, "inner_inter %d %d", ARRISNULL( a ), ARRISNULL( b ) );
#endif
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return NULL;
......@@ -756,7 +627,8 @@ rt__int_size(ArrayType *a, float *size)
*****************************************************************************/
/* len >= 2 */
bool isort ( int *a, int len ) {
static bool
isort ( int *a, int len ) {
int tmp, index;
int *cur, *end;
bool r = FALSE;
......@@ -776,7 +648,8 @@ bool isort ( int *a, int len ) {
return r;
}
ArrayType * new_intArrayType( int num ) {
static ArrayType *
new_intArrayType( int num ) {
ArrayType * r;
int nbytes = ARR_OVERHEAD( NDIM ) + sizeof(int)*num;
......@@ -795,7 +668,8 @@ ArrayType * new_intArrayType( int num ) {
return r;
}
ArrayType * resize_intArrayType( ArrayType * a, int num ) {
static ArrayType *
resize_intArrayType( ArrayType * a, int num ) {
int nbytes = ARR_OVERHEAD( NDIM ) + sizeof(int)*num;
if ( num == ARRSIZE(a) ) return a;
......@@ -809,7 +683,8 @@ ArrayType * resize_intArrayType( ArrayType * a, int num ) {
return a;
}
ArrayType * copy_intArrayType( ArrayType * a ) {
static ArrayType *
copy_intArrayType( ArrayType * a ) {
ArrayType * r;
if ( ! a ) return NULL;
r = new_intArrayType( ARRSIZE(a) );
......@@ -818,7 +693,8 @@ ArrayType * copy_intArrayType( ArrayType * a ) {
}
/* num for compressed key */
int internal_size (int *a, int len ) {
static int
internal_size (int *a, int len ) {
int i,size=0;
for(i=0;i<len;i+=2)
......@@ -829,7 +705,8 @@ int internal_size (int *a, int len ) {
}
/* r is sorted and size of r > 1 */
ArrayType * _int_unique( ArrayType * r ) {
static ArrayType *
_int_unique( ArrayType * r ) {
int *tmp, *dr, *data;
int num = ARRSIZE(r);
data = tmp = dr = ARRPTR( r );
......@@ -840,3 +717,509 @@ ArrayType * _int_unique( ArrayType * r ) {
tmp++;
return resize_intArrayType(r, dr + 1 - ARRPTR(r) );
}
/*********************************************************************
** intbig functions
*********************************************************************/
static void
gensign(BITVEC sign, int * a, int len) {
int i;
NULLIFY(sign);
for(i=0; i<len; i++) {
setbit( sign, *a%SIGLENBIT );
a++;
}
}
static bool
_intbig_overlap(ArrayType *a, ArrayType *b) {
int i;
BITVECP da, db;
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE;
da = SIGPTR(a);
db = SIGPTR(b);
LOOPBYTE( if ( da[i] & db[i] ) return TRUE );
return FALSE;
}
static bool
_intbig_contains(ArrayType *a, ArrayType *b) {
int i;
BITVECP da, db;
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE;
da = SIGPTR(a);
db = SIGPTR(b);
LOOPBYTE( if ( db[i] & ~da[i] ) return FALSE );
return TRUE;
}
static void
rt__intbig_size(ArrayType *a, float* sz) {
int i, len=0;
BITVECP bv;
if ( ARRISNULL( a ) ) {
*sz=0.0;
return;
}
bv = SIGPTR(a);
LOOPBIT( len += getbit(bv, i) );
*sz = (float) len;
return;
}
static ArrayType *
_intbig_union(ArrayType *a, ArrayType *b) {
ArrayType * r = NULL;
BITVECP da, db, dr;
int i;
if ( ARRISNULL( a ) && ARRISNULL( b ) ) return new_intArrayType(0);
if ( ARRISNULL( a ) ) {
r = copy_intArrayType( b );
return r;
}
if ( ARRISNULL( b ) ) {
r = copy_intArrayType( a );
return r;
}
r = new_intArrayType( SIGLENINT );
da = SIGPTR(a);
db = SIGPTR(b);
dr = SIGPTR(r);
LOOPBYTE(dr[i] = da[i] | db[i]);
return r;
}
/*
static ArrayType *
_intbig_inter(ArrayType *a, ArrayType *b) {
ArrayType * r = NULL;
BITVECP da, db, dr;
int i;
if ( ARRISNULL( a ) || ARRISNULL( b ) ) return NULL;
r = new_intArrayType( SIGLENINT );
da = SIGPTR(a);
db = SIGPTR(b);
dr = SIGPTR(r);
LOOPBYTE(dr[i] = da[i] & db[i]);
return r;
}
*/
bool *
g_intbig_same(ArrayType *a, ArrayType *b, bool *result) {
BITVECP da, db;
int i;
if ( ARRISNULL( a ) || ARRISNULL( b ) ) {
*result = ( ARRISNULL( a ) && ARRISNULL( b ) ) ? TRUE : FALSE;
return result;
}
da = SIGPTR(a);
db = SIGPTR(b);
LOOPBYTE(
if ( da[i] != db[i] ) {
*result = FALSE;
return result;
}
);
*result = TRUE;
return result;
}
GISTENTRY *
g_intbig_compress(GISTENTRY *entry) {
GISTENTRY *retval;
ArrayType *r, *in;
#ifdef PGSQL71
if ( entry->pred )
in = (ArrayType *)PG_DETOAST_DATUM( entry->pred );
else
in = NULL;
#else
in = (ArrayType *) entry->pred;
#endif
if ( ! entry->leafkey ) return entry;
retval = palloc(sizeof(GISTENTRY));
if ( ! retval )
elog(ERROR,"Can't allocate memory for compression");
if ( ARRISNULL( in ) ) {
#ifdef PGSQL71
if ( in ) if ( (char*)in != (char*)entry->pred ) pfree(in);
#endif
gistentryinit(*retval, (char *)NULL, entry->rel, entry->page, entry->offset,0, FALSE);
return( retval );
}
r = new_intArrayType( SIGLENINT );
gensign( SIGPTR( r ),
ARRPTR ( in ),
ARRSIZE( in ) );
gistentryinit(*retval, (char *)r, entry->rel, entry->page, entry->offset, VARSIZE( r ), FALSE);
#ifdef PGSQL71
if ( in ) if ( (char*)in != (char*)entry->pred ) pfree(in);
#endif
return(retval);
}
GISTENTRY *
g_intbig_decompress(GISTENTRY *entry) {
return entry;
}
GIST_SPLITVEC *
g_intbig_picksplit(bytea *entryvec, GIST_SPLITVEC *v) {
OffsetNumber k;
ArrayType *datum_l, *datum_r, *datum_alpha;
ArrayType *unionarr;
float size_l, size_r;
int nbytes;
OffsetNumber *left, *right;
OffsetNumber maxoff;
#ifdef GIST_DEBUG
elog(NOTICE, "--------picksplit %d",(VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY));
#endif
maxoff = ((VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)) - 2;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
maxoff = OffsetNumberNext(maxoff);
datum_l = datum_r = NULL;
for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k)) {
datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[k].pred);
if ( k != FirstOffsetNumber ) {
unionarr = (ArrayType *)_intbig_union(datum_l, datum_alpha);
if ( datum_l ) pfree(datum_l);
datum_l = unionarr;
rt__intbig_size((ArrayType *)unionarr, &size_l);
*left++ = k;
v->spl_nleft++;
} else {
unionarr = (ArrayType *)_intbig_union(datum_r, datum_alpha);
if ( datum_r ) pfree(datum_r);
datum_r = unionarr;
rt__intbig_size((ArrayType *)unionarr, &size_r);
*right++ = k;
v->spl_nright++;
}
}
if ( *(left-1) > *(right-1) ) {
*right = FirstOffsetNumber;
*(left-1) = InvalidOffsetNumber;
} else {
*left = FirstOffsetNumber;
*(right-1) = InvalidOffsetNumber;
}
v->spl_ldatum = (char *)datum_l;
v->spl_rdatum = (char *)datum_r;
#ifdef GIST_DEBUG
elog(NOTICE, "--------ENDpicksplit %d %d",v->spl_nleft, v->spl_nright);
#endif
return v;
}
ArrayType *
g_intbig_union(bytea *entryvec, int *sizep) {
return _int_common_union( entryvec, sizep, _intbig_union );
}
float *
g_intbig_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result){
_int_common_penalty( origentry, newentry, result, _intbig_union, rt__intbig_size);
*result= SIGLENBIT - *result;
return result;
}
bool
g_intbig_consistent(GISTENTRY *entry, ArrayType *query, StrategyNumber strategy) {
bool retval;
ArrayType * q = new_intArrayType( SIGLENINT );
if ( ARRISNULL( query ) ) return FALSE;
gensign( SIGPTR( q ),
ARRPTR( query ),
ARRSIZE( query ) );
switch(strategy) {
case RTOverlapStrategyNumber:
retval = (bool)_intbig_overlap((ArrayType *)(entry->pred), q);
break;
case RTSameStrategyNumber:
case RTContainsStrategyNumber:
retval = (bool)_intbig_contains((ArrayType *)(entry->pred), q);
break;
case RTContainedByStrategyNumber:
retval = (bool)_intbig_overlap((ArrayType *)(entry->pred), q);
break;
default:
retval = FALSE;
}
pfree( q );
return(retval);
}
/*****************************************************************
** Common GiST Method
*****************************************************************/
/*
** The GiST Union method for _intments
** returns the minimal set that encloses all the entries in entryvec
*/
ArrayType *
_int_common_union(bytea *entryvec, int *sizep, formarray unionf) {
int numranges, i;
ArrayType *out = (ArrayType *)NULL;
ArrayType *tmp;
#ifdef GIST_DEBUG
elog(NOTICE, "_int_common_union in");
#endif
numranges = (VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY);
tmp = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[0]).pred;
for (i = 1; i < numranges; i++) {
out = (*unionf)(tmp, (ArrayType *)
(((GISTENTRY *)(VARDATA(entryvec)))[i]).pred);
if (i > 1 && tmp) pfree(tmp);
tmp = out;
}
*sizep = VARSIZE( out );
if ( *sizep == 0 ) {
pfree( out );
#ifdef GIST_DEBUG
elog(NOTICE, "_int_common_union out1");
#endif
return NULL;
}
#ifdef GIST_DEBUG
elog(NOTICE, "_int_common_union out");
#endif
return(out);
}
/*
** The GiST Penalty method for _intments
*/
float *
_int_common_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result,
formarray unionf,
formfloat sizef)
{
Datum ud;
float tmp1, tmp2;
#ifdef GIST_DEBUG
elog(NOTICE, "penalty");
#endif
ud = (Datum)(*unionf)((ArrayType *)(origentry->pred), (ArrayType *)(newentry->pred));
(*sizef)((ArrayType *)ud, &tmp1);
(*sizef)((ArrayType *)(origentry->pred), &tmp2);
*result = tmp1 - tmp2;
pfree((char *)ud);
#ifdef GIST_DEBUG
elog(NOTICE, "--penalty\t%g", *result);
#endif
return(result);
}
/*
** The GiST PickSplit method for _intments
** We use Guttman's poly time split algorithm
*/
GIST_SPLITVEC *
_int_common_picksplit(bytea *entryvec,
GIST_SPLITVEC *v,
formarray unionf,
formarray interf,
formfloat sizef)
{
OffsetNumber i, j;
ArrayType *datum_alpha, *datum_beta;
ArrayType *datum_l, *datum_r;
ArrayType *union_d, *union_dl, *union_dr;
ArrayType *inter_d;
bool firsttime;
float size_alpha, size_beta, size_union, size_inter;
float size_waste, waste;
float size_l, size_r;
int nbytes;
OffsetNumber seed_1 = 0, seed_2 = 0;
OffsetNumber *left, *right;
OffsetNumber maxoff;
#ifdef GIST_DEBUG
elog(NOTICE, "--------picksplit %d",(VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY));
#endif
maxoff = ((VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)) - 2;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
firsttime = true;
waste = 0.0;
for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) {
datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred);
for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) {
datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[j].pred);
/* compute the wasted space by unioning these guys */
/* size_waste = size_union - size_inter; */
union_d = (*unionf)(datum_alpha, datum_beta);
(*sizef)(union_d, &size_union);
inter_d = (*interf)(datum_alpha, datum_beta);
(*sizef)(inter_d, &size_inter);
size_waste = size_union - size_inter;
pfree(union_d);
if (inter_d != (ArrayType *) NULL)
pfree(inter_d);
/*
* are these a more promising split that what we've
* already seen?
*/
if (size_waste > waste || firsttime) {
waste = size_waste;
seed_1 = i;
seed_2 = j;
firsttime = false;
}
}
}
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_1].pred);
datum_l = copy_intArrayType( datum_alpha );
(*sizef)((ArrayType *)datum_l, &size_l);
datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_2].pred);
datum_r = copy_intArrayType( datum_beta );
(*sizef)((ArrayType *)datum_r, &size_r);
/*
* Now split up the regions between the two seeds. An important
* property of this split algorithm is that the split vector v
* has the indices of items to be split in order in its left and
* right vectors. We exploit this property by doing a merge in
* the code that actually splits the page.
*
* For efficiency, we also place the new index tuple in this loop.
* This is handled at the very end, when we have placed all the
* existing tuples and i == maxoff + 1.
*/
maxoff = OffsetNumberNext(maxoff);
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
/*
* If we've already decided where to place this item, just
* put it on the right list. Otherwise, we need to figure
* out which page needs the least enlargement in order to
* store the item.
*/
if (i == seed_1) {
*left++ = i;
v->spl_nleft++;
continue;
} else if (i == seed_2) {
*right++ = i;
v->spl_nright++;
continue;
}
/* okay, which page needs least enlargement? */
datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred);
union_dl = (ArrayType *)(*unionf)(datum_l, datum_alpha);
union_dr = (ArrayType *)(*unionf)(datum_r, datum_alpha);
(*sizef)((ArrayType *)union_dl, &size_alpha);
(*sizef)((ArrayType *)union_dr, &size_beta);
/* pick which page to add it to */
if (size_alpha - size_l < size_beta - size_r) {
if ( datum_l ) pfree(datum_l);
if ( union_dr ) pfree(union_dr);
datum_l = union_dl;
size_l = size_alpha;
*left++ = i;
v->spl_nleft++;
} else {
if ( datum_r ) pfree(datum_r);
if ( union_dl ) pfree(union_dl);
datum_r = union_dr;
size_r = size_beta;
*right++ = i;
v->spl_nright++;
}
}
/**left = *right = FirstOffsetNumber;*/ /* sentinel value, see dosplit() */
if ( *(left-1) > *(right-1) ) {
*right = FirstOffsetNumber;
*(left-1) = InvalidOffsetNumber;
} else {
*left = FirstOffsetNumber;
*(right-1) = InvalidOffsetNumber;
}
v->spl_ldatum = (char *)datum_l;
v->spl_rdatum = (char *)datum_r;
#ifdef GIST_DEBUG
elog(NOTICE, "--------ENDpicksplit %d %d",v->spl_nleft, v->spl_nright);
#endif
return v;
}
-- Create the user-defined type for the 1-D frloating point indervals (_int4)
-- Create the user-defined type for the 1-D integer arrays (_int4)
--
BEGIN TRANSACTION;
......@@ -208,4 +208,116 @@ INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
WHERE amname = 'gist' and opcname = 'gist__int_ops'
and proname = 'g_int_same';
---------------------------------------------
-- intbig
---------------------------------------------
-- define the GiST support methods
CREATE FUNCTION g_intbig_consistent(opaque,_int4,int4) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION g_intbig_compress(opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION g_intbig_decompress(opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION g_intbig_penalty(opaque,opaque,opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION g_intbig_picksplit(opaque, opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION g_intbig_union(bytea, opaque) RETURNS _int4
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION g_intbig_same(_int4, _int4, opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
-- register the default opclass for indexing
INSERT INTO pg_opclass (opcname, opcdeftype)
values ( 'gist__intbig_ops', 0 );
-- get the comparators for _intments and store them in a tmp table
SELECT o.oid AS opoid, o.oprname
INTO TABLE _int_ops_tmp
FROM pg_operator o, pg_type t
WHERE o.oprleft = t.oid and o.oprright = t.oid
and t.typname = '_int4';
-- make sure we have the right operators
-- SELECT * from _int_ops_tmp;
-- using the tmp table, generate the amop entries
-- _int_overlap
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 3
FROM pg_am am, pg_opclass opcl, _int_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist__intbig_ops'
and c.oprname = '&&';
-- _int_contains
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 7
FROM pg_am am, pg_opclass opcl, _int_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist__intbig_ops'
and c.oprname = '@';
-- _int_contained
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 8
FROM pg_am am, pg_opclass opcl, _int_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist__intbig_ops'
and c.oprname = '~';
DROP TABLE _int_ops_tmp;
-- add the entries to amproc for the support methods
-- note the amprocnum numbers associated with each are specific!
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 1
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist__intbig_ops'
and proname = 'g_intbig_consistent';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 2
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist__intbig_ops'
and proname = 'g_intbig_union';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 3
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist__intbig_ops'
and proname = 'g_intbig_compress';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 4
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist__intbig_ops'
and proname = 'g_intbig_decompress';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 5
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist__intbig_ops'
and proname = 'g_intbig_penalty';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 6
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist__intbig_ops'
and proname = 'g_intbig_picksplit';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 7
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist__intbig_ops'
and proname = 'g_intbig_same';
END TRANSACTION;
......@@ -49,7 +49,7 @@ print <<EOT;
CREATE unique index message_key on message ( mid );
--CREATE unique index message_section_map_key1 on message_section_map ( mid, sid );
CREATE unique index message_section_map_key2 on message_section_map ( sid, mid );
CREATE INDEX message_rdtree_idx on message using gist ( sections ) with ( islossy );
CREATE INDEX message_rdtree_idx on message using gist ( sections gist__int_ops ) with ( islossy );
VACUUM ANALYZE;
select count(*) from message;
......
......@@ -17,3 +17,30 @@ SELECT count(*) from test__int WHERE a @ '{23,50}';
12
(1 row)
CREATE INDEX text_idx on test__int using gist ( a gist__int_ops ) with ( islossy );
SELECT count(*) from test__int WHERE a && '{23,50}';
count
-------
345
(1 row)
SELECT count(*) from test__int WHERE a @ '{23,50}';
count
-------
12
(1 row)
drop index text_idx;
CREATE INDEX text_idx on test__int using gist ( a gist__intbig_ops ) with ( islossy );
SELECT count(*) from test__int WHERE a && '{23,50}';
count
-------
345
(1 row)
SELECT count(*) from test__int WHERE a @ '{23,50}';
count
-------
12
(1 row)
......@@ -13,3 +13,14 @@ CREATE TABLE test__int( a int[] );
SELECT count(*) from test__int WHERE a && '{23,50}';
SELECT count(*) from test__int WHERE a @ '{23,50}';
CREATE INDEX text_idx on test__int using gist ( a gist__int_ops ) with ( islossy );
SELECT count(*) from test__int WHERE a && '{23,50}';
SELECT count(*) from test__int WHERE a @ '{23,50}';
drop index text_idx;
CREATE INDEX text_idx on test__int using gist ( a gist__intbig_ops ) with ( islossy );
SELECT count(*) from test__int WHERE a && '{23,50}';
SELECT count(*) from test__int WHERE a @ '{23,50}';
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment