commit Oleg and Teodor's RD-tree implementation ... this provides the

regression tests for the GiST changes ... this should be integrated into the regular regression tests similar to Vadim's SPI contrib stuff ...

commit Oleg and Teodor's RD-tree implementation ... this provides the
regression tests for the GiST changes ... this should be integrated into the regular regression tests similar to Vadim's SPI contrib stuff ...
1db943b3 · Marc G. Fournier · 0ad7db4b · 1db943b3 · 1db943b3 · 1db943b3
Commit 1db943b3 authored Jan 12, 2001 by Marc G. Fournier
10 changed files
--- a/contrib/intarray/Makefile
+++ b/contrib/intarray/Makefile
+subdir = contrib/intarray
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+# override libdir to install shlib in contrib not main directory
+libdir := $(libdir)/contrib
+# shared library parameters
+NAME= _int
+SO_MAJOR_VERSION= 1
+SO_MINOR_VERSION= 0
+override CPPFLAGS += -I$(srcdir) -DPGSQL71
+OBJS= _int.o
+all: all-lib $(NAME).sql
+# Shared library stuff
+include $(top_srcdir)/src/Makefile.shlib
+$(NAME).sql: $(NAME).sql.in
+	sed -e 's:MODULE_PATHNAME:$(libdir)/$(shlib):g' < $< > $@
+.PHONY: submake
+submake:
+	$(MAKE) -C $(top_builddir)/src/test/regress pg_regress
+# against installed postmaster
+installcheck: submake
+	@echo "'make installcheck' is not supported."	
+installcheck: submake
+	$(top_builddir)/src/test/regress/pg_regress _int
+# in-tree test doesn't work yet (no way to install my shared library)
+#check: all submake
+#	$(top_builddir)/src/test/regress/pg_regress --temp-install \
+#	  --top-builddir=$(top_builddir) _int
+check:
+	@echo "'make check' is not supported."
+	@echo "Do 'make install', then 'make installcheck' instead."
+install: all installdirs install-lib
+	#$(INSTALL_DATA) $(srcdir)/README.$(NAME)  $(docdir)/contrib
+	$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
+installdirs:
+	$(mkinstalldirs) $(docdir)/contrib $(datadir)/contrib $(libdir)
+uninstall: uninstall-lib
+	rm -f $(docdir)/contrib/README.$(NAME) $(datadir)/contrib/$(NAME).sql
+clean distclean maintainer-clean: clean-lib
+	rm -f *.so y.tab.c y.tab.h $(OBJS) $(NAME).sql
+# things created by various check targets
+	rm -rf results tmp_check log
+	rm -f regression.diffs regression.out regress.out run_check.out
+ifeq ($(PORTNAME), win)
+	rm -f regress.def
+endif
+depend dep:
+	$(CC) -MM $(CFLAGS) *.c >depend
+ifeq (depend,$(wildcard depend))
+include depend
+endif
--- a/contrib/intarray/Makefile.703
+++ b/contrib/intarray/Makefile.703
+#-------------------------------------------------------------------------
+#
+# Makefile --
+#
+#    Makefile for Enzyme Commission catalogue number type -- ec_code
+#
+#-------------------------------------------------------------------------
+PGDIR = ../..
+SRCDIR = $(PGDIR)/src
+include $(SRCDIR)/Makefile.global
+INCLUDE_OPT =	-I ./ \
+		-I $(SRCDIR)/ \
+		-I $(SRCDIR)/include \
+		-I $(SRCDIR)/port/$(PORTNAME)
+CFLAGS += $(INCLUDE_OPT) $(CFLAGS_SL)
+MODNAME =	_int
+OBJFILES =	$(MODNAME).o
+SQLDEFS =	$(MODNAME).sql
+MODULE =	$(MODNAME)$(DLSUFFIX)
+MODDIR =	$(LIBDIR)/modules
+SQLDIR =	$(LIBDIR)/sql
+all:		module sql
+module:		$(MODULE)
+sql:		$(SQLDEFS)
+$(MODULE):	$(OBJFILES)
+		$(CC) $(CFLAGS) -shared -o $@ $(OBJFILES)
+install:	$(MODULE) $(SQLDEFS) $(MODDIR) $(SQLDIR)
+		cp -p $(MODULE) $(MODDIR)/
+		strip $(MODDIR)/$(MODULE)
+		cp -p $(SQLDEFS) $(SQLDIR)/
+$(MODDIR):
+		mkdir -p $@
+$(SQLDIR):
+		mkdir -p $@
+%.sql: %.sql.in
+		sed "s|MODULE_PATHNAME|$(MODDIR)/$(MODULE)|" < $< > $@
+depend dep:
+		$(CC) -MM $(INCLUDE_OPT) *.c >depend
+clean:
+		rm -f $(MODULE) $(SQLDEFS) *$(DLSUFFIX)
+		rm -f *~ *# *.b *.o *.output *.tab.h $(MODNAME)parse.h $(MODNAME)parse.c $(MODNAME)scan.c 
+ifeq (depend,$(wildcard depend))
+include depend
+endif
--- a/contrib/intarray/README.intarray
+++ b/contrib/intarray/README.intarray
+This is an implementation of RD-tree data structure using GiST interface
+of PostgreSQL. It has built-in lossy compression - must be declared
+in index creation - with (islossy). Current implementation has index support 
+for one-dimensional array of int4's.
+All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov
+(oleg@sai.msu.su). See http://www.sai.msu.su/~megera/postgres/gist
+for additional information.
+INSTALLATION:
+  gmake
+  gmake install
+  -- load functions
+  psql <database> < _int.sql 
+REGRESSION TEST:
+   gmake installcheck
+EXAMPLE USAGE:
+  create table message (mid int not null,sections int[]);
+  create table message_section_map (mid int not null,sid int not null);
+  -- create indices
+CREATE unique index message_key on message ( mid );
+CREATE unique index message_section_map_key2 on message_section_map (sid, mid );
+CREATE INDEX message_rdtree_idx on message using gist ( sections ) with ( islossy );
+  -- select some messages with section in 1 OR 2 - OVERLAP operator
+  select message.mid from message where message.sections && '{1,2}';  
+  -- select messages contains in sections 1 AND 2 - CONTAINS operator
+  select message.mid from message where message.sections @ '{1,2}';
+  -- the same, CONTAINED operator
+  select message.mid from message where '{1,2}' ~ message.sections;
+BENCHMARK:
+  subdirectory bench contains benchmark suite.
+  cd ./bench
+  1. createdb TEST
+  2. psql TEST < ../_int.sql
+  3. ./create_test.pl | psql TEST
+  4. ./bench.pl - perl script to benchmark queries, supports OR, AND queries
+                  with/without RD-Tree. Run script without arguments to 
+                  see availbale options.
+     a)test without RD-Tree (OR)
+       ./bench.pl -d TEST -s 1,2 -v
+     b)test with RD-Tree 
+       ./bench.pl -d TEST -s 1,2 -v -r
+BENCHMARKS:
+Size of table <message>: 200000
+Size of table <message_section_map>: 268538 
+Distribution of messages by sections:
+section 0: 73899 messages
+section 1: 16298 messages
+section 50: 1241 messages
+section 99: 705 messages
+old - without RD-Tree support,
+new - with RD-Tree
+----------+---------------+----------------+
+|Search set|OR, time in sec|AND, time in sec|
+|          +-------+-------+--------+-------+
+|          |  old  |  new  |   old  |  new  |
+----------+-------+-------+--------+-------+
+|         1|  1.427|  0.215|       -|      -|
+----------+-------+-------+--------+-------+
+|        99|  1.029|  0.018|       -|      -|
+----------+-------+-------+--------+-------+
+|       1,2|  1.829|  0.334|   5.654|  0.042|
+----------+-------+-------+--------+-------+
+| 1,2,50,60|  2.057|  0.359|   5.044|  0.007|
+----------+-------+-------+--------+-------+
--- a/contrib/intarray/_int.c
+++ b/contrib/intarray/_int.c
--- a/contrib/intarray/_int.sql.in
+++ b/contrib/intarray/_int.sql.in
+-- Create the user-defined type for the 1-D frloating point indervals (_int4)
+-- 
+BEGIN TRANSACTION;
+--
+-- External C-functions for R-tree methods
+--
+-- Comparison methods
+CREATE FUNCTION _int_contains(_int4, _int4) RETURNS bool
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+INSERT INTO pg_description (objoid, description)
+   SELECT oid, 'contains'::text
+   FROM pg_proc
+   WHERE proname = '_int_contains'::name;
+CREATE FUNCTION _int_contained(_int4, _int4) RETURNS bool
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+INSERT INTO pg_description (objoid, description)
+   SELECT oid, 'contained in'::text
+   FROM pg_proc
+   WHERE proname = '_int_contained'::name;
+CREATE FUNCTION _int_overlap(_int4, _int4) RETURNS bool
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+INSERT INTO pg_description (objoid, description)
+   SELECT oid, 'overlaps'::text
+   FROM pg_proc
+   WHERE proname = '_int_overlap'::name;
+CREATE FUNCTION _int_same(_int4, _int4) RETURNS bool
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+INSERT INTO pg_description (objoid, description)
+   SELECT oid, 'same as'::text
+   FROM pg_proc
+   WHERE proname = '_int_same'::name;
+CREATE FUNCTION _int_different(_int4, _int4) RETURNS bool
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+INSERT INTO pg_description (objoid, description)
+   SELECT oid, 'different'::text
+   FROM pg_proc
+   WHERE proname = '_int_different'::name;
+-- support routines for indexing
+CREATE FUNCTION _int_union(_int4, _int4) RETURNS _int4
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+CREATE FUNCTION _int_inter(_int4, _int4) RETURNS _int4
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+--
+-- OPERATORS
+--
+CREATE OPERATOR && (
+   LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_overlap,
+   COMMUTATOR = '&&',
+   RESTRICT = contsel, JOIN = contjoinsel
+);
+--CREATE OPERATOR = (
+--   LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_same,
+--   COMMUTATOR = '=', NEGATOR = '<>',
+--   RESTRICT = eqsel, JOIN = eqjoinsel,
+--   SORT1 = '<', SORT2 = '<'
+--);
+CREATE OPERATOR <> (
+   LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_different,
+   COMMUTATOR = '<>', NEGATOR = '=',
+   RESTRICT = neqsel, JOIN = neqjoinsel
+);
+CREATE OPERATOR @ (
+   LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_contains,
+   COMMUTATOR = '~', RESTRICT = contsel, JOIN = contjoinsel
+);
+CREATE OPERATOR ~ (
+   LEFTARG = _int4, RIGHTARG = _int4, PROCEDURE = _int_contained,
+   COMMUTATOR = '@', RESTRICT = contsel, JOIN = contjoinsel
+);
+-- define the GiST support methods
+CREATE FUNCTION g_int_consistent(opaque,_int4,int4) RETURNS bool
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+CREATE FUNCTION g_int_compress(opaque) RETURNS opaque 
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+CREATE FUNCTION g_int_decompress(opaque) RETURNS opaque 
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+CREATE FUNCTION g_int_penalty(opaque,opaque,opaque) RETURNS opaque
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+CREATE FUNCTION g_int_picksplit(opaque, opaque) RETURNS opaque
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+CREATE FUNCTION g_int_union(bytea, opaque) RETURNS _int4 
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+CREATE FUNCTION g_int_same(_int4, _int4, opaque) RETURNS opaque 
+	AS 'MODULE_PATHNAME' LANGUAGE 'c';
+-- register the default opclass for indexing
+INSERT INTO pg_opclass (opcname, opcdeftype)
+   SELECT 'gist__int_ops', oid
+   FROM pg_type
+   WHERE typname = '_int4';
+-- get the comparators for _intments and store them in a tmp table
+SELECT o.oid AS opoid, o.oprname
+INTO TABLE _int_ops_tmp
+FROM pg_operator o, pg_type t
+WHERE o.oprleft = t.oid and o.oprright = t.oid
+   and t.typname = '_int4';
+-- make sure we have the right operators
+-- SELECT * from _int_ops_tmp;
+-- using the tmp table, generate the amop entries 
+-- _int_overlap
+INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
+   SELECT am.oid, opcl.oid, c.opoid, 3
+   FROM pg_am am, pg_opclass opcl, _int_ops_tmp c
+   WHERE amname = 'gist' and opcname = 'gist__int_ops' 
+      and c.oprname = '&&';
+-- _int_same
+INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
+   SELECT am.oid, opcl.oid, c.opoid, 6
+   FROM pg_am am, pg_opclass opcl, _int_ops_tmp c
+   WHERE amname = 'gist' and opcname = 'gist__int_ops' 
+      and c.oprname = '=';
+-- _int_contains
+INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
+   SELECT am.oid, opcl.oid, c.opoid, 7
+   FROM pg_am am, pg_opclass opcl, _int_ops_tmp c
+   WHERE amname = 'gist' and opcname = 'gist__int_ops' 
+      and c.oprname = '@';
+-- _int_contained
+INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
+   SELECT am.oid, opcl.oid, c.opoid, 8
+   FROM pg_am am, pg_opclass opcl, _int_ops_tmp c
+   WHERE amname = 'gist' and opcname = 'gist__int_ops' 
+      and c.oprname = '~';
+DROP TABLE _int_ops_tmp;
+-- add the entries to amproc for the support methods
+-- note the amprocnum numbers associated with each are specific!
+INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
+   SELECT am.oid, opcl.oid, pro.oid, 1
+   FROM pg_am am, pg_opclass opcl, pg_proc pro
+   WHERE  amname = 'gist' and opcname = 'gist__int_ops'
+      and proname = 'g_int_consistent';
+INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
+   SELECT am.oid, opcl.oid, pro.oid, 2
+   FROM pg_am am, pg_opclass opcl, pg_proc pro
+   WHERE  amname = 'gist' and opcname = 'gist__int_ops'
+      and proname = 'g_int_union';
+INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
+   SELECT am.oid, opcl.oid, pro.oid, 3
+   FROM pg_am am, pg_opclass opcl, pg_proc pro
+   WHERE  amname = 'gist' and opcname = 'gist__int_ops'
+      and proname = 'g_int_compress';
+INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
+   SELECT am.oid, opcl.oid, pro.oid, 4
+   FROM pg_am am, pg_opclass opcl, pg_proc pro
+   WHERE  amname = 'gist' and opcname = 'gist__int_ops'
+      and proname = 'g_int_decompress';
+INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
+   SELECT am.oid, opcl.oid, pro.oid, 5
+   FROM pg_am am, pg_opclass opcl, pg_proc pro
+   WHERE  amname = 'gist' and opcname = 'gist__int_ops'
+      and proname = 'g_int_penalty';
+INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
+   SELECT am.oid, opcl.oid, pro.oid, 6
+   FROM pg_am am, pg_opclass opcl, pg_proc pro
+   WHERE  amname = 'gist' and opcname = 'gist__int_ops'
+      and proname = 'g_int_picksplit';
+INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
+   SELECT am.oid, opcl.oid, pro.oid, 7
+   FROM pg_am am, pg_opclass opcl, pg_proc pro
+   WHERE  amname = 'gist' and opcname = 'gist__int_ops'
+      and proname = 'g_int_same';
+END TRANSACTION;
--- a/contrib/intarray/bench/bench.pl
+++ b/contrib/intarray/bench/bench.pl
+#!/usr/bin/perl 
+use strict;
+# make sure we are in a sane environment.
+use DBI();
+use DBD::Pg();
+use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
+use Getopt::Std;
+my %opt;
+getopts('d:b:s:veorauc', \%opt);
+if ( !( scalar %opt && defined $opt{s} ) ) {
+	print <<EOT;
+Usage:
+$0 -d DATABASE -s SECTIONS [-b NUMBER] [-v] [-e] [-o] [-r] [-a] [-u]
+-d DATABASE   	-DATABASE
+-b NUMBER   	-number of repeats
+-s SECTIONS 	-sections, format	sid1[,sid2[,sid3[...]]]]
+-v 		-verbose (show SQL)
+-e		-show explain
+-r		-use RD-tree index
+-a		-AND section
+-o		-show output
+-u		-unique
+-c 		-count
+EOT
+	exit;
+}
+$opt{d} ||= '_int4';
+my $dbi=DBI->connect('DBI:Pg:dbname='.$opt{d});
+my %table;
+my @where;
+$table{message}=1;
+if ( $opt{a} ) {
+	if ( $opt{r} ) {
+		push @where, "message.sections @ '{$opt{s}}'";
+	} else {
+		foreach my $sid ( split(/[,\s]+/, $opt{s} )) {
+			push @where, "EXISTS ( select  message_section_map.mid from message_section_map where message.mid=message_section_map.mid and message_section_map.sid = $sid )";
+		}
+	}
+} else {
+	if ( $opt{r} ) {
+		push @where, "message.sections && '{$opt{s}}'";
+	} else {
+		$table{message_section_map} = 1;
+		push @where, "message.mid = message_section_map.mid";
+		push @where, "message_section_map.sid in ($opt{s})";
+	}
+}
+my $outf;
+if ( $opt{c} ) {
+	$outf = ( $opt{u} ) ? 'count( distinct message.mid )' : 'count( message.mid )';
+} else {
+	$outf = ( $opt{u} ) ? 'distinct( message.mid )' : 'message.mid';
+}
+my $sql = "select $outf from ".join(', ', keys %table)." where ".join(' AND ', @where).';';
+if ( $opt{v} ) {
+	print "$sql\n";
+}
+if ( $opt{e} ) {
+	$dbi->do("explain $sql");
+}
+my $t0 = [gettimeofday];
+my $count=0;
+my $b=$opt{b};
+$b||=1;
+my @a;
+foreach ( 1..$b ) {
+	@a=exec_sql($dbi,$sql);
+	$count=$#a;
+}
+my $elapsed = tv_interval ( $t0, [gettimeofday]);
+if ( $opt{o} ) {
+	foreach ( @a ) {
+		print "$_->{mid}\t$_->{sections}\n";
+	}
+} 
+print sprintf("total: %.02f sec; number: %d; for one: %.03f sec; found %d docs\n", $elapsed, $b, $elapsed/$b, $count+1 );
+$dbi -> disconnect;
+sub exec_sql {
+        my ($dbi, $sql, @keys) = @_;
+        my $sth=$dbi->prepare($sql) || die;
+        $sth->execute( @keys ) || die; 
+        my $r;  
+        my @row;
+        while ( defined ( $r=$sth->fetchrow_hashref ) ) {
+                push @row, $r;
+        }               
+        $sth->finish;   
+        return @row;
+}
--- a/contrib/intarray/bench/create_test.pl
+++ b/contrib/intarray/bench/create_test.pl
+#!/usr/bin/perl
+use strict;
+print <<EOT;
+create table message (
+	mid	int not null,
+	sections	int[]
+);
+create table message_section_map (
+	mid 	int not null,
+	sid	int not null
+);
+EOT
+open(MSG,">message.tmp") || die;
+open(MAP,">message_section_map.tmp") || die;
+srand( 1 );
+#foreach my $i ( 1..1778 ) {
+#foreach my $i ( 1..3443 ) {
+#foreach my $i ( 1..5000 ) {
+#foreach my $i ( 1..29362 ) {
+#foreach my $i ( 1..33331 ) {
+#foreach my $i ( 1..83268 ) {
+foreach my $i ( 1..200000 ) {
+	my @sect;
+	if ( rand() < 0.7 ) {
+		$sect[0] = int( (rand()**4)*100 );
+	} else {
+		my %hash;
+		@sect = grep { $hash{$_}++; $hash{$_} <= 1 } map { int( (rand()**4)*100) } 0..( int(rand()*5) );
+	}
+	if ( $#sect < 0 || rand() < 0.1 ) {
+		print MSG "$i\t\\N\n";
+	} else {
+		print MSG "$i\t{".join(',',@sect)."}\n";
+		map { print MAP "$i\t$_\n" } @sect;
+	}
+}
+close MAP;
+close MSG;
+copytable('message');
+copytable('message_section_map');
+print <<EOT;
+CREATE unique index message_key on message ( mid );
+--CREATE unique index message_section_map_key1 on message_section_map ( mid, sid );
+CREATE unique index message_section_map_key2 on message_section_map ( sid, mid );
+CREATE INDEX message_rdtree_idx on message using gist ( sections ) with ( islossy );
+VACUUM ANALYZE;
+select count(*) from message;
+select count(*) from message_section_map;
+EOT
+unlink 'message.tmp', 'message_section_map.tmp';
+sub copytable {
+	my $t = shift;
+	print "COPY $t from stdin;\n";
+	open( FFF, "$t.tmp") || die;
+	while(<FFF>) { print; }
+	close FFF;
+	print "\\.\n";
+}
--- a/contrib/intarray/data/test__int.data
+++ b/contrib/intarray/data/test__int.data
--- a/contrib/intarray/expected/_int.out
+++ b/contrib/intarray/expected/_int.out
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+CREATE TABLE test__int( a int[] );
+\copy test__int from 'data/test__int.data'
+SELECT count(*) from test__int WHERE a && '{23,50}';
+ count 
+-------
+   345
+(1 row)
+SELECT count(*) from test__int WHERE a @ '{23,50}';
+ count 
+-------
+    12
+(1 row)
--- a/contrib/intarray/sql/_int.sql
+++ b/contrib/intarray/sql/_int.sql
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i _int.sql
+\set ECHO all
+CREATE TABLE test__int( a int[] );
+\copy test__int from 'data/test__int.data'
+SELECT count(*) from test__int WHERE a && '{23,50}';
+SELECT count(*) from test__int WHERE a @ '{23,50}';