Support for emulating RTREE indexing in GiST. Contributed by

Oleg Bartunov and Teodor Sigaev.

Support for emulating RTREE indexing in GiST. Contributed by
Oleg Bartunov and Teodor Sigaev.
16f85390 · Tom Lane · 3043810d · 16f85390 · 16f85390 · 16f85390
Commit 16f85390 authored May 31, 2001 by Tom Lane
11 changed files
--- a/contrib/Makefile
+++ b/contrib/Makefile
-# $Header: /cvsroot/pgsql/contrib/Makefile,v 1.20 2001/05/10 15:51:05 momjian Exp $
+# $Header: /cvsroot/pgsql/contrib/Makefile,v 1.21 2001/05/31 18:27:18 tgl Exp $
 subdir = contrib
 top_builddir = ..
@@ -27,6 +27,7 @@ WANTED_DIRS = \
 		pgbench		\
 		pgcrypto	\
 		rserv		\
+		rtree_gist	\
 		seg		\
 		soundex		\
 		spi		\

--- a/contrib/README
+++ b/contrib/README
@@ -133,6 +133,10 @@ rserv -
 	replication server
 	by Vadim B. Mikheev <vadim4o@email.com>
+rtree_gist -
+	Support for emulating RTREE indexing in GiST
+	by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@stack.net>
 seg -
 	Confidence-interval datatype (GiST indexing example)
 	by Gene Selkov, Jr. <selkovjr@mcs.anl.gov>

--- a/contrib/rtree_gist/Makefile
+++ b/contrib/rtree_gist/Makefile
+#
+# $Header: /cvsroot/pgsql/contrib/rtree_gist/Attic/Makefile,v 1.1 2001/05/31 18:27:18 tgl Exp $
+#
+subdir = contrib/rtree_gist
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+# override libdir to install shlib in contrib not main directory
+libdir := $(libdir)/contrib
+# shared library parameters
+NAME= rtree_gist
+SO_MAJOR_VERSION= 1
+SO_MINOR_VERSION= 0
+override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)
+OBJS= rtree_gist.o
+all: all-lib $(NAME).sql
+# Shared library stuff
+include $(top_srcdir)/src/Makefile.shlib
+$(NAME).sql: $(NAME).sql.in
+	sed -e 's:MODULE_PATHNAME:$(libdir)/$(shlib):g' < $< > $@
+.PHONY: submake
+submake:
+	$(MAKE) -C $(top_builddir)/src/test/regress pg_regress
+# against installed postmaster
+installcheck: submake
+	$(top_builddir)/src/test/regress/pg_regress rtree_gist
+# in-tree test doesn't work yet (no way to install my shared library)
+#check: all submake
+#	$(top_builddir)/src/test/regress/pg_regress --temp-install \
+#	  --top-builddir=$(top_builddir) rtree_gist
+check:
+	@echo "'make check' is not supported."
+	@echo "Do 'make install', then 'make installcheck' instead."
+install: all installdirs install-lib
+	$(INSTALL_DATA) $(srcdir)/README.$(NAME)  $(docdir)/contrib
+	$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
+installdirs:
+	$(mkinstalldirs) $(docdir)/contrib $(datadir)/contrib $(libdir)
+uninstall: uninstall-lib
+	rm -f $(docdir)/contrib/README.$(NAME) $(datadir)/contrib/$(NAME).sql
+clean distclean maintainer-clean: clean-lib
+	rm -f $(OBJS) $(NAME).sql
+# things created by various check targets
+	rm -rf results tmp_check log
+	rm -f regression.diffs regression.out regress.out run_check.out
+ifeq ($(PORTNAME), win)
+	rm -f regress.def
+endif
+depend dep:
+	$(CC) -MM $(CFLAGS) *.c >depend
+ifeq (depend,$(wildcard depend))
+include depend
+endif
--- a/contrib/rtree_gist/README.rtree_gist
+++ b/contrib/rtree_gist/README.rtree_gist
+This is R-Tree implementation using GiST.
+Code (for PG95) are taken from http://s2k-ftp.cs.berkeley.edu:8000/gist/pggist/
+and changed according to new version of GiST (7.1 and above)
+All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov
+(oleg@sai.msu.su). See http://www.sai.msu.su/~megera/postgres/gist
+for additional information.
+CHANGES:
+     Tue May 29 17:04:16 MSD 2001
+     1. Small fixes in polygon code
+        Thanks to Dave Blasby <dblasby@refractions.net>
+     Mon May 28 19:42:14 MSD 2001
+     1. Full implementation of R-tree using GiST - gist_box_ops,gist_poly_ops
+     2. gist_poly_ops is lossy
+     3. NULLs support
+     4. works with multi-key GiST
+NOTICE:
+     This version will works only with postgresql version 7.1 and above
+     because of changes in interface of function calling.
+INSTALLATION:
+  gmake
+  gmake install
+  -- load functions
+  psql <database> < rtree_gist.sql
+REGRESSION TEST:
+   gmake installcheck
+EXAMPLE USAGE:
+   create table boxtmp (b box);
+   -- create index
+   create index bix on boxtmp using gist (b gist_box_ops);
+   -- query
+   select * from boxtmp where b && '(1000,1000,0,0)'::box;
+BENCHMARKS:
+ subdirectory bench contains benchmark suite.
+ Prerequisities: perl, DBI, DBD:Pg, Time::HiRes
+  cd ./bench
+  1. createdb TEST
+  2. psql TEST < ../box.sql
+  3. ./create_test.pl | psql TEST 
+     -- change $NUM - number of rows in test dataset
+  4. ./bench.pl - perl script to benchmark queries. 
+                  Run script without arguments to see available options.
+     a)test without GiST index, using built-in R-Tree
+       ./bench.pl -d TEST 
+     b)test R-Tree using GiST index
+       ./bench.pl -d TEST -g 
+RESULTS:
+1. One interesting thing is that insertion time for built-in R-Tree is 
+   about 8 times more than ones for GiST implementation of R-Tree !!!
+2. Postmaster requires much more memory for built-in R-Tree
+3. Search time depends on dataset. In our case we got:
+        +------------+-----------+--------------+
+        |Number boxes|R-tree, sec|R-tree using  |
+        |            |           |   GiST, sec  |
+        +------------+-----------+--------------+
+        |          10|      0.002|         0.002|
+        +------------+-----------+--------------+
+        |         100|      0.002|         0.002|
+        +------------+-----------+--------------+
+        |        1000|      0.002|         0.002|
+        +------------+-----------+--------------+
+        |       10000|      0.015|         0.025|
+        +------------+-----------+--------------+
+        |       20000|      0.029|         0.048|
+        +------------+-----------+--------------+
+        |       40000|      0.055|         0.092|
+        +------------+-----------+--------------+
+        |       80000|      0.113|         0.178|
+        +------------+-----------+--------------+
+        |      160000|      0.338|         0.337|
+        +------------+-----------+--------------+
+        |      320000|      0.674|         0.673|
+        +------------+-----------+--------------+
--- a/contrib/rtree_gist/bench/bench.pl
+++ b/contrib/rtree_gist/bench/bench.pl
+#!/usr/bin/perl -w
+use strict;
+# make sure we are in a sane environment.
+use DBI();
+use DBD::Pg();
+use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
+use Getopt::Std;
+my %opt;
+getopts('d:b:gv', \%opt);
+if ( !( scalar %opt ) ) {
+	print <<EOT;
+Usage:
+$0 -d DATABASE -b N [-v] [-g]
+-d DATABASE  - DATABASE name
+-b N    -number of cycles
+-v      - print sql
+-g      -use GiST index( default built-in R-tree )
+EOT
+	exit;
+}
+$opt{d} ||= 'TEST';
+my $dbi=DBI->connect('DBI:Pg:dbname='.$opt{d}) || die "Couldn't connect DB: $opt{d} !\n";
+my $sql;
+my $notice;
+my $sss = '(3000,3000,2990,2990)';
+if ( $opt{g} ) {
+	$notice = "Testing GiST implementation of R-Tree";
+	$sql = "select count(*) from boxtmp where b && '$sss'::box;";
+} else {
+	$notice = "Testing built-in implementation of R-Tree";
+	$sql = "select count(*) from boxtmp2 where b && '$sss'::box;";
+}
+my $t0 = [gettimeofday];
+my $count=0;
+my $b=$opt{b};
+$b ||=1;  
+foreach ( 1..$b ) {
+	my @a=exec_sql($dbi,$sql);
+	$count=$#a;
+}
+my $elapsed = tv_interval ( $t0, [gettimeofday]);
+print "$notice:\n";
+print "$sql\n" if ( $opt{v} );
+print "Done\n";
+print sprintf("total: %.02f sec; number: %d; for one: %.03f sec; found %d docs\n", $elapsed, $b, $elapsed/$b, $count+1 );
+$dbi -> disconnect;
+sub exec_sql {
+        my ($dbi, $sql, @keys) = @_;
+        my $sth=$dbi->prepare($sql) || die;
+        $sth->execute( @keys ) || die; 
+        my $r;  
+        my @row;
+        while ( defined ( $r=$sth->fetchrow_hashref ) ) {
+                push @row, $r;
+        }               
+        $sth->finish;   
+        return @row;
+}
--- a/contrib/rtree_gist/bench/create_test.pl
+++ b/contrib/rtree_gist/bench/create_test.pl
+#!/usr/bin/perl
+use strict;
+my $NUM = 20000;
+print "drop table boxtmp;\n"; 
+print "drop table boxtmp2;\n"; 
+print "create table boxtmp (b box);\n";
+print "create table boxtmp2 (b box);\n";
+srand(1);
+open(DAT,">bbb.dat") || die;
+foreach ( 1..$NUM ) {
+	#print DAT '(',int( 500+500*rand() ),',',int( 500+500*rand() ),',',int( 500*rand() ),',',int( 500*rand() ),")\n";
+	my ( $x1,$y1, $x2,$y2 ) = (
+		10000*rand(),
+		10000*rand(),
+		10000*rand(),
+		10000*rand()
+	);
+	print DAT '(',
+		max($x1,$x2),',',
+		max($y1,$y2),',',
+		min($x1,$x2),',',
+		min($y1,$y2),")\n";
+}
+close DAT;
+print "copy boxtmp from stdin;\n";
+open(DAT,"bbb.dat") || die;
+while(<DAT>) { print; }
+close DAT;
+print "\\.\n";
+print "copy boxtmp2 from stdin;\n";
+open(DAT,"bbb.dat") || die;
+while(<DAT>) { print; }
+close DAT;
+print "\\.\n";
+print "create index bix on boxtmp using gist (b gist_box_ops);\n";
+print "create index bix2 on boxtmp2 using rtree (b box_ops);\n";
+sub min {
+	return ( $_[0] < $_[1] ) ? $_[0] : $_[1];
+}
+sub max {
+	return ( $_[0] > $_[1] ) ? $_[0] : $_[1];
+}
--- a/contrib/rtree_gist/data/test_box.data
+++ b/contrib/rtree_gist/data/test_box.data
--- a/contrib/rtree_gist/expected/rtree_gist.out
+++ b/contrib/rtree_gist/expected/rtree_gist.out
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+create table boxtmp (b box);
+\copy boxtmp from 'data/test_box.data'
+select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
+ count 
+-------
+     2
+(1 row)
+create index bix on boxtmp using rtree (b);
+select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
+ count 
+-------
+     2
+(1 row)
+drop index bix;
+create index bix on boxtmp using gist (b gist_box_ops);
+select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
+ count 
+-------
+     2
+(1 row)
+create table polytmp (p polygon);
+\copy polytmp from 'data/test_box.data'
+create index pix on polytmp using rtree (p);
+select count(*) from polytmp where p && '(1000,1000),(0,0)'::polygon;
+ count 
+-------
+     2
+(1 row)
+drop index pix;
+create index pix on polytmp using gist (p gist_poly_ops) with(islossy);
+select count(*) from polytmp where p && '(1000,1000),(0,0)'::polygon;
+ count 
+-------
+     2
+(1 row)
--- a/contrib/rtree_gist/rtree_gist.c
+++ b/contrib/rtree_gist/rtree_gist.c
--- a/contrib/rtree_gist/rtree_gist.sql.in
+++ b/contrib/rtree_gist/rtree_gist.sql.in
--- a/contrib/rtree_gist/sql/rtree_gist.sql
+++ b/contrib/rtree_gist/sql/rtree_gist.sql
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of seg.sql.
+--
+\set ECHO none
+\i rtree_gist.sql
+\set ECHO all
+create table boxtmp (b box);
+\copy boxtmp from 'data/test_box.data'
+select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
+create index bix on boxtmp using rtree (b);
+select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
+drop index bix;
+create index bix on boxtmp using gist (b gist_box_ops);
+select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
+create table polytmp (p polygon);
+\copy polytmp from 'data/test_box.data'
+create index pix on polytmp using rtree (p);
+select count(*) from polytmp where p && '(1000,1000),(0,0)'::polygon;
+drop index pix;
+create index pix on polytmp using gist (p gist_poly_ops) with(islossy);
+select count(*) from polytmp where p && '(1000,1000),(0,0)'::polygon;