Commit 16f85390 authored by Tom Lane's avatar Tom Lane

Support for emulating RTREE indexing in GiST. Contributed by

Oleg Bartunov and Teodor Sigaev.
parent 3043810d
# $Header: /cvsroot/pgsql/contrib/Makefile,v 1.20 2001/05/10 15:51:05 momjian Exp $
# $Header: /cvsroot/pgsql/contrib/Makefile,v 1.21 2001/05/31 18:27:18 tgl Exp $
subdir = contrib
top_builddir = ..
......@@ -27,6 +27,7 @@ WANTED_DIRS = \
pgbench \
pgcrypto \
rserv \
rtree_gist \
seg \
soundex \
spi \
......
......@@ -133,6 +133,10 @@ rserv -
replication server
by Vadim B. Mikheev <vadim4o@email.com>
rtree_gist -
Support for emulating RTREE indexing in GiST
by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@stack.net>
seg -
Confidence-interval datatype (GiST indexing example)
by Gene Selkov, Jr. <selkovjr@mcs.anl.gov>
......
#
# $Header: /cvsroot/pgsql/contrib/rtree_gist/Attic/Makefile,v 1.1 2001/05/31 18:27:18 tgl Exp $
#
subdir = contrib/rtree_gist
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
# override libdir to install shlib in contrib not main directory
libdir := $(libdir)/contrib
# shared library parameters
NAME= rtree_gist
SO_MAJOR_VERSION= 1
SO_MINOR_VERSION= 0
override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)
OBJS= rtree_gist.o
all: all-lib $(NAME).sql
# Shared library stuff
include $(top_srcdir)/src/Makefile.shlib
$(NAME).sql: $(NAME).sql.in
sed -e 's:MODULE_PATHNAME:$(libdir)/$(shlib):g' < $< > $@
.PHONY: submake
submake:
$(MAKE) -C $(top_builddir)/src/test/regress pg_regress
# against installed postmaster
installcheck: submake
$(top_builddir)/src/test/regress/pg_regress rtree_gist
# in-tree test doesn't work yet (no way to install my shared library)
#check: all submake
# $(top_builddir)/src/test/regress/pg_regress --temp-install \
# --top-builddir=$(top_builddir) rtree_gist
check:
@echo "'make check' is not supported."
@echo "Do 'make install', then 'make installcheck' instead."
install: all installdirs install-lib
$(INSTALL_DATA) $(srcdir)/README.$(NAME) $(docdir)/contrib
$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
installdirs:
$(mkinstalldirs) $(docdir)/contrib $(datadir)/contrib $(libdir)
uninstall: uninstall-lib
rm -f $(docdir)/contrib/README.$(NAME) $(datadir)/contrib/$(NAME).sql
clean distclean maintainer-clean: clean-lib
rm -f $(OBJS) $(NAME).sql
# things created by various check targets
rm -rf results tmp_check log
rm -f regression.diffs regression.out regress.out run_check.out
ifeq ($(PORTNAME), win)
rm -f regress.def
endif
depend dep:
$(CC) -MM $(CFLAGS) *.c >depend
ifeq (depend,$(wildcard depend))
include depend
endif
This is R-Tree implementation using GiST.
Code (for PG95) are taken from http://s2k-ftp.cs.berkeley.edu:8000/gist/pggist/
and changed according to new version of GiST (7.1 and above)
All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov
(oleg@sai.msu.su). See http://www.sai.msu.su/~megera/postgres/gist
for additional information.
CHANGES:
Tue May 29 17:04:16 MSD 2001
1. Small fixes in polygon code
Thanks to Dave Blasby <dblasby@refractions.net>
Mon May 28 19:42:14 MSD 2001
1. Full implementation of R-tree using GiST - gist_box_ops,gist_poly_ops
2. gist_poly_ops is lossy
3. NULLs support
4. works with multi-key GiST
NOTICE:
This version will works only with postgresql version 7.1 and above
because of changes in interface of function calling.
INSTALLATION:
gmake
gmake install
-- load functions
psql <database> < rtree_gist.sql
REGRESSION TEST:
gmake installcheck
EXAMPLE USAGE:
create table boxtmp (b box);
-- create index
create index bix on boxtmp using gist (b gist_box_ops);
-- query
select * from boxtmp where b && '(1000,1000,0,0)'::box;
BENCHMARKS:
subdirectory bench contains benchmark suite.
Prerequisities: perl, DBI, DBD:Pg, Time::HiRes
cd ./bench
1. createdb TEST
2. psql TEST < ../box.sql
3. ./create_test.pl | psql TEST
-- change $NUM - number of rows in test dataset
4. ./bench.pl - perl script to benchmark queries.
Run script without arguments to see available options.
a)test without GiST index, using built-in R-Tree
./bench.pl -d TEST
b)test R-Tree using GiST index
./bench.pl -d TEST -g
RESULTS:
1. One interesting thing is that insertion time for built-in R-Tree is
about 8 times more than ones for GiST implementation of R-Tree !!!
2. Postmaster requires much more memory for built-in R-Tree
3. Search time depends on dataset. In our case we got:
+------------+-----------+--------------+
|Number boxes|R-tree, sec|R-tree using |
| | | GiST, sec |
+------------+-----------+--------------+
| 10| 0.002| 0.002|
+------------+-----------+--------------+
| 100| 0.002| 0.002|
+------------+-----------+--------------+
| 1000| 0.002| 0.002|
+------------+-----------+--------------+
| 10000| 0.015| 0.025|
+------------+-----------+--------------+
| 20000| 0.029| 0.048|
+------------+-----------+--------------+
| 40000| 0.055| 0.092|
+------------+-----------+--------------+
| 80000| 0.113| 0.178|
+------------+-----------+--------------+
| 160000| 0.338| 0.337|
+------------+-----------+--------------+
| 320000| 0.674| 0.673|
+------------+-----------+--------------+
#!/usr/bin/perl -w
use strict;
# make sure we are in a sane environment.
use DBI();
use DBD::Pg();
use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
use Getopt::Std;
my %opt;
getopts('d:b:gv', \%opt);
if ( !( scalar %opt ) ) {
print <<EOT;
Usage:
$0 -d DATABASE -b N [-v] [-g]
-d DATABASE - DATABASE name
-b N -number of cycles
-v - print sql
-g -use GiST index( default built-in R-tree )
EOT
exit;
}
$opt{d} ||= 'TEST';
my $dbi=DBI->connect('DBI:Pg:dbname='.$opt{d}) || die "Couldn't connect DB: $opt{d} !\n";
my $sql;
my $notice;
my $sss = '(3000,3000,2990,2990)';
if ( $opt{g} ) {
$notice = "Testing GiST implementation of R-Tree";
$sql = "select count(*) from boxtmp where b && '$sss'::box;";
} else {
$notice = "Testing built-in implementation of R-Tree";
$sql = "select count(*) from boxtmp2 where b && '$sss'::box;";
}
my $t0 = [gettimeofday];
my $count=0;
my $b=$opt{b};
$b ||=1;
foreach ( 1..$b ) {
my @a=exec_sql($dbi,$sql);
$count=$#a;
}
my $elapsed = tv_interval ( $t0, [gettimeofday]);
print "$notice:\n";
print "$sql\n" if ( $opt{v} );
print "Done\n";
print sprintf("total: %.02f sec; number: %d; for one: %.03f sec; found %d docs\n", $elapsed, $b, $elapsed/$b, $count+1 );
$dbi -> disconnect;
sub exec_sql {
my ($dbi, $sql, @keys) = @_;
my $sth=$dbi->prepare($sql) || die;
$sth->execute( @keys ) || die;
my $r;
my @row;
while ( defined ( $r=$sth->fetchrow_hashref ) ) {
push @row, $r;
}
$sth->finish;
return @row;
}
#!/usr/bin/perl
use strict;
my $NUM = 20000;
print "drop table boxtmp;\n";
print "drop table boxtmp2;\n";
print "create table boxtmp (b box);\n";
print "create table boxtmp2 (b box);\n";
srand(1);
open(DAT,">bbb.dat") || die;
foreach ( 1..$NUM ) {
#print DAT '(',int( 500+500*rand() ),',',int( 500+500*rand() ),',',int( 500*rand() ),',',int( 500*rand() ),")\n";
my ( $x1,$y1, $x2,$y2 ) = (
10000*rand(),
10000*rand(),
10000*rand(),
10000*rand()
);
print DAT '(',
max($x1,$x2),',',
max($y1,$y2),',',
min($x1,$x2),',',
min($y1,$y2),")\n";
}
close DAT;
print "copy boxtmp from stdin;\n";
open(DAT,"bbb.dat") || die;
while(<DAT>) { print; }
close DAT;
print "\\.\n";
print "copy boxtmp2 from stdin;\n";
open(DAT,"bbb.dat") || die;
while(<DAT>) { print; }
close DAT;
print "\\.\n";
print "create index bix on boxtmp using gist (b gist_box_ops);\n";
print "create index bix2 on boxtmp2 using rtree (b box_ops);\n";
sub min {
return ( $_[0] < $_[1] ) ? $_[0] : $_[1];
}
sub max {
return ( $_[0] > $_[1] ) ? $_[0] : $_[1];
}
This diff is collapsed.
--
-- first, define the datatype. Turn off echoing so that expected file
-- does not depend on contents of seg.sql.
--
\set ECHO none
create table boxtmp (b box);
\copy boxtmp from 'data/test_box.data'
select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
count
-------
2
(1 row)
create index bix on boxtmp using rtree (b);
select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
count
-------
2
(1 row)
drop index bix;
create index bix on boxtmp using gist (b gist_box_ops);
select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
count
-------
2
(1 row)
create table polytmp (p polygon);
\copy polytmp from 'data/test_box.data'
create index pix on polytmp using rtree (p);
select count(*) from polytmp where p && '(1000,1000),(0,0)'::polygon;
count
-------
2
(1 row)
drop index pix;
create index pix on polytmp using gist (p gist_poly_ops) with(islossy);
select count(*) from polytmp where p && '(1000,1000),(0,0)'::polygon;
count
-------
2
(1 row)
This diff is collapsed.
This diff is collapsed.
--
-- first, define the datatype. Turn off echoing so that expected file
-- does not depend on contents of seg.sql.
--
\set ECHO none
\i rtree_gist.sql
\set ECHO all
create table boxtmp (b box);
\copy boxtmp from 'data/test_box.data'
select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
create index bix on boxtmp using rtree (b);
select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
drop index bix;
create index bix on boxtmp using gist (b gist_box_ops);
select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
create table polytmp (p polygon);
\copy polytmp from 'data/test_box.data'
create index pix on polytmp using rtree (p);
select count(*) from polytmp where p && '(1000,1000),(0,0)'::polygon;
drop index pix;
create index pix on polytmp using gist (p gist_poly_ops) with(islossy);
select count(*) from polytmp where p && '(1000,1000),(0,0)'::polygon;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment