Commit 9892ddf5 authored by Tom Lane's avatar Tom Lane

Gene Selkov's CUBE datatype (GiST example code)

parent 5bb4f723
#
# $Header: /cvsroot/pgsql/contrib/cube/Makefile,v 1.1 2000/12/11 20:39:14 tgl Exp $
#
subdir = contrib/cube
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
# override libdir to install shlib in contrib not main directory
libdir := $(libdir)/contrib
# shared library parameters
NAME= cube
SO_MAJOR_VERSION= 1
SO_MINOR_VERSION= 0
override CPPFLAGS += -I$(srcdir)
OBJS= cube.o cubeparse.o cubescan.o buffer.o
all: all-lib $(NAME).sql
# Shared library stuff
include $(top_srcdir)/src/Makefile.shlib
cubeparse.c cubeparse.h: cubeparse.y
$(YACC) -d $(YFLAGS) -p cube_yy $<
mv -f y.tab.c cubeparse.c
mv -f y.tab.h cubeparse.h
cubescan.c: cubescan.l
ifdef FLEX
$(FLEX) $(FLEXFLAGS) -Pcube_yy -o'$@' $<
else
@$(missing) flex $< $@
endif
$(NAME).sql: $(NAME).sql.in
sed -e 's:MODULE_PATHNAME:$(libdir)/$(shlib):g' < $< > $@
.PHONY: submake
submake:
$(MAKE) -C $(top_builddir)/src/test/regress pg_regress
# against installed postmaster
installcheck: submake
$(top_builddir)/src/test/regress/pg_regress cube
# in-tree test doesn't work yet (no way to install my shared library)
#check: all submake
# $(top_builddir)/src/test/regress/pg_regress --temp-install \
# --top-builddir=$(top_builddir) seg
check:
@echo "'make check' is not supported."
@echo "Do 'make install', then 'make installcheck' instead."
install: all installdirs install-lib
$(INSTALL_DATA) $(srcdir)/README.$(NAME) $(docdir)/contrib
$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
installdirs:
$(mkinstalldirs) $(docdir)/contrib $(datadir)/contrib $(libdir)
uninstall: uninstall-lib
rm -f $(docdir)/contrib/README.$(NAME) $(datadir)/contrib/$(NAME).sql
clean distclean maintainer-clean: clean-lib
rm -f cubeparse.c cubeparse.h cubescan.c
rm -f y.tab.c y.tab.h $(OBJS) $(NAME).sql
# things created by various check targets
rm -rf results tmp_check log
rm -f regression.diffs regression.out regress.out run_check.out
ifeq ($(PORTNAME), win)
rm -f regress.def
endif
depend dep:
$(CC) -MM $(CFLAGS) *.c >depend
ifeq (depend,$(wildcard depend))
include depend
endif
This directory contains the code for the user-defined type,
CUBE, representing multidimensional cubes.
FILES
-----
Makefile building instructions for the shared library
README.cube the file you are now reading
buffer.c globals and buffer access utilities shared between
the parser (cubeparse.y) and the scanner (cubescan.l)
buffer.h function prototypes for buffer.c
cube.c the implementation of this data type in c
cube.sql.in SQL code needed to register this type with postgres
(transformed to cube.sql by make)
cubedata.h the data structure used to store the cubes
cubeparse.y the grammar file for the parser (used by cube_in() in cube.c)
cubescan.l scanner rules (used by cube_yyparse() in cubeparse.y)
INSTALLATION
============
To install the type, run
make
make install
For this to work, make sure that:
. the cube source directory is in the postgres contrib directory
. the user running "make install" has postgres administrative authority
. this user's environment defines the PGLIB and PGDATA variables and has
postgres binaries in the PATH.
This only installs the type implementation and documentation. To make the
type available in any particular database, do
psql -d databasename < cube.sql
If you install the type in the template1 database, all subsequently created
databases will inherit it.
To test the new type, after "make install" do
make installcheck
If it fails, examine the file regression.diffs to find out the reason (the
test code is a direct adaptation of the regression tests from the main
source tree).
SYNTAX
======
The following are valid external representations for the CUBE type:
'x' A floating point value representing
a one-dimensional point or one-dimensional
zero length cubement
'(x)' Same as above
'x1,x2,x3,...,xn' A point in n-dimensional space,
represented internally as a zero volume box
'(x1,x2,x3,...,xn)' Same as above
'(x),(y)' 1-D cubement starting at x and ending at y
or vice versa; the order does not matter
'(x1,...,xn),(y1,...,yn)' n-dimensional box represented by
a pair of its opposite corners, no matter which.
Functions take care of swapping to achieve
"lower left -- upper right" representation
before computing any values
Grammar
-------
rule 1 box -> O_BRACKET paren_list COMMA paren_list C_BRACKET
rule 2 box -> paren_list COMMA paren_list
rule 3 box -> paren_list
rule 4 box -> list
rule 5 paren_list -> O_PAREN list C_PAREN
rule 6 list -> FLOAT
rule 7 list -> list COMMA FLOAT
Tokens
------
n [0-9]+
integer [+-]?{n}
real [+-]?({n}\.{n}?)|(\.{n})
FLOAT ({integer}|{real})([eE]{integer})?
O_BRACKET \[
C_BRACKET \]
O_PAREN \(
C_PAREN \)
COMMA \,
Examples of valid CUBE representations:
--------------------------------------
'x' A floating point value representing
a one-dimensional point (or, zero-length
one-dimensional interval)
'(x)' Same as above
'x1,x2,x3,...,xn' A point in n-dimensional space,
represented internally as a zero volume cube
'(x1,x2,x3,...,xn)' Same as above
'(x),(y)' A 1-D interval starting at x and ending at y
or vice versa; the order does not matter
'[(x),(y)]' Same as above
'(x1,...,xn),(y1,...,yn)' An n-dimensional box represented by
a pair of its diagonally opposite corners,
regardless of order. Swapping is provided
by all comarison routines to ensure the
"lower left -- upper right" representation
before actaul comparison takes place.
'[(x1,...,xn),(y1,...,yn)]' Same as above
White space is ignored, so '[(x),(y)]' can be: '[ ( x ), ( y ) ]'
DEFAULTS
========
I believe this union:
select cube_union('(0,5,2),(2,3,1)','0');
cube_union
-------------------
(0, 0, 0),(2, 5, 2)
(1 row)
does not contradict to the common sense, neither does the intersection
select cube_inter('(0,-1),(1,1)','(-2),(2)');
cube_inter
-------------
(0, 0),(1, 0)
(1 row)
In all binary operations on differently sized boxes, I assume the smaller
one to be a cartesian projection, i. e., having zeroes in place of coordinates
omitted in the string representation. The above examples are equivalent to:
cube_union('(0,5,2),(2,3,1)','(0,0,0),(0,0,0)');
cube_inter('(0,-1),(1,1)','(-2,0),(2,0)');
The following containment predicate uses the point syntax,
while in fact the second argument is internally represented by a box.
This syntax makes it unnecessary to define the special Point type
and functions for (box,point) predicates.
select cube_contains('(0,0),(1,1)', '0.5,0.5');
cube_contains
--------------
t
(1 row)
PRECISION
=========
Values are stored internally as 32-bit floating point numbers. This means that
numbers with more than 7 significant digits will be truncated.
USAGE
=====
The access method for CUBE is a GiST (gist_cube_ops), which is a
generalization of R-tree. GiSTs allow the postgres implementation of
R-tree, originally encoded to support 2-D geometric types such as
boxes and polygons, to be used with any data type whose data domain
can be partitioned using the concepts of containment, intersection and
equality. In other words, everything that can intersect or contain
its own kind can be indexed with a GiST. That includes, among other
things, all geometric data types, regardless of their dimensionality
(see also contrib/seg).
The operators supported by the GiST access method include:
[a, b] << [c, d] Is left of
The left operand, [a, b], occurs entirely to the left of the
right operand, [c, d], on the axis (-inf, inf). It means,
[a, b] << [c, d] is true if b < c and false otherwise
[a, b] >> [c, d] Is right of
[a, b] is occurs entirely to the right of [c, d].
[a, b] >> [c, d] is true if b > c and false otherwise
[a, b] &< [c, d] Over left
The cubement [a, b] overlaps the cubement [c, d] in such a way
that a <= c <= b and b <= d
[a, b] &> [c, d] Over right
The cubement [a, b] overlaps the cubement [c, d] in such a way
that a > c and b <= c <= d
[a, b] = [c, d] Same as
The cubements [a, b] and [c, d] are identical, that is, a == b
and c == d
[a, b] @ [c, d] Contains
The cubement [a, b] contains the cubement [c, d], that is,
a <= c and b >= d
[a, b] @ [c, d] Contained in
The cubement [a, b] is contained in [c, d], that is,
a >= c and b <= d
Although the mnemonics of the following operators is questionable, I
preserved them to maintain visual consistency with other geometric
data types defined in Postgres.
Other operators:
[a, b] < [c, d] Less than
[a, b] > [c, d] Greater than
These operators do not make a lot of sense for any practical
purpose but sorting. These operators first compare (a) to (c),
and if these are equal, compare (b) to (d). That accounts for
reasonably good sorting in most cases, which is useful if
you want to use ORDER BY with this type
There are a few other potentially useful functions defined in cube.c
that vanished from the schema because I stopped using them. Some of
these were meant to support type casting. Let me know if I was wrong:
I will then add them back to the schema. I would also appreciate
other ideas that would enhance the type and make it more useful.
For examples of usage, see sql/cube.sql
CREDITS
=======
This code is essentially based on the example written for
Illustra, http://garcia.me.berkeley.edu/~adong/rtree
My thanks are primarily to Prof. Joe Hellerstein
(http://db.cs.berkeley.edu/~jmh/) for elucidating the gist of the GiST
(http://gist.cs.berkeley.edu/), and to his former student, Andy Dong
(http://best.me.berkeley.edu/~adong/), for his exemplar.
I am also grateful to all postgres developers, present and past, for enabling
myself to create my own world and live undisturbed in it. And I would like to
acknowledge my gratitude to Argonne Lab and to the U.S. Department of Energy
for the years of faithful support of my database research.
------------------------------------------------------------------------
Gene Selkov, Jr.
Computational Scientist
Mathematics and Computer Science Division
Argonne National Laboratory
9700 S Cass Ave.
Building 221
Argonne, IL 60439-4844
selkovjr@mcs.anl.gov
/* This module defines the parse buffer and routines for setting/reading it */
#include "postgres.h"
#include "utils/elog.h"
static char * PARSE_BUFFER;
static char * PARSE_BUFFER_PTR;
static unsigned int PARSE_BUFFER_SIZE;
static unsigned int SCANNER_POS;
void set_parse_buffer( char* s );
void reset_parse_buffer( void );
int read_parse_buffer( void );
char * parse_buffer( void );
char * parse_buffer_ptr( void );
unsigned int parse_buffer_curr_char( void );
unsigned int parse_buffer_size( void );
unsigned int parse_buffer_pos( void );
extern void cube_flush_scanner_buffer(void); /* defined in cubescan.l */
void set_parse_buffer( char* s )
{
PARSE_BUFFER = s;
PARSE_BUFFER_SIZE = strlen(s);
if ( PARSE_BUFFER_SIZE == 0 ) {
elog(ERROR, "cube_in: can't parse an empty string");
}
PARSE_BUFFER_PTR = PARSE_BUFFER;
SCANNER_POS = 0;
}
void reset_parse_buffer( void )
{
PARSE_BUFFER_PTR = PARSE_BUFFER;
SCANNER_POS = 0;
cube_flush_scanner_buffer();
}
int read_parse_buffer( void )
{
int c;
/*
c = *PARSE_BUFFER_PTR++;
SCANNER_POS++;
*/
c = PARSE_BUFFER[SCANNER_POS];
if(SCANNER_POS < PARSE_BUFFER_SIZE)
SCANNER_POS++;
return c;
}
char * parse_buffer( void )
{
return PARSE_BUFFER;
}
unsigned int parse_buffer_curr_char( void )
{
return PARSE_BUFFER[SCANNER_POS];
}
char * parse_buffer_ptr( void )
{
return PARSE_BUFFER_PTR;
}
unsigned int parse_buffer_pos( void )
{
return SCANNER_POS;
}
unsigned int parse_buffer_size( void )
{
return PARSE_BUFFER_SIZE;
}
extern void set_parse_buffer( char* s );
extern void reset_parse_buffer( void );
extern int read_parse_buffer( void );
extern char * parse_buffer( void );
extern char * parse_buffer_ptr( void );
extern unsigned int parse_buffer_curr_char( void );
extern unsigned int parse_buffer_pos( void );
extern unsigned int parse_buffer_size( void );
This diff is collapsed.
-- Create the user-defined type for N-dimensional boxes
--
BEGIN TRANSACTION;
CREATE FUNCTION cube_in(opaque)
RETURNS opaque
AS 'MODULE_PATHNAME'
LANGUAGE 'c';
CREATE FUNCTION cube_out(opaque)
RETURNS opaque
AS 'MODULE_PATHNAME'
LANGUAGE 'c';
CREATE TYPE cube (
internallength = variable,
input = cube_in,
output = cube_out
);
COMMENT ON TYPE cube IS
'multi-dimensional cube ''(FLOAT-1, FLOAT-2, ..., FLOAT-N), (FLOAT-1, FLOAT-2, ..., FLOAT-N)''';
--
-- External C-functions for R-tree methods
--
-- Left/Right methods
CREATE FUNCTION cube_over_left(cube, cube) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION cube_over_left(cube, cube) IS
'is over and left of (NOT IMPLEMENTED)';
CREATE FUNCTION cube_over_right(cube, cube) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION cube_over_right(cube, cube) IS
'is over and right of (NOT IMPLEMENTED)';
CREATE FUNCTION cube_left(cube, cube) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION cube_left(cube, cube) IS
'is left of (NOT IMPLEMENTED)';
CREATE FUNCTION cube_right(cube, cube) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION cube_right(cube, cube) IS
'is right of (NOT IMPLEMENTED)';
-- Comparison methods
CREATE FUNCTION cube_lt(cube, cube) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION cube_lt(cube, cube) IS
'lower than';
CREATE FUNCTION cube_gt(cube, cube) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION cube_gt(cube, cube) IS
'greater than';
CREATE FUNCTION cube_contains(cube, cube) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION cube_contains(cube, cube) IS
'contains';
CREATE FUNCTION cube_contained(cube, cube) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION cube_contained(cube, cube) IS
'contained in';
CREATE FUNCTION cube_overlap(cube, cube) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION cube_overlap(cube, cube) IS
'overlaps';
CREATE FUNCTION cube_same(cube, cube) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION cube_same(cube, cube) IS
'same as';
CREATE FUNCTION cube_different(cube, cube) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION cube_different(cube, cube) IS
'different';
-- support routines for indexing
CREATE FUNCTION cube_union(cube, cube) RETURNS cube
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION cube_inter(cube, cube) RETURNS cube
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION cube_size(cube) RETURNS float4
AS 'MODULE_PATHNAME' LANGUAGE 'c';
-- Misc N-dimensional functions
-- proximity routines
CREATE FUNCTION cube_distance(cube, cube) RETURNS float4
AS 'MODULE_PATHNAME' LANGUAGE 'c';
--
-- OPERATORS
--
CREATE OPERATOR < (
LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_lt,
COMMUTATOR = '>',
RESTRICT = scalarltsel, JOIN = scalarltjoinsel
);
CREATE OPERATOR > (
LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_gt,
COMMUTATOR = '<',
RESTRICT = scalargtsel, JOIN = scalargtjoinsel
);
CREATE OPERATOR << (
LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_left,
COMMUTATOR = '>>',
RESTRICT = positionsel, JOIN = positionjoinsel
);
CREATE OPERATOR &< (
LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_over_left,
COMMUTATOR = '&>',
RESTRICT = positionsel, JOIN = positionjoinsel
);
CREATE OPERATOR && (
LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_overlap,
COMMUTATOR = '&&',
RESTRICT = positionsel, JOIN = positionjoinsel
);
CREATE OPERATOR &> (
LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_over_right,
COMMUTATOR = '&<',
RESTRICT = positionsel, JOIN = positionjoinsel
);
CREATE OPERATOR >> (
LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_right,
COMMUTATOR = '<<',
RESTRICT = positionsel, JOIN = positionjoinsel
);
CREATE OPERATOR = (
LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_same,
COMMUTATOR = '=', NEGATOR = '<>',
RESTRICT = eqsel, JOIN = eqjoinsel,
SORT1 = '<', SORT2 = '<'
);
CREATE OPERATOR <> (
LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_different,
COMMUTATOR = '<>', NEGATOR = '=',
RESTRICT = neqsel, JOIN = neqjoinsel
);
CREATE OPERATOR @ (
LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_contains,
COMMUTATOR = '~',
RESTRICT = contsel, JOIN = contjoinsel
);
CREATE OPERATOR ~ (
LEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_contained,
COMMUTATOR = '@',
RESTRICT = contsel, JOIN = contjoinsel
);
-- define the GiST support methods
CREATE FUNCTION g_cube_consistent(opaque,cube,int4) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION g_cube_compress(opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION g_cube_decompress(opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION g_cube_penalty(opaque,opaque,opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION g_cube_picksplit(opaque, opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION g_cube_union(bytea, opaque) RETURNS cube
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION g_cube_same(cube, cube, opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
-- register the default opclass for indexing
INSERT INTO pg_opclass (opcname, opcdeftype)
SELECT 'gist_cube_ops', oid
FROM pg_type
WHERE typname = 'cube';
-- get the comparators for boxes and store them in a tmp table
SELECT o.oid AS opoid, o.oprname
INTO TABLE gist_cube_ops_tmp
FROM pg_operator o, pg_type t
WHERE o.oprleft = t.oid and o.oprright = t.oid
and t.typname = 'cube';
-- make sure we have the right operators
-- SELECT * from gist_cube_ops_tmp;
-- using the tmp table, generate the amop entries
-- cube_left
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 1
FROM pg_am am, pg_opclass opcl, gist_cube_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and c.oprname = '<<';
-- cube_over_left
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 2
FROM pg_am am, pg_opclass opcl, gist_cube_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and c.oprname = '&<';
-- cube_overlap
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 3
FROM pg_am am, pg_opclass opcl, gist_cube_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and c.oprname = '&&';
-- cube_over_right
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 4
FROM pg_am am, pg_opclass opcl, gist_cube_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and c.oprname = '&>';
-- cube_right
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 5
FROM pg_am am, pg_opclass opcl, gist_cube_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and c.oprname = '>>';
-- cube_same
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 6
FROM pg_am am, pg_opclass opcl, gist_cube_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and c.oprname = '=';
-- cube_contains
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 7
FROM pg_am am, pg_opclass opcl, gist_cube_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and c.oprname = '@';
-- cube_contained
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 8
FROM pg_am am, pg_opclass opcl, gist_cube_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and c.oprname = '~';
DROP TABLE gist_cube_ops_tmp;
-- add the entries to amproc for the support methods
-- note the amprocnum numbers associated with each are specific!
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 1
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and proname = 'g_cube_consistent';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 2
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and proname = 'g_cube_union';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 3
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and proname = 'g_cube_compress';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 4
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and proname = 'g_cube_decompress';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 5
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and proname = 'g_cube_penalty';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 6
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and proname = 'g_cube_picksplit';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 7
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_cube_ops'
and proname = 'g_cube_same';
END TRANSACTION;
/*#include "postgres.h"*/
typedef struct NDBOX {
unsigned int size; /* required to be a Postgres varlena type */
unsigned int dim;
float x[1];
} NDBOX;
%{
/* NdBox = [(lowerleft),(upperright)] */
/* [(xLL(1)...xLL(N)),(xUR(1)...xUR(n))] */
#define YYERROR_VERBOSE
#define YYPARSE_PARAM result /* need this to pass a pointer (void *) to yyparse */
#define YYSTYPE char *
#define YYDEBUG 1
#include <string.h>
#include "cubedata.h"
#include "buffer.h"
#include "postgres.h"
#include "utils/palloc.h"
#include "utils/elog.h"
#undef yylex /* falure to redefine yylex will result in a call to the */
#define yylex cube_yylex /* wrong scanner when running inside the postgres backend */
extern int yylex(); /* defined as cube_yylex in cubescan.c */
extern int errno;
int cube_yyerror( char *msg );
int cube_yyparse(void *result);
static int delim_count(char *s, char delim);
static NDBOX * write_box(unsigned int dim, char *str1, char *str2);
static NDBOX * write_point_as_box(char *s);
%}
/* BISON Declarations */
%token FLOAT O_PAREN C_PAREN O_BRACKET C_BRACKET COMMA
%start box
/* Grammar follows */
%%
box:
O_BRACKET paren_list COMMA paren_list C_BRACKET {
int dim;
int c = parse_buffer_curr_char();
int pos = parse_buffer_pos();
/* We can't let the parser recognize more than one valid expression:
the job is done and memory is allocated. */
if ( c != '\0' ) {
/* Not at EOF */
reset_parse_buffer();
elog(ERROR, "(0) bad cube representation; garbage at or before char %d, ('%c', \\%03o)\n", pos, c, c );
YYERROR;
}
dim = delim_count($2, ',') + 1;
if ( (delim_count($4, ',') + 1) != dim ) {
reset_parse_buffer();
elog(ERROR, "(1) bad cube representation; different point dimensions in (%s) and (%s)\n", $2, $4);
YYABORT;
}
*((void **)result) = write_box( dim, $2, $4 );
}
|
paren_list COMMA paren_list {
int dim;
int c = parse_buffer_curr_char();
int pos = parse_buffer_pos();
if ( c != '\0' ) { /* Not at EOF */
reset_parse_buffer();
elog(ERROR, "(2) bad cube representation; garbage at or before char %d, ('%c', \\%03o)\n", pos, c, c );
YYABORT;
}
dim = delim_count($1, ',') + 1;
if ( (delim_count($3, ',') + 1) != dim ) {
reset_parse_buffer();
elog(ERROR, "(3) bad cube representation; different point dimensions in (%s) and (%s)\n", $1, $3);
YYABORT;
}
*((void **)result) = write_box( dim, $1, $3 );
}
|
paren_list {
int c = parse_buffer_curr_char();
int pos = parse_buffer_pos();
if ( c != '\0') { /* Not at EOF */
reset_parse_buffer();
elog(ERROR, "(4) bad cube representation; garbage at or before char %d, ('%c', \\%03o)\n", pos, c, c );
YYABORT;
}
if ( yychar != YYEOF) {
/* There's still a lookahead token to be parsed */
reset_parse_buffer();
elog(ERROR, "(5) bad cube representation; garbage at or before char %d, ('end of input', \\%03o)\n", pos, c);
YYABORT;
}
*((void **)result) = write_point_as_box($1);
}
|
list {
int c = parse_buffer_curr_char();
int pos = parse_buffer_pos();
if ( c != '\0') { /* Not at EOF */
reset_parse_buffer();
elog(ERROR, "(6) bad cube representation; garbage at or before char %d, ('%c', \\%03o)\n", pos, c, c);
YYABORT;
}
if ( yychar != YYEOF) {
/* There's still a lookahead token to be parsed */
reset_parse_buffer();
elog(ERROR, "(7) bad cube representation; garbage at or before char %d, ('end of input', \\%03o)\n", pos, c);
YYABORT;
}
*((void **)result) = write_point_as_box($1);
}
;
paren_list:
O_PAREN list C_PAREN {
$$ = $2;
}
;
list:
FLOAT {
$$ = palloc(strlen(parse_buffer()) + 1);
strcpy($$, $1);
}
|
list COMMA FLOAT {
$$ = $1;
strcat($$, ",");
strcat($$, $3);
}
;
%%
int cube_yyerror ( char *msg ) {
char *buf = (char *) palloc(256);
int position;
yyclearin;
if ( !strcmp(msg, "parse error, expecting `$'") ) {
msg = "expecting end of input";
}
position = parse_buffer_pos() > parse_buffer_size() ? parse_buffer_pos() - 1 : parse_buffer_pos();
sprintf(
buf,
"%s at or before position %d, character ('%c', \\%03o), input: '%s'\n",
msg,
position,
parse_buffer()[position - 1],
parse_buffer()[position - 1],
parse_buffer()
);
reset_parse_buffer();
elog(ERROR, buf);
return 0;
}
static int
delim_count(char *s, char delim)
{
int ndelim = 0;
while ((s = strchr(s, delim)) != NULL)
{
ndelim++;
s++;
}
return (ndelim);
}
static NDBOX *
write_box(unsigned int dim, char *str1, char *str2)
{
NDBOX * bp;
char * s;
int i;
int size = offsetof(NDBOX, x[0]) + sizeof(float) * dim * 2;
bp = palloc(size);
bp->size = size;
bp->dim = dim;
s = str1;
bp->x[i=0] = strtod(s, NULL);
while ((s = strchr(s, ',')) != NULL) {
s++; i++;
bp->x[i] = strtod(s, NULL);
}
s = str2;
bp->x[i=dim] = strtod(s, NULL);
while ((s = strchr(s, ',')) != NULL) {
s++; i++;
bp->x[i] = strtod(s, NULL);
}
return(bp);
}
static NDBOX * write_point_as_box(char *str)
{
NDBOX * bp;
int i, size;
double x;
int dim = delim_count(str, ',') + 1;
char * s = str;
size = offsetof(NDBOX, x[0]) + sizeof(float) * dim * 2;
bp = palloc(size);
bp->size = size;
bp->dim = dim;
i = 0;
x = strtod(s, NULL);
bp->x[0] = x;
bp->x[dim] = x;
while ((s = strchr(s, ',')) != NULL) {
s++; i++;
x = strtod(s, NULL);
bp->x[i] = x;
bp->x[i+dim] = x;
}
return(bp);
}
%{
/*
** A scanner for EMP-style numeric ranges
*/
#define YYSTYPE char *
#define yylval cube_yylval
#include <stdio.h>
#include "cubeparse.h"
#include "buffer.h"
#define YY_NO_UNPUT 1
#undef yywrap
/* flex screws a couple symbols when used with the -P otion; fix those */
#define YY_DECL int cube_yylex YY_PROTO(( void )); \
int cube_yylex YY_PROTO(( void ))
/* redefined YY_INPUT reads byte-wise from the memory area defined in buffer.c */
#undef YY_INPUT
#define YY_INPUT(buf,result,max_size) \
{ \
int c = read_parse_buffer(); \
result = (c == '\0') ? YY_NULL : (buf[0] = c, 1); \
}
void cube_flush_scanner_buffer(void);
%}
n [0-9]+
integer [+-]?{n}
real [+-]?({n}\.{n}?)|(\.{n})
float ({integer}|{real})([eE]{integer})?
%%
{float} yylval = yytext; return FLOAT;
\[ yylval = "("; return O_BRACKET;
\] yylval = ")"; return C_BRACKET;
\( yylval = "("; return O_PAREN;
\) yylval = ")"; return C_PAREN;
\, yylval = ")"; return COMMA;
[ ]+ /* discard spaces */
. return yytext[0]; /* alert parser of the garbage */
%%
int cube_yylex();
void cube_flush_scanner_buffer(void) {
fprintf(stderr, "cube_flush_scanner_buffer called\n");
YY_FLUSH_BUFFER;
}
This diff is collapsed.
This diff is collapsed.
--
-- Test cube datatype
--
--
-- first, define the datatype. Turn off echoing so that expected file
-- does not depend on contents of cube.sql.
--
\set ECHO none
\i cube.sql
\set ECHO all
--
-- testing the input and output functions
--
-- Any number (a one-dimensional point)
SELECT '1'::cube AS cube;
SELECT '-1'::cube AS cube;
SELECT '1.'::cube AS cube;
SELECT '-1.'::cube AS cube;
SELECT '.1'::cube AS cube;
SELECT '-.1'::cube AS cube;
SELECT '1.0'::cube AS cube;
SELECT '-1.0'::cube AS cube;
SELECT '1e7'::cube AS cube;
SELECT '-1e7'::cube AS cube;
SELECT '1.0e7'::cube AS cube;
SELECT '-1.0e7'::cube AS cube;
SELECT '1e+7'::cube AS cube;
SELECT '-1e+7'::cube AS cube;
SELECT '1.0e+7'::cube AS cube;
SELECT '-1.0e+7'::cube AS cube;
SELECT '1e-7'::cube AS cube;
SELECT '-1e-7'::cube AS cube;
SELECT '1.0e-7'::cube AS cube;
SELECT '-1.0e-7'::cube AS cube;
SELECT '1e700'::cube AS cube;
SELECT '-1e700'::cube AS cube;
SELECT '1e-700'::cube AS cube;
SELECT '-1e-700'::cube AS cube;
-- simple lists (points)
SELECT '1,2'::cube AS cube;
SELECT '(1,2)'::cube AS cube;
SELECT '1,2,3,4,5'::cube AS cube;
SELECT '(1,2,3,4,5)'::cube AS cube;
-- double lists (cubes)
SELECT '(0),(0)'::cube AS cube;
SELECT '(0),(1)'::cube AS cube;
SELECT '[(0),(0)]'::cube AS cube;
SELECT '[(0),(1)]'::cube AS cube;
SELECT '(0,0,0,0),(0,0,0,0)'::cube AS cube;
SELECT '(0,0,0,0),(1,0,0,0)'::cube AS cube;
SELECT '[(0,0,0,0),(0,0,0,0)]'::cube AS cube;
SELECT '[(0,0,0,0),(1,0,0,0)]'::cube AS cube;
-- invalid input: parse errors
SELECT ''::cube AS cube;
SELECT 'ABC'::cube AS cube;
SELECT '()'::cube AS cube;
SELECT '[]'::cube AS cube;
SELECT '[()]'::cube AS cube;
SELECT '[(1)]'::cube AS cube;
SELECT '[(1),]'::cube AS cube;
SELECT '[(1),2]'::cube AS cube;
SELECT '[(1),(2),(3)]'::cube AS cube;
SELECT '1,'::cube AS cube;
SELECT '1,2,'::cube AS cube;
SELECT '1,,2'::cube AS cube;
SELECT '(1,)'::cube AS cube;
SELECT '(1,2,)'::cube AS cube;
SELECT '(1,,2)'::cube AS cube;
-- invalid input: semantic errors and trailing garbage
SELECT '[(1),(2)],'::cube AS cube; -- 0
SELECT '[(1,2,3),(2,3)]'::cube AS cube; -- 1
SELECT '[(1,2),(1,2,3)]'::cube AS cube; -- 1
SELECT '(1),(2),'::cube AS cube; -- 2
SELECT '(1,2,3),(2,3)'::cube AS cube; -- 3
SELECT '(1,2),(1,2,3)'::cube AS cube; -- 3
SELECT '(1,2,3)ab'::cube AS cube; -- 4
SELECT '(1,2,3)a'::cube AS cube; -- 5
SELECT '(1,2)('::cube AS cube; -- 5
SELECT '1,2ab'::cube AS cube; -- 6
SELECT '1 e7'::cube AS cube; -- 6
SELECT '1,2a'::cube AS cube; -- 7
SELECT '1..2'::cube AS cube; -- 7
--
-- testing the operators
--
-- equality/inequality:
--
SELECT '24, 33.20'::cube = '24, 33.20'::cube AS bool;
SELECT '24, 33.20'::cube != '24, 33.20'::cube AS bool;
SELECT '24, 33.20'::cube = '24, 33.21'::cube AS bool;
SELECT '24, 33.20'::cube != '24, 33.21'::cube AS bool;
SELECT '(2,0),(3,1)'::cube = '(2,0,0,0,0),(3,1,0,0,0)'::cube AS bool;
SELECT '(2,0),(3,1)'::cube = '(2,0,0,0,0),(3,1,0,0,1)'::cube AS bool;
-- "lower than" / "greater than"
-- (these operators are not useful for anything but ordering)
--
SELECT '1'::cube > '2'::cube AS bool;
SELECT '1'::cube < '2'::cube AS bool;
SELECT '1,1'::cube > '1,2'::cube AS bool;
SELECT '1,1'::cube < '1,2'::cube AS bool;
SELECT '(2,0),(3,1)'::cube > '(2,0,0,0,0),(3,1,0,0,1)'::cube AS bool;
SELECT '(2,0),(3,1)'::cube < '(2,0,0,0,0),(3,1,0,0,1)'::cube AS bool;
SELECT '(2,0),(3,1)'::cube > '(2,0,0,0,1),(3,1,0,0,0)'::cube AS bool;
SELECT '(2,0),(3,1)'::cube < '(2,0,0,0,1),(3,1,0,0,0)'::cube AS bool;
SELECT '(2,0),(3,1)'::cube > '(2,0,0,0,0),(3,1,0,0,0)'::cube AS bool;
SELECT '(2,0),(3,1)'::cube < '(2,0,0,0,0),(3,1,0,0,0)'::cube AS bool;
SELECT '(2,0,0,0,0),(3,1,0,0,1)'::cube > '(2,0),(3,1)'::cube AS bool;
SELECT '(2,0,0,0,0),(3,1,0,0,1)'::cube < '(2,0),(3,1)'::cube AS bool;
SELECT '(2,0,0,0,1),(3,1,0,0,0)'::cube > '(2,0),(3,1)'::cube AS bool;
SELECT '(2,0,0,0,1),(3,1,0,0,0)'::cube < '(2,0),(3,1)'::cube AS bool;
SELECT '(2,0,0,0,0),(3,1,0,0,0)'::cube > '(2,0),(3,1)'::cube AS bool;
SELECT '(2,0,0,0,0),(3,1,0,0,0)'::cube < '(2,0),(3,1)'::cube AS bool;
-- "overlap"
--
SELECT '1'::cube && '1'::cube AS bool;
SELECT '1'::cube && '2'::cube AS bool;
SELECT '[(-1,-1,-1),(1,1,1)]'::cube && '0'::cube AS bool;
SELECT '[(-1,-1,-1),(1,1,1)]'::cube && '1'::cube AS bool;
SELECT '[(-1,-1,-1),(1,1,1)]'::cube && '1,1,1'::cube AS bool;
SELECT '[(-1,-1,-1),(1,1,1)]'::cube && '[(1,1,1),(2,2,2)]'::cube AS bool;
SELECT '[(-1,-1,-1),(1,1,1)]'::cube && '[(1,1),(2,2)]'::cube AS bool;
SELECT '[(-1,-1,-1),(1,1,1)]'::cube && '[(2,1,1),(2,2,2)]'::cube AS bool;
-- "overlap on the left" / "overlap on the right"
-- (these operators are not useful at all but R-tree seems to be
-- sensitive to their presence)
--
SELECT '1'::cube &< '0'::cube AS bool;
SELECT '1'::cube &< '1'::cube AS bool;
SELECT '1'::cube &< '2'::cube AS bool;
SELECT '(0),(1)'::cube &< '0'::cube AS bool;
SELECT '(0),(1)'::cube &< '1'::cube AS bool;
SELECT '(0),(1)'::cube &< '(0),(0.5)'::cube AS bool;
SELECT '(0),(1)'::cube &< '(0),(1)'::cube AS bool;
SELECT '(0),(1)'::cube &< '(0),(2)'::cube AS bool;
SELECT '(0),(1)'::cube &< '(1),(2)'::cube AS bool;
SELECT '(0),(1)'::cube &< '(2),(3)'::cube AS bool;
SELECT '0'::cube &> '1'::cube AS bool;
SELECT '1'::cube &> '1'::cube AS bool;
SELECT '2'::cube &> '1'::cube AS bool;
SELECT '0'::cube &> '(0),(1)'::cube AS bool;
SELECT '1'::cube &> '(0),(1)'::cube AS bool;
SELECT '(0),(0.5)' &> '(0),(1)'::cube AS bool;
SELECT '(0),(1)'::cube &> '(0),(1)'::cube AS bool;
SELECT '(0),(2)'::cube &> '(0),(1)'::cube AS bool;
SELECT '(1),(2)'::cube &> '(0),(1)'::cube AS bool;
SELECT '(2),(3)'::cube &> '(0),(1)'::cube AS bool;
-- "left" / "right"
-- (these operators are not useful but for 1-D or 2-D cubes, but R-tree
-- seems to want them defined)
--
SELECT '1'::cube << '0'::cube AS bool;
SELECT '1'::cube << '1'::cube AS bool;
SELECT '1'::cube << '2'::cube AS bool;
SELECT '(0),(1)'::cube << '0'::cube AS bool;
SELECT '(0),(1)'::cube << '1'::cube AS bool;
SELECT '(0),(1)'::cube << '(0),(0.5)'::cube AS bool;
SELECT '(0),(1)'::cube << '(0),(1)'::cube AS bool;
SELECT '(0),(1)'::cube << '(0),(2)'::cube AS bool;
SELECT '(0),(1)'::cube << '(1),(2)'::cube AS bool;
SELECT '(0),(1)'::cube << '(2),(3)'::cube AS bool;
SELECT '0'::cube >> '1'::cube AS bool;
SELECT '1'::cube >> '1'::cube AS bool;
SELECT '2'::cube >> '1'::cube AS bool;
SELECT '0'::cube >> '(0),(1)'::cube AS bool;
SELECT '1'::cube >> '(0),(1)'::cube AS bool;
SELECT '(0),(0.5)' >> '(0),(1)'::cube AS bool;
SELECT '(0),(1)'::cube >> '(0),(1)'::cube AS bool;
SELECT '(0),(2)'::cube >> '(0),(1)'::cube AS bool;
SELECT '(1),(2)'::cube >> '(0),(1)'::cube AS bool;
SELECT '(2),(3)'::cube >> '(0),(1)'::cube AS bool;
-- "contained in" (the left operand is the cube entirely enclosed by
-- the right operand):
--
SELECT '0'::cube ~ '0'::cube AS bool;
SELECT '0,0,0'::cube ~ '0,0,0'::cube AS bool;
SELECT '0,0'::cube ~ '0,0,1'::cube AS bool;
SELECT '0,0,0'::cube ~ '0,0,1'::cube AS bool;
SELECT '1,0,0'::cube ~ '0,0,1'::cube AS bool;
SELECT '(1,0,0),(0,0,1)'::cube ~ '(1,0,0),(0,0,1)'::cube AS bool;
SELECT '(1,0,0),(0,0,1)'::cube ~ '(-1,-1,-1),(1,1,1)'::cube AS bool;
SELECT '(1,0,0),(0,0,1)'::cube ~ '(-1,-1,-1,-1),(1,1,1,1)'::cube AS bool;
SELECT '0'::cube ~ '(-1),(1)'::cube AS bool;
SELECT '1'::cube ~ '(-1),(1)'::cube AS bool;
SELECT '-1'::cube ~ '(-1),(1)'::cube AS bool;
SELECT '(-1),(1)'::cube ~ '(-1),(1)'::cube AS bool;
SELECT '(-1),(1)'::cube ~ '(-1,-1),(1,1)'::cube AS bool;
SELECT '(-2),(1)'::cube ~ '(-1),(1)'::cube AS bool;
SELECT '(-2),(1)'::cube ~ '(-1,-1),(1,1)'::cube AS bool;
-- "contains" (the left operand is the cube that entirely encloses the
-- right operand)
--
SELECT '0'::cube @ '0'::cube AS bool;
SELECT '0,0,0'::cube @ '0,0,0'::cube AS bool;
SELECT '0,0,1'::cube @ '0,0'::cube AS bool;
SELECT '0,0,1'::cube @ '0,0,0'::cube AS bool;
SELECT '0,0,1'::cube @ '1,0,0'::cube AS bool;
SELECT '(1,0,0),(0,0,1)'::cube @ '(1,0,0),(0,0,1)'::cube AS bool;
SELECT '(-1,-1,-1),(1,1,1)'::cube @ '(1,0,0),(0,0,1)'::cube AS bool;
SELECT '(-1,-1,-1,-1),(1,1,1,1)'::cube @ '(1,0,0),(0,0,1)'::cube AS bool;
SELECT '(-1),(1)'::cube @ '0'::cube AS bool;
SELECT '(-1),(1)'::cube @ '1'::cube AS bool;
SELECT '(-1),(1)'::cube @ '-1'::cube AS bool;
SELECT '(-1),(1)'::cube @ '(-1),(1)'::cube AS bool;
SELECT '(-1,-1),(1,1)'::cube @ '(-1),(1)'::cube AS bool;
SELECT '(-1),(1)'::cube @ '(-2),(1)'::cube AS bool;
SELECT '(-1,-1),(1,1)'::cube @ '(-2),(1)'::cube AS bool;
-- Load some example data and build the index
--
CREATE TABLE test_cube (c cube);
\copy test_cube from 'data/test_cube.data'
CREATE INDEX test_cube_ix ON test_cube USING gist (c);
SELECT * FROM test_cube WHERE c && '(3000,1000),(0,0)';
-- Test sorting
SELECT * FROM test_cube WHERE c && '(3000,1000),(0,0)' GROUP BY c;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment