Commit 911e7020 authored by Alexander Korotkov's avatar Alexander Korotkov

Implement operator class parameters

PostgreSQL provides set of template index access methods, where opclasses have
much freedom in the semantics of indexing.  These index AMs are GiST, GIN,
SP-GiST and BRIN.  There opclasses define representation of keys, operations on
them and supported search strategies.  So, it's natural that opclasses may be
faced some tradeoffs, which require user-side decision.  This commit implements
opclass parameters allowing users to set some values, which tell opclass how to
index the particular dataset.

This commit doesn't introduce new storage in system catalog.  Instead it uses
pg_attribute.attoptions, which is used for table column storage options but
unused for index attributes.

In order to evade changing signature of each opclass support function, we
implement unified way to pass options to opclass support functions.  Options
are set to fn_expr as the constant bytea expression.  It's possible due to the
fact that opclass support functions are executed outside of expressions, so
fn_expr is unused for them.

This commit comes with some examples of opclass options usage.  We parametrize
signature length in GiST.  That applies to multiple opclasses: tsvector_ops,
gist__intbig_ops, gist_ltree_ops, gist__ltree_ops, gist_trgm_ops and
gist_hstore_ops.  Also we parametrize maximum number of integer ranges for
gist__int_ops.  However, the main future usage of this feature is expected
to be json, where users would be able to specify which way to index particular
json parts.

Catversion is bumped.

Discussion: https://postgr.es/m/d22c3a18-31c7-1879-fc11-4c1ce2f5e5af%40postgrespro.ru
Author: Nikita Glukhov, revised by me
Reviwed-by: Nikolay Shaplov, Robert Haas, Tom Lane, Tomas Vondra, Alvaro Herrera
parent 1d53432f
......@@ -22,7 +22,8 @@
/* Support procedures numbers */
#define BLOOM_HASH_PROC 1
#define BLOOM_NPROC 1
#define BLOOM_OPTIONS_PROC 2
#define BLOOM_NPROC 2
/* Scan strategies */
#define BLOOM_EQUAL_STRATEGY 1
......
......@@ -109,6 +109,7 @@ blhandler(PG_FUNCTION_ARGS)
amroutine->amstrategies = BLOOM_NSTRATEGIES;
amroutine->amsupport = BLOOM_NPROC;
amroutine->amoptsprocnum = BLOOM_OPTIONS_PROC;
amroutine->amcanorder = false;
amroutine->amcanorderbyop = false;
amroutine->amcanbackward = false;
......
......@@ -108,6 +108,9 @@ blvalidate(Oid opclassoid)
ok = check_amproc_signature(procform->amproc, INT4OID, false,
1, 1, opckeytype);
break;
case BLOOM_OPTIONS_PROC:
ok = check_amoptsproc_signature(procform->amproc);
break;
default:
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
......@@ -204,6 +207,8 @@ blvalidate(Oid opclassoid)
if (opclassgroup &&
(opclassgroup->functionset & (((uint64) 1) << i)) != 0)
continue; /* got it */
if (i == BLOOM_OPTIONS_PROC)
continue; /* optional method */
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("bloom opclass %s is missing support function %d",
......
......@@ -11,6 +11,7 @@ OBJS = \
EXTENSION = hstore
DATA = hstore--1.4.sql \
hstore--1.6--1.7.sql \
hstore--1.5--1.6.sql \
hstore--1.4--1.5.sql \
hstore--1.3--1.4.sql hstore--1.2--1.3.sql \
......
......@@ -1344,6 +1344,51 @@ select count(*) from testhstore where h ?& ARRAY['public','disabled'];
42
(1 row)
drop index hidx;
create index hidx on testhstore using gist(h gist_hstore_ops(siglen=0));
ERROR: value 0 out of bounds for option "siglen"
DETAIL: Valid values are between "1" and "2024".
create index hidx on testhstore using gist(h gist_hstore_ops(siglen=2025));
ERROR: value 2025 out of bounds for option "siglen"
DETAIL: Valid values are between "1" and "2024".
create index hidx on testhstore using gist(h gist_hstore_ops(siglen=2024));
set enable_seqscan=off;
select count(*) from testhstore where h @> 'wait=>NULL';
count
-------
1
(1 row)
select count(*) from testhstore where h @> 'wait=>CC';
count
-------
15
(1 row)
select count(*) from testhstore where h @> 'wait=>CC, public=>t';
count
-------
2
(1 row)
select count(*) from testhstore where h ? 'public';
count
-------
194
(1 row)
select count(*) from testhstore where h ?| ARRAY['public','disabled'];
count
-------
337
(1 row)
select count(*) from testhstore where h ?& ARRAY['public','disabled'];
count
-------
42
(1 row)
drop index hidx;
create index hidx on testhstore using gin (h);
set enable_seqscan=off;
......
/* contrib/hstore/hstore--1.6--1.7.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION hstore UPDATE TO '1.7'" to load this file. \quit
CREATE FUNCTION ghstore_options(internal)
RETURNS void
AS 'MODULE_PATHNAME', 'ghstore_options'
LANGUAGE C IMMUTABLE PARALLEL SAFE;
ALTER OPERATOR FAMILY gist_hstore_ops USING gist
ADD FUNCTION 10 (hstore) ghstore_options (internal);
# hstore extension
comment = 'data type for storing sets of (key, value) pairs'
default_version = '1.6'
default_version = '1.7'
module_pathname = '$libdir/hstore'
relocatable = true
trusted = true
This diff is collapsed.
......@@ -304,6 +304,19 @@ select count(*) from testhstore where h ? 'public';
select count(*) from testhstore where h ?| ARRAY['public','disabled'];
select count(*) from testhstore where h ?& ARRAY['public','disabled'];
drop index hidx;
create index hidx on testhstore using gist(h gist_hstore_ops(siglen=0));
create index hidx on testhstore using gist(h gist_hstore_ops(siglen=2025));
create index hidx on testhstore using gist(h gist_hstore_ops(siglen=2024));
set enable_seqscan=off;
select count(*) from testhstore where h @> 'wait=>NULL';
select count(*) from testhstore where h @> 'wait=>CC';
select count(*) from testhstore where h @> 'wait=>CC, public=>t';
select count(*) from testhstore where h ? 'public';
select count(*) from testhstore where h ?| ARRAY['public','disabled'];
select count(*) from testhstore where h ?& ARRAY['public','disabled'];
drop index hidx;
create index hidx on testhstore using gin (h);
set enable_seqscan=off;
......
......@@ -12,7 +12,8 @@ OBJS = \
_intbig_gist.o
EXTENSION = intarray
DATA = intarray--1.2.sql intarray--1.1--1.2.sql intarray--1.0--1.1.sql
DATA = intarray--1.2--1.3.sql intarray--1.2.sql intarray--1.1--1.2.sql \
intarray--1.0--1.1.sql
PGFILEDESC = "intarray - functions and operators for arrays of integers"
REGRESS = _int
......
......@@ -8,7 +8,19 @@
#include "utils/memutils.h"
/* number ranges for compression */
#define MAXNUMRANGE 100
#define G_INT_NUMRANGES_DEFAULT 100
#define G_INT_NUMRANGES_MAX ((GISTMaxIndexKeySize - VARHDRSZ) / \
(2 * sizeof(int32)))
#define G_INT_GET_NUMRANGES() (PG_HAS_OPCLASS_OPTIONS() ? \
((GISTIntArrayOptions *) PG_GET_OPCLASS_OPTIONS())->num_ranges : \
G_INT_NUMRANGES_DEFAULT)
/* gist_int_ops opclass options */
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int num_ranges; /* number of ranges */
} GISTIntArrayOptions;
/* useful macros for accessing int4 arrays */
#define ARRPTR(x) ( (int32 *) ARR_DATA_PTR(x) )
......@@ -47,15 +59,17 @@
/* bigint defines */
#define SIGLENINT 63 /* >122 => key will toast, so very slow!!! */
#define SIGLEN ( sizeof(int)*SIGLENINT )
#define SIGLENBIT (SIGLEN*BITS_PER_BYTE)
#define SIGLEN_DEFAULT (63 * 4)
#define SIGLEN_MAX GISTMaxIndexKeySize
#define SIGLENBIT(siglen) ((siglen) * BITS_PER_BYTE)
#define GET_SIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \
((GISTIntArrayBigOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \
SIGLEN_DEFAULT)
typedef char BITVEC[SIGLEN];
typedef char *BITVECP;
#define LOOPBYTE \
for(i=0;i<SIGLEN;i++)
#define LOOPBYTE(siglen) \
for (i = 0; i < siglen; i++)
/* beware of multiple evaluation of arguments to these macros! */
#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) )
......@@ -63,8 +77,15 @@ typedef char *BITVECP;
#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITS_PER_BYTE ) )
#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITS_PER_BYTE ) )
#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 )
#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
#define HASHVAL(val, siglen) (((unsigned int)(val)) % SIGLENBIT(siglen))
#define HASH(sign, val, siglen) SETBIT((sign), HASHVAL(val, siglen))
/* gist_intbig_ops opclass options */
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int siglen; /* signature length in bytes */
} GISTIntArrayBigOptions;
/*
* type of index key
......@@ -81,7 +102,7 @@ typedef struct
#define ISALLTRUE(x) ( ((GISTTYPE*)x)->flag & ALLISTRUE )
#define GTHDRSIZE (VARHDRSZ + sizeof(int32))
#define CALCGTSIZE(flag) ( GTHDRSIZE+(((flag) & ALLISTRUE) ? 0 : SIGLEN) )
#define CALCGTSIZE(flag, siglen) ( GTHDRSIZE+(((flag) & ALLISTRUE) ? 0 : (siglen)) )
#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
......@@ -103,7 +124,7 @@ bool inner_int_contains(ArrayType *a, ArrayType *b);
ArrayType *inner_int_union(ArrayType *a, ArrayType *b);
ArrayType *inner_int_inter(ArrayType *a, ArrayType *b);
void rt__int_size(ArrayType *a, float *size);
void gensign(BITVEC sign, int *a, int len);
void gensign(BITVECP sign, int *a, int len, int siglen);
/*****************************************************************************
......@@ -149,7 +170,7 @@ typedef struct QUERYTYPE
#define PG_GETARG_QUERYTYPE_P(n) DatumGetQueryTypeP(PG_GETARG_DATUM(n))
#define PG_GETARG_QUERYTYPE_P_COPY(n) DatumGetQueryTypePCopy(PG_GETARG_DATUM(n))
bool signconsistent(QUERYTYPE *query, BITVEC sign, bool calcnot);
bool signconsistent(QUERYTYPE *query, BITVECP sign, int siglen, bool calcnot);
bool execconsistent(QUERYTYPE *query, ArrayType *array, bool calcnot);
bool gin_bool_consistent(QUERYTYPE *query, bool *check);
......
......@@ -232,7 +232,7 @@ typedef struct
* is there value 'val' in (sorted) array or not ?
*/
static bool
checkcondition_arr(void *checkval, ITEM *item)
checkcondition_arr(void *checkval, ITEM *item, void *options)
{
int32 *StopLow = ((CHKVAL *) checkval)->arrb;
int32 *StopHigh = ((CHKVAL *) checkval)->arre;
......@@ -254,42 +254,42 @@ checkcondition_arr(void *checkval, ITEM *item)
}
static bool
checkcondition_bit(void *checkval, ITEM *item)
checkcondition_bit(void *checkval, ITEM *item, void *siglen)
{
return GETBIT(checkval, HASHVAL(item->val));
return GETBIT(checkval, HASHVAL(item->val, (int)(intptr_t) siglen));
}
/*
* evaluate boolean expression, using chkcond() to test the primitive cases
*/
static bool
execute(ITEM *curitem, void *checkval, bool calcnot,
bool (*chkcond) (void *checkval, ITEM *item))
execute(ITEM *curitem, void *checkval, void *options, bool calcnot,
bool (*chkcond) (void *checkval, ITEM *item, void *options))
{
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (curitem->type == VAL)
return (*chkcond) (checkval, curitem);
return (*chkcond) (checkval, curitem, options);
else if (curitem->val == (int32) '!')
{
return calcnot ?
((execute(curitem - 1, checkval, calcnot, chkcond)) ? false : true)
((execute(curitem - 1, checkval, options, calcnot, chkcond)) ? false : true)
: true;
}
else if (curitem->val == (int32) '&')
{
if (execute(curitem + curitem->left, checkval, calcnot, chkcond))
return execute(curitem - 1, checkval, calcnot, chkcond);
if (execute(curitem + curitem->left, checkval, options, calcnot, chkcond))
return execute(curitem - 1, checkval, options, calcnot, chkcond);
else
return false;
}
else
{ /* |-operator */
if (execute(curitem + curitem->left, checkval, calcnot, chkcond))
if (execute(curitem + curitem->left, checkval, options, calcnot, chkcond))
return true;
else
return execute(curitem - 1, checkval, calcnot, chkcond);
return execute(curitem - 1, checkval, options, calcnot, chkcond);
}
}
......@@ -297,10 +297,10 @@ execute(ITEM *curitem, void *checkval, bool calcnot,
* signconsistent & execconsistent called by *_consistent
*/
bool
signconsistent(QUERYTYPE *query, BITVEC sign, bool calcnot)
signconsistent(QUERYTYPE *query, BITVECP sign, int siglen, bool calcnot)
{
return execute(GETQUERY(query) + query->size - 1,
(void *) sign, calcnot,
(void *) sign, (void *)(intptr_t) siglen, calcnot,
checkcondition_bit);
}
......@@ -314,7 +314,7 @@ execconsistent(QUERYTYPE *query, ArrayType *array, bool calcnot)
chkval.arrb = ARRPTR(array);
chkval.arre = chkval.arrb + ARRNELEMS(array);
return execute(GETQUERY(query) + query->size - 1,
(void *) &chkval, calcnot,
(void *) &chkval, NULL, calcnot,
checkcondition_arr);
}
......@@ -325,7 +325,7 @@ typedef struct
} GinChkVal;
static bool
checkcondition_gin(void *checkval, ITEM *item)
checkcondition_gin(void *checkval, ITEM *item, void *options)
{
GinChkVal *gcv = (GinChkVal *) checkval;
......@@ -356,7 +356,7 @@ gin_bool_consistent(QUERYTYPE *query, bool *check)
}
return execute(GETQUERY(query) + query->size - 1,
(void *) &gcv, true,
(void *) &gcv, NULL, true,
checkcondition_gin);
}
......@@ -428,7 +428,7 @@ boolop(PG_FUNCTION_ARGS)
chkval.arrb = ARRPTR(val);
chkval.arre = chkval.arrb + ARRNELEMS(val);
result = execute(GETQUERY(query) + query->size - 1,
&chkval, true,
&chkval, NULL, true,
checkcondition_arr);
pfree(val);
......
......@@ -7,6 +7,7 @@
#include "_int.h"
#include "access/gist.h"
#include "access/reloptions.h"
#include "access/stratnum.h"
#define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
......@@ -32,6 +33,7 @@ PG_FUNCTION_INFO_V1(g_int_penalty);
PG_FUNCTION_INFO_V1(g_int_picksplit);
PG_FUNCTION_INFO_V1(g_int_union);
PG_FUNCTION_INFO_V1(g_int_same);
PG_FUNCTION_INFO_V1(g_int_options);
/*
......@@ -156,6 +158,7 @@ g_int_compress(PG_FUNCTION_ARGS)
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval;
ArrayType *r;
int num_ranges = G_INT_GET_NUMRANGES();
int len,
lenr;
int *dr;
......@@ -170,9 +173,9 @@ g_int_compress(PG_FUNCTION_ARGS)
CHECKARRVALID(r);
PREPAREARR(r);
if (ARRNELEMS(r) >= 2 * MAXNUMRANGE)
if (ARRNELEMS(r) >= 2 * num_ranges)
elog(NOTICE, "input array is too big (%d maximum allowed, %d current), use gist__intbig_ops opclass instead",
2 * MAXNUMRANGE - 1, ARRNELEMS(r));
2 * num_ranges - 1, ARRNELEMS(r));
retval = palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(r),
......@@ -195,7 +198,7 @@ g_int_compress(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(entry);
}
if ((len = ARRNELEMS(r)) >= 2 * MAXNUMRANGE)
if ((len = ARRNELEMS(r)) >= 2 * num_ranges)
{ /* compress */
if (r == (ArrayType *) DatumGetPointer(entry->key))
r = DatumGetArrayTypePCopy(entry->key);
......@@ -208,7 +211,7 @@ g_int_compress(PG_FUNCTION_ARGS)
* "lenr" is the number of ranges we must eventually remove by
* merging, we must be careful to remove no more than this number.
*/
lenr = len - MAXNUMRANGE;
lenr = len - num_ranges;
/*
* Initially assume we can merge consecutive ints into a range. but we
......@@ -241,7 +244,7 @@ g_int_compress(PG_FUNCTION_ARGS)
*/
len = 2 * (len - j);
cand = 1;
while (len > MAXNUMRANGE * 2)
while (len > num_ranges * 2)
{
min = PG_INT64_MAX;
for (i = 2; i < len; i += 2)
......@@ -278,6 +281,7 @@ g_int_decompress(PG_FUNCTION_ARGS)
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval;
ArrayType *r;
int num_ranges = G_INT_GET_NUMRANGES();
int *dr,
lenr;
ArrayType *in;
......@@ -304,7 +308,7 @@ g_int_decompress(PG_FUNCTION_ARGS)
lenin = ARRNELEMS(in);
if (lenin < 2 * MAXNUMRANGE)
if (lenin < 2 * num_ranges)
{ /* not compressed value */
if (in != (ArrayType *) DatumGetPointer(entry->key))
{
......@@ -604,3 +608,17 @@ g_int_picksplit(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(v);
}
Datum
g_int_options(PG_FUNCTION_ARGS)
{
local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
init_local_reloptions(relopts, sizeof(GISTIntArrayOptions));
add_local_int_reloption(relopts, "numranges",
"number of ranges for compression",
G_INT_NUMRANGES_DEFAULT, 1, G_INT_NUMRANGES_MAX,
offsetof(GISTIntArrayOptions, num_ranges));
PG_RETURN_VOID();
}
......@@ -319,14 +319,14 @@ _int_unique(ArrayType *r)
}
void
gensign(BITVEC sign, int *a, int len)
gensign(BITVECP sign, int *a, int len, int siglen)
{
int i;
/* we assume that the sign vector is previously zeroed */
for (i = 0; i < len; i++)
{
HASH(sign, *a);
HASH(sign, *a, siglen);
a++;
}
}
......
This diff is collapsed.
......@@ -547,6 +547,166 @@ SELECT count(*) from test__int WHERE a @@ '!20 & !21';
6343
(1 row)
DROP INDEX text_idx;
CREATE INDEX text_idx on test__int using gist (a gist__int_ops(numranges = 0));
ERROR: value 0 out of bounds for option "numranges"
DETAIL: Valid values are between "1" and "252".
CREATE INDEX text_idx on test__int using gist (a gist__int_ops(numranges = 253));
ERROR: value 253 out of bounds for option "numranges"
DETAIL: Valid values are between "1" and "252".
CREATE INDEX text_idx on test__int using gist (a gist__int_ops(numranges = 252));
SELECT count(*) from test__int WHERE a && '{23,50}';
count
-------
403
(1 row)
SELECT count(*) from test__int WHERE a @@ '23|50';
count
-------
403
(1 row)
SELECT count(*) from test__int WHERE a @> '{23,50}';
count
-------
12
(1 row)
SELECT count(*) from test__int WHERE a @@ '23&50';
count
-------
12
(1 row)
SELECT count(*) from test__int WHERE a @> '{20,23}';
count
-------
12
(1 row)
SELECT count(*) from test__int WHERE a <@ '{73,23,20}';
count
-------
10
(1 row)
SELECT count(*) from test__int WHERE a = '{73,23,20}';
count
-------
1
(1 row)
SELECT count(*) from test__int WHERE a @@ '50&68';
count
-------
9
(1 row)
SELECT count(*) from test__int WHERE a @> '{20,23}' or a @> '{50,68}';
count
-------
21
(1 row)
SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)';
count
-------
21
(1 row)
SELECT count(*) from test__int WHERE a @@ '20 | !21';
count
-------
6566
(1 row)
SELECT count(*) from test__int WHERE a @@ '!20 & !21';
count
-------
6343
(1 row)
DROP INDEX text_idx;
CREATE INDEX text_idx on test__int using gist (a gist__intbig_ops(siglen = 0));
ERROR: value 0 out of bounds for option "siglen"
DETAIL: Valid values are between "1" and "2024".
CREATE INDEX text_idx on test__int using gist (a gist__intbig_ops(siglen = 2025));
ERROR: value 2025 out of bounds for option "siglen"
DETAIL: Valid values are between "1" and "2024".
CREATE INDEX text_idx on test__int using gist (a gist__intbig_ops(siglen = 2024));
SELECT count(*) from test__int WHERE a && '{23,50}';
count
-------
403
(1 row)
SELECT count(*) from test__int WHERE a @@ '23|50';
count
-------
403
(1 row)
SELECT count(*) from test__int WHERE a @> '{23,50}';
count
-------
12
(1 row)
SELECT count(*) from test__int WHERE a @@ '23&50';
count
-------
12
(1 row)
SELECT count(*) from test__int WHERE a @> '{20,23}';
count
-------
12
(1 row)
SELECT count(*) from test__int WHERE a <@ '{73,23,20}';
count
-------
10
(1 row)
SELECT count(*) from test__int WHERE a = '{73,23,20}';
count
-------
1
(1 row)
SELECT count(*) from test__int WHERE a @@ '50&68';
count
-------
9
(1 row)
SELECT count(*) from test__int WHERE a @> '{20,23}' or a @> '{50,68}';
count
-------
21
(1 row)
SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)';
count
-------
21
(1 row)
SELECT count(*) from test__int WHERE a @@ '20 | !21';
count
-------
6566
(1 row)
SELECT count(*) from test__int WHERE a @@ '!20 & !21';
count
-------
6343
(1 row)
DROP INDEX text_idx;
CREATE INDEX text_idx on test__int using gist ( a gist__intbig_ops );
SELECT count(*) from test__int WHERE a && '{23,50}';
......
/* contrib/intarray/intarray--1.2--1.3.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION intarray UPDATE TO '1.3'" to load this file. \quit
CREATE FUNCTION g_int_options(internal)
RETURNS void
AS 'MODULE_PATHNAME', 'g_int_options'
LANGUAGE C IMMUTABLE PARALLEL SAFE;
CREATE FUNCTION g_intbig_options(internal)
RETURNS void
AS 'MODULE_PATHNAME', 'g_intbig_options'
LANGUAGE C IMMUTABLE PARALLEL SAFE;
ALTER OPERATOR FAMILY gist__int_ops USING gist
ADD FUNCTION 10 (_int4) g_int_options (internal);
ALTER OPERATOR FAMILY gist__intbig_ops USING gist
ADD FUNCTION 10 (_int4) g_intbig_options (internal);
# intarray extension
comment = 'functions, operators, and index support for 1-D arrays of integers'
default_version = '1.2'
default_version = '1.3'
module_pathname = '$libdir/_int'
relocatable = true
trusted = true
......@@ -110,6 +110,42 @@ SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)';
SELECT count(*) from test__int WHERE a @@ '20 | !21';
SELECT count(*) from test__int WHERE a @@ '!20 & !21';
DROP INDEX text_idx;
CREATE INDEX text_idx on test__int using gist (a gist__int_ops(numranges = 0));
CREATE INDEX text_idx on test__int using gist (a gist__int_ops(numranges = 253));
CREATE INDEX text_idx on test__int using gist (a gist__int_ops(numranges = 252));
SELECT count(*) from test__int WHERE a && '{23,50}';
SELECT count(*) from test__int WHERE a @@ '23|50';
SELECT count(*) from test__int WHERE a @> '{23,50}';
SELECT count(*) from test__int WHERE a @@ '23&50';
SELECT count(*) from test__int WHERE a @> '{20,23}';
SELECT count(*) from test__int WHERE a <@ '{73,23,20}';
SELECT count(*) from test__int WHERE a = '{73,23,20}';
SELECT count(*) from test__int WHERE a @@ '50&68';
SELECT count(*) from test__int WHERE a @> '{20,23}' or a @> '{50,68}';
SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)';
SELECT count(*) from test__int WHERE a @@ '20 | !21';
SELECT count(*) from test__int WHERE a @@ '!20 & !21';
DROP INDEX text_idx;
CREATE INDEX text_idx on test__int using gist (a gist__intbig_ops(siglen = 0));
CREATE INDEX text_idx on test__int using gist (a gist__intbig_ops(siglen = 2025));
CREATE INDEX text_idx on test__int using gist (a gist__intbig_ops(siglen = 2024));
SELECT count(*) from test__int WHERE a && '{23,50}';
SELECT count(*) from test__int WHERE a @@ '23|50';
SELECT count(*) from test__int WHERE a @> '{23,50}';
SELECT count(*) from test__int WHERE a @@ '23&50';
SELECT count(*) from test__int WHERE a @> '{20,23}';
SELECT count(*) from test__int WHERE a <@ '{73,23,20}';
SELECT count(*) from test__int WHERE a = '{73,23,20}';
SELECT count(*) from test__int WHERE a @@ '50&68';
SELECT count(*) from test__int WHERE a @> '{20,23}' or a @> '{50,68}';
SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)';
SELECT count(*) from test__int WHERE a @@ '20 | !21';
SELECT count(*) from test__int WHERE a @@ '!20 & !21';
DROP INDEX text_idx;
CREATE INDEX text_idx on test__int using gist ( a gist__intbig_ops );
......
......@@ -15,7 +15,7 @@ OBJS = \
PG_CPPFLAGS = -DLOWER_NODE
EXTENSION = ltree
DATA = ltree--1.1.sql ltree--1.0--1.1.sql
DATA = ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
PGFILEDESC = "ltree - hierarchical label data type"
HEADERS = ltree.h
......
This diff is collapsed.
......@@ -7637,6 +7637,98 @@ SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
23.3.32.21.5.14.10.17.1
(4 rows)
drop index tstidx;
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
ERROR: value 0 out of bounds for option "siglen"
DETAIL: Valid values are between "1" and "2024".
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2025));
ERROR: value 2025 out of bounds for option "siglen"
DETAIL: Valid values are between "1" and "2024".
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2024));
SELECT count(*) FROM ltreetest WHERE t < '12.3';
count
-------
123
(1 row)
SELECT count(*) FROM ltreetest WHERE t <= '12.3';
count
-------
124
(1 row)
SELECT count(*) FROM ltreetest WHERE t = '12.3';
count
-------
1
(1 row)
SELECT count(*) FROM ltreetest WHERE t >= '12.3';
count
-------
883
(1 row)
SELECT count(*) FROM ltreetest WHERE t > '12.3';
count
-------
882
(1 row)
SELECT count(*) FROM ltreetest WHERE t @> '1.1.1';
count
-------
4
(1 row)
SELECT count(*) FROM ltreetest WHERE t <@ '1.1.1';
count
-------
4
(1 row)
SELECT count(*) FROM ltreetest WHERE t @ '23 & 1';
count
-------
39
(1 row)
SELECT count(*) FROM ltreetest WHERE t ~ '1.1.1.*';
count
-------
4
(1 row)
SELECT count(*) FROM ltreetest WHERE t ~ '*.1';
count
-------
34
(1 row)
SELECT count(*) FROM ltreetest WHERE t ~ '23.*{1}.1';
count
-------
1
(1 row)
SELECT count(*) FROM ltreetest WHERE t ~ '23.*.1';
count
-------
3
(1 row)
SELECT count(*) FROM ltreetest WHERE t ~ '23.*.2';
count
-------
1
(1 row)
SELECT count(*) FROM ltreetest WHERE t ? '{23.*.1,23.*.2}';
count
-------
4
(1 row)
create table _ltreetest (t ltree[]);
\copy _ltreetest FROM 'data/_ltree.data'
SELECT count(*) FROM _ltreetest WHERE t @> '1.1.1' ;
......@@ -7749,3 +7841,65 @@ SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
15
(1 row)
drop index _tstidx;
create index _tstidx on _ltreetest using gist (t gist__ltree_ops(siglen=0));
ERROR: value 0 out of bounds for option "siglen"
DETAIL: Valid values are between "1" and "2024".
create index _tstidx on _ltreetest using gist (t gist__ltree_ops(siglen=2025));
ERROR: value 2025 out of bounds for option "siglen"
DETAIL: Valid values are between "1" and "2024".
create index _tstidx on _ltreetest using gist (t gist__ltree_ops(siglen=2024));
SELECT count(*) FROM _ltreetest WHERE t @> '1.1.1' ;
count
-------
15
(1 row)
SELECT count(*) FROM _ltreetest WHERE t <@ '1.1.1' ;
count
-------
19
(1 row)
SELECT count(*) FROM _ltreetest WHERE t @ '23 & 1' ;
count
-------
147
(1 row)
SELECT count(*) FROM _ltreetest WHERE t ~ '1.1.1.*' ;
count
-------
19
(1 row)
SELECT count(*) FROM _ltreetest WHERE t ~ '*.1' ;
count
-------
109
(1 row)
SELECT count(*) FROM _ltreetest WHERE t ~ '23.*{1}.1' ;
count
-------
5
(1 row)
SELECT count(*) FROM _ltreetest WHERE t ~ '23.*.1' ;
count
-------
11
(1 row)
SELECT count(*) FROM _ltreetest WHERE t ~ '23.*.2' ;
count
-------
5
(1 row)
SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
count
-------
15
(1 row)
/* contrib/ltree/ltree--1.1--1.2.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION ltree UPDATE TO '1.2'" to load this file. \quit
CREATE FUNCTION ltree_gist_options(internal)
RETURNS void
AS 'MODULE_PATHNAME', 'ltree_gist_options'
LANGUAGE C IMMUTABLE PARALLEL SAFE;
CREATE FUNCTION _ltree_gist_options(internal)
RETURNS void
AS 'MODULE_PATHNAME', '_ltree_gist_options'
LANGUAGE C IMMUTABLE PARALLEL SAFE;
ALTER OPERATOR FAMILY gist_ltree_ops USING gist
ADD FUNCTION 10 (ltree) ltree_gist_options (internal);
ALTER OPERATOR FAMILY gist__ltree_ops USING gist
ADD FUNCTION 10 (_ltree) _ltree_gist_options (internal);
# ltree extension
comment = 'data type for hierarchical tree-like structures'
default_version = '1.1'
default_version = '1.2'
module_pathname = '$libdir/ltree'
relocatable = true
trusted = true
......@@ -209,15 +209,16 @@ int ltree_strncasecmp(const char *a, const char *b, size_t s);
/* GiST support for ltree */
#define SIGLEN_MAX GISTMaxIndexKeySize
#define SIGLEN_DEFAULT (2 * sizeof(int32))
#define BITBYTE 8
#define SIGLENINT 2
#define SIGLEN ( sizeof(int32)*SIGLENINT )
#define SIGLENBIT (SIGLEN*BITBYTE)
typedef unsigned char BITVEC[SIGLEN];
#define SIGLEN (sizeof(int32) * SIGLENINT)
#define SIGLENBIT(siglen) ((siglen) * BITBYTE)
typedef unsigned char *BITVECP;
#define LOOPBYTE \
for(i=0;i<SIGLEN;i++)
#define LOOPBYTE(siglen) \
for(i = 0; i < (siglen); i++)
#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
#define GETBITBYTE(x,i) ( ((unsigned char)(x)) >> i & 0x01 )
......@@ -225,8 +226,8 @@ typedef unsigned char *BITVECP;
#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
#define HASHVAL(val, siglen) (((unsigned int)(val)) % SIGLENBIT(siglen))
#define HASH(sign, val, siglen) SETBIT((sign), HASHVAL(val, siglen))
/*
* type of index key for ltree. Tree are combined B-Tree and R-Tree
......@@ -256,26 +257,37 @@ typedef struct
#define LTG_ISONENODE(x) ( ((ltree_gist*)(x))->flag & LTG_ONENODE )
#define LTG_ISALLTRUE(x) ( ((ltree_gist*)(x))->flag & LTG_ALLTRUE )
#define LTG_ISNORIGHT(x) ( ((ltree_gist*)(x))->flag & LTG_NORIGHT )
#define LTG_LNODE(x) ( (ltree*)( ( ((char*)(x))+LTG_HDRSIZE ) + ( LTG_ISALLTRUE(x) ? 0 : SIGLEN ) ) )
#define LTG_RENODE(x) ( (ltree*)( ((char*)LTG_LNODE(x)) + VARSIZE(LTG_LNODE(x))) )
#define LTG_RNODE(x) ( LTG_ISNORIGHT(x) ? LTG_LNODE(x) : LTG_RENODE(x) )
#define LTG_LNODE(x, siglen) ( (ltree*)( ( ((char*)(x))+LTG_HDRSIZE ) + ( LTG_ISALLTRUE(x) ? 0 : (siglen) ) ) )
#define LTG_RENODE(x, siglen) ( (ltree*)( ((char*)LTG_LNODE(x, siglen)) + VARSIZE(LTG_LNODE(x, siglen))) )
#define LTG_RNODE(x, siglen) ( LTG_ISNORIGHT(x) ? LTG_LNODE(x, siglen) : LTG_RENODE(x, siglen) )
#define LTG_GETLNODE(x) ( LTG_ISONENODE(x) ? LTG_NODE(x) : LTG_LNODE(x) )
#define LTG_GETRNODE(x) ( LTG_ISONENODE(x) ? LTG_NODE(x) : LTG_RNODE(x) )
#define LTG_GETLNODE(x, siglen) ( LTG_ISONENODE(x) ? LTG_NODE(x) : LTG_LNODE(x, siglen) )
#define LTG_GETRNODE(x, siglen) ( LTG_ISONENODE(x) ? LTG_NODE(x) : LTG_RNODE(x, siglen) )
extern ltree_gist *ltree_gist_alloc(bool isalltrue, BITVECP sign, int siglen,
ltree *left, ltree *right);
/* GiST support for ltree[] */
#define ASIGLENINT (7)
#define ASIGLEN (sizeof(int32)*ASIGLENINT)
#define ASIGLENBIT (ASIGLEN*BITBYTE)
typedef unsigned char ABITVEC[ASIGLEN];
#define LTREE_ASIGLEN_DEFAULT (7 * sizeof(int32))
#define LTREE_ASIGLEN_MAX GISTMaxIndexKeySize
#define LTREE_GET_ASIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \
((LtreeGistOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \
LTREE_ASIGLEN_DEFAULT)
#define ASIGLENBIT(siglen) ((siglen) * BITBYTE)
#define ALOOPBYTE(siglen) \
for (i = 0; i < (siglen); i++)
#define ALOOPBYTE \
for(i=0;i<ASIGLEN;i++)
#define AHASHVAL(val, siglen) (((unsigned int)(val)) % ASIGLENBIT(siglen))
#define AHASH(sign, val, siglen) SETBIT((sign), AHASHVAL(val, siglen))
#define AHASHVAL(val) (((unsigned int)(val)) % ASIGLENBIT)
#define AHASH(sign, val) SETBIT((sign), AHASHVAL(val))
/* gist_ltree_ops and gist__ltree_ops opclass options */
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int siglen; /* signature length in bytes */
} LtreeGistOptions;
/* type of key is the same to ltree_gist */
......
This diff is collapsed.
......@@ -280,6 +280,26 @@ SELECT * FROM ltreetest WHERE t ~ '23.*.1' order by t asc;
SELECT * FROM ltreetest WHERE t ~ '23.*.2' order by t asc;
SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
drop index tstidx;
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2025));
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2024));
SELECT count(*) FROM ltreetest WHERE t < '12.3';
SELECT count(*) FROM ltreetest WHERE t <= '12.3';
SELECT count(*) FROM ltreetest WHERE t = '12.3';
SELECT count(*) FROM ltreetest WHERE t >= '12.3';
SELECT count(*) FROM ltreetest WHERE t > '12.3';
SELECT count(*) FROM ltreetest WHERE t @> '1.1.1';
SELECT count(*) FROM ltreetest WHERE t <@ '1.1.1';
SELECT count(*) FROM ltreetest WHERE t @ '23 & 1';
SELECT count(*) FROM ltreetest WHERE t ~ '1.1.1.*';
SELECT count(*) FROM ltreetest WHERE t ~ '*.1';
SELECT count(*) FROM ltreetest WHERE t ~ '23.*{1}.1';
SELECT count(*) FROM ltreetest WHERE t ~ '23.*.1';
SELECT count(*) FROM ltreetest WHERE t ~ '23.*.2';
SELECT count(*) FROM ltreetest WHERE t ? '{23.*.1,23.*.2}';
create table _ltreetest (t ltree[]);
\copy _ltreetest FROM 'data/_ltree.data'
......@@ -305,3 +325,18 @@ SELECT count(*) FROM _ltreetest WHERE t ~ '23.*{1}.1' ;
SELECT count(*) FROM _ltreetest WHERE t ~ '23.*.1' ;
SELECT count(*) FROM _ltreetest WHERE t ~ '23.*.2' ;
SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
drop index _tstidx;
create index _tstidx on _ltreetest using gist (t gist__ltree_ops(siglen=0));
create index _tstidx on _ltreetest using gist (t gist__ltree_ops(siglen=2025));
create index _tstidx on _ltreetest using gist (t gist__ltree_ops(siglen=2024));
SELECT count(*) FROM _ltreetest WHERE t @> '1.1.1' ;
SELECT count(*) FROM _ltreetest WHERE t <@ '1.1.1' ;
SELECT count(*) FROM _ltreetest WHERE t @ '23 & 1' ;
SELECT count(*) FROM _ltreetest WHERE t ~ '1.1.1.*' ;
SELECT count(*) FROM _ltreetest WHERE t ~ '*.1' ;
SELECT count(*) FROM _ltreetest WHERE t ~ '23.*{1}.1' ;
SELECT count(*) FROM _ltreetest WHERE t ~ '23.*.1' ;
SELECT count(*) FROM _ltreetest WHERE t ~ '23.*.2' ;
SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
......@@ -9,7 +9,7 @@ OBJS = \
trgm_regexp.o
EXTENSION = pg_trgm
DATA = pg_trgm--1.3--1.4.sql \
DATA = pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \
pg_trgm--1.0--1.1.sql
PGFILEDESC = "pg_trgm - trigram matching"
......
This diff is collapsed.
/* contrib/pg_trgm/pg_trgm--1.5--1.5.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.5'" to load this file. \quit
CREATE FUNCTION gtrgm_options(internal)
RETURNS void
AS 'MODULE_PATHNAME', 'gtrgm_options'
LANGUAGE C IMMUTABLE PARALLEL SAFE;
ALTER OPERATOR FAMILY gist_trgm_ops USING gist
ADD FUNCTION 10 (text) gtrgm_options (internal);
# pg_trgm extension
comment = 'text similarity measurement and index searching based on trigrams'
default_version = '1.4'
default_version = '1.5'
module_pathname = '$libdir/pg_trgm'
relocatable = true
trusted = true
......@@ -46,6 +46,20 @@ select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';
drop index trgm_idx;
create index trgm_idx on test_trgm using gist (t gist_trgm_ops(siglen=0));
create index trgm_idx on test_trgm using gist (t gist_trgm_ops(siglen=2025));
create index trgm_idx on test_trgm using gist (t gist_trgm_ops(siglen=2024));
set enable_seqscan=off;
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
explain (costs off)
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';
drop index trgm_idx;
create index trgm_idx on test_trgm using gin (t gin_trgm_ops);
set enable_seqscan=off;
......
......@@ -73,17 +73,16 @@ typedef struct
#define TRGMHDRSIZE (VARHDRSZ + sizeof(uint8))
/* gist */
#define SIGLEN_DEFAULT (sizeof(int) * 3)
#define SIGLEN_MAX GISTMaxIndexKeySize
#define BITBYTE 8
#define SIGLENINT 3 /* >122 => key will toast, so very slow!!! */
#define SIGLEN ( sizeof(int)*SIGLENINT )
#define SIGLENBIT (SIGLEN*BITBYTE - 1) /* see makesign */
#define SIGLENBIT(siglen) ((siglen) * BITBYTE - 1) /* see makesign */
typedef char BITVEC[SIGLEN];
typedef char *BITVECP;
#define LOOPBYTE \
for(i=0;i<SIGLEN;i++)
#define LOOPBYTE(siglen) \
for (i = 0; i < (siglen); i++)
#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
#define GETBITBYTE(x,i) ( (((char)(x)) >> (i)) & 0x01 )
......@@ -91,8 +90,8 @@ typedef char *BITVECP;
#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
#define HASHVAL(val, siglen) (((unsigned int)(val)) % SIGLENBIT(siglen))
#define HASH(sign, val, siglen) SETBIT((sign), HASHVAL(val, siglen))
#define ARRKEY 0x01
#define SIGNKEY 0x02
......@@ -102,7 +101,7 @@ typedef char *BITVECP;
#define ISSIGNKEY(x) ( ((TRGM*)x)->flag & SIGNKEY )
#define ISALLTRUE(x) ( ((TRGM*)x)->flag & ALLISTRUE )
#define CALCGTSIZE(flag, len) ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
#define CALCGTSIZE(flag, len) ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : (len)) ) )
#define GETSIGN(x) ( (BITVECP)( (char*)x+TRGMHDRSIZE ) )
#define GETARR(x) ( (trgm*)( (char*)x+TRGMHDRSIZE ) )
#define ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) )
......
This diff is collapsed.
......@@ -467,6 +467,23 @@ CREATE INDEX hidx ON testhstore USING GIST (h);
CREATE INDEX hidx ON testhstore USING GIN (h);
</programlisting>
<para>
<literal>gist_hstore_ops</literal> GiST opclass approximates set of
key/value pairs as a bitmap signature. Optional integer parameter
<literal>siglen</literal> of <literal>gist_hstore_ops</literal> determines
signature length in bytes. Default signature length is 16 bytes.
Valid values of signature length are between 1 and 2024 bytes. Longer
signatures leads to more precise search (scan less fraction of index, scan
less heap pages), but larger index.
</para>
<para>
Example of creating such an index with a signature length of 32 bytes:
</para>
<programlisting>
CREATE INDEX hidx ON testhstore USING GIST (h gist_hstore_ops(siglen=32));
</programlisting>
<para>
<type>hstore</type> also supports <type>btree</type> or <type>hash</type> indexes for
the <literal>=</literal> operator. This allows <type>hstore</type> columns to be
......
......@@ -1316,7 +1316,7 @@ SELECT target FROM tests WHERE subject = 'some-subject' AND success;
An index definition can specify an <firstterm>operator
class</firstterm> for each column of an index.
<synopsis>
CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> (<replaceable>column</replaceable> <replaceable>opclass</replaceable> <optional><replaceable>sort options</replaceable></optional> <optional>, ...</optional>);
CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> (<replaceable>column</replaceable> <replaceable>opclass</replaceable> [ ( <replaceable>opclass_options</replaceable> ) ] <optional><replaceable>sort options</replaceable></optional> <optional>, ...</optional>);
</synopsis>
The operator class identifies the operators to be used by the index
for that column. For example, a B-tree index on the type <type>int4</type>
......
......@@ -265,7 +265,7 @@
</para>
<para>
Two GiST index operator classes are provided:
Two parametrized GiST index operator classes are provided:
<literal>gist__int_ops</literal> (used by default) is suitable for
small- to medium-size data sets, while
<literal>gist__intbig_ops</literal> uses a larger signature and is more
......@@ -274,6 +274,25 @@
The implementation uses an RD-tree data structure with
built-in lossy compression.
</para>
<para>
<literal>gist__int_ops</literal> approximates integer set as an array of
integer ranges. Optional integer parameter <literal>numranges</literal> of
<literal>gist__int_ops</literal> determines maximum number of ranges in
one index key. Default value of <literal>numranges</literal> is 100.
Valid values are between 1 and 253. Using larger arrays as GiST index
keys leads to more precise search (scan less fraction of index, scan less
heap pages), but larger index.
</para>
<para>
<literal>gist__intbig_ops</literal> approximates integer set as a bitmap
signature. Optional integer parameter <literal>siglen</literal> of
<literal>gist__intbig_ops</literal> determines signature length in bytes.
Default signature length is 16 bytes. Valid values of signature length
are between 1 and 2024 bytes. Longer signatures leads to more precise
search (scan less fraction of index, scan less heap pages), but larger index.
</para>
<para>
There is also a non-default GIN operator class
......@@ -293,8 +312,8 @@
-- a message can be in one or more <quote>sections</quote>
CREATE TABLE message (mid INT PRIMARY KEY, sections INT[], ...);
-- create specialized index
CREATE INDEX message_rdtree_idx ON message USING GIST (sections gist__int_ops);
-- create specialized index with sigature length of 32 bytes
CREATE INDEX message_rdtree_idx ON message USING GIST (sections gist__int_ops(siglen=32));
-- select messages in section 1 OR 2 - OVERLAP operator
SELECT message.mid FROM message WHERE message.sections &amp;&amp; '{1,2}';
......
......@@ -498,30 +498,59 @@ Europe &amp; Russia*@ &amp; !Transportation
</listitem>
<listitem>
<para>
GiST index over <type>ltree</type>:
GiST index over <type>ltree</type> (<literal>gist_ltree_ops</literal>
opclass):
<literal>&lt;</literal>, <literal>&lt;=</literal>, <literal>=</literal>,
<literal>&gt;=</literal>, <literal>&gt;</literal>,
<literal>@&gt;</literal>, <literal>&lt;@</literal>,
<literal>@</literal>, <literal>~</literal>, <literal>?</literal>
</para>
<para>
Example of creating such an index:
<literal>gist_ltree_ops</literal> GiST opclass approximates set of
path labels as a bitmap signature. Optional integer parameter
<literal>siglen</literal> of <literal>gist_ltree_ops</literal> determines
signature length in bytes. Default signature length is 8 bytes.
Valid values of signature length are between 1 and 2024 bytes. Longer
signatures leads to more precise search (scan less fraction of index, scan
less heap pages), but larger index.
</para>
<para>
Example of creating such an index with a default signature length of 8 bytes:
</para>
<programlisting>
CREATE INDEX path_gist_idx ON test USING GIST (path);
</programlisting>
<para>
Example of creating such an index with a signature length of 100 bytes:
</para>
<programlisting>
CREATE INDEX path_gist_idx ON test USING GIST (path gist_ltree_ops(siglen=100));
</programlisting>
</listitem>
<listitem>
<para>
GiST index over <type>ltree[]</type>:
GiST index over <type>ltree[]</type> (<literal>gist__ltree_ops</literal>
opclass):
<literal>ltree[] &lt;@ ltree</literal>, <literal>ltree @&gt; ltree[]</literal>,
<literal>@</literal>, <literal>~</literal>, <literal>?</literal>
</para>
<para>
Example of creating such an index:
<literal>gist__ltree_ops</literal> GiST opclass works similar to
<literal>gist_ltree_ops</literal> and also takes signature length as
a parameter. Default value of <literal>siglen</literal> in
<literal>gist__ltree_ops</literal> is 28 bytes.
</para>
<para>
Example of creating such an index with a default signature length of 28 bytes:
</para>
<programlisting>
CREATE INDEX path_gist_idx ON test USING GIST (array_path);
</programlisting>
<para>
Example of creating such an index with a signature length of 100 bytes:
</para>
<programlisting>
CREATE INDEX path_gist_idx ON test USING GIST (array_path gist__ltree_ops(siglen=100));
</programlisting>
<para>
Note: This index type is lossy.
......
......@@ -390,6 +390,23 @@ CREATE INDEX trgm_idx ON test_trgm USING GIN (t gin_trgm_ops);
</programlisting>
</para>
<para>
<literal>gist_trgm_ops</literal> GiST opclass approximates set of
trigrams as a bitmap signature. Optional integer parameter
<literal>siglen</literal> of <literal>gist_trgm_ops</literal> determines
signature length in bytes. Default signature length is 12 bytes.
Valid values of signature length are between 1 and 2024 bytes. Longer
signatures leads to more precise search (scan less fraction of index, scan
less heap pages), but larger index.
</para>
<para>
Example of creating such an index with a signature length of 32 bytes:
</para>
<programlisting>
CREATE INDEX trgm_idx ON test_trgm USING GIST (t gist_trgm_ops(siglen=32));
</programlisting>
<para>
At this point, you will have an index on the <structfield>t</structfield> column that
you can use for similarity searching. A typical query is
......
......@@ -22,7 +22,7 @@ PostgreSQL documentation
<refsynopsisdiv>
<synopsis>
CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class="parameter">name</replaceable> ] ON [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ USING <replaceable class="parameter">method</replaceable> ]
( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ COLLATE <replaceable class="parameter">collation</replaceable> ] [ <replaceable class="parameter">opclass</replaceable> ] [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...] )
( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ COLLATE <replaceable class="parameter">collation</replaceable> ] { <replaceable class="parameter">opclass</replaceable> | DEFAULT } [ ( <replaceable class="parameter">opclass_parameter</replaceable> = <replaceable class="parameter">value</replaceable> [, ... ] ) ] [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...] )
[ INCLUDE ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
[ WITH ( <replaceable class="parameter">storage_parameter</replaceable> = <replaceable class="parameter">value</replaceable> [, ... ] ) ]
[ TABLESPACE <replaceable class="parameter">tablespace_name</replaceable> ]
......@@ -285,6 +285,15 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable class="parameter">opclass_parameter</replaceable></term>
<listitem>
<para>
The name of an operator class parameter. See below for details.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>ASC</literal></term>
<listitem>
......@@ -679,8 +688,9 @@ Indexes:
</para>
<para>
An <firstterm>operator class</firstterm> can be specified for each
column of an index. The operator class identifies the operators to be
An <firstterm>operator class</firstterm> with its optional parameters
can be specified for each column of an index.
The operator class identifies the operators to be
used by the index for that column. For example, a B-tree index on
four-byte integers would use the <literal>int4_ops</literal> class;
this operator class includes comparison functions for four-byte
......
......@@ -3637,7 +3637,7 @@ SELECT plainto_tsquery('supernovae stars');
<tertiary>text search</tertiary>
</indexterm>
<literal>CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> USING GIST (<replaceable>column</replaceable>);</literal>
<literal>CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> USING GIST (<replaceable>column</replaceable> [ { DEFAULT | tsvector_ops } (siglen = <replaceable>number</replaceable>) ] );</literal>
</term>
<listitem>
......@@ -3645,6 +3645,8 @@ SELECT plainto_tsquery('supernovae stars');
Creates a GiST (Generalized Search Tree)-based index.
The <replaceable>column</replaceable> can be of <type>tsvector</type> or
<type>tsquery</type> type.
Optional integer parameter <literal>siglen</literal> determines
signature length in bytes (see below for details).
</para>
</listitem>
</varlistentry>
......@@ -3668,12 +3670,17 @@ SELECT plainto_tsquery('supernovae stars');
to check the actual table row to eliminate such false matches.
(<productname>PostgreSQL</productname> does this automatically when needed.)
GiST indexes are lossy because each document is represented in the
index by a fixed-length signature. The signature is generated by hashing
index by a fixed-length signature. Signature length in bytes is determined
by the value of the optional integer parameter <literal>siglen</literal>.
Default signature length (when <literal>siglen</literal> is not specied) is
124 bytes, maximal length is 2024 bytes. The signature is generated by hashing
each word into a single bit in an n-bit string, with all these bits OR-ed
together to produce an n-bit document signature. When two words hash to
the same bit position there will be a false match. If all words in
the query have matches (real or false) then the table row must be
retrieved to see if the match is correct.
retrieved to see if the match is correct. Longer signatures leads to more
precise search (scan less fraction of index, scan less heap pages), but
larger index.
</para>
<para>
......
......@@ -90,6 +90,7 @@ brinhandler(PG_FUNCTION_ARGS)
amroutine->amstrategies = 0;
amroutine->amsupport = BRIN_LAST_OPTIONAL_PROCNUM;
amroutine->amoptsprocnum = BRIN_PROCNUM_OPTIONS;
amroutine->amcanorder = false;
amroutine->amcanorderbyop = false;
amroutine->amcanbackward = false;
......
......@@ -105,6 +105,9 @@ brinvalidate(Oid opclassoid)
3, 3, INTERNALOID, INTERNALOID,
INTERNALOID);
break;
case BRIN_PROCNUM_OPTIONS:
ok = check_amoptsproc_signature(procform->amproc);
break;
default:
/* Complain if it's not a valid optional proc number */
if (procform->amprocnum < BRIN_FIRST_OPTIONAL_PROCNUM ||
......
This diff is collapsed.
......@@ -41,6 +41,7 @@ ginhandler(PG_FUNCTION_ARGS)
amroutine->amstrategies = 0;
amroutine->amsupport = GINNProcs;
amroutine->amoptsprocnum = GIN_OPTIONS_PROC;
amroutine->amcanorder = false;
amroutine->amcanorderbyop = false;
amroutine->amcanbackward = false;
......
......@@ -142,6 +142,9 @@ ginvalidate(Oid opclassoid)
INTERNALOID, INTERNALOID,
INTERNALOID);
break;
case GIN_OPTIONS_PROC:
ok = check_amoptsproc_signature(procform->amproc);
break;
default:
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
......@@ -237,7 +240,8 @@ ginvalidate(Oid opclassoid)
if (opclassgroup &&
(opclassgroup->functionset & (((uint64) 1) << i)) != 0)
continue; /* got it */
if (i == GIN_COMPARE_PROC || i == GIN_COMPARE_PARTIAL_PROC)
if (i == GIN_COMPARE_PROC || i == GIN_COMPARE_PARTIAL_PROC ||
i == GIN_OPTIONS_PROC)
continue; /* optional method */
if (i == GIN_CONSISTENT_PROC || i == GIN_TRICONSISTENT_PROC)
continue; /* don't need both, see check below loop */
......
......@@ -62,6 +62,7 @@ gisthandler(PG_FUNCTION_ARGS)
amroutine->amstrategies = 0;
amroutine->amsupport = GISTNProcs;
amroutine->amoptsprocnum = GIST_OPTIONS_PROC;
amroutine->amcanorder = false;
amroutine->amcanorderbyop = true;
amroutine->amcanbackward = false;
......
......@@ -140,6 +140,9 @@ gistvalidate(Oid opclassoid)
5, 5, INTERNALOID, opcintype,
INT2OID, OIDOID, INTERNALOID);
break;
case GIST_OPTIONS_PROC:
ok = check_amoptsproc_signature(procform->amproc);
break;
default:
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
......@@ -259,7 +262,8 @@ gistvalidate(Oid opclassoid)
(opclassgroup->functionset & (((uint64) 1) << i)) != 0)
continue; /* got it */
if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC ||
i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC)
i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC ||
i == GIST_OPTIONS_PROC)
continue; /* optional methods */
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
......
......@@ -59,6 +59,7 @@ hashhandler(PG_FUNCTION_ARGS)
amroutine->amstrategies = HTMaxStrategyNumber;
amroutine->amsupport = HASHNProcs;
amroutine->amoptsprocnum = HASHOPTIONS_PROC;
amroutine->amcanorder = false;
amroutine->amcanorderbyop = false;
amroutine->amcanbackward = true;
......
......@@ -126,6 +126,10 @@ hashvalidate(Oid opclassoid)
procform->amproclefttype);
}
break;
case HASHOPTIONS_PROC:
if (!check_amoptsproc_signature(procform->amproc))
result = false;
break;
default:
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
......
This diff is collapsed.
This diff is collapsed.
......@@ -112,6 +112,7 @@ bthandler(PG_FUNCTION_ARGS)
amroutine->amstrategies = BTMaxStrategyNumber;
amroutine->amsupport = BTNProcs;
amroutine->amoptsprocnum = BTOPTIONS_PROC;
amroutine->amcanorder = true;
amroutine->amcanorderbyop = false;
amroutine->amcanbackward = true;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment