Commit 8a3631f8 authored by Teodor Sigaev's avatar Teodor Sigaev

GIN: Generalized Inverted iNdex.

text[], int4[], Tsearch2 support for GIN.
parent 427c6b5b
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.13 2006/01/27 16:32:31 teodor Exp $ # $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.14 2006/05/02 11:28:54 teodor Exp $
MODULE_big = tsearch2 MODULE_big = tsearch2
OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \ OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
...@@ -7,7 +7,7 @@ OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \ ...@@ -7,7 +7,7 @@ OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \ ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \
tsvector_op.o rank.o ts_stat.o \ tsvector_op.o rank.o ts_stat.o \
query_util.o query_support.o query_rewrite.o query_gist.o \ query_util.o query_support.o query_rewrite.o query_gist.o \
ts_locale.o ts_locale.o ginidx.o
SUBDIRS := snowball ispell wordparser SUBDIRS := snowball ispell wordparser
SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o) SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o)
......
...@@ -3001,3 +3001,42 @@ select a is null, a from test_tsvector order by a; ...@@ -3001,3 +3001,42 @@ select a is null, a from test_tsvector order by a;
t | t |
(514 rows) (514 rows)
drop index wowidx;
create index wowidx on test_tsvector using gin (a);
set enable_seqscan=off;
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
count
-------
158
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
count
-------
17
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
count
-------
6
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
count
-------
98
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
count
-------
23
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
count
-------
39
(1 row)
#include "postgres.h"
#include <float.h>
#include "access/gist.h"
#include "access/itup.h"
#include "access/tuptoaster.h"
#include "storage/bufpage.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "tsvector.h"
#include "query.h"
#include "query_cleanup.h"
PG_FUNCTION_INFO_V1(gin_extract_tsvector);
Datum gin_extract_tsvector(PG_FUNCTION_ARGS);
Datum
gin_extract_tsvector(PG_FUNCTION_ARGS) {
tsvector *vector = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
Datum *entries = NULL;
*nentries = 0;
if ( vector->size > 0 ) {
int i;
WordEntry *we = ARRPTR( vector );
*nentries = (uint32)vector->size;
entries = (Datum*)palloc( sizeof(Datum) * vector->size );
for(i=0;i<vector->size;i++) {
text *txt = (text*)palloc( VARHDRSZ + we->len );
VARATT_SIZEP(txt) = VARHDRSZ + we->len;
memcpy( VARDATA(txt), STRPTR( vector ) + we->pos, we->len );
entries[i] = PointerGetDatum( txt );
we++;
}
}
PG_FREE_IF_COPY(vector, 0);
PG_RETURN_POINTER(entries);
}
PG_FUNCTION_INFO_V1(gin_extract_tsquery);
Datum gin_extract_tsquery(PG_FUNCTION_ARGS);
Datum
gin_extract_tsquery(PG_FUNCTION_ARGS) {
QUERYTYPE *query = (QUERYTYPE*) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
StrategyNumber strategy = DatumGetUInt16( PG_GETARG_DATUM(2) );
Datum *entries = NULL;
*nentries = 0;
if ( query->size > 0 ) {
int4 i, j=0, len;
ITEM *item;
item = clean_NOT_v2(GETQUERY(query), &len);
if ( !item )
elog(ERROR,"Query requires full scan, GIN doesn't support it");
item = GETQUERY(query);
for(i=0; i<query->size; i++)
if ( item[i].type == VAL )
(*nentries)++;
entries = (Datum*)palloc( sizeof(Datum) * (*nentries) );
for(i=0; i<query->size; i++)
if ( item[i].type == VAL ) {
text *txt;
txt = (text*)palloc( VARHDRSZ + item[i].length );
VARATT_SIZEP(txt) = VARHDRSZ + item[i].length;
memcpy( VARDATA(txt), GETOPERAND( query ) + item[i].distance, item[i].length );
entries[j++] = PointerGetDatum( txt );
if ( strategy == 1 && item[i].weight != 0 )
elog(ERROR,"With class of lexeme restrictions use @@@ operation");
}
}
PG_FREE_IF_COPY(query, 0);
PG_RETURN_POINTER(entries);
}
typedef struct {
ITEM *frst;
bool *mapped_check;
} GinChkVal;
static bool
checkcondition_gin(void *checkval, ITEM * val) {
GinChkVal *gcv = (GinChkVal*)checkval;
return gcv->mapped_check[ val - gcv->frst ];
}
PG_FUNCTION_INFO_V1(gin_ts_consistent);
Datum gin_ts_consistent(PG_FUNCTION_ARGS);
Datum
gin_ts_consistent(PG_FUNCTION_ARGS) {
bool *check = (bool*)PG_GETARG_POINTER(0);
QUERYTYPE *query = (QUERYTYPE*) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
bool res = FALSE;
if ( query->size > 0 ) {
int4 i, j=0;
ITEM *item;
GinChkVal gcv;
gcv.frst = item = GETQUERY(query);
gcv.mapped_check= (bool*)palloc( sizeof(bool) * query->size );
for(i=0; i<query->size; i++)
if ( item[i].type == VAL )
gcv.mapped_check[ i ] = check[ j++ ];
res = TS_execute(
GETQUERY(query),
&gcv,
true,
checkcondition_gin
);
}
PG_FREE_IF_COPY(query, 2);
PG_RETURN_BOOL(res);
}
...@@ -363,3 +363,14 @@ select * from ts_debug('Tsearch module for PostgreSQL 7.3.3'); ...@@ -363,3 +363,14 @@ select * from ts_debug('Tsearch module for PostgreSQL 7.3.3');
insert into test_tsvector values (null, null); insert into test_tsvector values (null, null);
select a is null, a from test_tsvector order by a; select a is null, a from test_tsvector order by a;
drop index wowidx;
create index wowidx on test_tsvector using gin (a);
set enable_seqscan=off;
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
...@@ -1146,8 +1146,54 @@ AS ...@@ -1146,8 +1146,54 @@ AS
FUNCTION 7 gtsq_same (gtsq, gtsq, internal), FUNCTION 7 gtsq_same (gtsq, gtsq, internal),
STORAGE gtsq; STORAGE gtsq;
--GIN support function
CREATE FUNCTION gin_extract_tsvector(tsvector,internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C RETURNS NULL ON NULL INPUT;
CREATE FUNCTION gin_extract_tsquery(tsquery,internal,internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C RETURNS NULL ON NULL INPUT;
CREATE FUNCTION gin_ts_consistent(internal,internal,tsquery)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE C RETURNS NULL ON NULL INPUT;
CREATE OPERATOR @@@ (
LEFTARG = tsvector,
RIGHTARG = tsquery,
PROCEDURE = exectsq,
COMMUTATOR = '@@@',
RESTRICT = contsel,
JOIN = contjoinsel
);
CREATE OPERATOR @@@ (
LEFTARG = tsquery,
RIGHTARG = tsvector,
PROCEDURE = rexectsq,
COMMUTATOR = '@@@',
RESTRICT = contsel,
JOIN = contjoinsel
);
CREATE OPERATOR CLASS gin_tsvector_ops
DEFAULT FOR TYPE tsvector USING gin
AS
OPERATOR 1 @@ (tsvector, tsquery),
OPERATOR 2 @@@ (tsvector, tsquery) RECHECK,
FUNCTION 1 bttextcmp(text, text),
FUNCTION 2 gin_extract_tsvector(tsvector,internal),
FUNCTION 3 gin_extract_tsquery(tsquery,internal,internal),
FUNCTION 4 gin_ts_consistent(internal,internal,tsquery),
STORAGE text;
--example of ISpell dictionary --example of ISpell dictionary
--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_name='ispell_template'; --update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_name='ispell_template';
--example of synonym dict --example of synonym dict
--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5; --update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
END; END;
# #
# Makefile for the access methods module # Makefile for the access methods module
# #
# $PostgreSQL: pgsql/src/backend/access/Makefile,v 1.10 2005/11/07 17:36:44 tgl Exp $ # $PostgreSQL: pgsql/src/backend/access/Makefile,v 1.11 2006/05/02 11:28:54 teodor Exp $
# #
subdir = src/backend/access subdir = src/backend/access
top_builddir = ../../.. top_builddir = ../../..
include $(top_builddir)/src/Makefile.global include $(top_builddir)/src/Makefile.global
SUBDIRS := common gist hash heap index nbtree transam SUBDIRS := common gist hash heap index nbtree transam gin
SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o) SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o)
all: SUBSYS.o all: SUBSYS.o
......
#-------------------------------------------------------------------------
#
# Makefile--
# Makefile for access/gin
#
# IDENTIFICATION
# $PostgreSQL: pgsql/src/backend/access/gin/Makefile,v 1.1 2006/05/02 11:28:54 teodor Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/access/gin
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \
ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \
ginbulk.o
all: SUBSYS.o
SUBSYS.o: $(OBJS)
$(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS)
depend dep:
$(CC) -MM $(CFLAGS) *.c >depend
clean:
rm -f SUBSYS.o $(OBJS)
ifeq (depend,$(wildcard depend))
include depend
endif
Gin for PostgreSQL
==================
Gin was sponsored by jfg://networks (http://www.jfg-networks.com/)
Gin stands for Generalized Inverted Index and should be considered as a genie,
not a drink.
Generalized means that the index does not know which operation it accelerates.
It instead works with custom strategies, defined for specific data types (read
"Index Method Strategies" in the PostgreSQL documentation). In that sense, Gin
is similar to GiST and differs from btree indices, which have predefined,
comparison-based operations.
An inverted index is an index structure storing a set of (key, posting list)
pairs, where 'posting list' is a set of documents in which the key occurs.
(A text document would usually contain many keys.) The primary goal of
Gin indices is support for highly scalable, full-text search in PostgreSQL.
Gin consists of a B-tree index constructed over entries (ET, entries tree),
where each entry is an element of the indexed value (element of array, lexeme
for tsvector) and where each tuple in a leaf page is either a pointer to a
B-tree over item pointers (PT, posting tree), or a list of item pointers
(PL, posting list) if the tuple is small enough.
Note: There is no delete operation for ET. The reason for this is that from
our experience, a set of unique words over a large collection change very
rarely. This greatly simplifies the code and concurrency algorithms.
Gin comes with built-in support for one-dimensional arrays (eg. integer[],
text[]), but no support for NULL elements. The following operations are
available:
* contains: value_array @ query_array
* overlap: value_array && query_array
* contained: value_array ~ query_array
Synopsis
--------
=# create index txt_idx on aa using gin(a);
Features
--------
* Concurrency
* Write-Ahead Logging (WAL). (Recoverability from crashes.)
* User-defined opclasses. (The scheme is similar to GiST.)
* Optimized index creation (Makes use of maintenance_work_mem to accumulate
postings in memory.)
* Tsearch2 support via an opclass
* Soft upper limit on the returned results set using a GUC variable:
gin_fuzzy_search_limit
Gin Fuzzy Limit
---------------
There are often situations when a full-text search returns a very large set of
results. Since reading tuples from the disk and sorting them could take a
lot of time, this is unacceptable for production. (Note that the search
itself is very fast.)
Such queries usually contain very frequent lexemes, so the results are not
very helpful. To facilitate execution of such queries Gin has a configurable
soft upper limit of the size of the returned set, determined by the
'gin_fuzzy_search_limit' GUC variable. This is set to 0 by default (no
limit).
If a non-zero search limit is set, then the returned set is a subset of the
whole result set, chosen at random.
"Soft" means that the actual number of returned results could slightly differ
from the specified limit, depending on the query and the quality of the
system's random number generator.
From experience, a value of 'gin_fuzzy_search_limit' in the thousands
(eg. 5000-20000) works well. This means that 'gin_fuzzy_search_limit' will
have no effect for queries returning a result set with less tuples than this
number.
Limitations
-----------
* No support for multicolumn indices
* Gin doesn't uses scan->kill_prior_tuple & scan->ignore_killed_tuples
* Gin searches entries only by equality matching. This may be improved in
future.
* Gin doesn't support full scans of indices.
* Gin doesn't index NULL values.
Gin Interface
-------------
Opclass interface pseudocode. An example for a Gin opclass can be found in
ginarayproc.c.
Datum* extractValue(Datum inputValue, uint32* nentries)
Returns an array of Datum of entries of the value to be indexed. nentries
should contain the number of returned entries.
int compareEntry(Datum a, Datum b)
Compares two entries (not the indexing values)
Datum* extractQuery(Datum query, uint32* nentries, StrategyNumber n)
Returns an array of Datum of entries of the query to be executed.
n contains the strategy number of the operation.
bool consistent(bool[] check, StrategyNumber n, Datum query)
The size of the check array is the same as sizeof of the array returned by
extractQuery. Each element of the check array is true if the indexed value
has a corresponding entry in the query. i.e. if (check[i] == TRUE) then
the i-th entry of the query is present in the indexed value. The Function
should return true if the indexed value matches by StrategyNumber and
the query.
Open Items
----------
We appreciate any comments, help and suggestions.
* Teach optimizer/executor that GIN is intrinsically clustered. i.e., it
always returns ItemPointer in ascending order.
* Tweak gincostestimate.
* GIN stores several ItemPointer to heap tuple, so VACUUM FULL produces
this warning message:
WARNING: index "idx" contains 88395 row versions, but table contains
51812 row versions
HINT: Rebuild the index with REINDEX.
**** Workaround added
TODO
----
Nearest future:
* Opclasses for all types (no programming, just many catalog changes).
Distant future:
* Replace B-tree of entries to something like GiST
* Add multicolumn support
* Optimize insert operations (background index insertion)
Authors
-------
All work was done by Teodor Sigaev (teodor@sigaev.ru) and Oleg Bartunov
(oleg@sai.msu.su).
/*-------------------------------------------------------------------------
*
* ginvacuum.c
* support function for GIN's indexing of any array
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.1 2006/05/02 11:28:54 teodor Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/freespace.h"
#include "utils/array.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
#include "utils/builtins.h"
#include "access/gin.h"
#define GinOverlapStrategy 1
#define GinContainsStrategy 2
#define GinContainedStrategy 3
#define ARRAYCHECK(x) do { \
if ( ARR_HASNULL(x) ) \
ereport(ERROR, \
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), \
errmsg("array must not contain nulls"))); \
\
if ( ARR_NDIM(x) != 1 && ARR_NDIM(x) != 0 ) \
ereport(ERROR, \
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), \
errmsg("array must be one-dimensional"))); \
} while(0)
/*
* Function used as extractValue and extractQuery both
*/
Datum
ginarrayextract(PG_FUNCTION_ARGS) {
ArrayType *array;
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
Datum *entries = NULL;
int16 elmlen;
bool elmbyval;
char elmalign;
/* we should guarantee that array will not be destroyed during all operation */
array = PG_GETARG_ARRAYTYPE_P_COPY(0);
ARRAYCHECK(array);
get_typlenbyvalalign(ARR_ELEMTYPE(array),
&elmlen, &elmbyval, &elmalign);
deconstruct_array(array,
ARR_ELEMTYPE(array),
elmlen, elmbyval, elmalign,
&entries, NULL, (int*)nentries);
/* we should not free array, entries[i] points into it */
PG_RETURN_POINTER(entries);
}
Datum
ginarrayconsistent(PG_FUNCTION_ARGS) {
bool *check = (bool*)PG_GETARG_POINTER(0);
StrategyNumber strategy = PG_GETARG_UINT16(1);
ArrayType *query = PG_GETARG_ARRAYTYPE_P(2);
int res=FALSE, i, nentries=ArrayGetNItems(ARR_NDIM(query), ARR_DIMS(query));
/* we can do not check array carefully, it's done by previous ginarrayextract call */
switch( strategy ) {
case GinOverlapStrategy:
case GinContainedStrategy:
/* at least one element in check[] is true, so result = true */
res = TRUE;
break;
case GinContainsStrategy:
res = TRUE;
for(i=0;i<nentries;i++)
if ( !check[i] ) {
res = FALSE;
break;
}
break;
default:
elog(ERROR, "ginarrayconsistent: unknown strategy number: %d", strategy);
}
PG_RETURN_BOOL(res);
}
static TypeCacheEntry*
fillTypeCacheEntry( TypeCacheEntry *typentry, Oid element_type ) {
if ( typentry && typentry->type_id == element_type )
return typentry;
typentry = lookup_type_cache(element_type, TYPECACHE_EQ_OPR_FINFO);
if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify an equality operator for type %s", format_type_be(element_type))));
return typentry;
}
static bool
typeEQ(FunctionCallInfoData *locfcinfo, Datum a, Datum b) {
locfcinfo->arg[0] = a;
locfcinfo->arg[1] = b;
locfcinfo->argnull[0] = false;
locfcinfo->argnull[1] = false;
locfcinfo->isnull = false;
return DatumGetBool(FunctionCallInvoke(locfcinfo));
}
static bool
ginArrayOverlap(TypeCacheEntry *typentry, ArrayType *a, ArrayType *b) {
Datum *da, *db;
int na, nb, j, i;
FunctionCallInfoData locfcinfo;
if ( ARR_ELEMTYPE(a) != ARR_ELEMTYPE(b) )
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("cannot compare arrays of different element types")));
ARRAYCHECK(a);
ARRAYCHECK(b);
deconstruct_array(a,
ARR_ELEMTYPE(a),
typentry->typlen, typentry->typbyval, typentry->typalign,
&da, NULL, &na);
deconstruct_array(b,
ARR_ELEMTYPE(b),
typentry->typlen, typentry->typbyval, typentry->typalign,
&db, NULL, &nb);
InitFunctionCallInfoData(locfcinfo, &typentry->eq_opr_finfo, 2,
NULL, NULL);
for(i=0;i<na;i++) {
for(j=0;j<nb;j++) {
if ( typeEQ(&locfcinfo, da[i], db[j]) ) {
pfree( da );
pfree( db );
return TRUE;
}
}
}
pfree( da );
pfree( db );
return FALSE;
}
static bool
ginArrayContains(TypeCacheEntry *typentry, ArrayType *a, ArrayType *b) {
Datum *da, *db;
int na, nb, j, i, n = 0;
FunctionCallInfoData locfcinfo;
if ( ARR_ELEMTYPE(a) != ARR_ELEMTYPE(b) )
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("cannot compare arrays of different element types")));
ARRAYCHECK(a);
ARRAYCHECK(b);
deconstruct_array(a,
ARR_ELEMTYPE(a),
typentry->typlen, typentry->typbyval, typentry->typalign,
&da, NULL, &na);
deconstruct_array(b,
ARR_ELEMTYPE(b),
typentry->typlen, typentry->typbyval, typentry->typalign,
&db, NULL, &nb);
InitFunctionCallInfoData(locfcinfo, &typentry->eq_opr_finfo, 2,
NULL, NULL);
for(i=0;i<nb;i++) {
for(j=0;j<na;j++) {
if ( typeEQ(&locfcinfo, db[i], da[j]) ) {
n++;
break;
}
}
}
pfree( da );
pfree( db );
return ( n==nb ) ? TRUE : FALSE;
}
Datum
arrayoverlap(PG_FUNCTION_ARGS) {
ArrayType *a = PG_GETARG_ARRAYTYPE_P(0);
ArrayType *b = PG_GETARG_ARRAYTYPE_P(1);
TypeCacheEntry *typentry = fillTypeCacheEntry( fcinfo->flinfo->fn_extra, ARR_ELEMTYPE(a) );
bool res;
fcinfo->flinfo->fn_extra = (void*)typentry;
res = ginArrayOverlap( typentry, a, b );
PG_FREE_IF_COPY(a,0);
PG_FREE_IF_COPY(b,1);
PG_RETURN_BOOL(res);
}
Datum
arraycontains(PG_FUNCTION_ARGS) {
ArrayType *a = PG_GETARG_ARRAYTYPE_P(0);
ArrayType *b = PG_GETARG_ARRAYTYPE_P(1);
TypeCacheEntry *typentry = fillTypeCacheEntry( fcinfo->flinfo->fn_extra, ARR_ELEMTYPE(a) );
bool res;
fcinfo->flinfo->fn_extra = (void*)typentry;
res = ginArrayContains( typentry, a, b );
PG_FREE_IF_COPY(a,0);
PG_FREE_IF_COPY(b,1);
PG_RETURN_BOOL(res);
}
Datum
arraycontained(PG_FUNCTION_ARGS) {
ArrayType *a = PG_GETARG_ARRAYTYPE_P(0);
ArrayType *b = PG_GETARG_ARRAYTYPE_P(1);
TypeCacheEntry *typentry = fillTypeCacheEntry( fcinfo->flinfo->fn_extra, ARR_ELEMTYPE(a) );
bool res;
fcinfo->flinfo->fn_extra = (void*)typentry;
res = ginArrayContains( typentry, b, a );
PG_FREE_IF_COPY(a,0);
PG_FREE_IF_COPY(b,1);
PG_RETURN_BOOL(res);
}
This diff is collapsed.
/*-------------------------------------------------------------------------
*
* ginbulk.c
* routines for fast build of inverted index
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.1 2006/05/02 11:28:54 teodor Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/gin.h"
#include "access/heapam.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/freespace.h"
#include "utils/memutils.h"
#include "access/tuptoaster.h"
#define DEF_NENTRY 128
#define DEF_NPTR 4
void
ginInitBA(BuildAccumulator *accum) {
accum->number = 0;
accum->curget = 0;
accum->length = DEF_NENTRY;
accum->entries = (EntryAccumulator*)palloc0( sizeof(EntryAccumulator) * DEF_NENTRY );
accum->allocatedMemory = sizeof(EntryAccumulator) * DEF_NENTRY;
}
/*
* Stores heap item pointer. For robust, it checks that
* item pointer are ordered
*/
static void
ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heapptr) {
if ( entry->number >= entry->length ) {
accum->allocatedMemory += sizeof(ItemPointerData) * entry->length;
entry->length *= 2;
entry->list = (ItemPointerData*)repalloc(entry->list,
sizeof(ItemPointerData)*entry->length);
}
if ( entry->shouldSort==FALSE ) {
int res = compareItemPointers( entry->list + entry->number - 1, heapptr );
Assert( res != 0 );
if ( res > 0 )
entry->shouldSort=TRUE;
}
entry->list[ entry->number ] = *heapptr;
entry->number++;
}
/*
* Find/store one entry from indexed value.
* It supposes, that entry should be located between low and end of array of
* entries. Returns position of found/inserted entry
*/
static uint32
ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry, uint32 low) {
uint32 high = accum->number, mid;
int res;
while(high>low) {
mid = low + ((high - low) / 2);
res = compareEntries(accum->ginstate, entry, accum->entries[mid].value);
if ( res == 0 ) {
ginInsertData( accum, accum->entries+mid, heapptr );
return mid;
} else if ( res > 0 )
low = mid + 1;
else
high = mid;
}
/* did not find an entry, insert */
if ( accum->number >= accum->length ) {
accum->allocatedMemory += sizeof(EntryAccumulator) * accum->length;
accum->length *= 2;
accum->entries = (EntryAccumulator*)repalloc( accum->entries,
sizeof(EntryAccumulator) * accum->length );
}
if ( high != accum->number )
memmove( accum->entries+high+1, accum->entries+high, sizeof(EntryAccumulator) * (accum->number-high) );
accum->entries[high].value = entry;
accum->entries[high].length = DEF_NPTR;
accum->entries[high].number = 1;
accum->entries[high].shouldSort = FALSE;
accum->entries[high].list = (ItemPointerData*)palloc(sizeof(ItemPointerData)*DEF_NPTR);
accum->entries[high].list[0] = *heapptr;
accum->allocatedMemory += sizeof(ItemPointerData)*DEF_NPTR;
accum->number++;
return high;
}
/*
* Insert one heap pointer. Requires entries to be sorted!
*/
void
ginInsertRecordBA( BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry ) {
uint32 start=0,i;
for(i=0;i<nentry;i++)
start = ginInsertEntry( accum, heapptr, entries[i], start);
}
static int
qsortCompareItemPointers( const void *a, const void *b ) {
int res = compareItemPointers( (ItemPointer)a, (ItemPointer)b );
Assert( res!=0 );
return res;
}
ItemPointerData*
ginGetEntry(BuildAccumulator *accum, Datum *value, uint32 *n) {
EntryAccumulator *entry;
ItemPointerData *list;
if ( accum->curget >= accum->number )
return NULL;
else if ( accum->curget > 0 )
pfree( accum->entries[ accum->curget-1 ].list );
entry = accum->entries + accum->curget;
*n = entry->number;
*value = entry->value;
list = entry->list;
accum->curget++;
if ( entry->shouldSort && entry->number > 1 )
qsort(list, *n, sizeof(ItemPointerData), qsortCompareItemPointers);
return list;
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*-------------------------------------------------------------------------
*
* gininsert.c
* insert routines for the postgres inverted index access method.
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.1 2006/05/02 11:28:54 teodor Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/gin.h"
#include "access/heapam.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/freespace.h"
#include "utils/memutils.h"
#include "access/tuptoaster.h"
typedef struct {
GinState ginstate;
double indtuples;
MemoryContext tmpCtx;
BuildAccumulator accum;
} GinBuildState;
/*
* Creates posting tree with one page. Function
* suppose that items[] fits to page
*/
static BlockNumber
createPostingTree( Relation index, ItemPointerData *items, uint32 nitems ) {
BlockNumber blkno;
Buffer buffer = GinNewBuffer(index);
Page page;
START_CRIT_SECTION();
GinInitBuffer( buffer, GIN_DATA|GIN_LEAF );
page = BufferGetPage(buffer);
blkno = BufferGetBlockNumber(buffer);
memcpy( GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems );
GinPageGetOpaque(page)->maxoff = nitems;
if (!index->rd_istemp) {
XLogRecPtr recptr;
XLogRecData rdata[2];
ginxlogCreatePostingTree data;
data.node = index->rd_node;
data.blkno = blkno;
data.nitem = nitems;
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &data;
rdata[0].len = sizeof(ginxlogCreatePostingTree);
rdata[0].next = &rdata[1];
rdata[1].buffer = InvalidBuffer;
rdata[1].data = (char *) items;
rdata[1].len = sizeof(ItemPointerData) * nitems;
rdata[1].next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
END_CRIT_SECTION();
return blkno;
}
/*
* Adds array of item pointers to tuple's posting list or
* creates posting tree and tuple pointed to tree in a case
* of not enough space. Max size of tuple is defined in
* GinFormTuple().
*/
static IndexTuple
addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
IndexTuple old, ItemPointerData *items, uint32 nitem, bool isBuild) {
bool isnull;
Datum key = index_getattr(old, FirstOffsetNumber, ginstate->tupdesc, &isnull);
IndexTuple res = GinFormTuple(ginstate, key, NULL, nitem + GinGetNPosting(old));
if ( res ) {
/* good, small enough */
MergeItemPointers( GinGetPosting(res),
GinGetPosting(old), GinGetNPosting(old),
items, nitem
);
GinSetNPosting(res, nitem + GinGetNPosting(old));
} else {
BlockNumber postingRoot;
GinPostingTreeScan *gdi;
/* posting list becomes big, so we need to make posting's tree */
res = GinFormTuple(ginstate, key, NULL, 0);
postingRoot = createPostingTree(index, GinGetPosting(old), GinGetNPosting(old));
GinSetPostingTree(res, postingRoot);
gdi = prepareScanPostingTree(index, postingRoot, FALSE);
gdi->btree.isBuild = isBuild;
insertItemPointer(gdi, items, nitem);
pfree(gdi);
}
return res;
}
/*
* Inserts only one entry to the index, but it can adds more that 1
* ItemPointer.
*/
static void
ginEntryInsert( Relation index, GinState *ginstate, Datum value, ItemPointerData *items, uint32 nitem, bool isBuild) {
GinBtreeData btree;
GinBtreeStack *stack;
IndexTuple itup;
Page page;
prepareEntryScan( &btree, index, value, ginstate );
stack = ginFindLeafPage(&btree, NULL);
page = BufferGetPage( stack->buffer );
if ( btree.findItem( &btree, stack ) ) {
/* found entry */
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
if ( GinIsPostingTree(itup) ) {
/* lock root of posting tree */
GinPostingTreeScan *gdi;
BlockNumber rootPostingTree = GinGetPostingTree(itup);
/* release all stack */
LockBuffer(stack->buffer, GIN_UNLOCK);
freeGinBtreeStack( stack );
/* insert into posting tree */
gdi = prepareScanPostingTree( index, rootPostingTree, FALSE );
gdi->btree.isBuild = isBuild;
insertItemPointer(gdi, items, nitem);
return;
}
itup = addItemPointersToTuple(index, ginstate, stack, itup, items, nitem, isBuild);
btree.isDelete = TRUE;
} else {
/* We suppose, that tuple can store at list one itempointer */
itup = GinFormTuple( ginstate, value, items, 1);
if ( itup==NULL || IndexTupleSize(itup) >= GinMaxItemSize )
elog(ERROR, "huge tuple");
if ( nitem>1 ) {
IndexTuple previtup = itup;
itup = addItemPointersToTuple(index, ginstate, stack, previtup, items+1, nitem-1, isBuild);
pfree(previtup);
}
}
btree.entry = itup;
ginInsertValue(&btree, stack);
pfree( itup );
}
/*
* Saves indexed value in memory accumulator during index creation
* Function isnt use during normal insert
*/
static uint32
ginHeapTupleBulkInsert(BuildAccumulator *accum, Datum value, ItemPointer heapptr) {
Datum *entries;
uint32 nentries;
entries = extractEntriesSU( accum->ginstate, value, &nentries);
if ( nentries==0 )
/* nothing to insert */
return 0;
ginInsertRecordBA( accum, heapptr, entries, nentries);
pfree( entries );
return nentries;
}
static void
ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
bool *isnull, bool tupleIsAlive, void *state) {
GinBuildState *buildstate = (GinBuildState*)state;
MemoryContext oldCtx;
if ( *isnull )
return;
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
buildstate->indtuples += ginHeapTupleBulkInsert(&buildstate->accum, *values, &htup->t_self);
/* we use only half maintenance_work_mem, because there is some leaks
during insertion and extract values */
if ( buildstate->accum.allocatedMemory >= maintenance_work_mem*1024L/2L ) {
ItemPointerData *list;
Datum entry;
uint32 nlist;
while( (list=ginGetEntry(&buildstate->accum, &entry, &nlist)) != NULL )
ginEntryInsert(index, &buildstate->ginstate, entry, list, nlist, TRUE);
MemoryContextReset(buildstate->tmpCtx);
ginInitBA(&buildstate->accum);
}
MemoryContextSwitchTo(oldCtx);
}
Datum
ginbuild(PG_FUNCTION_ARGS) {
Relation heap = (Relation) PG_GETARG_POINTER(0);
Relation index = (Relation) PG_GETARG_POINTER(1);
IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
double reltuples;
GinBuildState buildstate;
Buffer buffer;
ItemPointerData *list;
Datum entry;
uint32 nlist;
MemoryContext oldCtx;
if (RelationGetNumberOfBlocks(index) != 0)
elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index));
initGinState(&buildstate.ginstate, index);
/* initialize the root page */
buffer = GinNewBuffer(index);
START_CRIT_SECTION();
GinInitBuffer(buffer, GIN_LEAF);
if (!index->rd_istemp) {
XLogRecPtr recptr;
XLogRecData rdata;
Page page;
rdata.buffer = InvalidBuffer;
rdata.data = (char *) &(index->rd_node);
rdata.len = sizeof(RelFileNode);
rdata.next = NULL;
page = BufferGetPage(buffer);
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
END_CRIT_SECTION();
/* build the index */
buildstate.indtuples = 0;
/*
* create a temporary memory context that is reset once for each tuple
* inserted into the index
*/
buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
"Gin build temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
buildstate.accum.ginstate = &buildstate.ginstate;
ginInitBA( &buildstate.accum );
/* do the heap scan */
reltuples = IndexBuildHeapScan(heap, index, indexInfo,
ginBuildCallback, (void *) &buildstate);
oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
while( (list=ginGetEntry(&buildstate.accum, &entry, &nlist)) != NULL )
ginEntryInsert(index, &buildstate.ginstate, entry, list, nlist, TRUE);
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(buildstate.tmpCtx);
/* since we just counted the # of tuples, may as well update stats */
IndexCloseAndUpdateStats(heap, reltuples, index, buildstate.indtuples);
PG_RETURN_VOID();
}
/*
* Inserts value during normal insertion
*/
static uint32
ginHeapTupleInsert( Relation index, GinState *ginstate, Datum value, ItemPointer item) {
Datum *entries;
uint32 i,nentries;
entries = extractEntriesSU( ginstate, value, &nentries);
if ( nentries==0 )
/* nothing to insert */
return 0;
for(i=0;i<nentries;i++)
ginEntryInsert(index, ginstate, entries[i], item, 1, FALSE);
return nentries;
}
Datum
gininsert(PG_FUNCTION_ARGS) {
Relation index = (Relation) PG_GETARG_POINTER(0);
Datum *values = (Datum *) PG_GETARG_POINTER(1);
bool *isnull = (bool *) PG_GETARG_POINTER(2);
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
#ifdef NOT_USED
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
bool checkUnique = PG_GETARG_BOOL(5);
#endif
GinState ginstate;
MemoryContext oldCtx;
MemoryContext insertCtx;
uint32 res;
if ( *isnull )
PG_RETURN_BOOL(false);
insertCtx = AllocSetContextCreate(CurrentMemoryContext,
"Gin insert temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldCtx = MemoryContextSwitchTo(insertCtx);
initGinState(&ginstate, index);
res = ginHeapTupleInsert(index, &ginstate, *values, ht_ctid);
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(insertCtx);
PG_RETURN_BOOL(res>0);
}
/*-------------------------------------------------------------------------
*
* ginscan.c
* routines to manage scans inverted index relations
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.1 2006/05/02 11:28:54 teodor Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/gin.h"
#include "access/heapam.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/freespace.h"
#include "utils/memutils.h"
Datum
ginbeginscan(PG_FUNCTION_ARGS) {
Relation rel = (Relation) PG_GETARG_POINTER(0);
int keysz = PG_GETARG_INT32(1);
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2);
IndexScanDesc scan;
scan = RelationGetIndexScan(rel, keysz, scankey);
PG_RETURN_POINTER(scan);
}
static void
fillScanKey( GinState *ginstate, GinScanKey key, Datum query,
Datum *entryValues, uint32 nEntryValues, StrategyNumber strategy ) {
uint32 i,j;
key->nentries = nEntryValues;
key->entryRes = (bool*)palloc0( sizeof(bool) * nEntryValues );
key->scanEntry = (GinScanEntry) palloc( sizeof(GinScanEntryData) * nEntryValues );
key->strategy = strategy;
key->query = query;
key->firstCall= TRUE;
ItemPointerSet( &(key->curItem), InvalidBlockNumber, InvalidOffsetNumber );
for(i=0; i<nEntryValues; i++) {
key->scanEntry[i].pval = key->entryRes + i;
key->scanEntry[i].entry = entryValues[i];
ItemPointerSet( &(key->scanEntry[i].curItem), InvalidBlockNumber, InvalidOffsetNumber );
key->scanEntry[i].offset = InvalidOffsetNumber;
key->scanEntry[i].buffer = InvalidBuffer;
key->scanEntry[i].list = NULL;
key->scanEntry[i].nlist = 0;
/* link to the equals entry in current scan key */
key->scanEntry[i].master = NULL;
for( j=0; j<i; j++)
if ( compareEntries( ginstate, entryValues[i], entryValues[j] ) == 0 ) {
key->scanEntry[i].master = key->scanEntry + j;
break;
}
}
}
static void
resetScanKeys(GinScanKey keys, uint32 nkeys) {
uint32 i, j;
if ( keys == NULL )
return;
for(i=0;i<nkeys;i++) {
GinScanKey key = keys + i;
key->firstCall = TRUE;
ItemPointerSet( &(key->curItem), InvalidBlockNumber, InvalidOffsetNumber );
for(j=0;j<key->nentries;j++) {
if ( key->scanEntry[j].buffer != InvalidBuffer )
ReleaseBuffer( key->scanEntry[i].buffer );
ItemPointerSet( &(key->scanEntry[j].curItem), InvalidBlockNumber, InvalidOffsetNumber );
key->scanEntry[j].offset = InvalidOffsetNumber;
key->scanEntry[j].buffer = InvalidBuffer;
key->scanEntry[j].list = NULL;
key->scanEntry[j].nlist = 0;
}
}
}
static void
freeScanKeys(GinScanKey keys, uint32 nkeys, bool removeRes) {
uint32 i, j;
if ( keys == NULL )
return;
for(i=0;i<nkeys;i++) {
GinScanKey key = keys + i;
for(j=0;j<key->nentries;j++) {
if ( key->scanEntry[j].buffer != InvalidBuffer )
ReleaseBuffer( key->scanEntry[j].buffer );
if ( removeRes && key->scanEntry[j].list )
pfree(key->scanEntry[j].list);
}
if ( removeRes )
pfree(key->entryRes);
pfree(key->scanEntry);
}
pfree(keys);
}
void
newScanKey( IndexScanDesc scan ) {
ScanKey scankey = scan->keyData;
GinScanOpaque so = (GinScanOpaque) scan->opaque;
int i;
uint32 nkeys = 0;
so->keys = (GinScanKey) palloc( scan->numberOfKeys * sizeof(GinScanKeyData) );
for(i=0; i<scan->numberOfKeys; i++) {
Datum* entryValues;
uint32 nEntryValues;
if ( scankey[i].sk_flags & SK_ISNULL )
elog(ERROR, "Gin doesn't support NULL as scan key");
Assert( scankey[i].sk_attno == 1 );
entryValues = (Datum*)DatumGetPointer(
FunctionCall3(
&so->ginstate.extractQueryFn,
scankey[i].sk_argument,
PointerGetDatum( &nEntryValues ),
UInt16GetDatum(scankey[i].sk_strategy)
)
);
if ( entryValues==NULL || nEntryValues == 0 )
/* full scan... */
continue;
fillScanKey( &so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument,
entryValues, nEntryValues, scankey[i].sk_strategy );
nkeys++;
}
so->nkeys = nkeys;
if ( so->nkeys == 0 )
elog(ERROR, "Gin doesn't support full scan due to it's awful inefficiency");
}
Datum
ginrescan(PG_FUNCTION_ARGS) {
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1);
GinScanOpaque so;
so = (GinScanOpaque) scan->opaque;
if ( so == NULL ) {
/* if called from ginbeginscan */
so = (GinScanOpaque)palloc( sizeof(GinScanOpaqueData) );
so->tempCtx = AllocSetContextCreate(CurrentMemoryContext,
"Gin scan temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
initGinState(&so->ginstate, scan->indexRelation);
scan->opaque = so;
} else {
freeScanKeys(so->keys, so->nkeys, TRUE);
freeScanKeys(so->markPos, so->nkeys, FALSE);
}
so->markPos=so->keys=NULL;
if ( scankey && scan->numberOfKeys > 0 ) {
memmove(scan->keyData, scankey,
scan->numberOfKeys * sizeof(ScanKeyData));
}
PG_RETURN_VOID();
}
Datum
ginendscan(PG_FUNCTION_ARGS) {
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GinScanOpaque so = (GinScanOpaque) scan->opaque;
if ( so != NULL ) {
freeScanKeys(so->keys, so->nkeys, TRUE);
freeScanKeys(so->markPos, so->nkeys, FALSE);
MemoryContextDelete(so->tempCtx);
pfree(so);
}
PG_RETURN_VOID();
}
static GinScanKey
copyScanKeys( GinScanKey keys, uint32 nkeys ) {
GinScanKey newkeys;
uint32 i, j;
newkeys = (GinScanKey)palloc( sizeof(GinScanKeyData) * nkeys );
memcpy( newkeys, keys, sizeof(GinScanKeyData) * nkeys );
for(i=0;i<nkeys;i++) {
newkeys[i].scanEntry = (GinScanEntry)palloc(sizeof(GinScanEntryData) * keys[i].nentries );
memcpy( newkeys[i].scanEntry, keys[i].scanEntry, sizeof(GinScanEntryData) * keys[i].nentries );
for(j=0;j<keys[i].nentries; j++) {
if ( keys[i].scanEntry[j].buffer != InvalidBuffer )
IncrBufferRefCount( keys[i].scanEntry[j].buffer );
if ( keys[i].scanEntry[j].master ) {
int masterN = keys[i].scanEntry[j].master - keys[i].scanEntry;
newkeys[i].scanEntry[j].master = newkeys[i].scanEntry + masterN;
}
}
}
return newkeys;
}
Datum
ginmarkpos(PG_FUNCTION_ARGS) {
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GinScanOpaque so = (GinScanOpaque) scan->opaque;
freeScanKeys(so->markPos, so->nkeys, FALSE);
so->markPos = copyScanKeys( so->keys, so->nkeys );
PG_RETURN_VOID();
}
Datum
ginrestrpos(PG_FUNCTION_ARGS) {
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GinScanOpaque so = (GinScanOpaque) scan->opaque;
freeScanKeys(so->keys, so->nkeys, FALSE);
so->keys = copyScanKeys( so->markPos, so->nkeys );
PG_RETURN_VOID();
}
/*-------------------------------------------------------------------------
*
* ginutil.c
* utilities routines for the postgres inverted index access method.
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.1 2006/05/02 11:28:54 teodor Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/gin.h"
#include "access/heapam.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/freespace.h"
void
initGinState( GinState *state, Relation index ) {
if ( index->rd_att->natts != 1 )
elog(ERROR, "numberOfAttributes %d != 1",
index->rd_att->natts);
state->tupdesc = index->rd_att;
fmgr_info_copy(&(state->compareFn),
index_getprocinfo(index, 1, GIN_COMPARE_PROC),
CurrentMemoryContext);
fmgr_info_copy(&(state->extractValueFn),
index_getprocinfo(index, 1, GIN_EXTRACTVALUE_PROC),
CurrentMemoryContext);
fmgr_info_copy(&(state->extractQueryFn),
index_getprocinfo(index, 1, GIN_EXTRACTQUERY_PROC),
CurrentMemoryContext);
fmgr_info_copy(&(state->consistentFn),
index_getprocinfo(index, 1, GIN_CONSISTENT_PROC),
CurrentMemoryContext);
}
/*
* Allocate a new page (either by recycling, or by extending the index file)
* The returned buffer is already pinned and exclusive-locked
* Caller is responsible for initializing the page by calling GinInitBuffer
*/
Buffer
GinNewBuffer(Relation index) {
Buffer buffer;
bool needLock;
/* First, try to get a page from FSM */
for(;;) {
BlockNumber blkno = GetFreeIndexPage(&index->rd_node);
if (blkno == InvalidBlockNumber)
break;
buffer = ReadBuffer(index, blkno);
/*
* We have to guard against the possibility that someone else already
* recycled this page; the buffer may be locked if so.
*/
if (ConditionalLockBuffer(buffer)) {
Page page = BufferGetPage(buffer);
if (PageIsNew(page))
return buffer; /* OK to use, if never initialized */
if (GinPageIsDeleted(page))
return buffer; /* OK to use */
LockBuffer(buffer, GIN_UNLOCK);
}
/* Can't use it, so release buffer and try again */
ReleaseBuffer(buffer);
}
/* Must extend the file */
needLock = !RELATION_IS_LOCAL(index);
if (needLock)
LockRelationForExtension(index, ExclusiveLock);
buffer = ReadBuffer(index, P_NEW);
LockBuffer(buffer, GIN_EXCLUSIVE);
if (needLock)
UnlockRelationForExtension(index, ExclusiveLock);
return buffer;
}
void
GinInitPage(Page page, uint32 f, Size pageSize) {
GinPageOpaque opaque;
PageInit(page, pageSize, sizeof(GinPageOpaqueData));
opaque = GinPageGetOpaque(page);
memset( opaque, 0, sizeof(GinPageOpaqueData) );
opaque->flags = f;
opaque->rightlink = InvalidBlockNumber;
}
void
GinInitBuffer(Buffer b, uint32 f) {
GinInitPage( BufferGetPage(b), f, BufferGetPageSize(b) );
}
int
compareEntries(GinState *ginstate, Datum a, Datum b) {
return DatumGetInt32(
FunctionCall2(
&ginstate->compareFn,
a, b
)
);
}
static FmgrInfo* cmpDatumPtr=NULL;
static bool needUnique = FALSE;
static int
cmpEntries(const void * a, const void * b) {
int res = DatumGetInt32(
FunctionCall2(
cmpDatumPtr,
*(Datum*)a,
*(Datum*)b
)
);
if ( res == 0 )
needUnique = TRUE;
return res;
}
Datum*
extractEntriesS(GinState *ginstate, Datum value, uint32 *nentries) {
Datum *entries;
entries = (Datum*)DatumGetPointer(
FunctionCall2(
&ginstate->extractValueFn,
value,
PointerGetDatum( nentries )
)
);
if ( entries == NULL )
*nentries = 0;
if ( *nentries > 1 ) {
cmpDatumPtr = &ginstate->compareFn;
needUnique = FALSE;
qsort(entries, *nentries, sizeof(Datum), cmpEntries);
}
return entries;
}
Datum*
extractEntriesSU(GinState *ginstate, Datum value, uint32 *nentries) {
Datum *entries = extractEntriesS(ginstate, value, nentries);
if ( *nentries>1 && needUnique ) {
Datum *ptr, *res;
ptr = res = entries;
while( ptr - entries < *nentries ) {
if ( compareEntries(ginstate, *ptr, *res ) != 0 )
*(++res) = *ptr++;
else
ptr++;
}
*nentries = res + 1 - entries;
}
return entries;
}
/*
* It's analog of PageGetTempPage(), but copies whole page
*/
Page
GinPageGetCopyPage( Page page ) {
Size pageSize = PageGetPageSize( page );
Page tmppage;
tmppage=(Page)palloc( pageSize );
memcpy( tmppage, page, pageSize );
return tmppage;
}
This diff is collapsed.
This diff is collapsed.
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* *
* Resource managers definition * Resource managers definition
* *
* $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.21 2005/11/07 17:36:45 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.22 2006/05/02 11:28:54 teodor Exp $
*/ */
#include "postgres.h" #include "postgres.h"
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "access/heapam.h" #include "access/heapam.h"
#include "access/multixact.h" #include "access/multixact.h"
#include "access/nbtree.h" #include "access/nbtree.h"
#include "access/gin.h"
#include "access/xact.h" #include "access/xact.h"
#include "access/xlog_internal.h" #include "access/xlog_internal.h"
#include "commands/dbcommands.h" #include "commands/dbcommands.h"
...@@ -35,7 +36,7 @@ const RmgrData RmgrTable[RM_MAX_ID + 1] = { ...@@ -35,7 +36,7 @@ const RmgrData RmgrTable[RM_MAX_ID + 1] = {
{"Heap", heap_redo, heap_desc, NULL, NULL}, {"Heap", heap_redo, heap_desc, NULL, NULL},
{"Btree", btree_redo, btree_desc, btree_xlog_startup, btree_xlog_cleanup}, {"Btree", btree_redo, btree_desc, btree_xlog_startup, btree_xlog_cleanup},
{"Hash", hash_redo, hash_desc, NULL, NULL}, {"Hash", hash_redo, hash_desc, NULL, NULL},
{"Reserved 13", NULL, NULL, NULL, NULL}, {"Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup},
{"Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup}, {"Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup},
{"Sequence", seq_redo, seq_desc, NULL, NULL} {"Sequence", seq_redo, seq_desc, NULL, NULL}
}; };
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.144 2006/03/05 15:58:23 momjian Exp $ * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.145 2006/05/02 11:28:54 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -376,6 +376,13 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck) ...@@ -376,6 +376,13 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck)
RelationGetRelationName(OldIndex)))); RelationGetRelationName(OldIndex))));
} }
if (!OldIndex->rd_am->amclusterable)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot cluster on index \"%s\" because access method does not clusterable",
RelationGetRelationName(OldIndex))));
/* /*
* Disallow clustering system relations. This will definitely NOT work * Disallow clustering system relations. This will definitely NOT work
* for shared relations (we have no way to update pg_class rows in other * for shared relations (we have no way to update pg_class rows in other
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/opclasscmds.c,v 1.43 2006/03/14 22:48:18 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/opclasscmds.c,v 1.44 2006/05/02 11:28:54 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -273,11 +273,11 @@ DefineOpClass(CreateOpClassStmt *stmt) ...@@ -273,11 +273,11 @@ DefineOpClass(CreateOpClassStmt *stmt)
else else
{ {
/* /*
* Currently, only GiST allows storagetype different from * Currently, only GiST and GIN allows storagetype different from
* datatype. This hardcoded test should be eliminated in favor of * datatype. This hardcoded test should be eliminated in favor of
* adding another boolean column to pg_am ... * adding another boolean column to pg_am ...
*/ */
if (amoid != GIST_AM_OID) if (!(amoid == GIST_AM_OID || amoid == GIN_AM_OID))
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION), (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("storage type may not be different from data type for access method \"%s\"", errmsg("storage type may not be different from data type for access method \"%s\"",
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.326 2006/03/31 23:32:06 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.327 2006/05/02 11:28:54 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -2982,7 +2982,16 @@ scan_index(Relation indrel, double num_tuples) ...@@ -2982,7 +2982,16 @@ scan_index(Relation indrel, double num_tuples)
/* /*
* Check for tuple count mismatch. If the index is partial, then it's OK * Check for tuple count mismatch. If the index is partial, then it's OK
* for it to have fewer tuples than the heap; else we got trouble. * for it to have fewer tuples than the heap; else we got trouble.
*
* XXX Hack. Since GIN stores every pointer to heap several times and
* counting num_index_tuples during vacuum is very comlpex and slow
* we just copy num_tuples to num_index_tuples as upper limit to avoid
* WARNING and optimizer mistakes.
*/ */
if ( indrel->rd_rel->relam == GIN_AM_OID )
{
stats->num_index_tuples = num_tuples;
} else
if (stats->num_index_tuples != num_tuples) if (stats->num_index_tuples != num_tuples)
{ {
if (stats->num_index_tuples > num_tuples || if (stats->num_index_tuples > num_tuples ||
...@@ -3052,7 +3061,16 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, ...@@ -3052,7 +3061,16 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
/* /*
* Check for tuple count mismatch. If the index is partial, then it's OK * Check for tuple count mismatch. If the index is partial, then it's OK
* for it to have fewer tuples than the heap; else we got trouble. * for it to have fewer tuples than the heap; else we got trouble.
*
* XXX Hack. Since GIN stores every pointer to heap several times and
* counting num_index_tuples during vacuum is very comlpex and slow
* we just copy num_tuples to num_index_tuples as upper limit to avoid
* WARNING and optimizer mistakes.
*/ */
if ( indrel->rd_rel->relam == GIN_AM_OID )
{
stats->num_index_tuples = num_tuples;
} else
if (stats->num_index_tuples != num_tuples + keep_tuples) if (stats->num_index_tuples != num_tuples + keep_tuples)
{ {
if (stats->num_index_tuples > num_tuples + keep_tuples || if (stats->num_index_tuples > num_tuples + keep_tuples ||
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.204 2006/05/02 04:34:18 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.205 2006/05/02 11:28:55 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -4829,3 +4829,21 @@ gistcostestimate(PG_FUNCTION_ARGS) ...@@ -4829,3 +4829,21 @@ gistcostestimate(PG_FUNCTION_ARGS)
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
Datum
gincostestimate(PG_FUNCTION_ARGS)
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
List *indexQuals = (List *) PG_GETARG_POINTER(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
genericcostestimate(root, index, indexQuals, 0.0,
indexStartupCost, indexTotalCost,
indexSelectivity, indexCorrelation);
PG_RETURN_VOID();
}
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/init/globals.c,v 1.97 2006/03/05 15:58:46 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/init/globals.c,v 1.98 2006/05/02 11:28:55 teodor Exp $
* *
* NOTES * NOTES
* Globals used all over the place should be declared here and not * Globals used all over the place should be declared here and not
...@@ -107,3 +107,5 @@ int VacuumCostDelay = 0; ...@@ -107,3 +107,5 @@ int VacuumCostDelay = 0;
int VacuumCostBalance = 0; /* working state for vacuum */ int VacuumCostBalance = 0; /* working state for vacuum */
bool VacuumCostActive = false; bool VacuumCostActive = false;
int GinFuzzySearchLimit = 0;
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* Written by Peter Eisentraut <peter_e@gmx.net>. * Written by Peter Eisentraut <peter_e@gmx.net>.
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.317 2006/04/25 14:11:56 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.318 2006/05/02 11:28:55 teodor Exp $
* *
*-------------------------------------------------------------------- *--------------------------------------------------------------------
*/ */
...@@ -64,7 +64,7 @@ ...@@ -64,7 +64,7 @@
#include "utils/memutils.h" #include "utils/memutils.h"
#include "utils/pg_locale.h" #include "utils/pg_locale.h"
#include "pgstat.h" #include "pgstat.h"
#include "access/gin.h"
#ifndef PG_KRB_SRVTAB #ifndef PG_KRB_SRVTAB
#define PG_KRB_SRVTAB "" #define PG_KRB_SRVTAB ""
...@@ -1572,6 +1572,16 @@ static struct config_int ConfigureNamesInt[] = ...@@ -1572,6 +1572,16 @@ static struct config_int ConfigureNamesInt[] =
0, 0, INT_MAX, assign_tcp_keepalives_count, show_tcp_keepalives_count 0, 0, INT_MAX, assign_tcp_keepalives_count, show_tcp_keepalives_count
}, },
{
{"gin_fuzzy_search_limit", PGC_USERSET, UNGROUPED,
gettext_noop("Sets the maximum allowed result for exact search by gin."),
NULL,
0
},
&GinFuzzySearchLimit,
0, 0, INT_MAX, NULL, NULL
},
/* End-of-list marker */ /* End-of-list marker */
{ {
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment