Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
b525bf77
Commit
b525bf77
authored
Dec 04, 2010
by
Tom Lane
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add KNNGIST support to contrib/pg_trgm.
Teodor Sigaev, with some revision by Tom
parent
b576757d
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
213 additions
and
42 deletions
+213
-42
contrib/pg_trgm/expected/pg_trgm.out
contrib/pg_trgm/expected/pg_trgm.out
+23
-0
contrib/pg_trgm/pg_trgm.sql.in
contrib/pg_trgm/pg_trgm.sql.in
+20
-1
contrib/pg_trgm/sql/pg_trgm.sql
contrib/pg_trgm/sql/pg_trgm.sql
+4
-0
contrib/pg_trgm/trgm.h
contrib/pg_trgm/trgm.h
+6
-4
contrib/pg_trgm/trgm_gin.c
contrib/pg_trgm/trgm_gin.c
+3
-0
contrib/pg_trgm/trgm_gist.c
contrib/pg_trgm/trgm_gist.c
+110
-30
contrib/pg_trgm/trgm_op.c
contrib/pg_trgm/trgm_op.c
+20
-6
contrib/pg_trgm/uninstall_pg_trgm.sql
contrib/pg_trgm/uninstall_pg_trgm.sql
+6
-0
doc/src/sgml/pgtrgm.sgml
doc/src/sgml/pgtrgm.sgml
+21
-1
No files found.
contrib/pg_trgm/expected/pg_trgm.out
View file @
b525bf77
...
...
@@ -1187,6 +1187,13 @@ select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198
qwertyu0988 | 0.333333
(1 row)
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
?column? | t
----------+-------------
0.411765 | qwertyu0988
0.5 | qwertyu0987
(2 rows)
create index trgm_idx on test_trgm using gist (t gist_trgm_ops);
set enable_seqscan=off;
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
...
...
@@ -2315,6 +2322,22 @@ select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198
qwertyu0988 | 0.333333
(1 row)
explain (costs off)
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
QUERY PLAN
---------------------------------------------------
Limit
-> Index Scan using trgm_idx on test_trgm
Order By: (t <-> 'q0987wertyu0988'::text)
(3 rows)
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
?column? | t
----------+-------------
0.411765 | qwertyu0988
0.5 | qwertyu0987
(2 rows)
drop index trgm_idx;
create index trgm_idx on test_trgm using gin (t gin_trgm_ops);
set enable_seqscan=off;
...
...
contrib/pg_trgm/pg_trgm.sql.in
View file @
b525bf77
...
...
@@ -26,7 +26,7 @@ LANGUAGE C STRICT IMMUTABLE;
CREATE OR REPLACE FUNCTION similarity_op(text,text)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT STABLE;
LANGUAGE C STRICT STABLE;
-- stable because depends on trgm_limit
CREATE OPERATOR % (
LEFTARG = text,
...
...
@@ -37,6 +37,18 @@ CREATE OPERATOR % (
JOIN = contjoinsel
);
CREATE OR REPLACE FUNCTION similarity_dist(text,text)
RETURNS float4
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT IMMUTABLE;
CREATE OPERATOR <-> (
LEFTARG = text,
RIGHTARG = text,
PROCEDURE = similarity_dist,
COMMUTATOR = '<->'
);
-- gist key
CREATE OR REPLACE FUNCTION gtrgm_in(cstring)
RETURNS gtrgm
...
...
@@ -60,6 +72,11 @@ RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION gtrgm_distance(internal,text,int,oid)
RETURNS float8
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION gtrgm_compress(internal)
RETURNS internal
AS 'MODULE_PATHNAME'
...
...
@@ -95,6 +112,7 @@ CREATE OPERATOR CLASS gist_trgm_ops
FOR TYPE text USING gist
AS
OPERATOR 1 % (text, text),
OPERATOR 2 <-> (text, text) FOR ORDER BY pg_catalog.float_ops,
FUNCTION 1 gtrgm_consistent (internal, text, int, oid, internal),
FUNCTION 2 gtrgm_union (bytea, internal),
FUNCTION 3 gtrgm_compress (internal),
...
...
@@ -102,6 +120,7 @@ AS
FUNCTION 5 gtrgm_penalty (internal, internal, internal),
FUNCTION 6 gtrgm_picksplit (internal, internal),
FUNCTION 7 gtrgm_same (gtrgm, gtrgm, internal),
FUNCTION 8 gtrgm_distance (internal, text, int, oid),
STORAGE gtrgm;
-- support functions for gin
...
...
contrib/pg_trgm/sql/pg_trgm.sql
View file @
b525bf77
...
...
@@ -26,6 +26,7 @@ CREATE TABLE test_trgm(t text);
select t,similarity(t,'
qwertyu0988
') as sml from test_trgm where t % '
qwertyu0988
' order by sml desc, t;
select t,similarity(t,'
gwertyu0988
') as sml from test_trgm where t % '
gwertyu0988
' order by sml desc, t;
select t,similarity(t,'
gwertyu1988
') as sml from test_trgm where t % '
gwertyu1988
' order by sml desc, t;
select t <-> '
q0987wertyu0988
', t from test_trgm order by t <-> '
q0987wertyu0988
' limit 2;
create index trgm_idx on test_trgm using gist (t gist_trgm_ops);
set enable_seqscan=off;
...
...
@@ -33,6 +34,9 @@ set enable_seqscan=off;
select t,similarity(t,'
qwertyu0988
') as sml from test_trgm where t % '
qwertyu0988
' order by sml desc, t;
select t,similarity(t,'
gwertyu0988
') as sml from test_trgm where t % '
gwertyu0988
' order by sml desc, t;
select t,similarity(t,'
gwertyu1988
') as sml from test_trgm where t % '
gwertyu1988
' order by sml desc, t;
explain (costs off)
select t <-> '
q0987wertyu0988
', t from test_trgm order by t <-> '
q0987wertyu0988
' limit 2;
select t <-> '
q0987wertyu0988
', t from test_trgm order by t <-> '
q0987wertyu0988
' limit 2;
drop index trgm_idx;
create index trgm_idx on test_trgm using gin (t gin_trgm_ops);
...
...
contrib/pg_trgm/trgm.h
View file @
b525bf77
...
...
@@ -4,12 +4,10 @@
#ifndef __TRGM_H__
#define __TRGM_H__
#include "postgres.h"
#include "access/gist.h"
#include "access/itup.h"
#include "utils/builtins.h"
#include "storage/bufpage.h"
#include "utils/builtins.h"
/* options */
#define LPADDING 2
...
...
@@ -18,6 +16,10 @@
#define IGNORECASE
#define DIVUNION
/* operator strategy numbers */
#define SimilarityStrategyNumber 1
#define DistanceStrategyNumber 2
typedef
char
trgm
[
3
];
...
...
@@ -89,4 +91,4 @@ extern float4 trgm_limit;
TRGM
*
generate_trgm
(
char
*
str
,
int
slen
);
float4
cnt_sml
(
TRGM
*
trg1
,
TRGM
*
trg2
);
#endif
#endif
/* __TRGM_H__ */
contrib/pg_trgm/trgm_gin.c
View file @
b525bf77
/*
* contrib/pg_trgm/trgm_gin.c
*/
#include "postgres.h"
#include "trgm.h"
#include "access/gin.h"
...
...
@@ -10,6 +12,7 @@
#include "utils/array.h"
#include "utils/builtins.h"
PG_FUNCTION_INFO_V1
(
gin_extract_trgm
);
Datum
gin_extract_trgm
(
PG_FUNCTION_ARGS
);
...
...
contrib/pg_trgm/trgm_gist.c
View file @
b525bf77
/*
* contrib/pg_trgm/trgm_gist.c
*/
#include "postgres.h"
#include "trgm.h"
#include "access/gist.h"
#include "access/itup.h"
#include "access/skey.h"
#include "access/tuptoaster.h"
#include "storage/bufpage.h"
#include "utils/array.h"
#include "utils/builtins.h"
PG_FUNCTION_INFO_V1
(
gtrgm_in
);
Datum
gtrgm_in
(
PG_FUNCTION_ARGS
);
...
...
@@ -25,6 +29,9 @@ Datum gtrgm_decompress(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1
(
gtrgm_consistent
);
Datum
gtrgm_consistent
(
PG_FUNCTION_ARGS
);
PG_FUNCTION_INFO_V1
(
gtrgm_distance
);
Datum
gtrgm_distance
(
PG_FUNCTION_ARGS
);
PG_FUNCTION_INFO_V1
(
gtrgm_union
);
Datum
gtrgm_union
(
PG_FUNCTION_ARGS
);
...
...
@@ -159,18 +166,35 @@ gtrgm_decompress(PG_FUNCTION_ARGS)
}
}
static
int4
cnt_sml_sign_common
(
TRGM
*
qtrg
,
BITVECP
sign
)
{
int4
count
=
0
;
int4
k
,
len
=
ARRNELEM
(
qtrg
);
trgm
*
ptr
=
GETARR
(
qtrg
);
int4
tmp
=
0
;
for
(
k
=
0
;
k
<
len
;
k
++
)
{
CPTRGM
(((
char
*
)
&
tmp
),
ptr
+
k
);
count
+=
GETBIT
(
sign
,
HASHVAL
(
tmp
));
}
return
count
;
}
Datum
gtrgm_consistent
(
PG_FUNCTION_ARGS
)
{
GISTENTRY
*
entry
=
(
GISTENTRY
*
)
PG_GETARG_POINTER
(
0
);
text
*
query
=
PG_GETARG_TEXT_P
(
1
);
/* StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); */
StrategyNumber
strategy
=
(
StrategyNumber
)
PG_GETARG_UINT16
(
2
);
/* Oid subtype = PG_GETARG_OID(3); */
bool
*
recheck
=
(
bool
*
)
PG_GETARG_POINTER
(
4
);
TRGM
*
key
=
(
TRGM
*
)
DatumGetPointer
(
entry
->
key
);
TRGM
*
qtrg
;
bool
res
=
false
;
bool
res
;
char
*
cache
=
(
char
*
)
fcinfo
->
flinfo
->
fn_extra
;
/* All cases served by this function are exact */
...
...
@@ -193,39 +217,95 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
qtrg
=
(
TRGM
*
)
(
cache
+
MAXALIGN
(
VARSIZE
(
query
)));
if
(
GIST_LEAF
(
entry
))
{
/* all leafs contains orig trgm */
float4
tmpsml
=
cnt_sml
(
key
,
qtrg
);
switch
(
strategy
)
{
case
SimilarityStrategyNumber
:
if
(
GIST_LEAF
(
entry
))
{
/* all leafs contains orig trgm */
float4
tmpsml
=
cnt_sml
(
key
,
qtrg
);
/* strange bug at freebsd 5.2.1 and gcc 3.3.3 */
res
=
(
*
(
int
*
)
&
tmpsml
==
*
(
int
*
)
&
trgm_limit
||
tmpsml
>
trgm_limit
)
?
true
:
false
;
/* strange bug at freebsd 5.2.1 and gcc 3.3.3 */
res
=
(
*
(
int
*
)
&
tmpsml
==
*
(
int
*
)
&
trgm_limit
||
tmpsml
>
trgm_limit
)
?
true
:
false
;
}
else
if
(
ISALLTRUE
(
key
))
{
/* non-leaf contains signature */
res
=
true
;
}
else
{
/* non-leaf contains signature */
int4
count
=
cnt_sml_sign_common
(
qtrg
,
GETSIGN
(
key
));
int4
len
=
ARRNELEM
(
qtrg
);
if
(
len
==
0
)
res
=
false
;
else
res
=
(((((
float8
)
count
)
/
((
float8
)
len
)))
>=
trgm_limit
)
?
true
:
false
;
}
break
;
default:
elog
(
ERROR
,
"unrecognized strategy number: %d"
,
strategy
);
res
=
false
;
/* keep compiler quiet */
break
;
}
else
if
(
ISALLTRUE
(
key
))
{
/* non-leaf contains signature */
res
=
true
;
PG_RETURN_BOOL
(
res
);
}
Datum
gtrgm_distance
(
PG_FUNCTION_ARGS
)
{
GISTENTRY
*
entry
=
(
GISTENTRY
*
)
PG_GETARG_POINTER
(
0
);
text
*
query
=
PG_GETARG_TEXT_P
(
1
);
StrategyNumber
strategy
=
(
StrategyNumber
)
PG_GETARG_UINT16
(
2
);
/* Oid subtype = PG_GETARG_OID(3); */
TRGM
*
key
=
(
TRGM
*
)
DatumGetPointer
(
entry
->
key
);
TRGM
*
qtrg
;
float8
res
;
char
*
cache
=
(
char
*
)
fcinfo
->
flinfo
->
fn_extra
;
if
(
cache
==
NULL
||
VARSIZE
(
cache
)
!=
VARSIZE
(
query
)
||
memcmp
(
cache
,
query
,
VARSIZE
(
query
))
!=
0
)
{
qtrg
=
generate_trgm
(
VARDATA
(
query
),
VARSIZE
(
query
)
-
VARHDRSZ
);
if
(
cache
)
pfree
(
cache
);
fcinfo
->
flinfo
->
fn_extra
=
MemoryContextAlloc
(
fcinfo
->
flinfo
->
fn_mcxt
,
MAXALIGN
(
VARSIZE
(
query
))
+
VARSIZE
(
qtrg
));
cache
=
(
char
*
)
fcinfo
->
flinfo
->
fn_extra
;
memcpy
(
cache
,
query
,
VARSIZE
(
query
));
memcpy
(
cache
+
MAXALIGN
(
VARSIZE
(
query
)),
qtrg
,
VARSIZE
(
qtrg
));
}
else
{
/* non-leaf contains signature */
int4
count
=
0
;
int4
k
,
len
=
ARRNELEM
(
qtrg
);
trgm
*
ptr
=
GETARR
(
qtrg
);
BITVECP
sign
=
GETSIGN
(
key
);
int4
tmp
=
0
;
for
(
k
=
0
;
k
<
len
;
k
++
)
{
CPTRGM
(((
char
*
)
&
tmp
),
ptr
+
k
);
count
+=
GETBIT
(
sign
,
HASHVAL
(
tmp
));
}
#ifdef DIVUNION
res
=
(
len
==
count
)
?
true
:
((((((
float4
)
count
)
/
((
float4
)
(
len
-
count
))))
>=
trgm_limit
)
?
true
:
false
);
#else
res
=
(
len
==
0
)
?
false
:
((((((
float4
)
count
)
/
((
float4
)
len
)))
>=
trgm_limit
)
?
true
:
false
);
#endif
qtrg
=
(
TRGM
*
)
(
cache
+
MAXALIGN
(
VARSIZE
(
query
)));
switch
(
strategy
)
{
case
DistanceStrategyNumber
:
if
(
GIST_LEAF
(
entry
))
{
/* all leafs contains orig trgm */
res
=
1
.
0
-
cnt_sml
(
key
,
qtrg
);
}
else
if
(
ISALLTRUE
(
key
))
{
/* all leafs contains orig trgm */
res
=
0
.
0
;
}
else
{
/* non-leaf contains signature */
int4
count
=
cnt_sml_sign_common
(
qtrg
,
GETSIGN
(
key
));
int4
len
=
ARRNELEM
(
qtrg
);
res
=
(
len
==
0
)
?
-
1
.
0
:
1
.
0
-
((
float8
)
count
)
/
((
float8
)
len
);
}
break
;
default:
elog
(
ERROR
,
"unrecognized strategy number: %d"
,
strategy
);
res
=
0
;
/* keep compiler quiet */
break
;
}
PG_RETURN_
BOOL
(
res
);
PG_RETURN_
FLOAT8
(
res
);
}
static
int4
...
...
contrib/pg_trgm/trgm_op.c
View file @
b525bf77
/*
* contrib/pg_trgm/trgm_op.c
*/
#include "trgm.h"
#include "postgres.h"
#include <ctype.h>
#include "utils/array.h"
#include "trgm.h"
#include "catalog/pg_type.h"
#include "tsearch/ts_locale.h"
#include "utils/array.h"
PG_MODULE_MAGIC
;
...
...
@@ -359,16 +364,25 @@ similarity(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT4
(
res
);
}
PG_FUNCTION_INFO_V1
(
similarity_dist
);
Datum
similarity_dist
(
PG_FUNCTION_ARGS
);
Datum
similarity_dist
(
PG_FUNCTION_ARGS
)
{
float4
res
=
DatumGetFloat4
(
DirectFunctionCall2
(
similarity
,
PG_GETARG_DATUM
(
0
),
PG_GETARG_DATUM
(
1
)));
PG_RETURN_FLOAT4
(
1
.
0
-
res
);
}
PG_FUNCTION_INFO_V1
(
similarity_op
);
Datum
similarity_op
(
PG_FUNCTION_ARGS
);
Datum
similarity_op
(
PG_FUNCTION_ARGS
)
{
float4
res
=
DatumGetFloat4
(
DirectFunctionCall2
(
similarity
,
float4
res
=
DatumGetFloat4
(
DirectFunctionCall2
(
similarity
,
PG_GETARG_DATUM
(
0
),
PG_GETARG_DATUM
(
1
)
));
PG_GETARG_DATUM
(
1
)));
PG_RETURN_BOOL
(
res
>=
trgm_limit
);
}
contrib/pg_trgm/uninstall_pg_trgm.sql
View file @
b525bf77
...
...
@@ -19,6 +19,8 @@ DROP FUNCTION gtrgm_compress(internal);
DROP
FUNCTION
gtrgm_consistent
(
internal
,
text
,
int
,
oid
,
internal
);
DROP
FUNCTION
gtrgm_distance
(
internal
,
text
,
int
,
oid
);
DROP
TYPE
gtrgm
CASCADE
;
DROP
OPERATOR
CLASS
gin_trgm_ops
USING
gin
;
...
...
@@ -33,6 +35,10 @@ DROP OPERATOR % (text, text);
DROP
FUNCTION
similarity_op
(
text
,
text
);
DROP
OPERATOR
<->
(
text
,
text
);
DROP
FUNCTION
similarity_dist
(
text
,
text
);
DROP
FUNCTION
similarity
(
text
,
text
);
DROP
FUNCTION
show_trgm
(
text
);
...
...
doc/src/sgml/pgtrgm.sgml
View file @
b525bf77
...
...
@@ -117,6 +117,14 @@
<function>set_limit</>.
</entry>
</row>
<row>
<entry><type>text</> <literal><-></literal> <type>text</></entry>
<entry><type>real</type></entry>
<entry>
Returns the <quote>distance</> between the arguments, that is
one minus the <function>similarity()</> value.
</entry>
</row>
</tbody>
</tgroup>
</table>
...
...
@@ -129,7 +137,7 @@
The <filename>pg_trgm</filename> module provides GiST and GIN index
operator classes that allow you to create an index over a text column for
the purpose of very fast similarity searches. These index types support
the
<literal>%</> similarity operator
(and no other operators, so you may
the
above-described similarity operators
(and no other operators, so you may
want a regular B-tree index too).
</para>
...
...
@@ -161,6 +169,18 @@ SELECT t, similarity(t, '<replaceable>word</>') AS sml
sets.
</para>
<para>
A variant of the above query is
<programlisting>
SELECT t, t <-> '<replaceable>word</>' AS dist
FROM test_trgm
ORDER BY dist LIMIT 10;
</programlisting>
This can be implemented quite efficiently by GiST indexes, but not
by GIN indexes. It will usually beat the first formulation when only
a small number of the closest matches is wanted.
</para>
<para>
The choice between GiST and GIN indexing depends on the relative
performance characteristics of GiST and GIN, which are discussed elsewhere.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment