Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
abd8c94f
Commit
abd8c94f
authored
Aug 14, 2009
by
Teodor Sigaev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add prefix support for synonym dictionary
parent
0c738084
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
111 additions
and
8 deletions
+111
-8
doc/src/sgml/textsearch.sgml
doc/src/sgml/textsearch.sgml
+58
-1
src/backend/tsearch/dict_synonym.c
src/backend/tsearch/dict_synonym.c
+31
-7
src/backend/tsearch/synonym_sample.syn
src/backend/tsearch/synonym_sample.syn
+1
-0
src/test/regress/expected/tsdicts.out
src/test/regress/expected/tsdicts.out
+18
-0
src/test/regress/sql/tsdicts.sql
src/test/regress/sql/tsdicts.sql
+3
-0
No files found.
doc/src/sgml/textsearch.sgml
View file @
abd8c94f
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.5
2 2009/06/17 21:58:49 tgl
Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.5
3 2009/08/14 14:53:20 teodor
Exp $ -->
<chapter id="textsearch">
<title id="textsearch-title">Full Text Search</title>
...
...
@@ -2288,6 +2288,63 @@ SELECT * FROM ts_debug('english', 'Paris');
asciiword | Word, all ASCII | Paris | {my_synonym,english_stem} | my_synonym | {paris}
</programlisting>
</para>
<para>
An asterisk (<literal>*</literal>) at the end of definition word indicates
that definition word is a prefix, and <function>to_tsquery()</function>
function will transform that definition to the prefix search format (see
<xref linkend="textsearch-parsing-queries">).
Notice that it is ignored in <function>to_tsvector()</function>.
</para>
<para>
Contents of <filename>$SHAREDIR/tsearch_data/synonym_sample.syn</>:
</para>
<programlisting>
postgres pgsql
postgresql pgsql
postgre pgsql
gogle googl
indices index*
</programlisting>
<para>
Results:
</para>
<programlisting>
=# create text search dictionary syn( template=synonym,synonyms='synonym_sample');
=# select ts_lexize('syn','indices');
ts_lexize
-----------
{index}
(1 row)
=# create text search configuration tst ( copy=simple);
=# alter text search configuration tst alter mapping for asciiword with syn;
=# select to_tsquery('tst','indices');
to_tsquery
------------
'index':*
(1 row)
=# select 'indexes are very useful'::tsvector;
tsvector
---------------------------------
'are' 'indexes' 'useful' 'very'
(1 row)
=# select 'indexes are very useful'::tsvector @@ to_tsquery('tst','indices');
?column?
----------
t
(1 row)
=# select to_tsvector('tst','indices');
to_tsvector
-------------
'index':1
(1 row)
</programlisting>
<para>
The only parameter required by the <literal>synonym</> template is
...
...
src/backend/tsearch/dict_synonym.c
View file @
abd8c94f
...
...
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.1
0 2009/01/01 17:23:48 momjian
Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.1
1 2009/08/14 14:53:20 teodor
Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -23,6 +23,8 @@ typedef struct
{
char
*
in
;
char
*
out
;
int
outlen
;
uint16
flags
;
}
Syn
;
typedef
struct
...
...
@@ -36,11 +38,14 @@ typedef struct
* Finds the next whitespace-delimited word within the 'in' string.
* Returns a pointer to the first character of the word, and a pointer
* to the next byte after the last character in the word (in *end).
* Character '*' at the end of word will not be threated as word
* charater if flags is not null.
*/
static
char
*
findwrd
(
char
*
in
,
char
**
end
)
findwrd
(
char
*
in
,
char
**
end
,
uint16
*
flags
)
{
char
*
start
;
char
*
lastchar
;
/* Skip leading spaces */
while
(
*
in
&&
t_isspace
(
in
))
...
...
@@ -53,13 +58,27 @@ findwrd(char *in, char **end)
return
NULL
;
}
start
=
in
;
lastchar
=
start
=
in
;
/* Find end of word */
while
(
*
in
&&
!
t_isspace
(
in
))
{
lastchar
=
in
;
in
+=
pg_mblen
(
in
);
}
if
(
in
-
lastchar
==
1
&&
t_iseq
(
lastchar
,
'*'
)
&&
flags
)
{
*
flags
=
TSL_PREFIX
;
*
end
=
lastchar
;
}
else
{
if
(
flags
)
*
flags
=
0
;
*
end
=
in
;
}
*
end
=
in
;
return
start
;
}
...
...
@@ -84,6 +103,7 @@ dsynonym_init(PG_FUNCTION_ARGS)
*
end
=
NULL
;
int
cur
=
0
;
char
*
line
=
NULL
;
uint16
flags
=
0
;
foreach
(
l
,
dictoptions
)
{
...
...
@@ -117,7 +137,7 @@ dsynonym_init(PG_FUNCTION_ARGS)
while
((
line
=
tsearch_readline
(
&
trst
))
!=
NULL
)
{
starti
=
findwrd
(
line
,
&
end
);
starti
=
findwrd
(
line
,
&
end
,
NULL
);
if
(
!
starti
)
{
/* Empty line */
...
...
@@ -130,7 +150,7 @@ dsynonym_init(PG_FUNCTION_ARGS)
}
*
end
=
'\0'
;
starto
=
findwrd
(
end
+
1
,
&
end
);
starto
=
findwrd
(
end
+
1
,
&
end
,
&
flags
);
if
(
!
starto
)
{
/* A line with only one word (+whitespace). Ignore silently. */
...
...
@@ -168,6 +188,9 @@ dsynonym_init(PG_FUNCTION_ARGS)
d
->
syn
[
cur
].
out
=
lowerstr
(
starto
);
}
d
->
syn
[
cur
].
outlen
=
strlen
(
starto
);
d
->
syn
[
cur
].
flags
=
flags
;
cur
++
;
skipline:
...
...
@@ -212,7 +235,8 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
PG_RETURN_POINTER
(
NULL
);
res
=
palloc0
(
sizeof
(
TSLexeme
)
*
2
);
res
[
0
].
lexeme
=
pstrdup
(
found
->
out
);
res
[
0
].
lexeme
=
pnstrdup
(
found
->
out
,
found
->
outlen
);
res
[
0
].
flags
=
found
->
flags
;
PG_RETURN_POINTER
(
res
);
}
src/backend/tsearch/synonym_sample.syn
View file @
abd8c94f
...
...
@@ -2,3 +2,4 @@ postgres pgsql
postgresql pgsql
postgre pgsql
gogle googl
indices index*
src/test/regress/expected/tsdicts.out
View file @
abd8c94f
...
...
@@ -208,6 +208,12 @@ SELECT ts_lexize('synonym', 'Gogle');
{googl}
(1 row)
SELECT ts_lexize('synonym', 'indices');
ts_lexize
-----------
{index}
(1 row)
-- Create and simple test thesaurus dictionary
-- More tests in configuration checks because ts_lexize()
-- cannot pass more than one word to thesaurus.
...
...
@@ -290,6 +296,18 @@ SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead
'common':2 'googl':7,10 'instead':8 'mistak':3 'write':6
(1 row)
SELECT to_tsvector('synonym_tst', 'Indexes or indices - Which is right plural form of index?');
to_tsvector
----------------------------------------------
'form':8 'index':1,3,10 'plural':7 'right':6
(1 row)
SELECT to_tsquery('synonym_tst', 'Index & indices');
to_tsquery
---------------------
'index' & 'index':*
(1 row)
-- test thesaurus in configuration
-- see thesaurus_sample.ths to understand 'odd' resulting tsvector
CREATE TEXT SEARCH CONFIGURATION thesaurus_tst (
...
...
src/test/regress/sql/tsdicts.sql
View file @
abd8c94f
...
...
@@ -56,6 +56,7 @@ CREATE TEXT SEARCH DICTIONARY synonym (
SELECT
ts_lexize
(
'synonym'
,
'PoStGrEs'
);
SELECT
ts_lexize
(
'synonym'
,
'Gogle'
);
SELECT
ts_lexize
(
'synonym'
,
'indices'
);
-- Create and simple test thesaurus dictionary
-- More tests in configuration checks because ts_lexize()
...
...
@@ -104,6 +105,8 @@ ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR
SELECT
to_tsvector
(
'synonym_tst'
,
'Postgresql is often called as postgres or pgsql and pronounced as postgre'
);
SELECT
to_tsvector
(
'synonym_tst'
,
'Most common mistake is to write Gogle instead of Google'
);
SELECT
to_tsvector
(
'synonym_tst'
,
'Indexes or indices - Which is right plural form of index?'
);
SELECT
to_tsquery
(
'synonym_tst'
,
'Index & indices'
);
-- test thesaurus in configuration
-- see thesaurus_sample.ths to understand 'odd' resulting tsvector
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment