Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
25bd9ce3
Commit
25bd9ce3
authored
Aug 05, 2009
by
Tom Lane
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add matchorig, matchsynonyms, and keepsynonyms options to contrib/dict_xsyn.
Sergey Karpov
parent
23dc89d2
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
282 additions
and
52 deletions
+282
-52
contrib/dict_xsyn/dict_xsyn.c
contrib/dict_xsyn/dict_xsyn.c
+69
-43
contrib/dict_xsyn/expected/dict_xsyn.out
contrib/dict_xsyn/expected/dict_xsyn.out
+128
-2
contrib/dict_xsyn/sql/dict_xsyn.sql
contrib/dict_xsyn/sql/dict_xsyn.sql
+39
-2
doc/src/sgml/dict-xsyn.sgml
doc/src/sgml/dict-xsyn.sgml
+46
-5
No files found.
contrib/dict_xsyn/dict_xsyn.c
View file @
25bd9ce3
...
...
@@ -6,7 +6,7 @@
* Copyright (c) 2007-2009, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.
6 2009/01/01 17:23:32 momjian
Exp $
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.
7 2009/08/05 18:06:49 tgl
Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -33,7 +33,10 @@ typedef struct
int
len
;
Syn
*
syn
;
bool
matchorig
;
bool
keeporig
;
bool
matchsynonyms
;
bool
keepsynonyms
;
}
DictSyn
;
...
...
@@ -88,7 +91,8 @@ read_dictionary(DictSyn *d, char *filename)
{
char
*
value
;
char
*
key
;
char
*
end
=
NULL
;
char
*
pos
;
char
*
end
;
if
(
*
line
==
'\0'
)
continue
;
...
...
@@ -96,26 +100,36 @@ read_dictionary(DictSyn *d, char *filename)
value
=
lowerstr
(
line
);
pfree
(
line
);
key
=
find_word
(
value
,
&
end
)
;
if
(
!
key
)
pos
=
value
;
while
((
key
=
find_word
(
pos
,
&
end
))
!=
NULL
)
{
pfree
(
value
);
continue
;
}
/* Enlarge syn structure if full */
if
(
cur
==
d
->
len
)
{
d
->
len
=
(
d
->
len
>
0
)
?
2
*
d
->
len
:
16
;
if
(
d
->
syn
)
d
->
syn
=
(
Syn
*
)
repalloc
(
d
->
syn
,
sizeof
(
Syn
)
*
d
->
len
);
else
d
->
syn
=
(
Syn
*
)
palloc
(
sizeof
(
Syn
)
*
d
->
len
);
}
if
(
cur
==
d
->
len
)
{
d
->
len
=
(
d
->
len
>
0
)
?
2
*
d
->
len
:
16
;
if
(
d
->
syn
)
d
->
syn
=
(
Syn
*
)
repalloc
(
d
->
syn
,
sizeof
(
Syn
)
*
d
->
len
);
else
d
->
syn
=
(
Syn
*
)
palloc
(
sizeof
(
Syn
)
*
d
->
len
);
}
/* Save first word only if we will match it */
if
(
pos
!=
value
||
d
->
matchorig
)
{
d
->
syn
[
cur
].
key
=
pnstrdup
(
key
,
end
-
key
);
d
->
syn
[
cur
].
value
=
pstrdup
(
value
);
d
->
syn
[
cur
].
key
=
pnstrdup
(
key
,
end
-
key
);
d
->
syn
[
cur
].
value
=
value
;
cur
++
;
}
pos
=
end
;
cur
++
;
/* Don't bother scanning synonyms if we will not match them */
if
(
!
d
->
matchsynonyms
)
break
;
}
pfree
(
value
);
}
tsearch_readline_end
(
&
trst
);
...
...
@@ -133,23 +147,40 @@ dxsyn_init(PG_FUNCTION_ARGS)
List
*
dictoptions
=
(
List
*
)
PG_GETARG_POINTER
(
0
);
DictSyn
*
d
;
ListCell
*
l
;
char
*
filename
=
NULL
;
d
=
(
DictSyn
*
)
palloc0
(
sizeof
(
DictSyn
));
d
->
len
=
0
;
d
->
syn
=
NULL
;
d
->
matchorig
=
true
;
d
->
keeporig
=
true
;
d
->
matchsynonyms
=
false
;
d
->
keepsynonyms
=
true
;
foreach
(
l
,
dictoptions
)
{
DefElem
*
defel
=
(
DefElem
*
)
lfirst
(
l
);
if
(
pg_strcasecmp
(
defel
->
defname
,
"KEEPORIG"
)
==
0
)
if
(
pg_strcasecmp
(
defel
->
defname
,
"MATCHORIG"
)
==
0
)
{
d
->
matchorig
=
defGetBoolean
(
defel
);
}
else
if
(
pg_strcasecmp
(
defel
->
defname
,
"KEEPORIG"
)
==
0
)
{
d
->
keeporig
=
defGetBoolean
(
defel
);
}
else
if
(
pg_strcasecmp
(
defel
->
defname
,
"MATCHSYNONYMS"
)
==
0
)
{
d
->
matchsynonyms
=
defGetBoolean
(
defel
);
}
else
if
(
pg_strcasecmp
(
defel
->
defname
,
"KEEPSYNONYMS"
)
==
0
)
{
d
->
keepsynonyms
=
defGetBoolean
(
defel
);
}
else
if
(
pg_strcasecmp
(
defel
->
defname
,
"RULES"
)
==
0
)
{
read_dictionary
(
d
,
defGetString
(
defel
));
/* we can't read the rules before parsing all options! */
filename
=
defGetString
(
defel
);
}
else
{
...
...
@@ -160,6 +191,9 @@ dxsyn_init(PG_FUNCTION_ARGS)
}
}
if
(
filename
)
read_dictionary
(
d
,
filename
);
PG_RETURN_POINTER
(
d
);
}
...
...
@@ -194,41 +228,33 @@ dxsyn_lexize(PG_FUNCTION_ARGS)
/* Parse string of synonyms and return array of words */
{
char
*
value
=
pstrdup
(
found
->
value
);
int
value_length
=
strlen
(
value
);
char
*
pos
=
value
;
char
*
value
=
found
->
value
;
char
*
syn
;
char
*
pos
;
char
*
end
;
int
nsyns
=
0
;
bool
is_first
=
true
;
res
=
palloc
(
0
);
res
=
palloc
(
sizeof
(
TSLexeme
)
);
while
(
pos
<
value
+
value_length
)
pos
=
value
;
while
((
syn
=
find_word
(
pos
,
&
end
))
!=
NULL
)
{
char
*
end
;
char
*
syn
=
find_word
(
pos
,
&
end
);
if
(
!
syn
)
break
;
*
end
=
'\0'
;
res
=
repalloc
(
res
,
sizeof
(
TSLexeme
)
*
(
nsyns
+
2
));
res
[
nsyns
].
lexeme
=
NULL
;
/*
first word is added to result only if KEEPORIG flag is set
*/
if
(
d
->
keeporig
||
!
is_first
)
/*
The first word is output only if keeporig=true
*/
if
(
pos
!=
value
||
d
->
keeporig
)
{
res
[
nsyns
].
lexeme
=
pstrdup
(
syn
);
res
[
nsyns
+
1
].
lexeme
=
NULL
;
res
[
nsyns
].
lexeme
=
pnstrdup
(
syn
,
end
-
syn
);
nsyns
++
;
}
is_first
=
false
;
pos
=
end
;
pos
=
end
+
1
;
/* Stop if we are not to output the synonyms */
if
(
!
d
->
keepsynonyms
)
break
;
}
pfree
(
value
);
res
[
nsyns
].
lexeme
=
NULL
;
}
PG_RETURN_POINTER
(
res
);
...
...
contrib/dict_xsyn/expected/dict_xsyn.out
View file @
25bd9ce3
...
...
@@ -5,10 +5,76 @@
SET client_min_messages = warning;
\set ECHO none
RESET client_min_messages;
--
configuration
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
--
default configuration - match first word and return it among with all synonyms
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=
true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=
false);
--lexize
SELECT ts_lexize('xsyn', 'supernova');
ts_lexize
--------------------------
{supernova,sn,sne,1987a}
(1 row)
SELECT ts_lexize('xsyn', 'sn');
ts_lexize
-----------
(1 row)
SELECT ts_lexize('xsyn', 'grb');
ts_lexize
-----------
(1 row)
-- the same, but return only synonyms
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
SELECT ts_lexize('xsyn', 'supernova');
ts_lexize
----------------
{sn,sne,1987a}
(1 row)
SELECT ts_lexize('xsyn', 'sn');
ts_lexize
-----------
(1 row)
SELECT ts_lexize('xsyn', 'grb');
ts_lexize
-----------
(1 row)
-- match any word and return all words
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
SELECT ts_lexize('xsyn', 'supernova');
ts_lexize
--------------------------
{supernova,sn,sne,1987a}
(1 row)
SELECT ts_lexize('xsyn', 'sn');
ts_lexize
--------------------------
{supernova,sn,sne,1987a}
(1 row)
SELECT ts_lexize('xsyn', 'grb');
ts_lexize
-----------
(1 row)
-- match any word and return all words except first one
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
SELECT ts_lexize('xsyn', 'supernova');
ts_lexize
----------------
{sn,sne,1987a}
(1 row)
SELECT ts_lexize('xsyn', 'sn');
ts_lexize
----------------
{sn,sne,1987a}
...
...
@@ -20,3 +86,63 @@ SELECT ts_lexize('xsyn', 'grb');
(1 row)
-- match any synonym but not first word, and return first word instead
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
SELECT ts_lexize('xsyn', 'supernova');
ts_lexize
-----------
(1 row)
SELECT ts_lexize('xsyn', 'sn');
ts_lexize
-------------
{supernova}
(1 row)
SELECT ts_lexize('xsyn', 'grb');
ts_lexize
-----------
(1 row)
-- do not match or return anything
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=false);
SELECT ts_lexize('xsyn', 'supernova');
ts_lexize
-----------
(1 row)
SELECT ts_lexize('xsyn', 'sn');
ts_lexize
-----------
(1 row)
SELECT ts_lexize('xsyn', 'grb');
ts_lexize
-----------
(1 row)
-- match any word but return nothing
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
SELECT ts_lexize('xsyn', 'supernova');
ts_lexize
-----------
{}
(1 row)
SELECT ts_lexize('xsyn', 'sn');
ts_lexize
-----------
{}
(1 row)
SELECT ts_lexize('xsyn', 'grb');
ts_lexize
-----------
(1 row)
contrib/dict_xsyn/sql/dict_xsyn.sql
View file @
25bd9ce3
...
...
@@ -8,9 +8,46 @@ SET client_min_messages = warning;
\
set
ECHO
all
RESET
client_min_messages
;
--
configuration
ALTER
TEXT
SEARCH
DICTIONARY
xsyn
(
RULES
=
'xsyn_sample'
,
KEEPORIG
=
false
);
--
default configuration - match first word and return it among with all synonyms
ALTER
TEXT
SEARCH
DICTIONARY
xsyn
(
RULES
=
'xsyn_sample'
,
KEEPORIG
=
true
,
MATCHORIG
=
true
,
KEEPSYNONYMS
=
true
,
MATCHSYNONYMS
=
false
);
--lexize
SELECT
ts_lexize
(
'xsyn'
,
'supernova'
);
SELECT
ts_lexize
(
'xsyn'
,
'sn'
);
SELECT
ts_lexize
(
'xsyn'
,
'grb'
);
-- the same, but return only synonyms
ALTER
TEXT
SEARCH
DICTIONARY
xsyn
(
RULES
=
'xsyn_sample'
,
KEEPORIG
=
false
,
MATCHORIG
=
true
,
KEEPSYNONYMS
=
true
,
MATCHSYNONYMS
=
false
);
SELECT
ts_lexize
(
'xsyn'
,
'supernova'
);
SELECT
ts_lexize
(
'xsyn'
,
'sn'
);
SELECT
ts_lexize
(
'xsyn'
,
'grb'
);
-- match any word and return all words
ALTER
TEXT
SEARCH
DICTIONARY
xsyn
(
RULES
=
'xsyn_sample'
,
KEEPORIG
=
true
,
MATCHORIG
=
true
,
KEEPSYNONYMS
=
true
,
MATCHSYNONYMS
=
true
);
SELECT
ts_lexize
(
'xsyn'
,
'supernova'
);
SELECT
ts_lexize
(
'xsyn'
,
'sn'
);
SELECT
ts_lexize
(
'xsyn'
,
'grb'
);
-- match any word and return all words except first one
ALTER
TEXT
SEARCH
DICTIONARY
xsyn
(
RULES
=
'xsyn_sample'
,
KEEPORIG
=
false
,
MATCHORIG
=
true
,
KEEPSYNONYMS
=
true
,
MATCHSYNONYMS
=
true
);
SELECT
ts_lexize
(
'xsyn'
,
'supernova'
);
SELECT
ts_lexize
(
'xsyn'
,
'sn'
);
SELECT
ts_lexize
(
'xsyn'
,
'grb'
);
-- match any synonym but not first word, and return first word instead
ALTER
TEXT
SEARCH
DICTIONARY
xsyn
(
RULES
=
'xsyn_sample'
,
KEEPORIG
=
true
,
MATCHORIG
=
false
,
KEEPSYNONYMS
=
false
,
MATCHSYNONYMS
=
true
);
SELECT
ts_lexize
(
'xsyn'
,
'supernova'
);
SELECT
ts_lexize
(
'xsyn'
,
'sn'
);
SELECT
ts_lexize
(
'xsyn'
,
'grb'
);
-- do not match or return anything
ALTER
TEXT
SEARCH
DICTIONARY
xsyn
(
RULES
=
'xsyn_sample'
,
KEEPORIG
=
false
,
MATCHORIG
=
false
,
KEEPSYNONYMS
=
false
,
MATCHSYNONYMS
=
false
);
SELECT
ts_lexize
(
'xsyn'
,
'supernova'
);
SELECT
ts_lexize
(
'xsyn'
,
'sn'
);
SELECT
ts_lexize
(
'xsyn'
,
'grb'
);
-- match any word but return nothing
ALTER
TEXT
SEARCH
DICTIONARY
xsyn
(
RULES
=
'xsyn_sample'
,
KEEPORIG
=
false
,
MATCHORIG
=
true
,
KEEPSYNONYMS
=
false
,
MATCHSYNONYMS
=
true
);
SELECT
ts_lexize
(
'xsyn'
,
'supernova'
);
SELECT
ts_lexize
(
'xsyn'
,
'sn'
);
SELECT
ts_lexize
(
'xsyn'
,
'grb'
);
doc/src/sgml/dict-xsyn.sgml
View file @
25bd9ce3
<!-- $PostgreSQL: pgsql/doc/src/sgml/dict-xsyn.sgml,v 1.
2 2007/12/06 04:12:10
tgl Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/dict-xsyn.sgml,v 1.
3 2009/08/05 18:06:49
tgl Exp $ -->
<sect1 id="dict-xsyn">
<title>dict_xsyn</title>
...
...
@@ -23,9 +23,26 @@
<itemizedlist>
<listitem>
<para>
<literal>keeporig</> controls whether the original word is included (if
<literal>true</>), or only its synonyms (if <literal>false</>). Default
is <literal>true</>.
<literal>matchorig</> controls whether the original word is accepted by
the dictionary. Default is <literal>true</>.
</para>
</listitem>
<listitem>
<para>
<literal>matchsynonyms</> controls whether the synonyms are
accepted by the dictionary. Default is <literal>false</>.
</para>
</listitem>
<listitem>
<para>
<literal>keeporig</> controls whether the original word is included in
the dictionary's output. Default is <literal>true</>.
</para>
</listitem>
<listitem>
<para>
<literal>keepsynonyms</> controls whether the synonyms are included in
the dictionary's output. Default is <literal>true</>.
</para>
</listitem>
<listitem>
...
...
@@ -87,13 +104,37 @@ ALTER TEXT SEARCH DICTIONARY
To test the dictionary, you can try
<programlisting>
mydb=# SELECT ts_lexize('xsyn', 'word');
ts_lexize
-----------------------
{syn1,syn2,syn3}
mydb# ALTER TEXT SEARCH DICTIONARY xsyn (RULES='my_rules', KEEPORIG=true);
ALTER TEXT SEARCH DICTIONARY
mydb=# SELECT ts_lexize('xsyn', 'word');
ts_lexize
-----------------------
{word,syn1,syn2,syn3}
mydb# ALTER TEXT SEARCH DICTIONARY xsyn (RULES='my_rules', KEEPORIG=false, MATCHSYNONYMS=true);
ALTER TEXT SEARCH DICTIONARY
mydb=# SELECT ts_lexize('xsyn', 'syn1');
ts_lexize
-----------------------
{syn1,syn2,syn3}
mydb# ALTER TEXT SEARCH DICTIONARY xsyn (RULES='my_rules', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false);
ALTER TEXT SEARCH DICTIONARY
mydb=# SELECT ts_lexize('xsyn', 'syn1');
ts_lexize
-----------------------
{word}
</programlisting>
but r
eal-world usage will involve including it in a text search
R
eal-world usage will involve including it in a text search
configuration as described in <xref linkend="textsearch">.
That might look like this:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment