Commit 028350f6 authored by Teodor Sigaev's avatar Teodor Sigaev

Make exact distance match for FTS phrase operator

Phrase operator now requires exact distance betweens lexems instead of
less-or-equal.

Per discussion c19fcfec308e6ccd952cdde9e648b505@mail.gmail.com
parent f1993038
...@@ -346,10 +346,10 @@ SELECT to_tsvector('error is not fatal') @@ to_tsquery('fatal <-> error'); ...@@ -346,10 +346,10 @@ SELECT to_tsvector('error is not fatal') @@ to_tsquery('fatal <-> error');
There is a more general version of the FOLLOWED BY operator having the There is a more general version of the FOLLOWED BY operator having the
form <literal>&lt;<replaceable>N</>&gt;</literal>, form <literal>&lt;<replaceable>N</>&gt;</literal>,
where <replaceable>N</> is an integer standing for the greatest distance where <replaceable>N</> is an integer standing for the exact distance
allowed between the matching lexemes. <literal>&lt;1&gt;</literal> is allowed between the matching lexemes. <literal>&lt;1&gt;</literal> is
the same as <literal>&lt;-&gt;</>, while <literal>&lt;2&gt;</literal> the same as <literal>&lt;-&gt;</>, while <literal>&lt;2&gt;</literal>
allows one other lexeme to optionally appear between the matches, and so allows one other lexeme to appear between the matches, and so
on. The <literal>phraseto_tsquery</> function makes use of this on. The <literal>phraseto_tsquery</> function makes use of this
operator to construct a <literal>tsquery</> that can match a multi-word operator to construct a <literal>tsquery</> that can match a multi-word
phrase when some of the words are stop words. For example: phrase when some of the words are stop words. For example:
...@@ -1529,7 +1529,7 @@ SELECT to_tsquery('fat') &lt;-&gt; to_tsquery('cat | rat'); ...@@ -1529,7 +1529,7 @@ SELECT to_tsquery('fat') &lt;-&gt; to_tsquery('cat | rat');
<para> <para>
Returns a query that searches for a match to the first given query Returns a query that searches for a match to the first given query
followed by a match to the second given query at a distance of at followed by a match to the second given query at a distance of at
most <replaceable>distance</replaceable> lexemes, using <replaceable>distance</replaceable> lexemes, using
the <literal>&lt;<replaceable>N</>&gt;</literal> the <literal>&lt;<replaceable>N</>&gt;</literal>
<type>tsquery</> operator. For example: <type>tsquery</> operator. For example:
......
...@@ -1375,6 +1375,7 @@ TS_phrase_execute(QueryItem *curitem, ...@@ -1375,6 +1375,7 @@ TS_phrase_execute(QueryItem *curitem,
ExecPhraseData Ldata = {0, false, NULL}, ExecPhraseData Ldata = {0, false, NULL},
Rdata = {0, false, NULL}; Rdata = {0, false, NULL};
WordEntryPos *Lpos, WordEntryPos *Lpos,
*LposStart,
*Rpos, *Rpos,
*pos_iter = NULL; *pos_iter = NULL;
...@@ -1416,34 +1417,39 @@ TS_phrase_execute(QueryItem *curitem, ...@@ -1416,34 +1417,39 @@ TS_phrase_execute(QueryItem *curitem,
pos_iter = data->pos; pos_iter = data->pos;
} }
Lpos = Ldata.pos;
Rpos = Rdata.pos;
/* /*
* Find matches by distance, WEP_GETPOS() is needed because * Find matches by distance, WEP_GETPOS() is needed because
* ExecPhraseData->data can point to the tsvector's WordEntryPosVector * ExecPhraseData->data can point to the tsvector's WordEntryPosVector
*/ */
Rpos = Rdata.pos;
LposStart = Ldata.pos;
while (Rpos < Rdata.pos + Rdata.npos) while (Rpos < Rdata.pos + Rdata.npos)
{
while (Lpos < Ldata.pos + Ldata.npos)
{
if (WEP_GETPOS(*Lpos) <= WEP_GETPOS(*Rpos))
{ {
/* /*
* Lpos is behind the Rpos, so we have to check the * We need to check all possible distances, so reset Lpos
* distance condition * to guranteed not yet satisfied position.
*/ */
if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) <= curitem->qoperator.distance) Lpos = LposStart;
while (Lpos < Ldata.pos + Ldata.npos)
{
if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) ==
curitem->qoperator.distance)
{ {
/* MATCH! */ /* MATCH! */
if (data) if (data)
{ {
/* Store position for upper phrase operator */
*pos_iter = WEP_GETPOS(*Rpos); *pos_iter = WEP_GETPOS(*Rpos);
pos_iter++; pos_iter++;
break; /* We need to build a unique result /*
* array, so go to the next Rpos */ * Set left start position to next, because current one
* could not satisfy distance for any other right
* position
*/
LposStart = Lpos + 1;
break;
} }
else else
{ {
...@@ -1453,15 +1459,18 @@ TS_phrase_execute(QueryItem *curitem, ...@@ -1453,15 +1459,18 @@ TS_phrase_execute(QueryItem *curitem,
*/ */
return true; return true;
} }
} }
} else if (WEP_GETPOS(*Rpos) <= WEP_GETPOS(*Lpos) ||
else WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) <
curitem->qoperator.distance)
{ {
/* /*
* Go to the next Rpos, because Lpos is ahead of the * Go to the next Rpos, because Lpos is ahead or on less
* current Rpos * distance than required by current operator
*/ */
break; break;
} }
Lpos++; Lpos++;
......
...@@ -665,10 +665,10 @@ SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true"; ...@@ -665,10 +665,10 @@ SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
t t
(1 row) (1 row)
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "true"; SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "false";
true false
------ -------
t f
(1 row) (1 row)
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false"; SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
...@@ -683,6 +683,12 @@ SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true"; ...@@ -683,6 +683,12 @@ SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
t t
(1 row) (1 row)
SELECT to_tsvector('simple', '1 2 1 2') @@ '1 <3> 2' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false"; SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
false false
------- -------
...@@ -897,7 +903,7 @@ SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:*'); ...@@ -897,7 +903,7 @@ SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:*');
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A'); SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A');
ts_rank_cd ts_rank_cd
------------ ------------
0.0714286 0
(1 row) (1 row)
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B'); SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
...@@ -924,10 +930,10 @@ SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true"; ...@@ -924,10 +930,10 @@ SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
t t
(1 row) (1 row)
SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "true"; SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "false";
true false
------ -------
t f
(1 row) (1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false"; SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
...@@ -954,10 +960,10 @@ SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true"; ...@@ -954,10 +960,10 @@ SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
t t
(1 row) (1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "true"; SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "false";
true false
------ -------
t f
(1 row) (1 row)
-- tsvector editing operations -- tsvector editing operations
......
...@@ -130,9 +130,10 @@ SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true"; ...@@ -130,9 +130,10 @@ SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
--phrase search --phrase search
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true"; SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "true"; SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "false";
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false"; SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true"; SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
SELECT to_tsvector('simple', '1 2 1 2') @@ '1 <3> 2' AS "true";
SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false"; SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* <-> 3' AS "true"; SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* <-> 3' AS "true";
...@@ -180,12 +181,12 @@ SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B'); ...@@ -180,12 +181,12 @@ SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
SELECT 'a:1 b:2'::tsvector @@ 'a <-> b'::tsquery AS "true"; SELECT 'a:1 b:2'::tsvector @@ 'a <-> b'::tsquery AS "true";
SELECT 'a:1 b:2'::tsvector @@ 'a <0> b'::tsquery AS "false"; SELECT 'a:1 b:2'::tsvector @@ 'a <0> b'::tsquery AS "false";
SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true"; SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "true"; SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false"; SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <0> b'::tsquery AS "false"; SELECT 'a:1 b:3'::tsvector @@ 'a <0> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false"; SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true"; SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "true"; SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "false";
-- tsvector editing operations -- tsvector editing operations
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment