Commit bb140506 authored by Teodor Sigaev's avatar Teodor Sigaev

Phrase full text search.

Patch introduces new text search operator (<-> or <DISTANCE>) into tsquery.
On-disk and binary in/out format of tsquery are backward compatible.
It has two side effect:
- change order for tsquery, so, users, who has a btree index over tsquery,
  should reindex it
- less number of parenthesis in tsquery output, and tsquery becomes more
  readable

Authors: Teodor Sigaev, Oleg Bartunov, Dmitry Ivanov
Reviewers: Alexander Korotkov, Artur Zakirov
parent 015e8894
...@@ -278,15 +278,15 @@ SELECT '(!1|2)&3'::tsquery; ...@@ -278,15 +278,15 @@ SELECT '(!1|2)&3'::tsquery;
(1 row) (1 row)
SELECT '1|(2|(4|(5|6)))'::tsquery; SELECT '1|(2|(4|(5|6)))'::tsquery;
tsquery tsquery
----------------------------------------- -----------------------------
'1' | ( '2' | ( '4' | ( '5' | '6' ) ) ) '1' | '2' | '4' | '5' | '6'
(1 row) (1 row)
SELECT '1|2|4|5|6'::tsquery; SELECT '1|2|4|5|6'::tsquery;
tsquery tsquery
----------------------------------------- -----------------------------
( ( ( '1' | '2' ) | '4' ) | '5' ) | '6' '1' | '2' | '4' | '5' | '6'
(1 row) (1 row)
SELECT '1&(2&(4&(5&6)))'::tsquery; SELECT '1&(2&(4&(5&6)))'::tsquery;
...@@ -340,7 +340,7 @@ select 'a' > 'b & c'::tsquery; ...@@ -340,7 +340,7 @@ select 'a' > 'b & c'::tsquery;
select 'a | f' < 'b & c'::tsquery; select 'a | f' < 'b & c'::tsquery;
?column? ?column?
---------- ----------
t f
(1 row) (1 row)
select 'a | ff' < 'b & c'::tsquery; select 'a | ff' < 'b & c'::tsquery;
...@@ -443,9 +443,9 @@ select count(*) from test_tsquery where keyword > 'new & york'; ...@@ -443,9 +443,9 @@ select count(*) from test_tsquery where keyword > 'new & york';
set enable_seqscan=on; set enable_seqscan=on;
select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city'); select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city');
rewrite rewrite
---------------------------------------------------------------------------------- ------------------------------------------------------------------------------
'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | ( 'nyc' | 'big' & 'apple' ) ) 'foo' & 'bar' & 'qq' & ( 'nyc' | 'big' & 'apple' | 'city' & 'new' & 'york' )
(1 row) (1 row)
select rewrite('moscow', 'select keyword, sample from test_tsquery'::text ); select rewrite('moscow', 'select keyword, sample from test_tsquery'::text );
...@@ -461,9 +461,9 @@ select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::tex ...@@ -461,9 +461,9 @@ select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::tex
(1 row) (1 row)
select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text ); select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text );
rewrite rewrite
------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row) (1 row)
select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery; select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery;
...@@ -479,9 +479,9 @@ select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery; ...@@ -479,9 +479,9 @@ select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery;
(1 row) (1 row)
select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery; select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery;
rewrite rewrite
------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row) (1 row)
select keyword from test_tsquery where keyword @> 'new'; select keyword from test_tsquery where keyword @> 'new';
...@@ -520,9 +520,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e ...@@ -520,9 +520,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
(1 row) (1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query; select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query;
rewrite rewrite
------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row) (1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword; select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
...@@ -538,9 +538,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e ...@@ -538,9 +538,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
(1 row) (1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword; select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword;
rewrite rewrite
------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row) (1 row)
create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops); create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops);
...@@ -581,9 +581,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e ...@@ -581,9 +581,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
(1 row) (1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query; select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query;
rewrite rewrite
------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row) (1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword; select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
...@@ -599,9 +599,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e ...@@ -599,9 +599,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
(1 row) (1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword; select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword;
rewrite rewrite
------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) ) ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
(1 row) (1 row)
set enable_seqscan='on'; set enable_seqscan='on';
......
...@@ -3924,8 +3924,9 @@ SELECT to_tsvector('english', 'The Fat Rats'); ...@@ -3924,8 +3924,9 @@ SELECT to_tsvector('english', 'The Fat Rats');
<para> <para>
A <type>tsquery</type> value stores lexemes that are to be A <type>tsquery</type> value stores lexemes that are to be
searched for, and combines them honoring the Boolean operators searched for, and combines them honoring the Boolean operators
<literal>&amp;</literal> (AND), <literal>|</literal> (OR), and <literal>&amp;</literal> (AND), <literal>|</literal> (OR),
<literal>!</> (NOT). Parentheses can be used to enforce grouping <literal>!</> (NOT) and <literal>&lt;-&gt;</> (FOLLOWED BY) phrase search
operator. Parentheses can be used to enforce grouping
of the operators: of the operators:
<programlisting> <programlisting>
...@@ -3946,8 +3947,8 @@ SELECT 'fat &amp; rat &amp; ! cat'::tsquery; ...@@ -3946,8 +3947,8 @@ SELECT 'fat &amp; rat &amp; ! cat'::tsquery;
</programlisting> </programlisting>
In the absence of parentheses, <literal>!</> (NOT) binds most tightly, In the absence of parentheses, <literal>!</> (NOT) binds most tightly,
and <literal>&amp;</literal> (AND) binds more tightly than and <literal>&amp;</literal> (AND) and <literal>&lt;-&gt;</literal> (FOLLOWED BY)
<literal>|</literal> (OR). both bind more tightly than <literal>|</literal> (OR).
</para> </para>
<para> <para>
......
...@@ -9127,6 +9127,12 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple ...@@ -9127,6 +9127,12 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
<entry><literal>!! 'cat'::tsquery</literal></entry> <entry><literal>!! 'cat'::tsquery</literal></entry>
<entry><literal>!'cat'</literal></entry> <entry><literal>!'cat'</literal></entry>
</row> </row>
<row>
<entry> <literal>&lt;-&gt;</literal> </entry>
<entry><type>tsquery</> followed by <type>tsquery</></entry>
<entry><literal>to_tsquery('fat') &lt;-&gt; to_tsquery('rat')</literal></entry>
<entry><literal>'fat' &lt;-&gt; 'rat'</literal></entry>
</row>
<row> <row>
<entry> <literal>@&gt;</literal> </entry> <entry> <literal>@&gt;</literal> </entry>
<entry><type>tsquery</> contains another ?</entry> <entry><type>tsquery</> contains another ?</entry>
...@@ -9219,6 +9225,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple ...@@ -9219,6 +9225,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
<entry><literal>plainto_tsquery('english', 'The Fat Rats')</literal></entry> <entry><literal>plainto_tsquery('english', 'The Fat Rats')</literal></entry>
<entry><literal>'fat' &amp; 'rat'</literal></entry> <entry><literal>'fat' &amp; 'rat'</literal></entry>
</row> </row>
<row>
<entry>
<indexterm>
<primary>phraseto_tsquery</primary>
</indexterm>
<literal><function>phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</> <type>regconfig</> , </optional> <replaceable class="PARAMETER">query</> <type>text</type>)</function></literal>
</entry>
<entry><type>tsquery</type></entry>
<entry>produce <type>tsquery</> ignoring punctuation</entry>
<entry><literal>phraseto_tsquery('english', 'The Fat Rats')</literal></entry>
<entry><literal>'fat' &lt;-&gt; 'rat'</literal></entry>
</row>
<row> <row>
<entry> <entry>
<indexterm> <indexterm>
...@@ -9421,6 +9439,27 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple ...@@ -9421,6 +9439,27 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
<entry><literal>SELECT ts_rewrite('a &amp; b'::tsquery, 'SELECT t,s FROM aliases')</literal></entry> <entry><literal>SELECT ts_rewrite('a &amp; b'::tsquery, 'SELECT t,s FROM aliases')</literal></entry>
<entry><literal>'b' &amp; ( 'foo' | 'bar' )</literal></entry> <entry><literal>'b' &amp; ( 'foo' | 'bar' )</literal></entry>
</row> </row>
<row>
<entry>
<indexterm>
<primary>tsquery_phrase</primary>
</indexterm>
<literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>)</function></literal>
</entry>
<entry><type>tsquery</type></entry>
<entry>implementation of <literal>&lt;-&gt;</> (FOLLOWED BY) operator</entry>
<entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'))</literal></entry>
<entry><literal>'fat' &lt;-&gt; 'cat'</literal></entry>
</row>
<row>
<entry>
<literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">distance</replaceable> <type>integer</>)</function></literal>
</entry>
<entry><type>tsquery</type></entry>
<entry>phrase-concatenate with distance</entry>
<entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10)</literal></entry>
<entry><literal>'fat' &lt;10&gt; 'cat'</literal></entry>
</row>
<row> <row>
<entry> <entry>
<indexterm> <indexterm>
......
...@@ -263,9 +263,10 @@ SELECT 'fat &amp; cow'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::t ...@@ -263,9 +263,10 @@ SELECT 'fat &amp; cow'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::t
As the above example suggests, a <type>tsquery</type> is not just raw As the above example suggests, a <type>tsquery</type> is not just raw
text, any more than a <type>tsvector</type> is. A <type>tsquery</type> text, any more than a <type>tsvector</type> is. A <type>tsquery</type>
contains search terms, which must be already-normalized lexemes, and contains search terms, which must be already-normalized lexemes, and
may combine multiple terms using AND, OR, and NOT operators. may combine multiple terms using AND, OR, NOT and FOLLOWED BY operators.
(For details see <xref linkend="datatype-textsearch">.) There are (For details see <xref linkend="datatype-textsearch">.) There are
functions <function>to_tsquery</> and <function>plainto_tsquery</> functions <function>to_tsquery</>, <function>plainto_tsquery</>
and <function>phraseto_tsquery</>
that are helpful in converting user-written text into a proper that are helpful in converting user-written text into a proper
<type>tsquery</type>, for example by normalizing words appearing in <type>tsquery</type>, for example by normalizing words appearing in
the text. Similarly, <function>to_tsvector</> is used to parse and the text. Similarly, <function>to_tsvector</> is used to parse and
...@@ -293,6 +294,35 @@ SELECT 'fat cats ate fat rats'::tsvector @@ to_tsquery('fat &amp; rat'); ...@@ -293,6 +294,35 @@ SELECT 'fat cats ate fat rats'::tsvector @@ to_tsquery('fat &amp; rat');
already normalized, so <literal>rats</> does not match <literal>rat</>. already normalized, so <literal>rats</> does not match <literal>rat</>.
</para> </para>
<para>
Phrase search is made possible with the help of the <literal>&lt;-&gt;</>
(FOLLOWED BY) operator, which enforces lexeme order. This allows you
to discard strings not containing the desired phrase, for example:
<programlisting>
SELECT q @@ to_tsquery('fatal &lt;-&gt; error')
FROM unnest(array[to_tsvector('fatal error'),
to_tsvector('error is not fatal')]) AS q;
?column?
----------
t
f
</programlisting>
A more generic version of the FOLLOWED BY operator takes form of
<literal>&lt;N&gt;</>, where N stands for the greatest allowed distance
between the specified lexemes. The <literal>phraseto_tsquery</>
function makes use of this behavior in order to construct a
<literal>tsquery</> capable of matching the provided phrase:
<programlisting>
SELECT phraseto_tsquery('cat ate some rats');
phraseto_tsquery
-------------------------------
( 'cat' &lt;-&gt; 'ate' ) &lt;2&gt; 'rat'
</programlisting>
</para>
<para> <para>
The <literal>@@</literal> operator also The <literal>@@</literal> operator also
supports <type>text</type> input, allowing explicit conversion of a text supports <type>text</type> input, allowing explicit conversion of a text
...@@ -709,11 +739,14 @@ UPDATE tt SET ti = ...@@ -709,11 +739,14 @@ UPDATE tt SET ti =
<para> <para>
<productname>PostgreSQL</productname> provides the <productname>PostgreSQL</productname> provides the
functions <function>to_tsquery</function> and functions <function>to_tsquery</function>,
<function>plainto_tsquery</function> for converting a query to <function>plainto_tsquery</function> and
the <type>tsquery</type> data type. <function>to_tsquery</function> <function>phraseto_tsquery</function>
offers access to more features than <function>plainto_tsquery</function>, for converting a query to the <type>tsquery</type> data type.
but is less forgiving about its input. <function>to_tsquery</function> offers access to more features
than both <function>plainto_tsquery</function> and
<function>phraseto_tsquery</function>, but is less forgiving
about its input.
</para> </para>
<indexterm> <indexterm>
...@@ -728,7 +761,8 @@ to_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable> <type> ...@@ -728,7 +761,8 @@ to_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable> <type>
<function>to_tsquery</function> creates a <type>tsquery</> value from <function>to_tsquery</function> creates a <type>tsquery</> value from
<replaceable>querytext</replaceable>, which must consist of single tokens <replaceable>querytext</replaceable>, which must consist of single tokens
separated by the Boolean operators <literal>&amp;</literal> (AND), separated by the Boolean operators <literal>&amp;</literal> (AND),
<literal>|</literal> (OR) and <literal>!</literal> (NOT). These operators <literal>|</literal> (OR), <literal>!</literal> (NOT), and also the
<literal>&lt;-&gt;</literal> (FOLLOWED BY) phrase search operator. These operators
can be grouped using parentheses. In other words, the input to can be grouped using parentheses. In other words, the input to
<function>to_tsquery</function> must already follow the general rules for <function>to_tsquery</function> must already follow the general rules for
<type>tsquery</> input, as described in <xref <type>tsquery</> input, as described in <xref
...@@ -814,8 +848,8 @@ SELECT plainto_tsquery('english', 'The Fat Rats'); ...@@ -814,8 +848,8 @@ SELECT plainto_tsquery('english', 'The Fat Rats');
</screen> </screen>
Note that <function>plainto_tsquery</> cannot Note that <function>plainto_tsquery</> cannot
recognize Boolean operators, weight labels, or prefix-match labels recognize Boolean and phrase search operators, weight labels,
in its input: or prefix-match labels in its input:
<screen> <screen>
SELECT plainto_tsquery('english', 'The Fat &amp; Rats:C'); SELECT plainto_tsquery('english', 'The Fat &amp; Rats:C');
...@@ -827,6 +861,57 @@ SELECT plainto_tsquery('english', 'The Fat &amp; Rats:C'); ...@@ -827,6 +861,57 @@ SELECT plainto_tsquery('english', 'The Fat &amp; Rats:C');
Here, all the input punctuation was discarded as being space symbols. Here, all the input punctuation was discarded as being space symbols.
</para> </para>
<indexterm>
<primary>phraseto_tsquery</primary>
</indexterm>
<synopsis>
phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">querytext</replaceable> <type>text</>) returns <type>tsquery</>
</synopsis>
<para>
<function>phraseto_tsquery</> behaves much like
<function>plainto_tsquery</>, with the exception
that it utilizes the <literal>&lt;-&gt;</literal> (FOLLOWED BY) phrase search
operator instead of the <literal>&amp;</literal> (AND) Boolean operator.
This is particularly useful when searching for exact lexeme sequences,
since the phrase search operator helps to maintain lexeme order.
</para>
<para>
Example:
<screen>
SELECT phraseto_tsquery('english', 'The Fat Rats');
phraseto_tsquery
------------------
'fat' &lt;-&gt; 'rat'
</screen>
Just like the <function>plainto_tsquery</>, the
<function>phraseto_tsquery</> function cannot
recognize Boolean and phrase search operators, weight labels,
or prefix-match labels in its input:
<screen>
SELECT phraseto_tsquery('english', 'The Fat &amp; Rats:C');
phraseto_tsquery
-----------------------------
( 'fat' &lt;-&gt; 'rat' ) &lt;-&gt; 'c'
</screen>
It is possible to specify the configuration to be used to parse the document,
for example, we could create a new one using the hunspell dictionary
(namely 'eng_hunspell') in order to match phrases with different word forms:
<screen>
SELECT phraseto_tsquery('eng_hunspell', 'developer of the building which collapsed');
phraseto_tsquery
--------------------------------------------------------------------------------------------
( 'developer' &lt;3&gt; 'building' ) &lt;2&gt; 'collapse' | ( 'developer' &lt;3&gt; 'build' ) &lt;2&gt; 'collapse'
</screen>
</para>
</sect2> </sect2>
<sect2 id="textsearch-ranking"> <sect2 id="textsearch-ranking">
...@@ -1387,6 +1472,81 @@ FROM (SELECT id, body, q, ts_rank_cd(ti, q) AS rank ...@@ -1387,6 +1472,81 @@ FROM (SELECT id, body, q, ts_rank_cd(ti, q) AS rank
</varlistentry> </varlistentry>
<varlistentry>
<term>
<literal><type>tsquery</> &lt;-&gt; <type>tsquery</></literal>
</term>
<listitem>
<para>
Returns the phrase-concatenation of the two given queries.
<screen>
SELECT to_tsquery('fat') &lt;-&gt; to_tsquery('cat | rat');
?column?
-----------------------------------
'fat' &lt;-&gt; 'cat' | 'fat' &lt;-&gt; 'rat'
</screen>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>
<indexterm>
<primary>tsquery_phrase</primary>
</indexterm>
<literal>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</> [, <replaceable class="PARAMETER">distance</replaceable> <type>integer</> ]) returns <type>tsquery</></literal>
</term>
<listitem>
<para>
Returns the distanced phrase-concatenation of the two given queries.
This function lies in the implementation of the <literal>&lt;-&gt;</> operator.
<screen>
SELECT tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10);
tsquery_phrase
------------------
'fat' &lt;10&gt; 'cat'
</screen>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>
<indexterm>
<primary>setweight</primary>
</indexterm>
<literal>setweight(<replaceable class="PARAMETER">query</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">weight</replaceable> <type>"char"</>) returns <type>tsquery</></literal>
</term>
<listitem>
<para>
<function>setweight</> returns a copy of the input query in which every
position has been labeled with the given <replaceable>weight</>(s), either
<literal>A</literal>, <literal>B</literal>, <literal>C</literal>,
<literal>D</literal> or their combination. These labels are retained when
queries are concatenated, allowing words from different parts of a document
to be weighted differently by ranking functions.
</para>
<para>
Note that weight labels apply to <emphasis>positions</>, not
<emphasis>lexemes</>. If the input query has been stripped of
positions then <function>setweight</> does nothing.
</para>
</listitem>
</varlistentry>
<varlistentry> <varlistentry>
<term> <term>
...@@ -2428,7 +2588,7 @@ more sample word(s) : more indexed word(s) ...@@ -2428,7 +2588,7 @@ more sample word(s) : more indexed word(s)
<para> <para>
Specific stop words recognized by the subdictionary cannot be Specific stop words recognized by the subdictionary cannot be
specified; instead use <literal>?</> to mark the location where any specified; instead use <literal>&lt;-&gt;</> to mark the location where any
stop word can appear. For example, assuming that <literal>a</> and stop word can appear. For example, assuming that <literal>a</> and
<literal>the</> are stop words according to the subdictionary: <literal>the</> are stop words according to the subdictionary:
......
...@@ -18,6 +18,13 @@ ...@@ -18,6 +18,13 @@
#include "utils/builtins.h" #include "utils/builtins.h"
typedef struct MorphOpaque
{
Oid cfg_id;
int qoperator; /* query operator */
} MorphOpaque;
Datum Datum
get_current_ts_config(PG_FUNCTION_ARGS) get_current_ts_config(PG_FUNCTION_ARGS)
{ {
...@@ -262,60 +269,81 @@ to_tsvector(PG_FUNCTION_ARGS) ...@@ -262,60 +269,81 @@ to_tsvector(PG_FUNCTION_ARGS)
* to the stack. * to the stack.
* *
* All words belonging to the same variant are pushed as an ANDed list, * All words belonging to the same variant are pushed as an ANDed list,
* and different variants are ORred together. * and different variants are ORed together.
*/ */
static void static void
pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix) pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
{ {
int32 count = 0; int32 count = 0;
ParsedText prs; ParsedText prs;
uint32 variant, uint32 variant,
pos, pos = 0,
cntvar = 0, cntvar = 0,
cntpos = 0, cntpos = 0,
cnt = 0; cnt = 0;
Oid cfg_id = DatumGetObjectId(opaque); /* the input is actually MorphOpaque *data = (MorphOpaque *) DatumGetPointer(opaque);
* an Oid, not a pointer */
prs.lenwords = 4; prs.lenwords = 4;
prs.curwords = 0; prs.curwords = 0;
prs.pos = 0; prs.pos = 0;
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
parsetext(cfg_id, &prs, strval, lenval); parsetext(data->cfg_id, &prs, strval, lenval);
if (prs.curwords > 0) if (prs.curwords > 0)
{ {
while (count < prs.curwords) while (count < prs.curwords)
{ {
pos = prs.words[count].pos.pos; /*
* Were any stop words removed? If so, fill empty positions
* with placeholders linked by an appropriate operator.
*/
if (pos > 0 && pos + 1 < prs.words[count].pos.pos)
{
while (pos + 1 < prs.words[count].pos.pos)
{
/* put placeholders for each missing stop word */
pushStop(state);
if (cntpos)
pushOperator(state, data->qoperator, 1);
cntpos++;
pos++;
}
}
pos = prs.words[count].pos.pos; /* save current word's position */
/* Go through all variants obtained from this token */
cntvar = 0; cntvar = 0;
while (count < prs.curwords && pos == prs.words[count].pos.pos) while (count < prs.curwords && pos == prs.words[count].pos.pos)
{ {
variant = prs.words[count].nvariant; variant = prs.words[count].nvariant;
/* Push all words belonging to the same variant */
cnt = 0; cnt = 0;
while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant) while (count < prs.curwords &&
pos == prs.words[count].pos.pos &&
variant == prs.words[count].nvariant)
{ {
pushValue(state,
pushValue(state, prs.words[count].word, prs.words[count].len, weight, prs.words[count].word,
((prs.words[count].flags & TSL_PREFIX) || prefix) ? true : false); prs.words[count].len,
weight,
((prs.words[count].flags & TSL_PREFIX) || prefix));
pfree(prs.words[count].word); pfree(prs.words[count].word);
if (cnt) if (cnt)
pushOperator(state, OP_AND); pushOperator(state, OP_AND, 0);
cnt++; cnt++;
count++; count++;
} }
if (cntvar) if (cntvar)
pushOperator(state, OP_OR); pushOperator(state, OP_OR, 0);
cntvar++; cntvar++;
} }
if (cntpos) if (cntpos)
pushOperator(state, OP_AND); pushOperator(state, data->qoperator, 1); /* distance may be useful */
cntpos++; cntpos++;
} }
...@@ -329,44 +357,18 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, ...@@ -329,44 +357,18 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
Datum Datum
to_tsquery_byid(PG_FUNCTION_ARGS) to_tsquery_byid(PG_FUNCTION_ARGS)
{ {
Oid cfgid = PG_GETARG_OID(0); text *in = PG_GETARG_TEXT_P(1);
text *in = PG_GETARG_TEXT_P(1); TSQuery query;
TSQuery query; MorphOpaque data;
QueryItem *res;
int32 len;
query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), false);
if (query->size == 0)
PG_RETURN_TSQUERY(query);
/* clean out any stopword placeholders from the tree */
res = clean_fakeval(GETQUERY(query), &len);
if (!res)
{
SET_VARSIZE(query, HDRSIZETQ);
query->size = 0;
PG_RETURN_POINTER(query);
}
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
/* data.cfg_id = PG_GETARG_OID(0);
* Removing the stopword placeholders might've resulted in fewer data.qoperator = OP_AND;
* QueryItems. If so, move the operands up accordingly.
*/
if (len != query->size)
{
char *oldoperand = GETOPERAND(query);
int32 lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
Assert(len < query->size); query = parse_tsquery(text_to_cstring(in),
pushval_morph,
query->size = len; PointerGetDatum(&data),
memmove((void *) GETOPERAND(query), oldoperand, VARSIZE(query) - (oldoperand - (char *) query)); false);
SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
}
pfree(res);
PG_RETURN_TSQUERY(query); PG_RETURN_TSQUERY(query);
} }
...@@ -385,55 +387,60 @@ to_tsquery(PG_FUNCTION_ARGS) ...@@ -385,55 +387,60 @@ to_tsquery(PG_FUNCTION_ARGS)
Datum Datum
plainto_tsquery_byid(PG_FUNCTION_ARGS) plainto_tsquery_byid(PG_FUNCTION_ARGS)
{ {
Oid cfgid = PG_GETARG_OID(0); text *in = PG_GETARG_TEXT_P(1);
text *in = PG_GETARG_TEXT_P(1); TSQuery query;
TSQuery query; MorphOpaque data;
QueryItem *res;
int32 len;
query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), true); data.cfg_id = PG_GETARG_OID(0);
data.qoperator = OP_AND;
if (query->size == 0) query = parse_tsquery(text_to_cstring(in),
PG_RETURN_TSQUERY(query); pushval_morph,
PointerGetDatum(&data),
true);
/* clean out any stopword placeholders from the tree */ PG_RETURN_POINTER(query);
res = clean_fakeval(GETQUERY(query), &len); }
if (!res)
{
SET_VARSIZE(query, HDRSIZETQ);
query->size = 0;
PG_RETURN_POINTER(query);
}
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
/* Datum
* Removing the stopword placeholders might've resulted in fewer plainto_tsquery(PG_FUNCTION_ARGS)
* QueryItems. If so, move the operands up accordingly. {
*/ text *in = PG_GETARG_TEXT_P(0);
if (len != query->size) Oid cfgId;
{
char *oldoperand = GETOPERAND(query); cfgId = getTSCurrentConfig(true);
int32 lenoperand = VARSIZE(query) - (oldoperand - (char *) query); PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
ObjectIdGetDatum(cfgId),
PointerGetDatum(in)));
}
Assert(len < query->size);
query->size = len; Datum
memmove((void *) GETOPERAND(query), oldoperand, lenoperand); phraseto_tsquery_byid(PG_FUNCTION_ARGS)
SET_VARSIZE(query, COMPUTESIZE(len, lenoperand)); {
} text *in = PG_GETARG_TEXT_P(1);
TSQuery query;
MorphOpaque data;
pfree(res); data.cfg_id = PG_GETARG_OID(0);
PG_RETURN_POINTER(query); data.qoperator = OP_PHRASE;
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
true);
PG_RETURN_TSQUERY(query);
} }
Datum Datum
plainto_tsquery(PG_FUNCTION_ARGS) phraseto_tsquery(PG_FUNCTION_ARGS)
{ {
text *in = PG_GETARG_TEXT_P(0); text *in = PG_GETARG_TEXT_P(0);
Oid cfgId; Oid cfgId;
cfgId = getTSCurrentConfig(true); cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid, PG_RETURN_DATUM(DirectFunctionCall2(phraseto_tsquery_byid,
ObjectIdGetDatum(cfgId), ObjectIdGetDatum(cfgId),
PointerGetDatum(in))); PointerGetDatum(in)));
} }
...@@ -454,7 +454,7 @@ hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type) ...@@ -454,7 +454,7 @@ hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
} }
static void static void
hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen) hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
{ {
int i; int i;
QueryItem *item = GETQUERY(query); QueryItem *item = GETQUERY(query);
...@@ -467,6 +467,7 @@ hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen) ...@@ -467,6 +467,7 @@ hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
} }
word = &(prs->words[prs->curwords - 1]); word = &(prs->words[prs->curwords - 1]);
word->pos = LIMITPOS(pos);
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
{ {
if (item->type == QI_VAL && if (item->type == QI_VAL &&
...@@ -492,17 +493,20 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme ...@@ -492,17 +493,20 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme
{ {
ParsedLex *tmplexs; ParsedLex *tmplexs;
TSLexeme *ptr; TSLexeme *ptr;
int32 savedpos;
while (lexs) while (lexs)
{ {
if (lexs->type > 0) if (lexs->type > 0)
hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type); hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
ptr = norms; ptr = norms;
savedpos = prs->vectorpos;
while (ptr && ptr->lexeme) while (ptr && ptr->lexeme)
{ {
hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme)); if (ptr->flags & TSL_ADDPOS)
savedpos++;
hlfinditem(prs, query, savedpos, ptr->lexeme, strlen(ptr->lexeme));
ptr++; ptr++;
} }
...@@ -516,6 +520,8 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme ...@@ -516,6 +520,8 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme
ptr = norms; ptr = norms;
while (ptr->lexeme) while (ptr->lexeme)
{ {
if (ptr->flags & TSL_ADDPOS)
prs->vectorpos++;
pfree(ptr->lexeme); pfree(ptr->lexeme);
ptr++; ptr++;
} }
...@@ -575,7 +581,10 @@ hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int bu ...@@ -575,7 +581,10 @@ hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int bu
do do
{ {
if ((norms = LexizeExec(&ldata, &lexs)) != NULL) if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
{
prs->vectorpos++;
addHLParsedLex(prs, query, lexs, norms); addHLParsedLex(prs, query, lexs, norms);
}
else else
addHLParsedLex(prs, query, lexs, NULL); addHLParsedLex(prs, query, lexs, NULL);
} while (norms); } while (norms);
......
...@@ -261,7 +261,7 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem, ...@@ -261,7 +261,7 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem,
/* /*
* Traverse the tsquery in preorder, calculating selectivity as: * Traverse the tsquery in preorder, calculating selectivity as:
* *
* selec(left_oper) * selec(right_oper) in AND nodes, * selec(left_oper) * selec(right_oper) in AND & PHRASE nodes,
* *
* selec(left_oper) + selec(right_oper) - * selec(left_oper) + selec(right_oper) -
* selec(left_oper) * selec(right_oper) in OR nodes, * selec(left_oper) * selec(right_oper) in OR nodes,
...@@ -400,6 +400,7 @@ tsquery_opr_selec(QueryItem *item, char *operand, ...@@ -400,6 +400,7 @@ tsquery_opr_selec(QueryItem *item, char *operand,
lookup, length, minfreq); lookup, length, minfreq);
break; break;
case OP_PHRASE:
case OP_AND: case OP_AND:
s1 = tsquery_opr_selec(item + 1, operand, s1 = tsquery_opr_selec(item + 1, operand,
lookup, length, minfreq); lookup, length, minfreq);
......
...@@ -2030,15 +2030,36 @@ typedef struct ...@@ -2030,15 +2030,36 @@ typedef struct
} hlCheck; } hlCheck;
static bool static bool
checkcondition_HL(void *checkval, QueryOperand *val) checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
{ {
int i; int i;
hlCheck *checkval = (hlCheck *) opaque;
for (i = 0; i < ((hlCheck *) checkval)->len; i++) for (i = 0; i < checkval->len; i++)
{ {
if (((hlCheck *) checkval)->words[i].item == val) if (checkval->words[i].item == val)
return true; {
/* don't need to find all positions */
if (!data)
return true;
if (!data->pos)
{
data->pos = palloc(sizeof(WordEntryPos) * checkval->len);
data->allocated = true;
data->npos = 1;
data->pos[0] = checkval->words[i].pos;
}
else if (data->pos[data->npos - 1] < checkval->words[i].pos)
{
data->pos[data->npos++] = checkval->words[i].pos;
}
}
} }
if (data && data->npos > 0)
return true;
return false; return false;
} }
...@@ -2400,7 +2421,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, int highlight, ...@@ -2400,7 +2421,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, int highlight,
if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)) if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))
{ {
/* best already finded, so try one more cover */ /* best already found, so try one more cover */
p++; p++;
continue; continue;
} }
......
...@@ -179,14 +179,16 @@ typedef struct ...@@ -179,14 +179,16 @@ typedef struct
} GinChkVal; } GinChkVal;
static GinTernaryValue static GinTernaryValue
checkcondition_gin(void *checkval, QueryOperand *val) checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data)
{ {
GinChkVal *gcv = (GinChkVal *) checkval;
int j; int j;
/* if any val requiring a weight is used, set recheck flag */ /*
if (val->weight != 0) * if any val requiring a weight is used or caller
*(gcv->need_recheck) = true; * needs position information then set recheck flag
*/
if (val->weight != 0 || data != NULL)
*gcv->need_recheck = true;
/* convert item's number to corresponding entry's (operand's) number */ /* convert item's number to corresponding entry's (operand's) number */
j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
...@@ -195,16 +197,22 @@ checkcondition_gin(void *checkval, QueryOperand *val) ...@@ -195,16 +197,22 @@ checkcondition_gin(void *checkval, QueryOperand *val)
return gcv->check[j]; return gcv->check[j];
} }
/*
* Wrapper of check condition function for TS_execute.
*/
static bool
checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
return checkcondition_gin_internal((GinChkVal *) checkval,
val,
data) != GIN_FALSE;
}
/* /*
* Evaluate tsquery boolean expression using ternary logic. * Evaluate tsquery boolean expression using ternary logic.
*
* chkcond is a callback function used to evaluate each VAL node in the query.
* checkval can be used to pass information to the callback. TS_execute doesn't
* do anything with it.
*/ */
static GinTernaryValue static GinTernaryValue
TS_execute_ternary(QueryItem *curitem, void *checkval, TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem)
GinTernaryValue (*chkcond) (void *checkval, QueryOperand *val))
{ {
GinTernaryValue val1, GinTernaryValue val1,
val2, val2,
...@@ -214,22 +222,30 @@ TS_execute_ternary(QueryItem *curitem, void *checkval, ...@@ -214,22 +222,30 @@ TS_execute_ternary(QueryItem *curitem, void *checkval,
check_stack_depth(); check_stack_depth();
if (curitem->type == QI_VAL) if (curitem->type == QI_VAL)
return chkcond(checkval, (QueryOperand *) curitem); return checkcondition_gin_internal(gcv,
(QueryOperand *) curitem,
NULL /* don't have any position info */);
switch (curitem->qoperator.oper) switch (curitem->qoperator.oper)
{ {
case OP_NOT: case OP_NOT:
result = TS_execute_ternary(curitem + 1, checkval, chkcond); result = TS_execute_ternary(gcv, curitem + 1);
if (result == GIN_MAYBE) if (result == GIN_MAYBE)
return result; return result;
return !result; return !result;
case OP_PHRASE:
/*
* GIN doesn't contain any information about positions,
* treat OP_PHRASE as OP_AND with recheck requirement
*/
*gcv->need_recheck = true;
case OP_AND: case OP_AND:
val1 = TS_execute_ternary(curitem + curitem->qoperator.left, val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left);
checkval, chkcond);
if (val1 == GIN_FALSE) if (val1 == GIN_FALSE)
return GIN_FALSE; return GIN_FALSE;
val2 = TS_execute_ternary(curitem + 1, checkval, chkcond); val2 = TS_execute_ternary(gcv, curitem + 1);
if (val2 == GIN_FALSE) if (val2 == GIN_FALSE)
return GIN_FALSE; return GIN_FALSE;
if (val1 == GIN_TRUE && val2 == GIN_TRUE) if (val1 == GIN_TRUE && val2 == GIN_TRUE)
...@@ -238,11 +254,10 @@ TS_execute_ternary(QueryItem *curitem, void *checkval, ...@@ -238,11 +254,10 @@ TS_execute_ternary(QueryItem *curitem, void *checkval,
return GIN_MAYBE; return GIN_MAYBE;
case OP_OR: case OP_OR:
val1 = TS_execute_ternary(curitem + curitem->qoperator.left, val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left);
checkval, chkcond);
if (val1 == GIN_TRUE) if (val1 == GIN_TRUE)
return GIN_TRUE; return GIN_TRUE;
val2 = TS_execute_ternary(curitem + 1, checkval, chkcond); val2 = TS_execute_ternary(gcv, curitem + 1);
if (val2 == GIN_TRUE) if (val2 == GIN_TRUE)
return GIN_TRUE; return GIN_TRUE;
if (val1 == GIN_FALSE && val2 == GIN_FALSE) if (val1 == GIN_FALSE && val2 == GIN_FALSE)
...@@ -327,9 +342,7 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS) ...@@ -327,9 +342,7 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
gcv.map_item_operand = (int *) (extra_data[0]); gcv.map_item_operand = (int *) (extra_data[0]);
gcv.need_recheck = &recheck; gcv.need_recheck = &recheck;
res = TS_execute_ternary(GETQUERY(query), res = TS_execute_ternary(&gcv, GETQUERY(query));
&gcv,
checkcondition_gin);
if (res == GIN_TRUE && recheck) if (res == GIN_TRUE && recheck)
res = GIN_MAYBE; res = GIN_MAYBE;
......
...@@ -298,7 +298,7 @@ typedef struct ...@@ -298,7 +298,7 @@ typedef struct
* is there value 'val' in array or not ? * is there value 'val' in array or not ?
*/ */
static bool static bool
checkcondition_arr(void *checkval, QueryOperand *val) checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data)
{ {
int32 *StopLow = ((CHKVAL *) checkval)->arrb; int32 *StopLow = ((CHKVAL *) checkval)->arrb;
int32 *StopHigh = ((CHKVAL *) checkval)->arre; int32 *StopHigh = ((CHKVAL *) checkval)->arre;
...@@ -327,7 +327,7 @@ checkcondition_arr(void *checkval, QueryOperand *val) ...@@ -327,7 +327,7 @@ checkcondition_arr(void *checkval, QueryOperand *val)
} }
static bool static bool
checkcondition_bit(void *checkval, QueryOperand *val) checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data)
{ {
/* /*
* we are not able to find a prefix in signature tree * we are not able to find a prefix in signature tree
......
This diff is collapsed.
This diff is collapsed.
...@@ -27,7 +27,7 @@ tsquery_numnode(PG_FUNCTION_ARGS) ...@@ -27,7 +27,7 @@ tsquery_numnode(PG_FUNCTION_ARGS)
} }
static QTNode * static QTNode *
join_tsqueries(TSQuery a, TSQuery b, int8 operator) join_tsqueries(TSQuery a, TSQuery b, int8 operator, uint16 distance)
{ {
QTNode *res = (QTNode *) palloc0(sizeof(QTNode)); QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
...@@ -36,6 +36,8 @@ join_tsqueries(TSQuery a, TSQuery b, int8 operator) ...@@ -36,6 +36,8 @@ join_tsqueries(TSQuery a, TSQuery b, int8 operator)
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem)); res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = QI_OPR; res->valnode->type = QI_OPR;
res->valnode->qoperator.oper = operator; res->valnode->qoperator.oper = operator;
if (operator == OP_PHRASE)
res->valnode->qoperator.distance = distance;
res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b)); res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
...@@ -64,7 +66,7 @@ tsquery_and(PG_FUNCTION_ARGS) ...@@ -64,7 +66,7 @@ tsquery_and(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(a); PG_RETURN_POINTER(a);
} }
res = join_tsqueries(a, b, OP_AND); res = join_tsqueries(a, b, OP_AND, 0);
query = QTN2QT(res); query = QTN2QT(res);
...@@ -94,7 +96,7 @@ tsquery_or(PG_FUNCTION_ARGS) ...@@ -94,7 +96,7 @@ tsquery_or(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(a); PG_RETURN_POINTER(a);
} }
res = join_tsqueries(a, b, OP_OR); res = join_tsqueries(a, b, OP_OR, 0);
query = QTN2QT(res); query = QTN2QT(res);
...@@ -105,6 +107,52 @@ tsquery_or(PG_FUNCTION_ARGS) ...@@ -105,6 +107,52 @@ tsquery_or(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(query); PG_RETURN_POINTER(query);
} }
Datum
tsquery_phrase_distance(PG_FUNCTION_ARGS)
{
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
TSQuery b = PG_GETARG_TSQUERY_COPY(1);
QTNode *res;
TSQuery query;
int32 distance = PG_GETARG_INT32(2);
if (distance < 0 || distance > MAXENTRYPOS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("distance in phrase operator should be non-negative and less than %d",
MAXENTRYPOS)));
if (a->size == 0)
{
PG_FREE_IF_COPY(a, 1);
PG_RETURN_POINTER(b);
}
else if (b->size == 0)
{
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(a);
}
res = join_tsqueries(a, b, OP_PHRASE, (uint16) distance);
query = QTN2QT(res);
QTNFree(res);
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(cleanup_fakeval_and_phrase(query));
}
Datum
tsquery_phrase(PG_FUNCTION_ARGS)
{
PG_RETURN_POINTER(DirectFunctionCall3(
tsquery_phrase_distance,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
Int32GetDatum(1)));
}
Datum Datum
tsquery_not(PG_FUNCTION_ARGS) tsquery_not(PG_FUNCTION_ARGS)
{ {
......
...@@ -110,6 +110,10 @@ QTNodeCompare(QTNode *an, QTNode *bn) ...@@ -110,6 +110,10 @@ QTNodeCompare(QTNode *an, QTNode *bn)
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0) if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
return res; return res;
} }
if (ao->oper == OP_PHRASE && ao->distance != bo->distance)
return (ao->distance > bo->distance) ? -1 : 1;
return 0; return 0;
} }
else if (an->valnode->type == QI_VAL) else if (an->valnode->type == QI_VAL)
...@@ -150,7 +154,7 @@ QTNSort(QTNode *in) ...@@ -150,7 +154,7 @@ QTNSort(QTNode *in)
for (i = 0; i < in->nchild; i++) for (i = 0; i < in->nchild; i++)
QTNSort(in->child[i]); QTNSort(in->child[i]);
if (in->nchild > 1) if (in->nchild > 1 && in->valnode->qoperator.oper != OP_PHRASE)
qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN); qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN);
} }
...@@ -190,7 +194,10 @@ QTNTernary(QTNode *in) ...@@ -190,7 +194,10 @@ QTNTernary(QTNode *in)
{ {
QTNode *cc = in->child[i]; QTNode *cc = in->child[i];
if (cc->valnode->type == QI_OPR && in->valnode->qoperator.oper == cc->valnode->qoperator.oper) /* OP_Phrase isn't associative */
if (cc->valnode->type == QI_OPR &&
in->valnode->qoperator.oper == cc->valnode->qoperator.oper &&
in->valnode->qoperator.oper != OP_PHRASE)
{ {
int oldnchild = in->nchild; int oldnchild = in->nchild;
......
This diff is collapsed.
...@@ -28,7 +28,7 @@ typedef struct ...@@ -28,7 +28,7 @@ typedef struct
/* Compare two WordEntryPos values for qsort */ /* Compare two WordEntryPos values for qsort */
static int int
comparePos(const void *a, const void *b) comparePos(const void *a, const void *b)
{ {
int apos = WEP_GETPOS(*(const WordEntryPos *) a); int apos = WEP_GETPOS(*(const WordEntryPos *) a);
......
This diff is collapsed.
...@@ -89,7 +89,15 @@ do { \ ...@@ -89,7 +89,15 @@ do { \
} \ } \
} while (0) } while (0)
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) ) /* phrase operator begins with '<' */
#define ISOPERATOR(x) \
( pg_mblen(x) == 1 && ( *(x) == '!' || \
*(x) == '&' || \
*(x) == '|' || \
*(x) == '(' || \
*(x) == ')' || \
*(x) == '<' \
) )
/* Fills gettoken_tsvector's output parameters, and returns true */ /* Fills gettoken_tsvector's output parameters, and returns true */
#define RETURN_TOKEN \ #define RETURN_TOKEN \
......
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 201604062 #define CATALOG_VERSION_NO 201604071
#endif #endif
...@@ -1675,6 +1675,9 @@ DATA(insert OID = 3680 ( "&&" PGNSP PGUID b f f 3615 3615 3615 0 0 tsque ...@@ -1675,6 +1675,9 @@ DATA(insert OID = 3680 ( "&&" PGNSP PGUID b f f 3615 3615 3615 0 0 tsque
DESCR("AND-concatenate"); DESCR("AND-concatenate");
DATA(insert OID = 3681 ( "||" PGNSP PGUID b f f 3615 3615 3615 0 0 tsquery_or - - )); DATA(insert OID = 3681 ( "||" PGNSP PGUID b f f 3615 3615 3615 0 0 tsquery_or - - ));
DESCR("OR-concatenate"); DESCR("OR-concatenate");
/* <-> operation calls tsquery_phrase, but function is polymorphic. So, point to OID of the tsquery_phrase */
DATA(insert OID = 5005 ( "<->" PGNSP PGUID b f f 3615 3615 3615 0 0 5003 - - ));
DESCR("phrase-concatenate");
DATA(insert OID = 3682 ( "!!" PGNSP PGUID l f f 0 3615 3615 0 0 tsquery_not - - )); DATA(insert OID = 3682 ( "!!" PGNSP PGUID l f f 0 3615 3615 0 0 tsquery_not - - ));
DESCR("NOT tsquery"); DESCR("NOT tsquery");
DATA(insert OID = 3693 ( "@>" PGNSP PGUID b f f 3615 3615 16 3694 0 tsq_mcontains contsel contjoinsel )); DATA(insert OID = 3693 ( "@>" PGNSP PGUID b f f 3615 3615 16 3694 0 tsq_mcontains contsel contjoinsel ));
......
...@@ -4607,6 +4607,9 @@ DESCR("less-equal-greater"); ...@@ -4607,6 +4607,9 @@ DESCR("less-equal-greater");
DATA(insert OID = 3669 ( tsquery_and PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_and _null_ _null_ _null_ )); DATA(insert OID = 3669 ( tsquery_and PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_and _null_ _null_ _null_ ));
DATA(insert OID = 3670 ( tsquery_or PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_or _null_ _null_ _null_ )); DATA(insert OID = 3670 ( tsquery_or PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_or _null_ _null_ _null_ ));
DATA(insert OID = 5003 ( tsquery_phrase PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_phrase _null_ _null_ _null_ ));
DATA(insert OID = 5004 ( tsquery_phrase PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 3615 "3615 3615 23" _null_ _null_ _null_ _null_ _null_ tsquery_phrase_distance _null_ _null_ _null_ ));
DESCR("phrase-concatenate with distance");
DATA(insert OID = 3671 ( tsquery_not PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3615 "3615" _null_ _null_ _null_ _null_ _null_ tsquery_not _null_ _null_ _null_ )); DATA(insert OID = 3671 ( tsquery_not PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3615 "3615" _null_ _null_ _null_ _null_ _null_ tsquery_not _null_ _null_ _null_ ));
DATA(insert OID = 3691 ( tsq_mcontains PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsq_mcontains _null_ _null_ _null_ )); DATA(insert OID = 3691 ( tsq_mcontains PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsq_mcontains _null_ _null_ _null_ ));
...@@ -4726,12 +4729,16 @@ DATA(insert OID = 3746 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 ...@@ -4726,12 +4729,16 @@ DATA(insert OID = 3746 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2
DESCR("make tsquery"); DESCR("make tsquery");
DATA(insert OID = 3747 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery_byid _null_ _null_ _null_ )); DATA(insert OID = 3747 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery_byid _null_ _null_ _null_ ));
DESCR("transform to tsquery"); DESCR("transform to tsquery");
DATA(insert OID = 5006 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ ));
DESCR("transform to tsquery");
DATA(insert OID = 3749 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ )); DATA(insert OID = 3749 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ ));
DESCR("transform to tsvector"); DESCR("transform to tsvector");
DATA(insert OID = 3750 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ )); DATA(insert OID = 3750 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ ));
DESCR("make tsquery"); DESCR("make tsquery");
DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery _null_ _null_ _null_ )); DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery"); DESCR("transform to tsquery");
DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery");
DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ )); DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
DESCR("trigger for automatic update of tsvector column"); DESCR("trigger for automatic update of tsvector column");
......
...@@ -34,16 +34,17 @@ typedef struct ...@@ -34,16 +34,17 @@ typedef struct
*/ */
typedef struct typedef struct
{ {
uint32 selected:1, uint32 selected: 1,
in:1, in: 1,
replace:1, replace: 1,
repeated:1, repeated: 1,
skip:1, skip: 1,
unused:3, unused: 3,
type:8, type: 8,
len:16; len: 16;
char *word; WordEntryPos pos;
QueryOperand *item; char *word;
QueryOperand *item;
} HeadlineWordEntry; } HeadlineWordEntry;
typedef struct typedef struct
...@@ -51,6 +52,7 @@ typedef struct ...@@ -51,6 +52,7 @@ typedef struct
HeadlineWordEntry *words; HeadlineWordEntry *words;
int32 lenwords; int32 lenwords;
int32 curwords; int32 curwords;
int32 vectorpos; /* positions a-la tsvector */
char *startsel; char *startsel;
char *stopsel; char *stopsel;
char *fragdelim; char *fragdelim;
......
...@@ -49,6 +49,8 @@ typedef struct ...@@ -49,6 +49,8 @@ typedef struct
#define MAXSTRLEN ( (1<<11) - 1) #define MAXSTRLEN ( (1<<11) - 1)
#define MAXSTRPOS ( (1<<20) - 1) #define MAXSTRPOS ( (1<<20) - 1)
extern int comparePos(const void *a, const void *b);
/* /*
* Equivalent to * Equivalent to
* typedef struct { * typedef struct {
...@@ -213,15 +215,33 @@ typedef struct ...@@ -213,15 +215,33 @@ typedef struct
} QueryOperand; } QueryOperand;
/* Legal values for QueryOperator.operator */ /*
#define OP_NOT 1 * Legal values for QueryOperator.operator.
#define OP_AND 2 * They should be ordered by priority! We assume that phrase
#define OP_OR 3 * has highest priority, but this agreement is only
* for query transformation! That's need to simplify
* algorithm of query transformation.
*/
#define OP_OR 1
#define OP_AND 2
#define OP_NOT 3
#define OP_PHRASE 4
#define OP_NOT_PHRASE 5 /*
* OP_PHRASE negation operations must have greater
* priority in order to force infix() to surround
* the whole OP_PHRASE expression with parentheses.
*/
#define TOP_PRIORITY 6 /* highest priority for val nodes */
#define OP_PRIORITY(x) (x)
#define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
typedef struct typedef struct
{ {
QueryItemType type; QueryItemType type;
int8 oper; /* see above */ int8 oper; /* see above */
int16 distance; /* distance between agrs for OP_PHRASE */
uint32 left; /* pointer to left operand. Right operand is uint32 left; /* pointer to left operand. Right operand is
* item + 1, left operand is placed * item + 1, left operand is placed
* item+item->left */ * item+item->left */
...@@ -304,6 +324,8 @@ extern Datum tsquery_numnode(PG_FUNCTION_ARGS); ...@@ -304,6 +324,8 @@ extern Datum tsquery_numnode(PG_FUNCTION_ARGS);
extern Datum tsquery_and(PG_FUNCTION_ARGS); extern Datum tsquery_and(PG_FUNCTION_ARGS);
extern Datum tsquery_or(PG_FUNCTION_ARGS); extern Datum tsquery_or(PG_FUNCTION_ARGS);
extern Datum tsquery_phrase(PG_FUNCTION_ARGS);
extern Datum tsquery_phrase_distance(PG_FUNCTION_ARGS);
extern Datum tsquery_not(PG_FUNCTION_ARGS); extern Datum tsquery_not(PG_FUNCTION_ARGS);
extern Datum tsquery_rewrite(PG_FUNCTION_ARGS); extern Datum tsquery_rewrite(PG_FUNCTION_ARGS);
......
...@@ -55,7 +55,7 @@ extern TSQuery parse_tsquery(char *buf, ...@@ -55,7 +55,7 @@ extern TSQuery parse_tsquery(char *buf,
extern void pushValue(TSQueryParserState state, extern void pushValue(TSQueryParserState state,
char *strval, int lenval, int16 weight, bool prefix); char *strval, int lenval, int16 weight, bool prefix);
extern void pushStop(TSQueryParserState state); extern void pushStop(TSQueryParserState state);
extern void pushOperator(TSQueryParserState state, int8 oper); extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance);
/* /*
* parse plain text and lexize words * parse plain text and lexize words
...@@ -104,8 +104,15 @@ extern text *generateHeadline(HeadlineParsedText *prs); ...@@ -104,8 +104,15 @@ extern text *generateHeadline(HeadlineParsedText *prs);
/* /*
* Common check function for tsvector @@ tsquery * Common check function for tsvector @@ tsquery
*/ */
typedef struct ExecPhraseData
{
int npos;
bool allocated;
WordEntryPos *pos;
} ExecPhraseData;
extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot, extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
bool (*chkcond) (void *checkval, QueryOperand *val)); bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *));
extern bool tsquery_requires_match(QueryItem *curitem); extern bool tsquery_requires_match(QueryItem *curitem);
/* /*
...@@ -120,6 +127,8 @@ extern Datum to_tsquery_byid(PG_FUNCTION_ARGS); ...@@ -120,6 +127,8 @@ extern Datum to_tsquery_byid(PG_FUNCTION_ARGS);
extern Datum to_tsquery(PG_FUNCTION_ARGS); extern Datum to_tsquery(PG_FUNCTION_ARGS);
extern Datum plainto_tsquery_byid(PG_FUNCTION_ARGS); extern Datum plainto_tsquery_byid(PG_FUNCTION_ARGS);
extern Datum plainto_tsquery(PG_FUNCTION_ARGS); extern Datum plainto_tsquery(PG_FUNCTION_ARGS);
extern Datum phraseto_tsquery_byid(PG_FUNCTION_ARGS);
extern Datum phraseto_tsquery(PG_FUNCTION_ARGS);
/* /*
* GiST support function * GiST support function
...@@ -169,7 +178,7 @@ extern Datum gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS); ...@@ -169,7 +178,7 @@ extern Datum gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS);
* TSQuery Utilities * TSQuery Utilities
*/ */
extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len); extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
extern QueryItem *clean_fakeval(QueryItem *ptr, int32 *len); extern TSQuery cleanup_fakeval_and_phrase(TSQuery in);
typedef struct QTNode typedef struct QTNode
{ {
......
...@@ -434,9 +434,9 @@ SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footbal ...@@ -434,9 +434,9 @@ SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footbal
(1 row) (1 row)
SELECT to_tsquery('ispell_tst', 'footballklubber'); SELECT to_tsquery('ispell_tst', 'footballklubber');
to_tsquery to_tsquery
------------------------------------------------------------------------------ --------------------------------------------------------------------------
( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' 'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
(1 row) (1 row)
SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky'); SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky');
...@@ -458,9 +458,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb ...@@ -458,9 +458,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb
(1 row) (1 row)
SELECT to_tsquery('hunspell_tst', 'footballklubber'); SELECT to_tsquery('hunspell_tst', 'footballklubber');
to_tsquery to_tsquery
------------------------------------------------------------------------------ --------------------------------------------------------------------------
( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' 'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
(1 row) (1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
...@@ -469,6 +469,18 @@ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); ...@@ -469,6 +469,18 @@ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky' 'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
(1 row) (1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
to_tsquery
-----------------------------------------------------------------------------
( 'foot':B <-> 'sky' ) & ( 'ball':B <-> 'sky' ) & ( 'klubber':B <-> 'sky' )
(1 row)
SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
phraseto_tsquery
-----------------------------------------------------------------------
( 'foot' <-> 'sky' ) & ( 'ball' <-> 'sky' ) & ( 'klubber' <-> 'sky' )
(1 row)
-- Test ispell dictionary with hunspell affix with FLAG long in configuration -- Test ispell dictionary with hunspell affix with FLAG long in configuration
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
REPLACE hunspell WITH hunspell_long; REPLACE hunspell WITH hunspell_long;
...@@ -479,9 +491,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb ...@@ -479,9 +491,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb
(1 row) (1 row)
SELECT to_tsquery('hunspell_tst', 'footballklubber'); SELECT to_tsquery('hunspell_tst', 'footballklubber');
to_tsquery to_tsquery
------------------------------------------------------------------------------ --------------------------------------------------------------------------
( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' 'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
(1 row) (1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
...@@ -500,9 +512,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb ...@@ -500,9 +512,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb
(1 row) (1 row)
SELECT to_tsquery('hunspell_tst', 'footballklubber'); SELECT to_tsquery('hunspell_tst', 'footballklubber');
to_tsquery to_tsquery
------------------------------------------------------------------------------ --------------------------------------------------------------------------
( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' 'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
(1 row) (1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
......
This diff is collapsed.
...@@ -277,15 +277,15 @@ SELECT '(!1|2)&3'::tsquery; ...@@ -277,15 +277,15 @@ SELECT '(!1|2)&3'::tsquery;
(1 row) (1 row)
SELECT '1|(2|(4|(5|6)))'::tsquery; SELECT '1|(2|(4|(5|6)))'::tsquery;
tsquery tsquery
----------------------------------------- -----------------------------
'1' | ( '2' | ( '4' | ( '5' | '6' ) ) ) '1' | '2' | '4' | '5' | '6'
(1 row) (1 row)
SELECT '1|2|4|5|6'::tsquery; SELECT '1|2|4|5|6'::tsquery;
tsquery tsquery
----------------------------------------- -----------------------------
( ( ( '1' | '2' ) | '4' ) | '5' ) | '6' '1' | '2' | '4' | '5' | '6'
(1 row) (1 row)
SELECT '1&(2&(4&(5&6)))'::tsquery; SELECT '1&(2&(4&(5&6)))'::tsquery;
...@@ -325,11 +325,139 @@ SELECT $$'\\as'$$::tsquery; ...@@ -325,11 +325,139 @@ SELECT $$'\\as'$$::tsquery;
(1 row) (1 row)
SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery; SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery;
tsquery
--------------------------------------
'a':* & 'nbb':*AC | 'doo':*A | 'goo'
(1 row)
-- phrase transformation
SELECT 'a <-> (b|c)'::tsquery;
tsquery
---------------------------
'a' <-> 'b' | 'a' <-> 'c'
(1 row)
SELECT '(a|b) <-> c'::tsquery;
tsquery
---------------------------
'a' <-> 'c' | 'b' <-> 'c'
(1 row)
SELECT '(a|b) <-> (d|c)'::tsquery;
tsquery
-------------------------------------------------------
'a' <-> 'd' | 'b' <-> 'd' | 'a' <-> 'c' | 'b' <-> 'c'
(1 row)
SELECT 'a <-> (b&c)'::tsquery;
tsquery
-----------------------------------
( 'a' <-> 'b' ) & ( 'a' <-> 'c' )
(1 row)
SELECT '(a&b) <-> c'::tsquery;
tsquery
-----------------------------------
( 'a' <-> 'c' ) & ( 'b' <-> 'c' )
(1 row)
SELECT '(a&b) <-> (d&c)'::tsquery;
tsquery
-----------------------------------------------------------------------
( 'a' <-> 'd' ) & ( 'b' <-> 'd' ) & ( 'a' <-> 'c' ) & ( 'b' <-> 'c' )
(1 row)
SELECT 'a <-> !b'::tsquery;
tsquery
------------------------
'a' & !( 'a' <-> 'b' )
(1 row)
SELECT '!a <-> b'::tsquery;
tsquery
------------------------
!( 'a' <-> 'b' ) & 'b'
(1 row)
SELECT '!a <-> !b'::tsquery;
tsquery
------------------------------------
!'a' & !( !( 'a' <-> 'b' ) & 'b' )
(1 row)
SELECT 'a <-> !(b&c)'::tsquery;
tsquery
----------------------------------------------
'a' & !( ( 'a' <-> 'b' ) & ( 'a' <-> 'c' ) )
(1 row)
SELECT 'a <-> !(b|c)'::tsquery;
tsquery
--------------------------------------
'a' & !( 'a' <-> 'b' | 'a' <-> 'c' )
(1 row)
SELECT '!(a&b) <-> c'::tsquery;
tsquery
----------------------------------------------
!( ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) ) & 'c'
(1 row)
SELECT '!(a|b) <-> c'::tsquery;
tsquery
--------------------------------------
!( 'a' <-> 'c' | 'b' <-> 'c' ) & 'c'
(1 row)
SELECT '(!a|b) <-> c'::tsquery;
tsquery
--------------------------------------
!( 'a' <-> 'c' ) & 'c' | 'b' <-> 'c'
(1 row)
SELECT '(!a&b) <-> c'::tsquery;
tsquery tsquery
------------------------------------------ ------------------------------------------
( 'a':* & 'nbb':*AC | 'doo':*A ) | 'goo' !( 'a' <-> 'c' ) & 'c' & ( 'b' <-> 'c' )
(1 row)
SELECT 'c <-> (!a|b)'::tsquery;
tsquery
--------------------------------------
'c' & !( 'c' <-> 'a' ) | 'c' <-> 'b'
(1 row)
SELECT 'c <-> (!a&b)'::tsquery;
tsquery
------------------------------------------
'c' & !( 'c' <-> 'a' ) & ( 'c' <-> 'b' )
(1 row)
SELECT '(a|b) <-> !c'::tsquery;
tsquery
------------------------------------------------
( 'a' | 'b' ) & !( 'a' <-> 'c' | 'b' <-> 'c' )
(1 row)
SELECT '(a&b) <-> !c'::tsquery;
tsquery
----------------------------------------------------
'a' & 'b' & !( ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) )
(1 row)
SELECT '!c <-> (a|b)'::tsquery;
tsquery
-------------------------------------------------
!( 'c' <-> 'a' ) & 'a' | !( 'c' <-> 'b' ) & 'b'
(1 row)
SELECT '!c <-> (a&b)'::tsquery;
tsquery
-------------------------------------------------
!( 'c' <-> 'a' ) & 'a' & !( 'c' <-> 'b' ) & 'b'
(1 row) (1 row)
--comparisons
SELECT 'a' < 'b & c'::tsquery as "true"; SELECT 'a' < 'b & c'::tsquery as "true";
true true
------ ------
...@@ -342,10 +470,10 @@ SELECT 'a' > 'b & c'::tsquery as "false"; ...@@ -342,10 +470,10 @@ SELECT 'a' > 'b & c'::tsquery as "false";
f f
(1 row) (1 row)
SELECT 'a | f' < 'b & c'::tsquery as "true"; SELECT 'a | f' < 'b & c'::tsquery as "false";
true false
------ -------
t f
(1 row) (1 row)
SELECT 'a | ff' < 'b & c'::tsquery as "false"; SELECT 'a | ff' < 'b & c'::tsquery as "false";
...@@ -360,6 +488,7 @@ SELECT 'a | f | g' < 'b & c'::tsquery as "false"; ...@@ -360,6 +488,7 @@ SELECT 'a | f | g' < 'b & c'::tsquery as "false";
f f
(1 row) (1 row)
--concatenation
SELECT numnode( 'new'::tsquery ); SELECT numnode( 'new'::tsquery );
numnode numnode
--------- ---------
...@@ -402,6 +531,36 @@ SELECT 'foo & bar'::tsquery && 'asd | fg'; ...@@ -402,6 +531,36 @@ SELECT 'foo & bar'::tsquery && 'asd | fg';
'foo' & 'bar' & ( 'asd' | 'fg' ) 'foo' & 'bar' & ( 'asd' | 'fg' )
(1 row) (1 row)
SELECT 'a' <-> 'b & d'::tsquery;
?column?
-----------------------------------
( 'a' <-> 'b' ) & ( 'a' <-> 'd' )
(1 row)
SELECT 'a & g' <-> 'b & d'::tsquery;
?column?
-----------------------------------------------------------------------
( 'a' <-> 'b' ) & ( 'g' <-> 'b' ) & ( 'a' <-> 'd' ) & ( 'g' <-> 'd' )
(1 row)
SELECT 'a & g' <-> 'b | d'::tsquery;
?column?
-----------------------------------------------------------------------
( 'a' <-> 'b' ) & ( 'g' <-> 'b' ) | ( 'a' <-> 'd' ) & ( 'g' <-> 'd' )
(1 row)
SELECT 'a & g' <-> 'b <-> d'::tsquery;
?column?
-----------------------------------------------------------
( 'a' <-> ( 'b' <-> 'd' ) ) & ( 'g' <-> ( 'b' <-> 'd' ) )
(1 row)
SELECT tsquery_phrase('a <3> g', 'b & d', 10);
tsquery_phrase
-------------------------------------------------------------
( ( 'a' <3> 'g' ) <10> 'b' ) & ( ( 'a' <3> 'g' ) <10> 'd' )
(1 row)
-- tsvector-tsquery operations -- tsvector-tsquery operations
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca' as "true"; SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca' as "true";
true true
...@@ -499,6 +658,80 @@ SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true"; ...@@ -499,6 +658,80 @@ SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
t t
(1 row) (1 row)
--phrase search
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
false
-------
f
(1 row)
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
false
-------
f
(1 row)
SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* <-> 3' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "false";
false
-------
f
(1 row)
SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
true
------
t
(1 row)
--ranking
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s'); SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
ts_rank ts_rank
----------- -----------
...@@ -613,6 +846,120 @@ SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s'); ...@@ -613,6 +846,120 @@ SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
0.1 0.1
(1 row) (1 row)
SELECT ts_rank_cd(' a:1 s:2A d g'::tsvector, 'a <-> s');
ts_rank_cd
------------
0.181818
(1 row)
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a <-> s');
ts_rank_cd
------------
0.133333
(1 row)
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a <-> s');
ts_rank_cd
------------
0.1
(1 row)
SELECT ts_rank_cd(' a:1 s:2 d:2A g'::tsvector, 'a <-> s');
ts_rank_cd
------------
0.1
(1 row)
SELECT ts_rank_cd(' a:1 s:2,3A d:2A g'::tsvector, 'a <2> s:A');
ts_rank_cd
------------
0.0909091
(1 row)
SELECT ts_rank_cd(' a:1 b:2 s:3A d:2A g'::tsvector, 'a <2> s:A');
ts_rank_cd
------------
0.0909091
(1 row)
SELECT ts_rank_cd(' a:1 sa:2D sb:2A g'::tsvector, 'a <-> s:*');
ts_rank_cd
------------
0.1
(1 row)
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:*');
ts_rank_cd
------------
0.1
(1 row)
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A');
ts_rank_cd
------------
0.0714286
(1 row)
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
ts_rank_cd
------------
0
(1 row)
SELECT 'a:1 b:2'::tsvector @@ 'a <-> b'::tsquery AS "true";
true
------
t
(1 row)
SELECT 'a:1 b:2'::tsvector @@ 'a <0> b'::tsquery AS "false";
false
-------
f
(1 row)
SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
true
------
t
(1 row)
SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "true";
true
------
t
(1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
false
-------
f
(1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <0> b'::tsquery AS "false";
false
-------
f
(1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false";
false
-------
f
(1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
true
------
t
(1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "true";
true
------
t
(1 row)
-- tsvector editing operations -- tsvector editing operations
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector); SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
strip strip
......
...@@ -142,6 +142,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb ...@@ -142,6 +142,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb
SELECT to_tsquery('hunspell_tst', 'footballklubber'); SELECT to_tsquery('hunspell_tst', 'footballklubber');
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
-- Test ispell dictionary with hunspell affix with FLAG long in configuration -- Test ispell dictionary with hunspell affix with FLAG long in configuration
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
REPLACE hunspell WITH hunspell_long; REPLACE hunspell WITH hunspell_long;
......
...@@ -129,6 +129,52 @@ SELECT plainto_tsquery('english', 'foo bar') || plainto_tsquery('english', 'asd ...@@ -129,6 +129,52 @@ SELECT plainto_tsquery('english', 'foo bar') || plainto_tsquery('english', 'asd
SELECT plainto_tsquery('english', 'foo bar') || !!plainto_tsquery('english', 'asd fg'); SELECT plainto_tsquery('english', 'foo bar') || !!plainto_tsquery('english', 'asd fg');
SELECT plainto_tsquery('english', 'foo bar') && 'asd | fg'; SELECT plainto_tsquery('english', 'foo bar') && 'asd | fg';
-- Check stop word deletion, a and s are stop-words
SELECT to_tsquery('english', '(1 <-> 2) <-> a');
SELECT to_tsquery('english', '(1 <-> a) <-> 2');
SELECT to_tsquery('english', '(a <-> 1) <-> 2');
SELECT to_tsquery('english', 'a <-> (1 <-> 2)');
SELECT to_tsquery('english', '1 <-> (a <-> 2)');
SELECT to_tsquery('english', '1 <-> (2 <-> a)');
SELECT to_tsquery('english', '(1 <-> 2) <3> a');
SELECT to_tsquery('english', '(1 <-> a) <3> 2');
SELECT to_tsquery('english', '(a <-> 1) <3> 2');
SELECT to_tsquery('english', 'a <3> (1 <-> 2)');
SELECT to_tsquery('english', '1 <3> (a <-> 2)');
SELECT to_tsquery('english', '1 <3> (2 <-> a)');
SELECT to_tsquery('english', '(1 <3> 2) <-> a');
SELECT to_tsquery('english', '(1 <3> a) <-> 2');
SELECT to_tsquery('english', '(a <3> 1) <-> 2');
SELECT to_tsquery('english', 'a <-> (1 <3> 2)');
SELECT to_tsquery('english', '1 <-> (a <3> 2)');
SELECT to_tsquery('english', '1 <-> (2 <3> a)');
SELECT to_tsquery('english', '((a <-> 1) <-> 2) <-> s');
SELECT to_tsquery('english', '(2 <-> (a <-> 1)) <-> s');
SELECT to_tsquery('english', '((1 <-> a) <-> 2) <-> s');
SELECT to_tsquery('english', '(2 <-> (1 <-> a)) <-> s');
SELECT to_tsquery('english', 's <-> ((a <-> 1) <-> 2)');
SELECT to_tsquery('english', 's <-> (2 <-> (a <-> 1))');
SELECT to_tsquery('english', 's <-> ((1 <-> a) <-> 2)');
SELECT to_tsquery('english', 's <-> (2 <-> (1 <-> a))');
SELECT to_tsquery('english', '((a <-> 1) <-> s) <-> 2');
SELECT to_tsquery('english', '(s <-> (a <-> 1)) <-> 2');
SELECT to_tsquery('english', '((1 <-> a) <-> s) <-> 2');
SELECT to_tsquery('english', '(s <-> (1 <-> a)) <-> 2');
SELECT to_tsquery('english', '2 <-> ((a <-> 1) <-> s)');
SELECT to_tsquery('english', '2 <-> (s <-> (a <-> 1))');
SELECT to_tsquery('english', '2 <-> ((1 <-> a) <-> s)');
SELECT to_tsquery('english', '2 <-> (s <-> (1 <-> a))');
SELECT to_tsquery('foo <-> (a <-> (the <-> bar))');
SELECT to_tsquery('((foo <-> a) <-> the) <-> bar');
SELECT to_tsquery('foo <-> a <-> the <-> bar');
SELECT phraseto_tsquery('PostgreSQL can be extended by the user in many ways');
SELECT ts_rank_cd(to_tsvector('english', ' SELECT ts_rank_cd(to_tsvector('english', '
Day after day, day after day, Day after day, day after day,
We stuck, nor breath nor motion, We stuck, nor breath nor motion,
...@@ -165,6 +211,18 @@ Water, water, every where, ...@@ -165,6 +211,18 @@ Water, water, every where,
S. T. Coleridge (1772-1834) S. T. Coleridge (1772-1834)
'), to_tsquery('english', 'ocean')); '), to_tsquery('english', 'ocean'));
SELECT ts_rank_cd(to_tsvector('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
'), to_tsquery('english', 'painted <-> Ship'));
SELECT ts_rank_cd(strip(to_tsvector('both stripped')), SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
to_tsquery('both & stripped')); to_tsquery('both & stripped'));
...@@ -208,6 +266,30 @@ Water, water, every where, ...@@ -208,6 +266,30 @@ Water, water, every where,
S. T. Coleridge (1772-1834) S. T. Coleridge (1772-1834)
', to_tsquery('english', 'ocean')); ', to_tsquery('english', 'ocean'));
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', phraseto_tsquery('english', 'painted Ocean'));
SELECT ts_headline('english', '
Day after day, day after day,
We stuck, nor breath nor motion,
As idle as a painted Ship
Upon a painted Ocean.
Water, water, every where
And all the boards did shrink;
Water, water, every where,
Nor any drop to drink.
S. T. Coleridge (1772-1834)
', phraseto_tsquery('english', 'idle as a painted Ship'));
SELECT ts_headline('english', ' SELECT ts_headline('english', '
<html> <html>
<!-- some comment --> <!-- some comment -->
...@@ -222,6 +304,10 @@ ff-bg ...@@ -222,6 +304,10 @@ ff-bg
</html>', </html>',
to_tsquery('english', 'sea&foo'), 'HighlightAll=true'); to_tsquery('english', 'sea&foo'), 'HighlightAll=true');
SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=2, MinWords=1');
SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 & 3', 'MaxWords=4, MinWords=1');
SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=4, MinWords=1');
--Check if headline fragments work --Check if headline fragments work
SELECT ts_headline('english', ' SELECT ts_headline('english', '
Day after day, day after day, Day after day, day after day,
...@@ -283,6 +369,8 @@ CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT); ...@@ -283,6 +369,8 @@ CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
Moscow moskva | moscow Moscow moskva | moscow
'Sanct Peter' Peterburg | peter | 'Sanct Peterburg' 'Sanct Peter' Peterburg | peter | 'Sanct Peterburg'
'foo bar qq' foo & (bar | qq) & city 'foo bar qq' foo & (bar | qq) & city
1 & (2 <-> 3) 2 <-> 4
5 <-> 6 5 <-> 7
\. \.
\set ECHO all \set ECHO all
...@@ -320,6 +408,11 @@ SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery'); ...@@ -320,6 +408,11 @@ SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery');
SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery'); SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery');
SELECT ts_rewrite( 'bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery'); SELECT ts_rewrite( 'bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery');
SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );
SELECT keyword FROM test_tsquery WHERE keyword @> 'new'; SELECT keyword FROM test_tsquery WHERE keyword @> 'new';
SELECT keyword FROM test_tsquery WHERE keyword @> 'moscow'; SELECT keyword FROM test_tsquery WHERE keyword @> 'moscow';
...@@ -386,3 +479,11 @@ select * from pendtest where 'ipsa:*'::tsquery @@ ts; ...@@ -386,3 +479,11 @@ select * from pendtest where 'ipsa:*'::tsquery @@ ts;
select * from pendtest where 'ips:*'::tsquery @@ ts; select * from pendtest where 'ips:*'::tsquery @@ ts;
select * from pendtest where 'ipt:*'::tsquery @@ ts; select * from pendtest where 'ipt:*'::tsquery @@ ts;
select * from pendtest where 'ipi:*'::tsquery @@ ts; select * from pendtest where 'ipi:*'::tsquery @@ ts;
--check OP_PHRASE on index
create temp table phrase_index_test(fts tsvector);
insert into phrase_index_test values('A fat cat has just eaten a rat.');
create index phrase_index_test_idx on phrase_index_test using gin(fts);
set enable_seqscan = off;
select * from phrase_index_test where fts @@ phraseto_tsquery('fat cat');
set enable_seqscan = on;
...@@ -58,12 +58,42 @@ SELECT E'1&(''2''&('' 4''&(\\|5 | ''6 \\'' !|&'')))'::tsquery; ...@@ -58,12 +58,42 @@ SELECT E'1&(''2''&('' 4''&(\\|5 | ''6 \\'' !|&'')))'::tsquery;
SELECT $$'\\as'$$::tsquery; SELECT $$'\\as'$$::tsquery;
SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery; SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery;
-- phrase transformation
SELECT 'a <-> (b|c)'::tsquery;
SELECT '(a|b) <-> c'::tsquery;
SELECT '(a|b) <-> (d|c)'::tsquery;
SELECT 'a <-> (b&c)'::tsquery;
SELECT '(a&b) <-> c'::tsquery;
SELECT '(a&b) <-> (d&c)'::tsquery;
SELECT 'a <-> !b'::tsquery;
SELECT '!a <-> b'::tsquery;
SELECT '!a <-> !b'::tsquery;
SELECT 'a <-> !(b&c)'::tsquery;
SELECT 'a <-> !(b|c)'::tsquery;
SELECT '!(a&b) <-> c'::tsquery;
SELECT '!(a|b) <-> c'::tsquery;
SELECT '(!a|b) <-> c'::tsquery;
SELECT '(!a&b) <-> c'::tsquery;
SELECT 'c <-> (!a|b)'::tsquery;
SELECT 'c <-> (!a&b)'::tsquery;
SELECT '(a|b) <-> !c'::tsquery;
SELECT '(a&b) <-> !c'::tsquery;
SELECT '!c <-> (a|b)'::tsquery;
SELECT '!c <-> (a&b)'::tsquery;
--comparisons
SELECT 'a' < 'b & c'::tsquery as "true"; SELECT 'a' < 'b & c'::tsquery as "true";
SELECT 'a' > 'b & c'::tsquery as "false"; SELECT 'a' > 'b & c'::tsquery as "false";
SELECT 'a | f' < 'b & c'::tsquery as "true"; SELECT 'a | f' < 'b & c'::tsquery as "false";
SELECT 'a | ff' < 'b & c'::tsquery as "false"; SELECT 'a | ff' < 'b & c'::tsquery as "false";
SELECT 'a | f | g' < 'b & c'::tsquery as "false"; SELECT 'a | f | g' < 'b & c'::tsquery as "false";
--concatenation
SELECT numnode( 'new'::tsquery ); SELECT numnode( 'new'::tsquery );
SELECT numnode( 'new & york'::tsquery ); SELECT numnode( 'new & york'::tsquery );
SELECT numnode( 'new & york | qwery'::tsquery ); SELECT numnode( 'new & york | qwery'::tsquery );
...@@ -72,6 +102,11 @@ SELECT 'foo & bar'::tsquery && 'asd'; ...@@ -72,6 +102,11 @@ SELECT 'foo & bar'::tsquery && 'asd';
SELECT 'foo & bar'::tsquery || 'asd & fg'; SELECT 'foo & bar'::tsquery || 'asd & fg';
SELECT 'foo & bar'::tsquery || !!'asd & fg'::tsquery; SELECT 'foo & bar'::tsquery || !!'asd & fg'::tsquery;
SELECT 'foo & bar'::tsquery && 'asd | fg'; SELECT 'foo & bar'::tsquery && 'asd | fg';
SELECT 'a' <-> 'b & d'::tsquery;
SELECT 'a & g' <-> 'b & d'::tsquery;
SELECT 'a & g' <-> 'b | d'::tsquery;
SELECT 'a & g' <-> 'b <-> d'::tsquery;
SELECT tsquery_phrase('a <3> g', 'b & d', 10);
-- tsvector-tsquery operations -- tsvector-tsquery operations
...@@ -93,6 +128,23 @@ SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true"; ...@@ -93,6 +128,23 @@ SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true";
SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true"; SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true"; SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
--phrase search
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "true";
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* <-> 3' AS "true";
SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "false";
SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "true";
SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
--ranking
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s'); SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s'); SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s');
SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*'); SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*');
...@@ -114,6 +166,27 @@ SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s'); ...@@ -114,6 +166,27 @@ SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s');
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s'); SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s');
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s'); SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
SELECT ts_rank_cd(' a:1 s:2A d g'::tsvector, 'a <-> s');
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a <-> s');
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a <-> s');
SELECT ts_rank_cd(' a:1 s:2 d:2A g'::tsvector, 'a <-> s');
SELECT ts_rank_cd(' a:1 s:2,3A d:2A g'::tsvector, 'a <2> s:A');
SELECT ts_rank_cd(' a:1 b:2 s:3A d:2A g'::tsvector, 'a <2> s:A');
SELECT ts_rank_cd(' a:1 sa:2D sb:2A g'::tsvector, 'a <-> s:*');
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:*');
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A');
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
SELECT 'a:1 b:2'::tsvector @@ 'a <-> b'::tsquery AS "true";
SELECT 'a:1 b:2'::tsvector @@ 'a <0> b'::tsquery AS "false";
SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "true";
SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <0> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "true";
-- tsvector editing operations -- tsvector editing operations
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector); SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment