Commit 40c1d7c1 authored by Tom Lane's avatar Tom Lane

Text search doc updates --- first cut at

syncing the existing docs with the final syntax decisions.
parent b77c6c73
<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.140 2007/08/21 15:13:16 momjian Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.141 2007/08/22 04:45:20 tgl Exp $ -->
<chapter Id="runtime-config"> <chapter Id="runtime-config">
<title>Server Configuration</title> <title>Server Configuration</title>
...@@ -4106,6 +4106,26 @@ SET XML OPTION { DOCUMENT | CONTENT }; ...@@ -4106,6 +4106,26 @@ SET XML OPTION { DOCUMENT | CONTENT };
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry id="guc-default-text-search-config" xreflabel="default_text_search_config">
<term><varname>default_text_search_config</varname> (<type>string</type>)</term>
<indexterm>
<primary><varname>default_text_search_config</> configuration parameter</primary>
</indexterm>
<listitem>
<para>
Selects the text search configuration that is used by those variants
of the text search functions that do not have an explicit argument
specifying the configuration.
See <xref linkend="textsearch"> for further information.
The built-in default is <literal>pg_catalog.simple</>, but
<application>initdb</application> will initialize the
configuration file with a setting that corresponds to the
chosen <varname>lc_ctype</varname> locale, if a configuration
matching that locale can be identified.
</para>
</listitem>
</varlistentry>
</variablelist> </variablelist>
</sect2> </sect2>
......
<!-- <!--
$PostgreSQL: pgsql/doc/src/sgml/ref/psql-ref.sgml,v 1.193 2007/07/10 00:21:31 tgl Exp $ $PostgreSQL: pgsql/doc/src/sgml/ref/psql-ref.sgml,v 1.194 2007/08/22 04:45:20 tgl Exp $
PostgreSQL documentation PostgreSQL documentation
--> -->
...@@ -997,6 +997,66 @@ testdb=&gt; ...@@ -997,6 +997,66 @@ testdb=&gt;
</varlistentry> </varlistentry>
<varlistentry>
<term><literal>\dF [ <replaceable class="parameter">pattern</replaceable> ]</literal></term>
<term><literal>\dF+ [ <replaceable class="parameter">pattern</replaceable> ]</literal></term>
<listitem>
<para>
Lists available text search configurations.
If <replaceable class="parameter">pattern</replaceable> is specified,
only configurations whose names match the pattern are shown.
If the form <literal>\dF+</literal> is used, a full description of
each configuration is shown, including the underlying text search
parser and the dictionary list for each parser token type.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>\dFd [ <replaceable class="parameter">pattern</replaceable> ]</literal></term>
<term><literal>\dFd+ [ <replaceable class="parameter">pattern</replaceable> ]</literal></term>
<listitem>
<para>
Lists available text search dictionaries.
If <replaceable class="parameter">pattern</replaceable> is specified,
only dictionaries whose names match the pattern are shown.
If the form <literal>\dFd+</literal> is used, additional information
is shown about each selected dictionary, including the underlying
text search template and the option values.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>\dFp [ <replaceable class="parameter">pattern</replaceable> ]</literal></term>
<term><literal>\dFp+ [ <replaceable class="parameter">pattern</replaceable> ]</literal></term>
<listitem>
<para>
Lists available text search parsers.
If <replaceable class="parameter">pattern</replaceable> is specified,
only parsers whose names match the pattern are shown.
If the form <literal>\dFp+</literal> is used, a full description of
each parser is shown, including the underlying functions and the
list of recognized token types.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>\dFt [ <replaceable class="parameter">pattern</replaceable> ]</literal></term>
<term><literal>\dFt+ [ <replaceable class="parameter">pattern</replaceable> ]</literal></term>
<listitem>
<para>
Lists available text search templates.
If <replaceable class="parameter">pattern</replaceable> is specified,
only templates whose names match the pattern are shown.
If the form <literal>\dFt+</literal> is used, additional information
is shown about each template, including the underlying function names.
</para>
</listitem>
</varlistentry>
<varlistentry> <varlistentry>
<term><literal>\dg [ <replaceable class="parameter">pattern</replaceable> ]</literal></term> <term><literal>\dg [ <replaceable class="parameter">pattern</replaceable> ]</literal></term>
<listitem> <listitem>
......
...@@ -6,11 +6,11 @@ ...@@ -6,11 +6,11 @@
<title>Introduction</title> <title>Introduction</title>
<para> <para>
Full Text Searching (<firstterm>text search</firstterm>) allows the Full Text Searching (or just <firstterm>text search</firstterm>) allows
searching of documents that satisfy a <varname>query</varname>, and identifying documents that satisfy a <firstterm>query</firstterm>, and
optionally returns them in some order. The most common search is to find optionally sorting them by relevance to the query. The most common search
all documents containing <varname>query terms</varname> and return them is to find all documents containing given <firstterm>query terms</firstterm>
in order of their <varname>similarity</varname> to the and return them in order of their <firstterm>similarity</firstterm> to the
<varname>query</varname>. Notions of <varname>query</varname> and <varname>query</varname>. Notions of <varname>query</varname> and
<varname>similarity</varname> are very flexible and depend on the specific <varname>similarity</varname> are very flexible and depend on the specific
application. The simplest search considers <varname>query</varname> as a application. The simplest search considers <varname>query</varname> as a
...@@ -250,9 +250,9 @@ SELECT 'fat:1 rat:2'::tsvector || 'fat:1 cat:2'::tsvector; ...@@ -250,9 +250,9 @@ SELECT 'fat:1 rat:2'::tsvector || 'fat:1 cat:2'::tsvector;
<listitem> <listitem>
<para> <para>
<type>Tsquery</type> is a data type for textual queries which supports <type>tsquery</type> is a data type for textual queries which supports
the boolean operators <literal>&amp;</literal> (AND), <literal>|</literal> (OR), the boolean operators <literal>&amp;</literal> (AND), <literal>|</literal> (OR),
and parentheses. A <type>Tsquery</type> consists of lexemes and parentheses. A <type>tsquery</type> consists of lexemes
(optionally labeled by letters) with boolean operators in between: (optionally labeled by letters) with boolean operators in between:
<programlisting> <programlisting>
...@@ -273,7 +273,7 @@ development of different search engines using the same full text index. ...@@ -273,7 +273,7 @@ development of different search engines using the same full text index.
<type>tsqueries</type> can be concatenated using <literal>&amp;&amp;</literal> (AND) <type>tsqueries</type> can be concatenated using <literal>&amp;&amp;</literal> (AND)
and <literal>||</literal> (OR) operators: and <literal>||</literal> (OR) operators:
<programlisting> <programlisting>
SELECT 'a &amp; b'::tsquery &amp;&amp; 'c|d'::tsquery; SELECT 'a &amp; b'::tsquery &amp;&amp; 'c | d'::tsquery;
?column? ?column?
--------------------------- ---------------------------
'a' &amp; 'b' &amp; ( 'c' | 'd' ) 'a' &amp; 'b' &amp; ( 'c' | 'd' )
...@@ -294,22 +294,24 @@ SELECT 'a &amp; b'::tsquery || 'c|d'::tsquery; ...@@ -294,22 +294,24 @@ SELECT 'a &amp; b'::tsquery || 'c|d'::tsquery;
<title>Performing Searches</title> <title>Performing Searches</title>
<para> <para>
Full text searching in <productname>PostgreSQL</productname> provides the Full text searching in <productname>PostgreSQL</productname> is based on
operator <type>@@</type> for two data types: <type>tsvector</type> the operator <literal>@@</literal>, which tests whether a <type>tsvector</type>
(document) and <type>tsquery</type> (query). Also, this operator (document) matches a <type>tsquery</type> (query). Also, this operator
supports <type>TEXT</type>, <type>VARCHAR</type>, and <type>CHAR</type> supports <type>text</type> input, allowing explicit conversion of a text
data types so simple full text searches can be done, but without ranking string to <type>tsvector</type> to be skipped. The variants available
support: are:
<programlisting> <programlisting>
tsvector @@ tsquery tsvector @@ tsquery
tsquery @@ tsvector tsquery @@ tsvector
TEXT | VARCHAR | CHAR @@ TEXT | tsquery text @@ tsquery
text @@ text
</programlisting> </programlisting>
</para> </para>
<para> <para>
The full text operator <type>@@</type> returns <literal>true</literal> if The match operator <literal>@@</literal> returns <literal>true</literal> if
<type>tsvector</type> contains <type>tsquery</type>: the <type>tsvector</type> matches the <type>tsquery</type>. It doesn't
matter which data type is written first:
<programlisting> <programlisting>
SELECT 'cat &amp; rat'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::tsvector; SELECT 'cat &amp; rat'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::tsvector;
?column? ?column?
...@@ -320,12 +322,18 @@ SELECT 'fat &amp; cow'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::t ...@@ -320,12 +322,18 @@ SELECT 'fat &amp; cow'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::t
---------- ----------
f f
</programlisting> </programlisting>
</para> </para>
<para>
The form <type>text</type> <literal>@@</literal> <type>tsquery</type>
is equivalent to <literal>to_tsvector(x) @@ y</literal>.
The form <type>text</type> <literal>@@</literal> <type>text</type>
is equivalent to <literal>to_tsvector(x) @@ plainto_tsquery(y)</literal>.
Note that the results of these forms will depend on the setting of <xref
linkend="guc-default-text-search-config">.
</para>
</sect2> </sect2>
</sect1> </sect1>
<sect1 id="textsearch-tables"> <sect1 id="textsearch-tables">
...@@ -358,11 +366,11 @@ or <literal>body</>: ...@@ -358,11 +366,11 @@ or <literal>body</>:
<programlisting> <programlisting>
SELECT title SELECT title
FROM pgweb FROM pgweb
WHERE to_tsvector('english', textcat(title, body)) @@ to_tsquery('create &amp; table') WHERE to_tsvector('english', title || body) @@ to_tsquery('create &amp; table')
ORDER BY dlm DESC LIMIT 10; ORDER BY dlm DESC LIMIT 10;
</programlisting> </programlisting>
<literal>dlm</> is the last-modified date in seconds since 1970 so we <literal>dlm</> is the last-modified date so we
used <command>ORDER BY dlm LIMIT 10</> to get the most recent used <command>ORDER BY dlm LIMIT 10</> to get the ten most recent
matches. For clarity we omitted the <function>coalesce</function> function matches. For clarity we omitted the <function>coalesce</function> function
which prevents the unwanted effect of <literal>NULL</literal> which prevents the unwanted effect of <literal>NULL</literal>
concatenation. concatenation.
...@@ -382,13 +390,13 @@ CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector('english', body)); ...@@ -382,13 +390,13 @@ CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector('english', body));
Notice that the 2-argument version of <function>to_tsvector</function> is Notice that the 2-argument version of <function>to_tsvector</function> is
used. Only text search functions which specify a configuration name can used. Only text search functions which specify a configuration name can
be used in expression indexes (<xref linkend="indexes-expressional">). be used in expression indexes (<xref linkend="indexes-expressional">).
Casting to a text search data type (<literal>::</>) is also unsupported. This is because the index contents must be unaffected by
This is because the index contents should be unaffected by <xref linkend="guc-default-text-search-config">.
<varname>default_text_search_config</>. If they were affected, the index If they were affected, the index
contents might be inconsistent because they could contain contents might be inconsistent because different entries could contain
<type>tsvector</>s that were created with different default text search <type>tsvector</>s that were created with different text search
configurations. Recovering a table from a <application>pg_dump</> would configurations, and there would be no way to guess which was which.
also not recreate index <type>tsvector</>s properly. It would be impossible to dump and restore such an index correctly.
</para> </para>
<para> <para>
...@@ -406,9 +414,9 @@ only with the same configuration used to create the index rows. ...@@ -406,9 +414,9 @@ only with the same configuration used to create the index rows.
It is possible to setup more complex expression indexes where the It is possible to setup more complex expression indexes where the
configuration name is specified by another column, e.g.: configuration name is specified by another column, e.g.:
<programlisting> <programlisting>
CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector(conf_name, body)); CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector(config_name, body));
</programlisting> </programlisting>
where <literal>conf_name</> is a column in the <literal>pgweb</> where <literal>config_name</> is a column in the <literal>pgweb</>
table. This allows mixed configurations in the same index while table. This allows mixed configurations in the same index while
recording which configuration was used for each index row. recording which configuration was used for each index row.
</para> </para>
...@@ -416,7 +424,7 @@ recording which configuration was used for each index row. ...@@ -416,7 +424,7 @@ recording which configuration was used for each index row.
<para> <para>
Indexes can even concatenate columns: Indexes can even concatenate columns:
<programlisting> <programlisting>
CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector('english', textcat(title, body))); CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector('english', title || body));
</programlisting> </programlisting>
</para> </para>
...@@ -438,7 +446,7 @@ CREATE INDEX textsearch_idx ON pgweb USING gin(textsearch_index); ...@@ -438,7 +446,7 @@ CREATE INDEX textsearch_idx ON pgweb USING gin(textsearch_index);
</programlisting> </programlisting>
After vacuuming, we are ready to perform a fast full text search: After vacuuming, we are ready to perform a fast full text search:
<programlisting> <programlisting>
SELECT rank_cd(textsearch_index, q) AS rank, title SELECT ts_rank_cd(textsearch_index, q) AS rank, title
FROM pgweb, to_tsquery('create &amp; table') q FROM pgweb, to_tsquery('create &amp; table') q
WHERE q @@ textsearch_index WHERE q @@ textsearch_index
ORDER BY rank DESC LIMIT 10; ORDER BY rank DESC LIMIT 10;
...@@ -527,16 +535,14 @@ SELECT 'a fat cat sat on a mat and ate a fat rat'::tsvector @@ 'fat &amp; cow':: ...@@ -527,16 +535,14 @@ SELECT 'a fat cat sat on a mat and ate a fat rat'::tsvector @@ 'fat &amp; cow'::
<term> <term>
<synopsis> <synopsis>
TEXT @@ TSQUERY text @@ tsquery
VARCHAR @@ TSQUERY
CHAR @@ TSQUERY
</synopsis> </synopsis>
</term> </term>
<listitem> <listitem>
<para> <para>
Returns <literal>true</literal> if <literal>TSQUERY</literal> is contained Returns <literal>true</literal> if <literal>TSQUERY</literal> is contained
in <literal>TEXT/VARCHAR</literal>, and <literal>false</literal> if not: in <literal>TEXT</literal>, and <literal>false</literal> if not:
<programlisting> <programlisting>
SELECT 'a fat cat sat on a mat and ate a fat rat'::text @@ 'cat &amp; rat'::tsquery; SELECT 'a fat cat sat on a mat and ate a fat rat'::text @@ 'cat &amp; rat'::tsquery;
?column? ?column?
...@@ -562,9 +568,7 @@ SELECT 'a fat cat sat on a mat and ate a fat rat'::text @@ 'cat &amp; cow'::tsqu ...@@ -562,9 +568,7 @@ SELECT 'a fat cat sat on a mat and ate a fat rat'::text @@ 'cat &amp; cow'::tsqu
<synopsis> <synopsis>
<!-- this is very confusing because there is no rule suggesting which is <!-- this is very confusing because there is no rule suggesting which is
first. --> first. -->
TEXT @@ TEXT text @@ text
VARCHAR @@ TEXT
CHAR @@ TEXT
</synopsis> </synopsis>
</term> </term>
...@@ -612,7 +616,7 @@ For index support of full text operators consult <xref linkend="textsearch-index ...@@ -612,7 +616,7 @@ For index support of full text operators consult <xref linkend="textsearch-index
<term> <term>
<synopsis> <synopsis>
to_tsvector(<optional><replaceable class="PARAMETER">conf_name</replaceable></optional>, <replaceable class="PARAMETER">document</replaceable> TEXT) returns TSVECTOR to_tsvector(<optional><replaceable class="PARAMETER">config_name</replaceable></optional>, <replaceable class="PARAMETER">document</replaceable> TEXT) returns TSVECTOR
</synopsis> </synopsis>
</term> </term>
...@@ -685,7 +689,7 @@ document to be weighted differently by ranking functions. ...@@ -685,7 +689,7 @@ document to be weighted differently by ranking functions.
<term> <term>
<synopsis> <synopsis>
<replaceable class="PARAMETER">vector1</replaceable> || <replaceable class="PARAMETER">vector2</replaceable> <replaceable class="PARAMETER">vector1</replaceable> || <replaceable class="PARAMETER">vector2</replaceable>
concat(<replaceable class="PARAMETER">vector1</replaceable> TSVECTOR, <replaceable class="PARAMETER">vector2</replaceable> TSVECTOR) returns TSVECTOR tsvector_concat(<replaceable class="PARAMETER">vector1</replaceable> TSVECTOR, <replaceable class="PARAMETER">vector2</replaceable> TSVECTOR) returns TSVECTOR
</synopsis> </synopsis>
</term> </term>
...@@ -701,7 +705,7 @@ weigh words from one section of your document differently than the others ...@@ -701,7 +705,7 @@ weigh words from one section of your document differently than the others
by parsing the sections into separate vectors and assigning each vector by parsing the sections into separate vectors and assigning each vector
a different position label with the <function>setweight()</function> a different position label with the <function>setweight()</function>
function. You can then concatenate them into a single vector and provide function. You can then concatenate them into a single vector and provide
a weights argument to the <function>rank()</function> function that assigns a weights argument to the <function>ts_rank()</function> function that assigns
different weights to positions with different labels. different weights to positions with different labels.
</para> </para>
</listitem> </listitem>
...@@ -751,42 +755,51 @@ it yet) --> ...@@ -751,42 +755,51 @@ it yet) -->
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry> <varlistentry>
<indexterm zone="textsearch-tsvector"> <indexterm zone="textsearch-tsvector">
<primary>trigger</primary> <primary>trigger</primary>
<secondary>for updating a derived tsvector column</secondary>
</indexterm> </indexterm>
<term> <term>
<synopsis> <synopsis>
tsvector_update_trigger(<optional><replaceable class="PARAMETER">vector_column_name</replaceable></optional>, <optional><replaceable class="PARAMETER">filter_name</replaceable></optional>, <replaceable class="PARAMETER">text_column_name</replaceable> <optional>, ... </optional>) tsvector_update_trigger(<replaceable class="PARAMETER">tsvector_column_name</replaceable>, <replaceable class="PARAMETER">config_name</replaceable>, <replaceable class="PARAMETER">text_column_name</replaceable> <optional>, ... </optional>)
tsvector_update_trigger_column(<replaceable class="PARAMETER">tsvector_column_name</replaceable>, <replaceable class="PARAMETER">config_column_name</replaceable>, <replaceable class="PARAMETER">text_column_name</replaceable> <optional>, ... </optional>)
</synopsis> </synopsis>
</term> </term>
<listitem> <listitem>
<para> <para>
The <function>tsvector_update_trigger()</function> trigger is used to Two built-in trigger functions are available to automatically update a
automatically update vector_column_name. <type>tsvector</> column from one or more textual columns. An example
<replaceable>filter_name</replaceable> is the function name to preprocess of their use is:
<replaceable>text_column_name</replaceable>. There can be many functions
and text columns specified in a
<function>tsvector_update_trigger()</function> trigger. If multiple
functions are specified, they apply to the following columns until the
next function appears. As an example of using a filter, function
<function>dropatsymbol</function> replaces all entries of the
<literal>@</literal> sign with a space:
<programlisting> <programlisting>
CREATE FUNCTION dropatsymbol(text) CREATE TABLE tblMessages (
RETURNS text strMessage text,
AS 'SELECT replace($1, ''@'', '' '');' tsv tsvector
LANGUAGE SQL; );
CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT CREATE TRIGGER tsvectorupdate BEFORE INSERT OR UPDATE
ON tblMessages FOR EACH ROW EXECUTE PROCEDURE ON tblMessages FOR EACH ROW EXECUTE PROCEDURE
tsvector_update_trigger(tsvector_column, dropatsymbol, strMessage); tsvector_update_trigger(tsv, 'pg_catalog.english', strMessage);
</programlisting> </programlisting>
Having created this trigger, any change in <structfield>strMessage</>
will be automatically reflected into <structfield>tsv</>.
</para>
<para>
Both triggers require you to specify the text search configuration to
be used to perform the conversion. For
<function>tsvector_update_trigger</>, the configuration name is simply
given as the second trigger argument. It must be schema-qualified as
shown above, so that the trigger behavior will not change with changes
in <varname>search_path</>. For
<function>tsvector_update_trigger_column</>, the second trigger argument
is the name of another table column, which must be of type
<type>regconfig</>. This allows a per-row selection of configuration
to be made.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
...@@ -882,7 +895,7 @@ All btree operations are defined for the <type>tsvector</type> type. ...@@ -882,7 +895,7 @@ All btree operations are defined for the <type>tsvector</type> type.
<term> <term>
<synopsis> <synopsis>
to_tsquery(<optional><replaceable class="PARAMETER">conf_name</replaceable></optional>, <replaceable class="PARAMETER">querytext</replaceable> text) returns TSQUERY to_tsquery(<optional><replaceable class="PARAMETER">config_name</replaceable></optional>, <replaceable class="PARAMETER">querytext</replaceable> text) returns TSQUERY
</synopsis> </synopsis>
</term> </term>
...@@ -925,7 +938,7 @@ Without quotes <function>to_tsquery</function> will generate a syntax error. ...@@ -925,7 +938,7 @@ Without quotes <function>to_tsquery</function> will generate a syntax error.
<term> <term>
<synopsis> <synopsis>
plainto_tsquery(<optional><replaceable class="PARAMETER">conf_name</replaceable></optional>, <replaceable class="PARAMETER">querytext</replaceable> text) returns TSQUERY plainto_tsquery(<optional><replaceable class="PARAMETER">config_name</replaceable></optional>, <replaceable class="PARAMETER">querytext</replaceable> text) returns TSQUERY
</synopsis> </synopsis>
</term> </term>
...@@ -1418,32 +1431,32 @@ function ( <xref linkend="textsearch-debugging"> ), which shows all details ...@@ -1418,32 +1431,32 @@ function ( <xref linkend="textsearch-debugging"> ), which shows all details
of the full text machinery: of the full text machinery:
<programlisting> <programlisting>
SELECT * FROM ts_debug('english','a fat cat sat on a mat - it ate a fat rats'); SELECT * FROM ts_debug('english','a fat cat sat on a mat - it ate a fat rats');
Alias | Description | Token | Dicts list | Lexized token Alias | Description | Token | Dictionaries | Lexized token
-------+---------------+-------+----------------------+--------------------------- -------+---------------+-------+--------------+----------------
lword | Latin word | a | {pg_catalog.en_stem} | pg_catalog.en_stem: {} lword | Latin word | a | {english} | english: {}
blank | Space symbols | | | blank | Space symbols | | |
lword | Latin word | fat | {pg_catalog.en_stem} | pg_catalog.en_stem: {fat} lword | Latin word | fat | {english} | english: {fat}
blank | Space symbols | | | blank | Space symbols | | |
lword | Latin word | cat | {pg_catalog.en_stem} | pg_catalog.en_stem: {cat} lword | Latin word | cat | {english} | english: {cat}
blank | Space symbols | | | blank | Space symbols | | |
lword | Latin word | sat | {pg_catalog.en_stem} | pg_catalog.en_stem: {sat} lword | Latin word | sat | {english} | english: {sat}
blank | Space symbols | | | blank | Space symbols | | |
lword | Latin word | on | {pg_catalog.en_stem} | pg_catalog.en_stem: {} lword | Latin word | on | {english} | english: {}
blank | Space symbols | | | blank | Space symbols | | |
lword | Latin word | a | {pg_catalog.en_stem} | pg_catalog.en_stem: {} lword | Latin word | a | {english} | english: {}
blank | Space symbols | | | blank | Space symbols | | |
lword | Latin word | mat | {pg_catalog.en_stem} | pg_catalog.en_stem: {mat} lword | Latin word | mat | {english} | english: {mat}
blank | Space symbols | | | blank | Space symbols | | |
blank | Space symbols | - | | blank | Space symbols | - | |
lword | Latin word | it | {pg_catalog.en_stem} | pg_catalog.en_stem: {} lword | Latin word | it | {english} | english: {}
blank | Space symbols | | | blank | Space symbols | | |
lword | Latin word | ate | {pg_catalog.en_stem} | pg_catalog.en_stem: {ate} lword | Latin word | ate | {english} | english: {ate}
blank | Space symbols | | | blank | Space symbols | | |
lword | Latin word | a | {pg_catalog.en_stem} | pg_catalog.en_stem: {} lword | Latin word | a | {english} | english: {}
blank | Space symbols | | | blank | Space symbols | | |
lword | Latin word | fat | {pg_catalog.en_stem} | pg_catalog.en_stem: {fat} lword | Latin word | fat | {english} | english: {fat}
blank | Space symbols | | | blank | Space symbols | | |
lword | Latin word | rats | {pg_catalog.en_stem} | pg_catalog.en_stem: {rat} lword | Latin word | rats | {english} | english: {rat}
(24 rows) (24 rows)
</programlisting> </programlisting>
</para> </para>
...@@ -1485,7 +1498,7 @@ The following functions allow manual parsing control: ...@@ -1485,7 +1498,7 @@ The following functions allow manual parsing control:
<term> <term>
<synopsis> <synopsis>
parse(<replaceable class="PARAMETER">parser</replaceable>, <replaceable class="PARAMETER">document</replaceable> TEXT) returns SETOF <type>tokenout</type> ts_parse(<replaceable class="PARAMETER">parser</replaceable>, <replaceable class="PARAMETER">document</replaceable> TEXT) returns SETOF <type>tokenout</type>
</synopsis> </synopsis>
</term> </term>
...@@ -1496,7 +1509,7 @@ of records, one for each token produced by parsing. Each record includes ...@@ -1496,7 +1509,7 @@ of records, one for each token produced by parsing. Each record includes
a <varname>tokid</varname> giving its type and a <varname>token</varname> a <varname>tokid</varname> giving its type and a <varname>token</varname>
which gives its content: which gives its content:
<programlisting> <programlisting>
SELECT * FROM parse('default','123 - a number'); SELECT * FROM ts_parse('default','123 - a number');
tokid | token tokid | token
-------+-------- -------+--------
22 | 123 22 | 123
...@@ -1517,7 +1530,7 @@ SELECT * FROM parse('default','123 - a number'); ...@@ -1517,7 +1530,7 @@ SELECT * FROM parse('default','123 - a number');
<term> <term>
<synopsis> <synopsis>
token_type(<replaceable class="PARAMETER">parser</replaceable> ) returns SETOF <type>tokentype</type> ts_token_type(<replaceable class="PARAMETER">parser</replaceable> ) returns SETOF <type>tokentype</type>
</synopsis> </synopsis>
</term> </term>
...@@ -1530,7 +1543,7 @@ type the table gives the <varname>tokid</varname> which the ...@@ -1530,7 +1543,7 @@ type the table gives the <varname>tokid</varname> which the
<varname>token</varname> of that type, the <varname>alias</varname> which <varname>token</varname> of that type, the <varname>alias</varname> which
names the token type, and a short <varname>description</varname>: names the token type, and a short <varname>description</varname>:
<programlisting> <programlisting>
SELECT * FROM token_type('default'); SELECT * FROM ts_token_type('default');
tokid | alias | description tokid | alias | description
-------+--------------+----------------------------------- -------+--------------+-----------------------------------
1 | lword | Latin word 1 | lword | Latin word
...@@ -1598,12 +1611,12 @@ The two ranking functions currently available are: ...@@ -1598,12 +1611,12 @@ The two ranking functions currently available are:
<varlistentry> <varlistentry>
<indexterm zone="textsearch-ranking"> <indexterm zone="textsearch-ranking">
<primary>rank</primary> <primary>ts_rank</primary>
</indexterm> </indexterm>
<term> <term>
<synopsis> <synopsis>
rank(<optional> <replaceable class="PARAMETER">weights</replaceable> float4[]</optional>, <replaceable class="PARAMETER">vector</replaceable> TSVECTOR, <replaceable class="PARAMETER">query</replaceable> TSQUERY, <optional> <replaceable class="PARAMETER">normalization</replaceable> int4 </optional>) returns float4 ts_rank(<optional> <replaceable class="PARAMETER">weights</replaceable> float4[]</optional>, <replaceable class="PARAMETER">vector</replaceable> TSVECTOR, <replaceable class="PARAMETER">query</replaceable> TSQUERY, <optional> <replaceable class="PARAMETER">normalization</replaceable> int4 </optional>) returns float4
</synopsis> </synopsis>
</term> </term>
...@@ -1630,12 +1643,12 @@ than words in the document body. ...@@ -1630,12 +1643,12 @@ than words in the document body.
<varlistentry> <varlistentry>
<indexterm zone="textsearch-ranking"> <indexterm zone="textsearch-ranking">
<primary>rank_cd</primary> <primary>ts_rank_cd</primary>
</indexterm> </indexterm>
<term> <term>
<synopsis> <synopsis>
rank_cd(<optional> <replaceable class="PARAMETER">weights</replaceable> float4[], </optional> <replaceable class="PARAMETER">vector</replaceable> TSVECTOR, <replaceable class="PARAMETER">query</replaceable> TSQUERY, <optional> <replaceable class="PARAMETER">normalization</replaceable> int4 </optional>) returns float4 ts_rank_cd(<optional> <replaceable class="PARAMETER">weights</replaceable> float4[], </optional> <replaceable class="PARAMETER">vector</replaceable> TSVECTOR, <replaceable class="PARAMETER">query</replaceable> TSQUERY, <optional> <replaceable class="PARAMETER">normalization</replaceable> int4 </optional>) returns float4
</synopsis> </synopsis>
</term> </term>
...@@ -1699,7 +1712,7 @@ a cosmetic change, i.e., the ordering of the search results will not change. ...@@ -1699,7 +1712,7 @@ a cosmetic change, i.e., the ordering of the search results will not change.
Several examples are shown below; note that the second example uses Several examples are shown below; note that the second example uses
normalized ranking: normalized ranking:
<programlisting> <programlisting>
SELECT title, rank_cd('{0.1, 0.2, 0.4, 1.0}',textsearch, query) AS rnk SELECT title, ts_rank_cd('{0.1, 0.2, 0.4, 1.0}',textsearch, query) AS rnk
FROM apod, to_tsquery('neutrino|(dark &amp; matter)') query FROM apod, to_tsquery('neutrino|(dark &amp; matter)') query
WHERE query @@ textsearch WHERE query @@ textsearch
ORDER BY rnk DESC LIMIT 10; ORDER BY rnk DESC LIMIT 10;
...@@ -1716,8 +1729,8 @@ ORDER BY rnk DESC LIMIT 10; ...@@ -1716,8 +1729,8 @@ ORDER BY rnk DESC LIMIT 10;
Ice Fishing for Cosmic Neutrinos | 1.6 Ice Fishing for Cosmic Neutrinos | 1.6
Weak Lensing Distorts the Universe | 0.818218 Weak Lensing Distorts the Universe | 0.818218
SELECT title, rank_cd('{0.1, 0.2, 0.4, 1.0}',textsearch, query)/ SELECT title, ts_rank_cd('{0.1, 0.2, 0.4, 1.0}',textsearch, query)/
(rank_cd('{0.1, 0.2, 0.4, 1.0}',textsearch, query) + 1) AS rnk (ts_rank_cd('{0.1, 0.2, 0.4, 1.0}',textsearch, query) + 1) AS rnk
FROM apod, to_tsquery('neutrino|(dark &amp; matter)') query FROM apod, to_tsquery('neutrino|(dark &amp; matter)') query
WHERE query @@ textsearch WHERE query @@ textsearch
ORDER BY rnk DESC LIMIT 10; ORDER BY rnk DESC LIMIT 10;
...@@ -1737,7 +1750,7 @@ ORDER BY rnk DESC LIMIT 10; ...@@ -1737,7 +1750,7 @@ ORDER BY rnk DESC LIMIT 10;
</para> </para>
<para> <para>
The first argument in <function>rank_cd</function> (<literal>'{0.1, 0.2, The first argument in <function>ts_rank_cd</function> (<literal>'{0.1, 0.2,
0.4, 1.0}'</literal>) is an optional parameter which specifies the 0.4, 1.0}'</literal>) is an optional parameter which specifies the
weights for labels <literal>D</literal>, <literal>C</literal>, weights for labels <literal>D</literal>, <literal>C</literal>,
<literal>B</literal>, and <literal>A</literal> used in function <literal>B</literal>, and <literal>A</literal> used in function
...@@ -1785,17 +1798,17 @@ implements such functionality. ...@@ -1785,17 +1798,17 @@ implements such functionality.
<term> <term>
<synopsis> <synopsis>
headline(<optional> <replaceable class="PARAMETER">conf_name</replaceable> text</optional>, <replaceable class="PARAMETER">document</replaceable> text, <replaceable class="PARAMETER">query</replaceable> TSQUERY, <optional> <replaceable class="PARAMETER">options</replaceable> text </optional>) returns text ts_headline(<optional> <replaceable class="PARAMETER">config_name</replaceable> text</optional>, <replaceable class="PARAMETER">document</replaceable> text, <replaceable class="PARAMETER">query</replaceable> TSQUERY, <optional> <replaceable class="PARAMETER">options</replaceable> text </optional>) returns text
</synopsis> </synopsis>
</term> </term>
<listitem> <listitem>
<para> <para>
The <function>headline()</function> function accepts a document along with The <function>ts_headline</function> function accepts a document along with
a query, and returns one or more ellipsis-separated excerpts from the a query, and returns one or more ellipsis-separated excerpts from the
document in which terms from the query are highlighted. The configuration document in which terms from the query are highlighted. The configuration
used to parse the document can be specified by its used to parse the document can be specified by its
<replaceable>conf_name</replaceable>; if none is specified, the current <replaceable>config_name</replaceable>; if none is specified, the current
configuration is used. configuration is used.
</para> </para>
...@@ -1840,13 +1853,13 @@ StartSel=&lt;b&gt;, StopSel=&lt;/b&gt;, MaxWords=35, MinWords=15, ShortWord=3, H ...@@ -1840,13 +1853,13 @@ StartSel=&lt;b&gt;, StopSel=&lt;/b&gt;, MaxWords=35, MinWords=15, ShortWord=3, H
For example: For example:
<programlisting> <programlisting>
SELECT headline('a b c', 'c'::tsquery); SELECT ts_headline('a b c', 'c'::tsquery);
headline headline
-------------- --------------
a b &lt;b&gt;c&lt;/b&gt; a b &lt;b&gt;c&lt;/b&gt;
SELECT headline('a b c', 'c'::tsquery, 'StartSel=&lt;,StopSel=&gt;'); SELECT ts_headline('a b c', 'c'::tsquery, 'StartSel=&lt;,StopSel=&gt;');
headline ts_headline
---------- -------------
a b &lt;c&gt; a b &lt;c&gt;
</programlisting> </programlisting>
</para> </para>
...@@ -1860,8 +1873,8 @@ shown. <acronym>SQL</acronym> subselects can help here; below is an ...@@ -1860,8 +1873,8 @@ shown. <acronym>SQL</acronym> subselects can help here; below is an
example: example:
<programlisting> <programlisting>
SELECT id,headline(body,q), rank SELECT id,ts_headline(body,q), rank
FROM (SELECT id,body,q, rank_cd (ti,q) AS rank FROM apod, to_tsquery('stars') q FROM (SELECT id,body,q, ts_rank_cd (ti,q) AS rank FROM apod, to_tsquery('stars') q
WHERE ti @@ q WHERE ti @@ q
ORDER BY rank DESC LIMIT 10) AS foo; ORDER BY rank DESC LIMIT 10) AS foo;
</programlisting> </programlisting>
...@@ -1869,8 +1882,8 @@ FROM (SELECT id,body,q, rank_cd (ti,q) AS rank FROM apod, to_tsquery('stars') q ...@@ -1869,8 +1882,8 @@ FROM (SELECT id,body,q, rank_cd (ti,q) AS rank FROM apod, to_tsquery('stars') q
<para> <para>
Note that the cascade dropping of the <function>parser</function> function Note that the cascade dropping of the <function>parser</function> function
causes dropping of the <literal>headline</literal> used in the full text search causes dropping of the <literal>ts_headline</literal> used in the full text search
configuration <replaceable>conf_name</replaceable><!-- TODO I don't get this -->. configuration <replaceable>config_name</replaceable><!-- TODO I don't get this -->.
</para> </para>
</sect2> </sect2>
...@@ -1958,7 +1971,7 @@ linkend="textsearch-rule-dictionary-example">) as an example. ...@@ -1958,7 +1971,7 @@ linkend="textsearch-rule-dictionary-example">) as an example.
</para> </para>
<para> <para>
The <literal>ALTER TEXT SEARCH CONFIGURATION public.pg ADD The <literal>ALTER TEXT SEARCH CONFIGURATION ADD
MAPPING</literal> command binds specific types of lexemes and a set of MAPPING</literal> command binds specific types of lexemes and a set of
dictionaries to process them. (Mappings can also be specified as part of dictionaries to process them. (Mappings can also be specified as part of
configuration creation.) Lexemes are processed by a stack of dictionaries configuration creation.) Lexemes are processed by a stack of dictionaries
...@@ -1979,12 +1992,12 @@ ALTER TEXT SEARCH CONFIGURATION astro_en ADD MAPPING FOR lword WITH astrosyn, en ...@@ -1979,12 +1992,12 @@ ALTER TEXT SEARCH CONFIGURATION astro_en ADD MAPPING FOR lword WITH astrosyn, en
</para> </para>
<para> <para>
Function <function>lexize</function> can be used to test dictionaries, Function <function>ts_lexize</function> can be used to test dictionaries,
for example: for example:
<programlisting> <programlisting>
SELECT lexize('en_stem', 'stars'); SELECT ts_lexize('en_stem', 'stars');
lexize ts_lexize
-------- -----------
{star} {star}
(1 row) (1 row)
</programlisting> </programlisting>
...@@ -2010,14 +2023,14 @@ SELECT to_tsvector('english','in the list of stop words'); ...@@ -2010,14 +2023,14 @@ SELECT to_tsvector('english','in the list of stop words');
The gaps between positions 1-3 and 3-5 are because of stop words, so ranks The gaps between positions 1-3 and 3-5 are because of stop words, so ranks
calculated for documents with and without stop words are quite different: calculated for documents with and without stop words are quite different:
<programlisting> <programlisting>
SELECT rank_cd ('{1,1,1,1}', to_tsvector('english','in the list of stop words'), to_tsquery('list &amp; stop')); SELECT ts_rank_cd ('{1,1,1,1}', to_tsvector('english','in the list of stop words'), to_tsquery('list &amp; stop'));
rank_cd ts_rank_cd
--------- ------------
0.5 0.5
SELECT rank_cd ('{1,1,1,1}', to_tsvector('english','list stop words'), to_tsquery('list &amp; stop')); SELECT ts_rank_cd ('{1,1,1,1}', to_tsvector('english','list stop words'), to_tsquery('list &amp; stop'));
rank_cd ts_rank_cd
--------- ------------
1 1
</programlisting> </programlisting>
...@@ -2033,26 +2046,24 @@ behaviour is an attempt to decrease possible noise. ...@@ -2033,26 +2046,24 @@ behaviour is an attempt to decrease possible noise.
<para> <para>
Here is an example of a dictionary that returns the input word as lowercase Here is an example of a dictionary that returns the input word as lowercase
or <literal>NULL</literal> if it is a stop word; it also specifies the location or <literal>NULL</literal> if it is a stop word; it also specifies the name
of the file of stop words. It uses the <literal>simple</> dictionary as of a file of stop words. It uses the <literal>simple</> dictionary as
a template: a template:
<programlisting> <programlisting>
CREATE TEXT SEARCH DICTIONARY public.simple_dict CREATE TEXT SEARCH DICTIONARY public.simple_dict (
TEMPLATE pg_catalog.simple TEMPLATE = pg_catalog.simple,
OPTION 'english.stop'; STOPWORDS = english
);
</programlisting> </programlisting>
Relative paths in <literal>OPTION</literal> resolve relative to Now we can test our dictionary:
<filename>share/</><!-- TODO and "share/" is relative to what? such
references occur elsewhere in this section -->. Now we can test our
dictionary:
<programlisting> <programlisting>
SELECT lexize('public.simple_dict','YeS'); SELECT ts_lexize('public.simple_dict','YeS');
lexize ts_lexize
-------- -----------
{yes} {yes}
SELECT lexize('public.simple_dict','The'); SELECT ts_lexize('public.simple_dict','The');
lexize ts_lexize
-------- -----------
{} {}
</programlisting> </programlisting>
</para> </para>
...@@ -2066,7 +2077,7 @@ SELECT lexize('public.simple_dict','The'); ...@@ -2066,7 +2077,7 @@ SELECT lexize('public.simple_dict','The');
<para> <para>
This dictionary template is used to create dictionaries which replace a This dictionary template is used to create dictionaries which replace a
word with a synonym. Phrases are not supported (use the thesaurus word with a synonym. Phrases are not supported (use the thesaurus
dictionary (<xref linkend="textsearch-thesaurus">) if you need them). Synonym dictionary (<xref linkend="textsearch-thesaurus">) for that). A synonym
dictionary can be used to overcome linguistic problems, for example, to dictionary can be used to overcome linguistic problems, for example, to
prevent an English stemmer dictionary from reducing the word 'Paris' to prevent an English stemmer dictionary from reducing the word 'Paris' to
'pari'. In that case, it is enough to have a <literal>Paris 'pari'. In that case, it is enough to have a <literal>Paris
...@@ -2074,17 +2085,18 @@ paris</literal> line in the synonym dictionary and put it before the ...@@ -2074,17 +2085,18 @@ paris</literal> line in the synonym dictionary and put it before the
<literal>en_stem</> dictionary: <literal>en_stem</> dictionary:
<programlisting> <programlisting>
SELECT * FROM ts_debug('english','Paris'); SELECT * FROM ts_debug('english','Paris');
Alias | Description | Token | Dicts list | Lexized token Alias | Description | Token | Dictionaries | Lexized token
-------+-------------+-------+----------------------+---------------------------- -------+-------------+-------+--------------+-----------------
lword | Latin word | Paris | {pg_catalog.en_stem} | pg_catalog.en_stem: {pari} lword | Latin word | Paris | {english} | english: {pari}
(1 row) (1 row)
ALTER TEXT SEARCH CONFIGURATION ADD MAPPING ON english FOR lword WITH synonym, en_stem;
ALTER TEXT SEARCH MAPPING ALTER TEXT SEARCH CONFIGURATION english
Time: 340.867 ms ADD MAPPING FOR lword WITH synonym, en_stem;
SELECT * FROM ts_debug('english','Paris'); SELECT * FROM ts_debug('english','Paris');
Alias | Description | Token | Dicts list | Lexized token Alias | Description | Token | Dictionaries | Lexized token
-------+-------------+-------+-----------------------------------------+----------------------------- -------+-------------+-------+-------------------+------------------
lword | Latin word | Paris | {pg_catalog.synonym,pg_catalog.en_stem} | pg_catalog.synonym: {paris} lword | Latin word | Paris | {synonym,en_stem} | synonym: {paris}
(1 row) (1 row)
</programlisting> </programlisting>
</para> </para>
...@@ -2171,9 +2183,11 @@ To define a new thesaurus dictionary one can use the thesaurus template. ...@@ -2171,9 +2183,11 @@ To define a new thesaurus dictionary one can use the thesaurus template.
For example: For example:
<programlisting> <programlisting>
CREATE TEXT SEARCH DICTIONARY thesaurus_simple CREATE TEXT SEARCH DICTIONARY thesaurus_simple (
TEMPLATE thesaurus_template TEMPLATE = thesaurus,
OPTION 'DictFile="dicts_data/thesaurus.txt.sample", Dictionary="en_stem"'; DictFile = mythesaurus,
Dictionary = pg_catalog.en_stem
);
</programlisting> </programlisting>
Here: Here:
<itemizedlist spacing="compact" mark="bullet"> <itemizedlist spacing="compact" mark="bullet">
...@@ -2181,12 +2195,15 @@ Here: ...@@ -2181,12 +2195,15 @@ Here:
<literal>thesaurus_simple</literal> is the thesaurus dictionary name <literal>thesaurus_simple</literal> is the thesaurus dictionary name
</para></listitem> </para></listitem>
<listitem><para> <listitem><para>
<literal>DictFile="/path/to/thesaurus_simple.txt"</literal> is the location of the thesaurus file <literal>mythesaurus</literal> is the base name of the thesaurus file
(its full name will be <filename>$SHAREDIR/tsearch_data/mythesaurus.ths</>,
where <literal>$SHAREDIR</> means the installation shared-data directory,
often <filename>/usr/local/share</>).
</para></listitem> </para></listitem>
<listitem><para> <listitem><para>
<literal>Dictionary="en_stem"</literal> defines the dictionary (snowball <literal>pg_catalog.en_stem</literal> is the dictionary (snowball
English stemmer) to use for thesaurus normalization. Notice that the English stemmer) to use for thesaurus normalization. Notice that the
<literal>en_stem</> dictionary has it is own configuration (for example, <literal>en_stem</> dictionary has its own configuration (for example,
stop words). stop words).
</para></listitem> </para></listitem>
</itemizedlist> </itemizedlist>
...@@ -2195,7 +2212,8 @@ Now it is possible to bind the thesaurus dictionary <literal>thesaurus_simple</l ...@@ -2195,7 +2212,8 @@ Now it is possible to bind the thesaurus dictionary <literal>thesaurus_simple</l
and selected <literal>tokens</literal>, for example: and selected <literal>tokens</literal>, for example:
<programlisting> <programlisting>
ALTER TEXT SEARCH russian ADD MAPPING FOR lword, lhword, lpart_hword WITH thesaurus_simple; ALTER TEXT SEARCH CONFIGURATION russian
ADD MAPPING FOR lword, lhword, lpart_hword WITH thesaurus_simple;
</programlisting> </programlisting>
</para> </para>
...@@ -2214,15 +2232,17 @@ crab nebulae : crab ...@@ -2214,15 +2232,17 @@ crab nebulae : crab
Below we create a dictionary and bind some token types with Below we create a dictionary and bind some token types with
an astronomical thesaurus and english stemmer: an astronomical thesaurus and english stemmer:
<programlisting> <programlisting>
CREATE TEXT SEARCH DICTIONARY thesaurus_astro OPTION CREATE TEXT SEARCH DICTIONARY thesaurus_astro (
TEMPLATE thesaurus_template TEMPLATE = thesaurus,
'DictFile="dicts_data/thesaurus_astro.txt", Dictionary="en_stem"'; DictFile = thesaurus_astro,
ALTER TEXT SEARCH CONFIGURATION russian ADD MAPPING FOR lword, lhword, lpart_hword Dictionary = en_stem
WITH thesaurus_astro, en_stem; );
ALTER TEXT SEARCH CONFIGURATION russian
ADD MAPPING FOR lword, lhword, lpart_hword WITH thesaurus_astro, en_stem;
</programlisting> </programlisting>
Now we can see how it works. Note that <function>lexize</function> cannot Now we can see how it works. Note that <function>ts_lexize</function> cannot
be used for testing the thesaurus (see description of be used for testing the thesaurus (see description of
<function>lexize</function>), but we can use <function>ts_lexize</function>), but we can use
<function>plainto_tsquery</function> and <function>to_tsvector</function> <function>plainto_tsquery</function> and <function>to_tsvector</function>
which accept <literal>text</literal> arguments, not lexemes: which accept <literal>text</literal> arguments, not lexemes:
...@@ -2288,17 +2308,17 @@ conjugations of the search term <literal>bank</literal>, e.g. ...@@ -2288,17 +2308,17 @@ conjugations of the search term <literal>bank</literal>, e.g.
<literal>banking</>, <literal>banked</>, <literal>banks</>, <literal>banking</>, <literal>banked</>, <literal>banks</>,
<literal>banks'</>, and <literal>bank's</>. <literal>banks'</>, and <literal>bank's</>.
<programlisting> <programlisting>
SELECT lexize('en_ispell','banking'); SELECT ts_lexize('en_ispell','banking');
lexize ts_lexize
-------- -----------
{bank} {bank}
SELECT lexize('en_ispell','bank''s'); SELECT ts_lexize('en_ispell','bank''s');
lexize ts_lexize
-------- -----------
{bank} {bank}
SELECT lexize('en_ispell','banked'); SELECT ts_lexize('en_ispell','banked');
lexize ts_lexize
-------- -----------
{bank} {bank}
</programlisting> </programlisting>
...@@ -2306,38 +2326,26 @@ SELECT lexize('en_ispell','banked'); ...@@ -2306,38 +2326,26 @@ SELECT lexize('en_ispell','banked');
<para> <para>
To create an ispell dictionary one should use the built-in To create an ispell dictionary one should use the built-in
<literal>ispell_template</literal> dictionary and specify several <literal>ispell</literal> dictionary and specify several
parameters. parameters.
</para> </para>
<programlisting> <programlisting>
CREATE TEXT SEARCH DICTIONARY en_ispell CREATE TEXT SEARCH DICTIONARY en_ispell (
TEMPLATE ispell_template TEMPLATE = ispell,
OPTION 'DictFile="/usr/local/share/dicts/ispell/english.dict", DictFile = english,
AffFile="/usr/local/share/dicts/ispell/english.aff", AffFile = english,
StopFile="/usr/local/share/dicts/ispell/english.stop"'; StopWords = english
);
</programlisting> </programlisting>
<para> <para>
Here, <literal>DictFile</>, <literal>AffFile</>, <literal>StopFile</> Here, <literal>DictFile</>, <literal>AffFile</>, and <literal>StopWords</>
specify the location of the dictionary and stop words files. specify the names of the dictionary, affixes, and stop-words files.
</para>
<para>
Relative paths in <literal>OPTION</literal> resolve relative to
<filename>share/dicts_data</>:
<programlisting>
CREATE TEXT SEARCH DICTIONARY en_ispell
TEMPLATE ispell_template
OPTION 'DictFile="ispell/english.dict",
AffFile="ispell/english.aff",
StopFile="english.stop"';
</programlisting>
</para> </para>
<para> <para>
Ispell dictionaries usually recognize a restricted set of words so it Ispell dictionaries usually recognize a restricted set of words so they
should be used in conjunction with another broader dictionary; for should be used in conjunction with another broader dictionary; for
example, a stemming dictionary, which recognizes everything. example, a stemming dictionary, which recognizes everything.
</para> </para>
<para> <para>
...@@ -2352,9 +2360,9 @@ compoundwords controlled z ...@@ -2352,9 +2360,9 @@ compoundwords controlled z
</programlisting> </programlisting>
Several examples for the Norwegian language: Several examples for the Norwegian language:
<programlisting> <programlisting>
SELECT lexize('norwegian_ispell','overbuljongterningpakkmesterassistent'); SELECT ts_lexize('norwegian_ispell','overbuljongterningpakkmesterassistent');
{over,buljong,terning,pakk,mester,assistent} {over,buljong,terning,pakk,mester,assistent}
SELECT lexize('norwegian_ispell','sjokoladefabrikk'); SELECT ts_lexize('norwegian_ispell','sjokoladefabrikk');
{sjokoladefabrikk,sjokolade,fabrikk} {sjokoladefabrikk,sjokolade,fabrikk}
</programlisting> </programlisting>
</para> </para>
...@@ -2374,27 +2382,18 @@ operations of Hunspell. ...@@ -2374,27 +2382,18 @@ operations of Hunspell.
<title><application>Snowball</> Stemming Dictionary</title> <title><application>Snowball</> Stemming Dictionary</title>
<para> <para>
The <application>Snowball</> template dictionary is based on the project The <application>Snowball</> dictionary template is based on the project
of Martin Porter, an inventor of the popular Porter's stemming algorithm of Martin Porter, inventor of the popular Porter's stemming algorithm
for the English language and now supported in many languages (see the <ulink for the English language and now supported in many languages (see the <ulink
url="http://snowball.tartarus.org">Snowball site</ulink> for more url="http://snowball.tartarus.org">Snowball site</ulink> for more
information). Full text searching contains a large number of stemmers for information). Full text searching contains a large number of stemmers for
many languages. The only option that is accepted by a snowball stemmer is the many languages. A Snowball dictionary requires a language parameter to
location of a file with stop words. It can be defined using the identify which stemmer to use, and optionally can specify a stopword file name.
<literal>ALTER TEXT SEARCH DICTIONARY</literal> command. For example,
</para>
<para>
<programlisting>
ALTER TEXT SEARCH DICTIONARY en_stem
SET OPTION 'StopFile=english-utf8.stop, Language=english';
</programlisting>
</para>
<para>
Relative paths in <literal>OPTION</literal> resolve relative
<filename>share/dicts/data</>:
<programlisting> <programlisting>
ALTER TEXT SEARCH DICTIONARY en_stem OPTION 'english.stop'; ALTER TEXT SEARCH DICTIONARY en_stem (
StopWords = english-utf8, Language = english
);
</programlisting> </programlisting>
</para> </para>
...@@ -2410,18 +2409,18 @@ before any other dictionary because a lexeme will not pass through its stemmer. ...@@ -2410,18 +2409,18 @@ before any other dictionary because a lexeme will not pass through its stemmer.
<title>Dictionary Testing</title> <title>Dictionary Testing</title>
<para> <para>
The <function>lexize</> function facilitates dictionary testing: The <function>ts_lexize</> function facilitates dictionary testing:
<variablelist> <variablelist>
<varlistentry> <varlistentry>
<indexterm zone="textsearch-dictionaries"> <indexterm zone="textsearch-dictionaries">
<primary>lexize</primary> <primary>ts_lexize</primary>
</indexterm> </indexterm>
<term> <term>
<synopsis> <synopsis>
lexize(<optional> <replaceable class="PARAMETER">dict_name</replaceable> text</optional>, <replaceable class="PARAMETER">lexeme</replaceable> text) returns text[] ts_lexize(<optional> <replaceable class="PARAMETER">dict_name</replaceable> text</optional>, <replaceable class="PARAMETER">lexeme</replaceable> text) returns text[]
</synopsis> </synopsis>
</term> </term>
...@@ -2433,13 +2432,13 @@ array if the lexeme is known to the dictionary but it is a stop word, or ...@@ -2433,13 +2432,13 @@ array if the lexeme is known to the dictionary but it is a stop word, or
<literal>NULL</literal> if it is an unknown word. <literal>NULL</literal> if it is an unknown word.
</para> </para>
<programlisting> <programlisting>
SELECT lexize('en_stem', 'stars'); SELECT ts_lexize('en_stem', 'stars');
lexize ts_lexize
-------- -----------
{star} {star}
SELECT lexize('en_stem', 'a'); SELECT ts_lexize('en_stem', 'a');
lexize ts_lexize
-------- -----------
{} {}
</programlisting> </programlisting>
</listitem> </listitem>
...@@ -2450,16 +2449,16 @@ SELECT lexize('en_stem', 'a'); ...@@ -2450,16 +2449,16 @@ SELECT lexize('en_stem', 'a');
<note> <note>
<para> <para>
The <function>lexize</function> function expects a The <function>ts_lexize</function> function expects a
<replaceable>lexeme</replaceable>, not text. Below is an example: <replaceable>lexeme</replaceable>, not text. Below is an example:
<programlisting> <programlisting>
SELECT lexize('thesaurus_astro','supernovae stars') is null; SELECT ts_lexize('thesaurus_astro','supernovae stars') is null;
?column? ?column?
---------- ----------
t t
</programlisting> </programlisting>
Thesaurus dictionary <literal>thesaurus_astro</literal> does know Thesaurus dictionary <literal>thesaurus_astro</literal> does know
<literal>supernovae stars</literal>, but lexize fails since it does not <literal>supernovae stars</literal>, but ts_lexize fails since it does not
parse the input text and considers it as a single lexeme. Use parse the input text and considers it as a single lexeme. Use
<function>plainto_tsquery</> and <function>to_tsvector</> to test thesaurus <function>plainto_tsquery</> and <function>to_tsvector</> to test thesaurus
dictionaries: dictionaries:
...@@ -2489,23 +2488,24 @@ about full text searching objects (<xref linkend="textsearch-psql">). ...@@ -2489,23 +2488,24 @@ about full text searching objects (<xref linkend="textsearch-psql">).
</para> </para>
<para> <para>
The <acronym>GUC</acronym> variable <varname>default_text_search_config</varname> The configuration parameter
(optionally schema-qualified) defines the name of the <emphasis>current <xref linkend="guc-default-text-search-config">
active</emphasis> configuration. It can be defined in specifies the name of the current default configuration, which is the
<literal>postgresql.conf</literal> or using the <command>SET</> command. one used by text search functions when an explicit configuration
parameter is omitted.
It can be set in <filename>postgresql.conf</filename>, or set for an
individual session using the <command>SET</> command.
</para> </para>
<para> <para>
Predefined full text searching objects are available in the Several predefined text searching configurations are available in the
<literal>pg_catalog</literal> schema. If you need a custom configuration <literal>pg_catalog</literal> schema. If you need a custom configuration
you can create a new full text searching object and modify it using SQL you can create a new text searching configuration and modify it using SQL
commands. commands.
New full text searching objects are created in the current schema by default New text searching objects are created in the current schema by default
(usually the <literal>public</literal> schema), but a schema-qualified (usually the <literal>public</literal> schema), but a schema-qualified
name can be used to create objects in the specified schema. It is owned name can be used to create objects in the specified schema.
by the current user and can be changed using the <command>ALTER TEXT
SEARCH OWNER</> command.
</para> </para>
<para> <para>
...@@ -2515,55 +2515,61 @@ As an example, we will create a configuration ...@@ -2515,55 +2515,61 @@ As an example, we will create a configuration
<programlisting> <programlisting>
BEGIN; BEGIN;
CREATE TEXT SEARCH CONFIGURATION public.pg LIKE english WITH MAP; CREATE TEXT SEARCH CONFIGURATION public.pg ( COPY = english );
</programlisting> </programlisting>
</para> </para>
<para> <para>
We will use a PostgreSQL-specific <literal>synonym</literal> dictionary We will use a PostgreSQL-specific synonym list
and store it in the <literal>share/dicts_data</literal> directory. The and store it in <filename>share/tsearch_data/pg_dict.syn</filename>.
dictionary looks like: The file contents look like:
<Programlisting> <Programlisting>
postgres pg postgres pg
pgsql pg pgsql pg
postgresql pg postgresql pg
</programlisting> </programlisting>
We define the dictionary like this:
<programlisting> <programlisting>
CREATE TEXT SEARCH DICTIONARY pg_dict CREATE TEXT SEARCH DICTIONARY pg_dict (
TEMPLATE synonym TEMPLATE = synonym
OPTION 'pg_dict.txt'; SYNONYMS = pg_dict
);
</programlisting> </programlisting>
</para> </para>
<para> <para>
Then register the <productname>ispell</> dictionary <literal>en_ispell</literal> using Then register the <productname>ispell</> dictionary
the <literal>ispell_template</literal> template: <literal>en_ispell</literal> using the <literal>ispell</literal> template:
<programlisting> <programlisting>
CREATE TEXT SEARCH DICTIONARY en_ispell CREATE TEXT SEARCH DICTIONARY en_ispell (
TEMPLATE ispell_template TEMPLATE = ispell,
OPTION 'DictFile="english-utf8.dict", DictFile = english-utf8,
AffFile="english-utf8.aff", AffFile = english-utf8,
StopFile="english-utf8.stop"'; StopWords = english-utf8
);
</programlisting> </programlisting>
</para> </para>
<para> <para>
Use the same stop word list for the <application>Snowball</> stemmer <literal>en_stem</literal>, We can use the same stop word list for the <application>Snowball</> stemmer
which is available by default: <literal>en_stem</literal>, which is available by default:
<programlisting> <programlisting>
ALTER TEXT SEARCH DICTIONARY en_stem SET OPTION 'english-utf8.stop'; ALTER TEXT SEARCH DICTIONARY en_stem (
StopWords = english-utf8
);
</programlisting> </programlisting>
</para> </para>
<para> <para>
Modify mappings for Latin words for configuration <literal>'pg'</>: Now modify mappings for Latin words for configuration <literal>pg</>:
<programlisting> <programlisting>
ALTER TEXT SEARCH CONFIGURATION pg ALTER MAPPING FOR lword, lhword, lpart_hword ALTER TEXT SEARCH CONFIGURATION pg
ALTER MAPPING FOR lword, lhword, lpart_hword
WITH pg_dict, en_ispell, en_stem; WITH pg_dict, en_ispell, en_stem;
</programlisting> </programlisting>
</para> </para>
...@@ -2572,7 +2578,8 @@ ALTER TEXT SEARCH CONFIGURATION pg ALTER MAPPING FOR lword, lhword, lpart_hword ...@@ -2572,7 +2578,8 @@ ALTER TEXT SEARCH CONFIGURATION pg ALTER MAPPING FOR lword, lhword, lpart_hword
We do not index or search some tokens: We do not index or search some tokens:
<programlisting> <programlisting>
ALTER TEXT SEARCH CONFIGURATION pg DROP MAPPING FOR email, url, sfloat, uri, float; ALTER TEXT SEARCH CONFIGURATION pg
DROP MAPPING FOR email, url, sfloat, uri, float;
</programlisting> </programlisting>
</para> </para>
...@@ -2582,7 +2589,7 @@ Now, we can test our configuration: ...@@ -2582,7 +2589,7 @@ Now, we can test our configuration:
SELECT * FROM ts_debug('public.pg', ' SELECT * FROM ts_debug('public.pg', '
PostgreSQL, the highly scalable, SQL compliant, open source object-relational PostgreSQL, the highly scalable, SQL compliant, open source object-relational
database management system, is now undergoing beta testing of the next database management system, is now undergoing beta testing of the next
version of our software: PostgreSQL 8.2. version of our software: PostgreSQL 8.3.
'); ');
COMMIT; COMMIT;
...@@ -2603,7 +2610,7 @@ are shown: ...@@ -2603,7 +2610,7 @@ are shown:
path | character varying | not null path | character varying | not null
body | character varying | body | character varying |
title | character varying | title | character varying |
dlm | integer | dlm | date |
</programlisting> </programlisting>
</para> </para>
...@@ -2644,15 +2651,15 @@ DATABASE ... SET</>. ...@@ -2644,15 +2651,15 @@ DATABASE ... SET</>.
However, if you need to use several text search configurations in the same However, if you need to use several text search configurations in the same
database you must be careful to reference the proper text search database you must be careful to reference the proper text search
configuration. This can be done by either setting configuration. This can be done by either setting
<varname>default_text_search_conf</> in each session or supplying the <varname>default_text_search_config</> in each session or supplying the
configuration name in every function call, e.g. to_tsquery('pg', configuration name in every function call, e.g. to_tsquery('french',
'friend'), to_tsvector('pg', col). If you are using an expression index, 'friend'), to_tsvector('english', col). If you are using an expression index,
you must also be sure to use the proper text search configuration every you must also be sure to use the proper text search configuration every
time an <command>INSERT</> or <command>UPDATE</> is executed because these time an <command>INSERT</> or <command>UPDATE</> is executed because these
will modify the index, or you can embed the configuration name into the will modify the index, or you can embed the configuration name into the
expression index, e.g.: expression index, e.g.:
<programlisting> <programlisting>
CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector('pg', textcat(title, body))); CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector('french', title || body));
</programlisting> </programlisting>
And if you do that, make sure you specify the configuration name in the And if you do that, make sure you specify the configuration name in the
<literal>WHERE</> clause as well so the expression index will be used. <literal>WHERE</> clause as well so the expression index will be used.
...@@ -2680,10 +2687,9 @@ Note that indexes are not mandatory for full text searching. ...@@ -2680,10 +2687,9 @@ Note that indexes are not mandatory for full text searching.
<varlistentry> <varlistentry>
<indexterm zone="textsearch-indexes"> <indexterm zone="textsearch-indexes">
<primary>index</primary> <primary>index</primary>
<secondary>GIST</secondary> <secondary>GIST, for text searching</secondary>
</indexterm> </indexterm>
<term> <term>
...@@ -2695,6 +2701,8 @@ CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> ...@@ -2695,6 +2701,8 @@ CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable>
<listitem> <listitem>
<para> <para>
Creates a GiST (Generalized Search Tree)-based index. Creates a GiST (Generalized Search Tree)-based index.
The <replaceable>column</replaceable> can be of <type>tsvector</> or
<type>tsquery</> type.
</para> </para>
</listitem> </listitem>
...@@ -2716,9 +2724,7 @@ CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> ...@@ -2716,9 +2724,7 @@ CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable>
<listitem> <listitem>
<para> <para>
Creates a GIN (Generalized Inverted Index)-based index. Creates a GIN (Generalized Inverted Index)-based index.
<replaceable class="PARAMETER">column</replaceable> is a The <replaceable>column</replaceable> must be of <type>tsvector</> type.
<literal>TSVECTOR</literal>, <literal>TEXT</literal>,
<literal>VARCHAR</literal>, or <literal>CHAR</literal>-type column.
</para> </para>
</listitem> </listitem>
...@@ -2728,10 +2734,11 @@ Creates a GIN (Generalized Inverted Index)-based index. ...@@ -2728,10 +2734,11 @@ Creates a GIN (Generalized Inverted Index)-based index.
</para> </para>
<para> <para>
A GiST index is <literal>lossy</literal>, meaning it is necessary A GiST index is <firstterm>lossy</firstterm>, meaning it is necessary
to consult the <literal>heap</literal> to check for false results. to check the actual table row to eliminate false matches.
<productname>PostgreSQL</productname> does this automatically; see <productname>PostgreSQL</productname> does this automatically; for
<literal>Filter:</literal> in the example below: example, in the query plan below, the <literal>Filter:</literal>
line indicates the index output will be rechecked:
<programlisting> <programlisting>
EXPLAIN SELECT * FROM apod WHERE textsearch @@ to_tsquery('supernovae'); EXPLAIN SELECT * FROM apod WHERE textsearch @@ to_tsquery('supernovae');
QUERY PLAN QUERY PLAN
...@@ -2788,7 +2795,8 @@ the number of unique words. ...@@ -2788,7 +2795,8 @@ the number of unique words.
There is one side-effect of the non-lossiness of a GIN index when using There is one side-effect of the non-lossiness of a GIN index when using
query labels/weights, like <literal>'supernovae:a'</literal>. A GIN index query labels/weights, like <literal>'supernovae:a'</literal>. A GIN index
has all the information necessary to determine a match, so the heap is has all the information necessary to determine a match, so the heap is
not accessed. However, if the query has label information it must access not accessed. However, label information is not stored in the index,
so if the query involves label weights it must access
the heap. Therefore, a special full text search operator <literal>@@@</literal> the heap. Therefore, a special full text search operator <literal>@@@</literal>
was created which forces the use of the heap to get information about was created which forces the use of the heap to get information about
labels. GiST indexes are lossy so it always reads the heap and there is labels. GiST indexes are lossy so it always reads the heap and there is
...@@ -3073,24 +3081,25 @@ configuration. ...@@ -3073,24 +3081,25 @@ configuration.
</para> </para>
<synopsis> <synopsis>
ts_debug(<optional><replaceable class="PARAMETER">conf_name</replaceable></optional>, <replaceable class="PARAMETER">document</replaceable> TEXT) returns SETOF tsdebug ts_debug(<optional><replaceable class="PARAMETER">config_name</replaceable></optional>, <replaceable class="PARAMETER">document</replaceable> TEXT) returns SETOF ts_debug
</synopsis> </synopsis>
<para> <para>
<function>ts_debug</> displays information about every token of <function>ts_debug</> displays information about every token of
<replaceable class="PARAMETER">document</replaceable> as produced by the <replaceable class="PARAMETER">document</replaceable> as produced by the
parser and processed by the configured dictionaries using the configuration parser and processed by the configured dictionaries using the configuration
specified by <replaceable class="PARAMETER">conf_name</replaceable>. specified by <replaceable class="PARAMETER">config_name</replaceable>.
</para> </para>
<para> <para>
<replaceable class="PARAMETER">tsdebug</replaceable> type defined as: <replaceable class="PARAMETER">ts_debug</replaceable> type defined as:
<programlisting> <programlisting>
CREATE TYPE tsdebug AS ( CREATE TYPE ts_debug AS (
"Alias" text, "Alias" text,
"Description" text, "Description" text,
"Token" text, "Token" text,
"Dicts list" text[], "Dictionaries" regdictionary[],
"Lexized token" text "Lexized token" text
);
</programlisting> </programlisting>
</para> </para>
...@@ -3101,13 +3110,17 @@ ispell dictionary for the English language. You can skip the test step and ...@@ -3101,13 +3110,17 @@ ispell dictionary for the English language. You can skip the test step and
play with the standard <literal>english</literal> configuration. play with the standard <literal>english</literal> configuration.
</para> </para>
<programlisting> <programlisting>
CREATE TEXT SEARCH CONFIGURATION public.english LIKE pg_catalog.english WITH MAP AS DEFAULT; CREATE TEXT SEARCH CONFIGURATION public.english ( COPY = pg_catalog.english );
CREATE TEXT SEARCH DICTIONARY en_ispell
TEMPLATE ispell_template CREATE TEXT SEARCH DICTIONARY en_ispell (
OPTION 'DictFile="/usr/local/share/dicts/ispell/english-utf8.dict", TEMPLATE = ispell,
AffFile="/usr/local/share/dicts/ispell/english-utf8.aff", DictFile = english-utf8,
StopFile="/usr/local/share/dicts/english.stop"'; AffFile = english-utf8,
ALTER TEXT SEARCH MAPPING ON public.english FOR lword WITH en_ispell,en_stem; StopWords = english
);
ALTER TEXT SEARCH CONFIGURATION public.english
ALTER MAPPING FOR lword WITH en_ispell, en_stem;
</programlisting> </programlisting>
<programlisting> <programlisting>
...@@ -3211,9 +3224,9 @@ shortened numbers. ...@@ -3211,9 +3224,9 @@ shortened numbers.
<para> <para>
Examples: Examples:
<programlisting> <programlisting>
SELECT lexize('intdict', 11234567890); SELECT ts_lexize('intdict', 11234567890);
lexize ts_lexize
---------- -----------
{112345} {112345}
</programlisting> </programlisting>
</para> </para>
...@@ -3221,10 +3234,12 @@ SELECT lexize('intdict', 11234567890); ...@@ -3221,10 +3234,12 @@ SELECT lexize('intdict', 11234567890);
Now, we want to ignore long integers: Now, we want to ignore long integers:
<programlisting> <programlisting>
ALTER TEXT SEARCH DICTIONARY intdict SET OPTION 'MAXLEN=6, REJECTLONG=TRUE'; ALTER TEXT SEARCH DICTIONARY intdict (
SELECT lexize('intdict', 11234567890); MAXLEN = 6, REJECTLONG = TRUE
lexize );
-------- SELECT ts_lexize('intdict', 11234567890);
ts_lexize
-----------
{} {}
</programlisting> </programlisting>
</para> </para>
...@@ -3379,9 +3394,14 @@ AS 'MODULE_PATHNAME' ...@@ -3379,9 +3394,14 @@ AS 'MODULE_PATHNAME'
LANGUAGE 'C' LANGUAGE 'C'
WITH (isstrict); WITH (isstrict);
CREATE TEXT SEARCH DICTIONARY intdict CREATE TEXT SEARCH TEMPLATE intdict_template (
LEXIZE 'dlexize_intdict' INIT 'dinit_intdict' LEXIZE = dlexize_intdict, INIT = dinit_intdict
OPTION 'MAXLEN=6,REJECTLONG = false'; );
CREATE TEXT SEARCH DICTIONARY intdict (
TEMPLATE = intdict_template,
MAXLEN = 6, REJECTLONG = false
);
COMMENT ON TEXT SEARCH DICTIONARY intdict IS 'Dictionary for Integers'; COMMENT ON TEXT SEARCH DICTIONARY intdict IS 'Dictionary for Integers';
...@@ -3483,7 +3503,7 @@ Below is the source code of our test parser, organized as a <filename>contrib</> ...@@ -3483,7 +3503,7 @@ Below is the source code of our test parser, organized as a <filename>contrib</>
<para> <para>
Testing: Testing:
<programlisting> <programlisting>
SELECT * FROM parse('testparser','That''s my first own parser'); SELECT * FROM ts_parse('testparser','That''s my first own parser');
tokid | token tokid | token
-------+-------- -------+--------
3 | That's 3 | That's
...@@ -3499,7 +3519,7 @@ SELECT to_tsvector('testcfg','That''s my first own parser'); ...@@ -3499,7 +3519,7 @@ SELECT to_tsvector('testcfg','That''s my first own parser');
to_tsvector to_tsvector
------------------------------------------------- -------------------------------------------------
'my':2 'own':4 'first':3 'parser':5 'that''s':1 'my':2 'own':4 'first':3 'parser':5 'that''s':1
SELECT headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', to_tsquery('testcfg', 'star')); SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', to_tsquery('testcfg', 'star'));
headline headline
----------------------------------------------------------------- -----------------------------------------------------------------
Supernovae &lt;b&gt;stars&lt;/b&gt; are the brightest phenomena in galaxies Supernovae &lt;b&gt;stars&lt;/b&gt; are the brightest phenomena in galaxies
...@@ -3696,15 +3716,15 @@ AS 'MODULE_PATHNAME' ...@@ -3696,15 +3716,15 @@ AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict); LANGUAGE 'C' with (isstrict);
CREATE TEXT SEARCH PARSER testparser CREATE TEXT SEARCH PARSER testparser (
START 'testprs_start' START = testprs_start,
GETTOKEN 'testprs_getlexeme' GETTOKEN = testprs_getlexeme,
END 'testprs_end' END = testprs_end,
LEXTYPES 'testprs_lextype' LEXTYPES = testprs_lextype
; ;
CREATE TEXT SEARCH CONFIGURATION testcfg PARSER 'testparser'; CREATE TEXT SEARCH CONFIGURATION testcfg ( PARSER = testparser );
CREATE TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple; ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
END; END;
</programlisting> </programlisting>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment