Commit 3e17ef1c authored by Tom Lane's avatar Tom Lane

Adjust ts_debug's output as per my proposal of yesterday: show the

active dictionary and its output lexemes as separate columns, instead
of smashing them into one text column, and lowercase the column names.
Also, define the output rowtype using OUT parameters instead of a
composite type, to be consistent with the other built-in functions.
parent 7ec280e1
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.402 2007/10/21 20:04:37 tgl Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.403 2007/10/22 20:13:37 tgl Exp $ -->
<chapter id="functions"> <chapter id="functions">
<title>Functions and Operators</title> <title>Functions and Operators</title>
...@@ -7857,11 +7857,11 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple ...@@ -7857,11 +7857,11 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
</thead> </thead>
<tbody> <tbody>
<row> <row>
<entry><literal><function>ts_debug</function>(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>text</>)</literal></entry> <entry><literal><function>ts_debug</function>(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>text</>, OUT <replaceable class="PARAMETER">alias</> <type>text</>, OUT <replaceable class="PARAMETER">description</> <type>text</>, OUT <replaceable class="PARAMETER">token</> <type>text</>, OUT <replaceable class="PARAMETER">dictionaries</> <type>regdictionary[]</>, OUT <replaceable class="PARAMETER">dictionary</> <type>regdictionary</>, OUT <replaceable class="PARAMETER">lexemes</> <type>text[]</>)</literal></entry>
<entry><type>setof ts_debug</type></entry> <entry><type>setof record</type></entry>
<entry>test a configuration</entry> <entry>test a configuration</entry>
<entry><literal>ts_debug('english', 'The Brightest supernovaes')</literal></entry> <entry><literal>ts_debug('english', 'The Brightest supernovaes')</literal></entry>
<entry><literal>(lword,"Latin word",The,{english_stem},"english_stem: {}") ...</literal></entry> <entry><literal>(lword,"Latin word",The,{english_stem},english_stem,{}) ...</literal></entry>
</row> </row>
<row> <row>
<entry><literal><function>ts_lexize</function>(<replaceable class="PARAMETER">dict</replaceable> <type>regdictionary</>, <replaceable class="PARAMETER">token</replaceable> <type>text</>)</literal></entry> <entry><literal><function>ts_lexize</function>(<replaceable class="PARAMETER">dict</replaceable> <type>regdictionary</>, <replaceable class="PARAMETER">token</replaceable> <type>text</>)</literal></entry>
......
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.22 2007/10/22 03:37:04 tgl Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.23 2007/10/22 20:13:37 tgl Exp $ -->
<chapter id="textsearch"> <chapter id="textsearch">
<title id="textsearch-title">Full Text Search</title> <title id="textsearch-title">Full Text Search</title>
...@@ -1699,18 +1699,18 @@ ON messages FOR EACH ROW EXECUTE PROCEDURE messages_trigger(); ...@@ -1699,18 +1699,18 @@ ON messages FOR EACH ROW EXECUTE PROCEDURE messages_trigger();
<itemizedlist spacing="compact" mark="bullet"> <itemizedlist spacing="compact" mark="bullet">
<listitem> <listitem>
<para> <para>
<structname>word</> <type>text</> &mdash; the value of a lexeme <replaceable>word</> <type>text</> &mdash; the value of a lexeme
</para> </para>
</listitem> </listitem>
<listitem> <listitem>
<para> <para>
<structname>ndoc</> <type>integer</> &mdash; number of documents <replaceable>ndoc</> <type>integer</> &mdash; number of documents
(<type>tsvector</>s) the word occurred in (<type>tsvector</>s) the word occurred in
</para> </para>
</listitem> </listitem>
<listitem> <listitem>
<para> <para>
<structname>nentry</> <type>integer</> &mdash; total number of <replaceable>nentry</> <type>integer</> &mdash; total number of
occurrences of the word occurrences of the word
</para> </para>
</listitem> </listitem>
...@@ -1901,8 +1901,8 @@ LIMIT 10; ...@@ -1901,8 +1901,8 @@ LIMIT 10;
as the entire word and as each component: as the entire word and as each component:
<programlisting> <programlisting>
SELECT "Alias", "Description", "Token" FROM ts_debug('foo-bar-beta1'); SELECT alias, description, token FROM ts_debug('foo-bar-beta1');
Alias | Description | Token alias | description | token
-------------+-------------------------------+--------------- -------------+-------------------------------+---------------
hword | Hyphenated word | foo-bar-beta1 hword | Hyphenated word | foo-bar-beta1
lpart_hword | Latin part of hyphenated word | foo lpart_hword | Latin part of hyphenated word | foo
...@@ -1917,8 +1917,8 @@ SELECT "Alias", "Description", "Token" FROM ts_debug('foo-bar-beta1'); ...@@ -1917,8 +1917,8 @@ SELECT "Alias", "Description", "Token" FROM ts_debug('foo-bar-beta1');
instructive example: instructive example:
<programlisting> <programlisting>
SELECT "Alias", "Description", "Token" FROM ts_debug('http://foo.com/stuff/index.html'); SELECT alias, description, token FROM ts_debug('http://foo.com/stuff/index.html');
Alias | Description | Token alias | description | token
----------+---------------+-------------------------- ----------+---------------+--------------------------
protocol | Protocol head | http:// protocol | Protocol head | http://
url | URL | foo.com/stuff/index.html url | URL | foo.com/stuff/index.html
...@@ -2186,25 +2186,23 @@ SELECT ts_lexize('public.simple_dict','The'); ...@@ -2186,25 +2186,23 @@ SELECT ts_lexize('public.simple_dict','The');
synonym dictionary and put it before the <literal>english_stem</> dictionary: synonym dictionary and put it before the <literal>english_stem</> dictionary:
<programlisting> <programlisting>
SELECT * FROM ts_debug('english','Paris'); SELECT * FROM ts_debug('english', 'Paris');
Alias | Description | Token | Dictionaries | Lexized token alias | description | token | dictionaries | dictionary | lexemes
-------+-------------+-------+----------------+---------------------- -------+-------------+-------+----------------+--------------+---------
lword | Latin word | Paris | {english_stem} | english_stem: {pari} lword | Latin word | Paris | {english_stem} | english_stem | {pari}
(1 row)
CREATE TEXT SEARCH DICTIONARY synonym ( CREATE TEXT SEARCH DICTIONARY my_synonym (
TEMPLATE = synonym, TEMPLATE = synonym,
SYNONYMS = my_synonyms SYNONYMS = my_synonyms
); );
ALTER TEXT SEARCH CONFIGURATION english ALTER TEXT SEARCH CONFIGURATION english
ALTER MAPPING FOR lword WITH synonym, english_stem; ALTER MAPPING FOR lword WITH my_synonym, english_stem;
SELECT * FROM ts_debug('english','Paris'); SELECT * FROM ts_debug('english', 'Paris');
Alias | Description | Token | Dictionaries | Lexized token alias | description | token | dictionaries | dictionary | lexemes
-------+-------------+-------+------------------------+------------------ -------+-------------+-------+---------------------------+------------+---------
lword | Latin word | Paris | {synonym,english_stem} | synonym: {paris} lword | Latin word | Paris | {my_synonym,english_stem} | my_synonym | {paris}
(1 row)
</programlisting> </programlisting>
</para> </para>
...@@ -2711,7 +2709,14 @@ SHOW default_text_search_config; ...@@ -2711,7 +2709,14 @@ SHOW default_text_search_config;
</indexterm> </indexterm>
<synopsis> <synopsis>
ts_debug(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>text</>) returns <type>setof ts_debug</> ts_debug(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>text</>,
OUT <replaceable class="PARAMETER">alias</> <type>text</>,
OUT <replaceable class="PARAMETER">description</> <type>text</>,
OUT <replaceable class="PARAMETER">token</> <type>text</>,
OUT <replaceable class="PARAMETER">dictionaries</> <type>regdictionary[]</>,
OUT <replaceable class="PARAMETER">dictionary</> <type>regdictionary</>,
OUT <replaceable class="PARAMETER">lexemes</> <type>text[]</>)
returns setof record
</synopsis> </synopsis>
<para> <para>
...@@ -2725,23 +2730,47 @@ SHOW default_text_search_config; ...@@ -2725,23 +2730,47 @@ SHOW default_text_search_config;
</para> </para>
<para> <para>
<function>ts_debug</>'s result row type is defined as: <function>ts_debug</> returns one row for each token identified in the text
by the parser. The columns returned are
<programlisting> <itemizedlist spacing="compact" mark="bullet">
CREATE TYPE ts_debug AS ( <listitem>
"Alias" text, <para>
"Description" text, <replaceable>alias</> <type>text</> &mdash; short name of the token type
"Token" text, </para>
"Dictionaries" regdictionary[], </listitem>
"Lexized token" text <listitem>
); <para>
</programlisting> <replaceable>description</> <type>text</> &mdash; description of the
token type
One row is produced for each token identified by the parser. </para>
The first three columns describe the token, and the fourth lists </listitem>
the dictionaries selected by the configuration for that token's type. <listitem>
The last column shows the result of dictionary processing: which <para>
dictionary (if any) recognized the token, and what it produced. <replaceable>token</> <type>text</> &mdash; text of the token
</para>
</listitem>
<listitem>
<para>
<replaceable>dictionaries</> <type>regdictionary[]</> &mdash; the
dictionaries selected by the configuration for this token type
</para>
</listitem>
<listitem>
<para>
<replaceable>dictionary</> <type>regdictionary</> &mdash; the dictionary
that recognized the token, or <literal>NULL</> if none did
</para>
</listitem>
<listitem>
<para>
<replaceable>lexemes</> <type>text[]</> &mdash; the lexeme(s) produced
by the dictionary that recognized the token, or <literal>NULL</> if
none did; an empty array (<literal>{}</>) means it was recognized as a
stop word
</para>
</listitem>
</itemizedlist>
</para> </para>
<para> <para>
...@@ -2749,33 +2778,32 @@ CREATE TYPE ts_debug AS ( ...@@ -2749,33 +2778,32 @@ CREATE TYPE ts_debug AS (
<programlisting> <programlisting>
SELECT * FROM ts_debug('english','a fat cat sat on a mat - it ate a fat rats'); SELECT * FROM ts_debug('english','a fat cat sat on a mat - it ate a fat rats');
Alias | Description | Token | Dictionaries | Lexized token alias | description | token | dictionaries | dictionary | lexemes
-------+---------------+-------+--------------+---------------- -------+---------------+-------+----------------+--------------+---------
lword | Latin word | a | {english} | english: {} lword | Latin word | a | {english_stem} | english_stem | {}
blank | Space symbols | | | blank | Space symbols | | {} | |
lword | Latin word | fat | {english} | english: {fat} lword | Latin word | fat | {english_stem} | english_stem | {fat}
blank | Space symbols | | | blank | Space symbols | | {} | |
lword | Latin word | cat | {english} | english: {cat} lword | Latin word | cat | {english_stem} | english_stem | {cat}
blank | Space symbols | | | blank | Space symbols | | {} | |
lword | Latin word | sat | {english} | english: {sat} lword | Latin word | sat | {english_stem} | english_stem | {sat}
blank | Space symbols | | | blank | Space symbols | | {} | |
lword | Latin word | on | {english} | english: {} lword | Latin word | on | {english_stem} | english_stem | {}
blank | Space symbols | | | blank | Space symbols | | {} | |
lword | Latin word | a | {english} | english: {} lword | Latin word | a | {english_stem} | english_stem | {}
blank | Space symbols | | | blank | Space symbols | | {} | |
lword | Latin word | mat | {english} | english: {mat} lword | Latin word | mat | {english_stem} | english_stem | {mat}
blank | Space symbols | | | blank | Space symbols | | {} | |
blank | Space symbols | - | | blank | Space symbols | - | {} | |
lword | Latin word | it | {english} | english: {} lword | Latin word | it | {english_stem} | english_stem | {}
blank | Space symbols | | | blank | Space symbols | | {} | |
lword | Latin word | ate | {english} | english: {ate} lword | Latin word | ate | {english_stem} | english_stem | {ate}
blank | Space symbols | | | blank | Space symbols | | {} | |
lword | Latin word | a | {english} | english: {} lword | Latin word | a | {english_stem} | english_stem | {}
blank | Space symbols | | | blank | Space symbols | | {} | |
lword | Latin word | fat | {english} | english: {fat} lword | Latin word | fat | {english_stem} | english_stem | {fat}
blank | Space symbols | | | blank | Space symbols | | {} | |
lword | Latin word | rats | {english} | english: {rat} lword | Latin word | rats | {english_stem} | english_stem | {rat}
(24 rows)
</programlisting> </programlisting>
</para> </para>
...@@ -2801,34 +2829,33 @@ ALTER TEXT SEARCH CONFIGURATION public.english ...@@ -2801,34 +2829,33 @@ ALTER TEXT SEARCH CONFIGURATION public.english
<programlisting> <programlisting>
SELECT * FROM ts_debug('public.english','The Brightest supernovaes'); SELECT * FROM ts_debug('public.english','The Brightest supernovaes');
Alias | Description | Token | Dictionaries | Lexized token alias | description | token | dictionaries | dictionary | lexemes
-------+---------------+-------------+-------------------------------------------------+------------------------------------- -------+---------------+-------------+-------------------------------+----------------+-------------
lword | Latin word | The | {public.english_ispell,pg_catalog.english_stem} | public.english_ispell: {} lword | Latin word | The | {english_ispell,english_stem} | english_ispell | {}
blank | Space symbols | | | blank | Space symbols | | {} | |
lword | Latin word | Brightest | {public.english_ispell,pg_catalog.english_stem} | public.english_ispell: {bright} lword | Latin word | Brightest | {english_ispell,english_stem} | english_ispell | {bright}
blank | Space symbols | | | blank | Space symbols | | {} | |
lword | Latin word | supernovaes | {public.english_ispell,pg_catalog.english_stem} | pg_catalog.english_stem: {supernova} lword | Latin word | supernovaes | {english_ispell,english_stem} | english_stem | {supernova}
(5 rows)
</programlisting> </programlisting>
<para> <para>
In this example, the word <literal>Brightest</> was recognized by the In this example, the word <literal>Brightest</> was recognized by the
parser as a <literal>Latin word</literal> (alias <literal>lword</literal>). parser as a <literal>Latin word</literal> (alias <literal>lword</literal>).
For this token type the dictionary list is For this token type the dictionary list is
<literal>public.english_ispell</> and <literal>english_ispell</> and
<literal>pg_catalog.english_stem</literal>. The word was recognized by <literal>english_stem</literal>. The word was recognized by
<literal>public.english_ispell</literal>, which reduced it to the noun <literal>english_ispell</literal>, which reduced it to the noun
<literal>bright</literal>. The word <literal>supernovaes</literal> is <literal>bright</literal>. The word <literal>supernovaes</literal> is
unknown to the <literal>public.english_ispell</literal> dictionary so it unknown to the <literal>english_ispell</literal> dictionary so it
was passed to the next dictionary, and, fortunately, was recognized (in was passed to the next dictionary, and, fortunately, was recognized (in
fact, <literal>public.english_stem</literal> is a Snowball dictionary which fact, <literal>english_stem</literal> is a Snowball dictionary which
recognizes everything; that is why it was placed at the end of the recognizes everything; that is why it was placed at the end of the
dictionary list). dictionary list).
</para> </para>
<para> <para>
The word <literal>The</literal> was recognized by the The word <literal>The</literal> was recognized by the
<literal>public.english_ispell</literal> dictionary as a stop word (<xref <literal>english_ispell</literal> dictionary as a stop word (<xref
linkend="textsearch-stopwords">) and will not be indexed. linkend="textsearch-stopwords">) and will not be indexed.
The spaces are discarded too, since the configuration provides no The spaces are discarded too, since the configuration provides no
dictionaries at all for them. dictionaries at all for them.
...@@ -2839,16 +2866,15 @@ SELECT * FROM ts_debug('public.english','The Brightest supernovaes'); ...@@ -2839,16 +2866,15 @@ SELECT * FROM ts_debug('public.english','The Brightest supernovaes');
you want to see: you want to see:
<programlisting> <programlisting>
SELECT "Alias", "Token", "Lexized token" SELECT alias, token, dictionary, lexemes
FROM ts_debug('public.english','The Brightest supernovaes'); FROM ts_debug('public.english','The Brightest supernovaes');
Alias | Token | Lexized token alias | token | dictionary | lexemes
-------+-------------+-------------------------------------- -------+-------------+----------------+-------------
lword | The | public.english_ispell: {} lword | The | english_ispell | {}
blank | | blank | | |
lword | Brightest | public.english_ispell: {bright} lword | Brightest | english_ispell | {bright}
blank | | blank | | |
lword | supernovaes | pg_catalog.english_stem: {supernova} lword | supernovaes | english_stem | {supernova}
(5 rows)
</programlisting> </programlisting>
</para> </para>
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* *
* Copyright (c) 1996-2007, PostgreSQL Global Development Group * Copyright (c) 1996-2007, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.46 2007/09/25 20:03:37 tgl Exp $ * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.47 2007/10/22 20:13:37 tgl Exp $
*/ */
CREATE VIEW pg_roles AS CREATE VIEW pg_roles AS
...@@ -389,38 +389,36 @@ CREATE VIEW pg_stat_bgwriter AS ...@@ -389,38 +389,36 @@ CREATE VIEW pg_stat_bgwriter AS
-- Tsearch debug function. Defined here because it'd be pretty unwieldy -- Tsearch debug function. Defined here because it'd be pretty unwieldy
-- to put it into pg_proc.h -- to put it into pg_proc.h
CREATE TYPE ts_debug AS ( CREATE FUNCTION ts_debug(IN config regconfig, IN document text,
"Alias" text, OUT alias text,
"Description" text, OUT description text,
"Token" text, OUT token text,
"Dictionaries" regdictionary[], OUT dictionaries regdictionary[],
"Lexized token" text OUT dictionary regdictionary,
); OUT lexemes text[])
RETURNS SETOF record AS
COMMENT ON TYPE ts_debug IS 'type returned from ts_debug() function';
CREATE FUNCTION ts_debug(regconfig, text)
RETURNS SETOF ts_debug AS
$$ $$
SELECT SELECT
tt.alias AS "Alias", tt.alias AS alias,
tt.description AS "Description", tt.description AS description,
parse.token AS "Token", parse.token AS token,
ARRAY ( SELECT m.mapdict::pg_catalog.regdictionary ARRAY ( SELECT m.mapdict::pg_catalog.regdictionary
FROM pg_catalog.pg_ts_config_map AS m FROM pg_catalog.pg_ts_config_map AS m
WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid
ORDER BY m.mapseqno ) ORDER BY m.mapseqno )
AS "Dictionaries", AS dictionaries,
( ( SELECT mapdict::pg_catalog.regdictionary
SELECT FROM pg_catalog.pg_ts_config_map AS m
dl.mapdict::pg_catalog.regdictionary || ': ' || dl.lex::pg_catalog.text WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid
FROM ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno
( SELECT mapdict, pg_catalog.ts_lexize(mapdict, parse.token) AS lex LIMIT 1
) AS dictionary,
( SELECT pg_catalog.ts_lexize(mapdict, parse.token)
FROM pg_catalog.pg_ts_config_map AS m FROM pg_catalog.pg_ts_config_map AS m
WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid
ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno ) dl ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno
LIMIT 1 LIMIT 1
) AS "Lexized token" ) AS lexemes
FROM pg_catalog.ts_parse( FROM pg_catalog.ts_parse(
(SELECT cfgparser FROM pg_catalog.pg_ts_config WHERE oid = $1 ), $2 (SELECT cfgparser FROM pg_catalog.pg_ts_config WHERE oid = $1 ), $2
) AS parse, ) AS parse,
...@@ -434,8 +432,14 @@ LANGUAGE SQL STRICT STABLE; ...@@ -434,8 +432,14 @@ LANGUAGE SQL STRICT STABLE;
COMMENT ON FUNCTION ts_debug(regconfig,text) IS COMMENT ON FUNCTION ts_debug(regconfig,text) IS
'debug function for text search configuration'; 'debug function for text search configuration';
CREATE FUNCTION ts_debug(text) CREATE FUNCTION ts_debug(IN document text,
RETURNS SETOF ts_debug AS OUT alias text,
OUT description text,
OUT token text,
OUT dictionaries regdictionary[],
OUT dictionary regdictionary,
OUT lexemes text[])
RETURNS SETOF record AS
$$ $$
SELECT * FROM pg_catalog.ts_debug( pg_catalog.get_current_ts_config(), $1); SELECT * FROM pg_catalog.ts_debug( pg_catalog.get_current_ts_config(), $1);
$$ $$
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.434 2007/10/19 22:01:45 tgl Exp $ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.435 2007/10/22 20:13:37 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 200710192 #define CATALOG_VERSION_NO 200710221
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment