Have text search thesaurus files use "?" for stop words.

Throw an error for actual stop words, rather than a warning. This fixes problems with cache reloading causing warning messages. Re-enable stop words in regression tests; was disabled by Tom. Document "?" as API change.

Have text search thesaurus files use "?" for stop words.
Throw an error for actual stop words, rather than a warning. This fixes problems with cache reloading causing warning messages. Re-enable stop words in regression tests; was disabled by Tom. Document "?" as API change.
d009992b · Bruce Momjian · 82748bc2 · d009992b · d009992b · d009992b
Commit d009992b authored Nov 10, 2007 by Bruce Momjian
4 changed files
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
-<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.30 2007/11/05 15:55:53 mha Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.31 2007/11/10 15:39:34 momjian Exp $ -->
 <chapter id="textsearch">
 <title id="textsearch-title">Full Text Search</title>
@@ -2258,20 +2258,17 @@ more sample word(s) : more indexed word(s)
   </para>
   <para>
-    Stop words recognized by the subdictionary are replaced by a <quote>stop
+    Specific stop words recognized by the subdictionary cannot be
-    word placeholder</quote> to record their position. To illustrate this,
+    specified;  instead use <literal>?</> to mark the location where any
-    consider these phrases:
+    stop word can appear.  For example, assuming that <literal>a</> and
+    <literal>the</> are stop words according to the subdictionary:
 <programlisting>
-a one the two : swsw
+? one ? two : swsw
-the one a two : swsw2
 </programlisting>
-    Assuming that <literal>a</> and <literal>the</> are stop words according
+    matches <literal>a one the two</> and <literal>the one a two</>;
-    to the subdictionary, these two phrases are identical to the thesaurus:
+    both would be replaced by <literal>swsw</>.
-    they both look like <replaceable>stopword</> <literal>one</>
-    <replaceable>stopword</> <literal>two</>.  Input matching this pattern
-    will be replaced by <literal>swsw2</>, according to the tie-breaking rule.
   </para>
   <para>
@@ -3576,6 +3573,12 @@ Parser: "pg_catalog.default"
    </para>
   </listitem>
+   <listitem>
+    <para>
+     Thesaurus files now use <literal>?</> for stop words.
+    </para>
+   </listitem>
   <listitem>
    <para>
     What else?

--- a/src/backend/tsearch/dict_thesaurus.c
+++ b/src/backend/tsearch/dict_thesaurus.c
@@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.5 2007/11/09 01:32:22 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.6 2007/11/10 15:39:34 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -412,6 +412,10 @@ compileTheLexeme(DictThesaurus * d)
 	{
 		TSLexeme   *ptr;
+		if (strcmp(d->wrds[i].lexeme, "?") == 0)	/* Is stop word marker? */
+			newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
+		else
+		{
 			ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
 										   PointerGetDatum(d->subdict->dictData),
 											  PointerGetDatum(d->wrds[i].lexeme),
@@ -422,12 +426,8 @@ compileTheLexeme(DictThesaurus * d)
 				elog(ERROR, "thesaurus word-sample \"%s\" isn't recognized by subdictionary (rule %d)",
 					 d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1);
 			else if (!(ptr->lexeme))
-		{
+				elog(ERROR, "thesaurus word-sample \"%s\" is recognized as stop-word, use \"?\" for stop words instead (rule %d)",
-			elog(NOTICE, "thesaurus word-sample \"%s\" is recognized as stop-word, assign any stop-word (rule %d)",
 					 d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1);
-			newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
-		}
 			else
 			{
 				while (ptr->lexeme)
@@ -455,6 +455,7 @@ compileTheLexeme(DictThesaurus * d)
 					ptr = remptr;
 				}
 			}
+		}
 		pfree(d->wrds[i].lexeme);
 		pfree(d->wrds[i].entries);

--- a/src/backend/tsearch/thesaurus_sample.ths
+++ b/src/backend/tsearch/thesaurus_sample.ths
@@ -14,4 +14,5 @@ two : *2
 supernovae stars : *sn
 supernovae : *sn
 booking tickets : order invitation cards
-# booking the tickets : order invitation Cards
+booking ? tickets : order invitation Cards
--- a/src/test/regress/expected/tsdicts.out
+++ b/src/test/regress/expected/tsdicts.out
@@ -312,7 +312,7 @@ SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usuall
 SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets');
                      to_tsvector                      
---------------------------------------------------------------------
+-------------------------------------------------------
- 'book':8 'card':3 'like':6 'look':5 'invit':2 'order':1 'ticket':10
+ 'card':3,10 'like':6 'look':5 'invit':2,9 'order':1,8
 (1 row)