Commit 7953fdcd authored by Tom Lane's avatar Tom Lane

Add a CaseSensitive parameter to synonym dictionaries.

Simon Riggs
parent 2fc27954
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.41 2008/03/04 03:17:18 momjian Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.42 2008/03/10 03:01:28 tgl Exp $ -->
<chapter id="textsearch">
<title id="textsearch-title">Full Text Search</title>
......@@ -2209,7 +2209,8 @@ SELECT ts_lexize('public.simple_dict','The');
dictionary can be used to overcome linguistic problems, for example, to
prevent an English stemmer dictionary from reducing the word 'Paris' to
'pari'. It is enough to have a <literal>Paris paris</literal> line in the
synonym dictionary and put it before the <literal>english_stem</> dictionary:
synonym dictionary and put it before the <literal>english_stem</>
dictionary. For example:
<programlisting>
SELECT * FROM ts_debug('english', 'Paris');
......@@ -2242,10 +2243,17 @@ SELECT * FROM ts_debug('english', 'Paris');
<productname>PostgreSQL</> installation's shared-data directory).
The file format is just one line
per word to be substituted, with the word followed by its synonym,
separated by white space. Blank lines and trailing spaces are ignored,
and upper case is folded to lower case.
separated by white space. Blank lines and trailing spaces are ignored.
</para>
<para>
The <literal>synonym</> template also has an optional parameter
<literal>CaseSensitive</>, which defaults to <literal>false</>. When
<literal>CaseSensitive</> is <literal>false</>, words in the synonym file
are folded to lower case, as are input tokens. When it is
<literal>true</>, words and tokens are not folded to lower case,
but are compared as-is.
</para>
</sect2>
<sect2 id="textsearch-thesaurus">
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.7 2008/01/01 19:45:52 momjian Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.8 2008/03/10 03:01:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -30,6 +30,7 @@ typedef struct
{
int len; /* length of syn array */
Syn *syn;
bool case_sensitive;
} DictSyn;
/*
......@@ -77,6 +78,7 @@ dsynonym_init(PG_FUNCTION_ARGS)
DictSyn *d;
ListCell *l;
char *filename = NULL;
bool case_sensitive = false;
FILE *fin;
char *starti,
*starto,
......@@ -90,6 +92,8 @@ dsynonym_init(PG_FUNCTION_ARGS)
if (pg_strcasecmp("Synonyms", defel->defname) == 0)
filename = defGetString(defel);
else if (pg_strcasecmp("CaseSensitive", defel->defname) == 0)
case_sensitive = defGetBoolean(defel);
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
......@@ -154,8 +158,16 @@ dsynonym_init(PG_FUNCTION_ARGS)
}
}
if (case_sensitive)
{
d->syn[cur].in = pstrdup(starti);
d->syn[cur].out = pstrdup(starto);
}
else
{
d->syn[cur].in = lowerstr(starti);
d->syn[cur].out = lowerstr(starto);
}
cur++;
......@@ -168,6 +180,8 @@ skipline:
d->len = cur;
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
d->case_sensitive = case_sensitive;
PG_RETURN_POINTER(d);
}
......@@ -185,7 +199,11 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
if (len <= 0 || d->len <= 0)
PG_RETURN_POINTER(NULL);
if (d->case_sensitive)
key.in = pnstrdup(in, len);
else
key.in = lowerstr_with_len(in, len);
key.out = NULL;
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment