Commit 7953fdcd authored by Tom Lane's avatar Tom Lane

Add a CaseSensitive parameter to synonym dictionaries.

Simon Riggs
parent 2fc27954
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.41 2008/03/04 03:17:18 momjian Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.42 2008/03/10 03:01:28 tgl Exp $ -->
<chapter id="textsearch"> <chapter id="textsearch">
<title id="textsearch-title">Full Text Search</title> <title id="textsearch-title">Full Text Search</title>
...@@ -2209,7 +2209,8 @@ SELECT ts_lexize('public.simple_dict','The'); ...@@ -2209,7 +2209,8 @@ SELECT ts_lexize('public.simple_dict','The');
dictionary can be used to overcome linguistic problems, for example, to dictionary can be used to overcome linguistic problems, for example, to
prevent an English stemmer dictionary from reducing the word 'Paris' to prevent an English stemmer dictionary from reducing the word 'Paris' to
'pari'. It is enough to have a <literal>Paris paris</literal> line in the 'pari'. It is enough to have a <literal>Paris paris</literal> line in the
synonym dictionary and put it before the <literal>english_stem</> dictionary: synonym dictionary and put it before the <literal>english_stem</>
dictionary. For example:
<programlisting> <programlisting>
SELECT * FROM ts_debug('english', 'Paris'); SELECT * FROM ts_debug('english', 'Paris');
...@@ -2242,10 +2243,17 @@ SELECT * FROM ts_debug('english', 'Paris'); ...@@ -2242,10 +2243,17 @@ SELECT * FROM ts_debug('english', 'Paris');
<productname>PostgreSQL</> installation's shared-data directory). <productname>PostgreSQL</> installation's shared-data directory).
The file format is just one line The file format is just one line
per word to be substituted, with the word followed by its synonym, per word to be substituted, with the word followed by its synonym,
separated by white space. Blank lines and trailing spaces are ignored, separated by white space. Blank lines and trailing spaces are ignored.
and upper case is folded to lower case.
</para> </para>
<para>
The <literal>synonym</> template also has an optional parameter
<literal>CaseSensitive</>, which defaults to <literal>false</>. When
<literal>CaseSensitive</> is <literal>false</>, words in the synonym file
are folded to lower case, as are input tokens. When it is
<literal>true</>, words and tokens are not folded to lower case,
but are compared as-is.
</para>
</sect2> </sect2>
<sect2 id="textsearch-thesaurus"> <sect2 id="textsearch-thesaurus">
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.7 2008/01/01 19:45:52 momjian Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.8 2008/03/10 03:01:28 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -30,6 +30,7 @@ typedef struct ...@@ -30,6 +30,7 @@ typedef struct
{ {
int len; /* length of syn array */ int len; /* length of syn array */
Syn *syn; Syn *syn;
bool case_sensitive;
} DictSyn; } DictSyn;
/* /*
...@@ -77,6 +78,7 @@ dsynonym_init(PG_FUNCTION_ARGS) ...@@ -77,6 +78,7 @@ dsynonym_init(PG_FUNCTION_ARGS)
DictSyn *d; DictSyn *d;
ListCell *l; ListCell *l;
char *filename = NULL; char *filename = NULL;
bool case_sensitive = false;
FILE *fin; FILE *fin;
char *starti, char *starti,
*starto, *starto,
...@@ -90,6 +92,8 @@ dsynonym_init(PG_FUNCTION_ARGS) ...@@ -90,6 +92,8 @@ dsynonym_init(PG_FUNCTION_ARGS)
if (pg_strcasecmp("Synonyms", defel->defname) == 0) if (pg_strcasecmp("Synonyms", defel->defname) == 0)
filename = defGetString(defel); filename = defGetString(defel);
else if (pg_strcasecmp("CaseSensitive", defel->defname) == 0)
case_sensitive = defGetBoolean(defel);
else else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
...@@ -154,8 +158,16 @@ dsynonym_init(PG_FUNCTION_ARGS) ...@@ -154,8 +158,16 @@ dsynonym_init(PG_FUNCTION_ARGS)
} }
} }
d->syn[cur].in = lowerstr(starti); if (case_sensitive)
d->syn[cur].out = lowerstr(starto); {
d->syn[cur].in = pstrdup(starti);
d->syn[cur].out = pstrdup(starto);
}
else
{
d->syn[cur].in = lowerstr(starti);
d->syn[cur].out = lowerstr(starto);
}
cur++; cur++;
...@@ -168,6 +180,8 @@ skipline: ...@@ -168,6 +180,8 @@ skipline:
d->len = cur; d->len = cur;
qsort(d->syn, d->len, sizeof(Syn), compareSyn); qsort(d->syn, d->len, sizeof(Syn), compareSyn);
d->case_sensitive = case_sensitive;
PG_RETURN_POINTER(d); PG_RETURN_POINTER(d);
} }
...@@ -185,7 +199,11 @@ dsynonym_lexize(PG_FUNCTION_ARGS) ...@@ -185,7 +199,11 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
if (len <= 0 || d->len <= 0) if (len <= 0 || d->len <= 0)
PG_RETURN_POINTER(NULL); PG_RETURN_POINTER(NULL);
key.in = lowerstr_with_len(in, len); if (d->case_sensitive)
key.in = pnstrdup(in, len);
else
key.in = lowerstr_with_len(in, len);
key.out = NULL; key.out = NULL;
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn); found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment