Add thesaurus dictionary which can replace N>0 lexemes by M>0 lexemes.

It required some changes in lexize algorithm, but interface with dictionaries stays compatible with old dictionaries. Funded by Georgia Public Library Service and LibLime, Inc.

Add thesaurus dictionary which can replace N>0 lexemes by M>0 lexemes.
It required some changes in lexize algorithm, but interface with dictionaries stays compatible with old dictionaries. Funded by Georgia Public Library Service and LibLime, Inc.
22505f47 · Teodor Sigaev · 3b7ed9ba · 22505f47 · 22505f47 · 22505f47
Commit 22505f47 authored May 31, 2006 by Teodor Sigaev
13 changed files
--- a/contrib/tsearch2/Makefile
+++ b/contrib/tsearch2/Makefile
-# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.14 2006/05/02 11:28:54 teodor Exp $
+# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.15 2006/05/31 14:05:31 teodor Exp $
 MODULE_big = tsearch2
 OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
-       dict_snowball.o dict_ispell.o dict_syn.o \
+       dict_snowball.o dict_ispell.o dict_syn.o dict_thesaurus.o \
       wparser.o wparser_def.o \
       ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \
       tsvector_op.o rank.o ts_stat.o \
       query_util.o query_support.o query_rewrite.o query_gist.o \
-       ts_locale.o ginidx.o
+       ts_locale.o ts_lexize.o ginidx.o
 SUBDIRS     := snowball ispell wordparser
 SUBDIROBJS  := $(SUBDIRS:%=%/SUBSYS.o)
@@ -16,7 +16,7 @@ OBJS	+= $(SUBDIROBJS)
 PG_CPPFLAGS = -I$(srcdir)/snowball -I$(srcdir)/ispell -I$(srcdir)/wordparser
-DATA = stopword/english.stop stopword/russian.stop stopword/russian.stop.utf8
+DATA = stopword/english.stop stopword/russian.stop stopword/russian.stop.utf8 thesaurus
 DATA_built = tsearch2.sql untsearch2.sql
 DOCS = README.tsearch2
 REGRESS = tsearch2

--- a/contrib/tsearch2/common.c
+++ b/contrib/tsearch2/common.c
@@ -5,6 +5,7 @@
 #include "catalog/pg_proc.h"
 #include "catalog/pg_namespace.h"
 #include "utils/syscache.h"
+#include "miscadmin.h"
 #include "ts_cfg.h"
 #include "dict.h"
@@ -163,3 +164,23 @@ get_oidnamespace(Oid funcoid)
 	return nspoid;
 }
+    /* if path is relative, take it as relative to share dir */
+char *
+to_absfilename(char *filename) {
+	if (!is_absolute_path(filename)) {
+		char        sharepath[MAXPGPATH];
+		char       *absfn;
+#ifdef  WIN32
+		char    delim = '\\';
+#else
+		char    delim = '/';
+#endif
+		get_share_path(my_exec_path, sharepath);
+		absfn = palloc(strlen(sharepath) + strlen(filename) + 2);
+		sprintf(absfn, "%s%c%s", sharepath, delim, filename);
+		filename = absfn;
+	}
+	return filename;
+}
--- a/contrib/tsearch2/common.h
+++ b/contrib/tsearch2/common.h
@@ -16,6 +16,8 @@ text	   *mtextdup(text *in);
 int			text_cmp(text *a, text *b);
+char * to_absfilename(char *filename);
 #define NEXTVAL(x) ( (text*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
 #define ARRNELEMS(x)  ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x))

--- a/contrib/tsearch2/dict.c
+++ b/contrib/tsearch2/dict.c
-/* $PostgreSQL: pgsql/contrib/tsearch2/dict.c,v 1.11 2006/03/11 04:38:30 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/dict.c,v 1.12 2006/05/31 14:05:31 teodor Exp $ */
 /*
 * interface functions to dictionary
@@ -50,16 +50,19 @@ init_dict(Oid id, DictInfo * dict)
 		Datum		opt;
 		Oid			oid = InvalidOid;
+		/* setup dictlexize method */
+		oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
+		if (isnull || oid == InvalidOid)
+			ts_error(ERROR, "Null dict_lexize for dictonary %d", id);
+		fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext);
+		/* setup and call dictinit method, optinally */
 		oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
 		if (!(isnull || oid == InvalidOid))
 		{
 			opt = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull);
 			dict->dictionary = (void *) DatumGetPointer(OidFunctionCall1(oid, opt));
 		}
-		oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
-		if (isnull || oid == InvalidOid)
-			ts_error(ERROR, "Null dict_lexize for dictonary %d", id);
-		fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext);
 		dict->dict_id = id;
 	}
 	else
@@ -98,6 +101,29 @@ comparedict(const void *a, const void *b)
 	return (((DictInfo *) a)->dict_id < ((DictInfo *) b)->dict_id) ? -1 : 1;
 }
+static void
+insertdict(Oid id) {
+	DictInfo	newdict;
+	if (DList.len == DList.reallen)
+	{
+		DictInfo   *tmp;
+		int			reallen = (DList.reallen) ? 2 * DList.reallen : 16;
+		tmp = (DictInfo *) realloc(DList.list, sizeof(DictInfo) * reallen);
+		if (!tmp)
+			ts_error(ERROR, "No memory");
+		DList.reallen = reallen;
+		DList.list = tmp;
+	}
+	init_dict(id, &newdict);
+	DList.list[DList.len] = newdict;
+	DList.len++;
+	qsort(DList.list, DList.len, sizeof(DictInfo), comparedict);
+}
 DictInfo *
 finddict(Oid id)
 {
@@ -117,23 +143,8 @@ finddict(Oid id)
 			return DList.last_dict;
 	}
-	/* last chance */
+	/* insert new dictionary */ 
-	if (DList.len == DList.reallen)
+	insertdict(id);
-	{
-		DictInfo   *tmp;
-		int			reallen = (DList.reallen) ? 2 * DList.reallen : 16;
-		tmp = (DictInfo *) realloc(DList.list, sizeof(DictInfo) * reallen);
-		if (!tmp)
-			ts_error(ERROR, "No memory");
-		DList.reallen = reallen;
-		DList.list = tmp;
-	}
-	DList.last_dict = &(DList.list[DList.len]);
-	init_dict(id, DList.last_dict);
-	DList.len++;
-	qsort(DList.list, DList.len, sizeof(DictInfo), comparedict);
 	return finddict(id); /* qsort changed order!! */ ;
 }
@@ -190,17 +201,32 @@ lexize(PG_FUNCTION_ARGS)
 			   *ptr;
 	Datum	   *da;
 	ArrayType  *a;
+	DictSubState	dstate = { false, false, NULL };
 	SET_FUNCOID();
 	dict = finddict(PG_GETARG_OID(0));
 	ptr = res = (TSLexeme *) DatumGetPointer(
-										  FunctionCall3(&(dict->lexize_info),
+										FunctionCall4(&(dict->lexize_info),
+										PointerGetDatum(dict->dictionary),
+										PointerGetDatum(VARDATA(in)),
+										Int32GetDatum(VARSIZE(in) - VARHDRSZ),
+										PointerGetDatum(&dstate)
+														)
+		);
+	if (dstate.getnext)  {
+		dstate.isend = true;	
+		ptr = res = (TSLexeme *) DatumGetPointer(
+										FunctionCall4(&(dict->lexize_info),
 										   PointerGetDatum(dict->dictionary),
 												PointerGetDatum(VARDATA(in)),
-										Int32GetDatum(VARSIZE(in) - VARHDRSZ)
+										Int32GetDatum(VARSIZE(in) - VARHDRSZ),
+										PointerGetDatum(&dstate)
 														)
 		);
+	}
 	PG_FREE_IF_COPY(in, 1);
 	if (!res)
 	{

--- a/contrib/tsearch2/dict.h
+++ b/contrib/tsearch2/dict.h
-/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.6 2006/03/11 04:38:30 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.7 2006/05/31 14:05:31 teodor Exp $ */
 #ifndef __DICT_H__
 #define __DICT_H__
 #include "postgres.h"
 #include "fmgr.h"
+#include "ts_cfg.h"
 typedef struct
 {
@@ -29,6 +30,11 @@ DictInfo   *finddict(Oid id);
 Oid			name2id_dict(text *name);
 void		reset_dict(void);
+typedef struct {
+	bool isend; /* in: marks for lexize_info about text end is reached */
+	bool getnext; /* out: dict wants next lexeme */
+	void	*private;  /* internal dict state between calls with getnext == true */
+} DictSubState;
 /* simple parser of cfg string */
 typedef struct
@@ -45,17 +51,61 @@ typedef struct
 	/*
 	 * number of variant of split word , for example Word 'fotballklubber'
 	 * (norwegian) has two varian to split: ( fotball, klubb ) and ( fot,
-	 * ball, klubb ). So, dictionary should return: nvariant	lexeme 1
+	 * ball, klubb ). So, dictionary should return: 
-	 * fotball 1	   klubb 2		 fot 2		 ball 2		  klubb
+	 * nvariant	lexeme 
-	 *
+	 *   1 		fotball 
+	 *   1	   	klubb 
+	 *	 2		fot 
+	 *	 2		ball 
+	 *   2		klubb
 	 */
 	uint16		nvariant;
-	/* currently unused */
 	uint16		flags;
 	/* C-string */
 	char	   *lexeme;
 }	TSLexeme;
+#define TSL_ADDPOS		0x01
+/*
+ * Lexize subsystem
+ */
+typedef struct ParsedLex {
+    int     	type;
+    char    	*lemm;
+    int     	lenlemm;
+	bool		resfollow;
+    struct ParsedLex *next;
+} ParsedLex;
+typedef struct ListParsedLex {
+	ParsedLex	*head;
+	ParsedLex	*tail;
+} ListParsedLex;
+typedef struct {
+    TSCfgInfo       *cfg;
+    Oid             curDictId;
+    int             posDict;
+    DictSubState    dictState;
+    ParsedLex       *curSub;
+	ListParsedLex	towork;   /* current list to work */
+	ListParsedLex	waste;    /* list of lexemes that already lexized */
+	/* fields to store last variant to lexize (basically, thesaurus 
+	   or similar to, which wants  several lexemes */	
+	ParsedLex		*lastRes;
+	TSLexeme		*tmpRes;
+} LexizeData;
+void LexizeInit(LexizeData *ld, TSCfgInfo *cfg);
+void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm);
+TSLexeme* LexizeExec(LexizeData *ld, ParsedLex **correspondLexem);
 #endif
--- a/contrib/tsearch2/dict_thesaurus.c
+++ b/contrib/tsearch2/dict_thesaurus.c
--- a/contrib/tsearch2/expected/tsearch2.out
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -4,21 +4,21 @@
 --
 \set ECHO none
 psql:tsearch2.sql:13: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_dict_pkey" for table "pg_ts_dict"
-psql:tsearch2.sql:158: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_parser_pkey" for table "pg_ts_parser"
+psql:tsearch2.sql:177: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_parser_pkey" for table "pg_ts_parser"
-psql:tsearch2.sql:257: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfg_pkey" for table "pg_ts_cfg"
+psql:tsearch2.sql:276: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfg_pkey" for table "pg_ts_cfg"
-psql:tsearch2.sql:264: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfgmap_pkey" for table "pg_ts_cfgmap"
+psql:tsearch2.sql:283: NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfgmap_pkey" for table "pg_ts_cfgmap"
-psql:tsearch2.sql:370: NOTICE:  type "tsvector" is not yet defined
+psql:tsearch2.sql:389: NOTICE:  type "tsvector" is not yet defined
 DETAIL:  Creating a shell type definition.
-psql:tsearch2.sql:375: NOTICE:  argument type tsvector is only a shell
+psql:tsearch2.sql:394: NOTICE:  argument type tsvector is only a shell
-psql:tsearch2.sql:429: NOTICE:  type "tsquery" is not yet defined
+psql:tsearch2.sql:448: NOTICE:  type "tsquery" is not yet defined
 DETAIL:  Creating a shell type definition.
-psql:tsearch2.sql:434: NOTICE:  argument type tsquery is only a shell
+psql:tsearch2.sql:453: NOTICE:  argument type tsquery is only a shell
-psql:tsearch2.sql:592: NOTICE:  type "gtsvector" is not yet defined
+psql:tsearch2.sql:611: NOTICE:  type "gtsvector" is not yet defined
 DETAIL:  Creating a shell type definition.
-psql:tsearch2.sql:597: NOTICE:  argument type gtsvector is only a shell
+psql:tsearch2.sql:616: NOTICE:  argument type gtsvector is only a shell
-psql:tsearch2.sql:1087: NOTICE:  type "gtsq" is not yet defined
+psql:tsearch2.sql:1106: NOTICE:  type "gtsq" is not yet defined
 DETAIL:  Creating a shell type definition.
-psql:tsearch2.sql:1092: NOTICE:  argument type gtsq is only a shell
+psql:tsearch2.sql:1111: NOTICE:  argument type gtsq is only a shell
 --tsvector
 SELECT '1'::tsvector;
 tsvector 

--- a/contrib/tsearch2/stopword.c
+++ b/contrib/tsearch2/stopword.c
@@ -4,8 +4,6 @@
 */
 #include "postgres.h"
-#include "miscadmin.h"
 #include "common.h"
 #include "dict.h"
 #include "ts_locale.h"
@@ -36,30 +34,11 @@ readstoplist(text *in, StopList * s)
 	s->len = 0;
 	if (in && VARSIZE(in) - VARHDRSZ > 0)
 	{
-		char	   *filename = text2char(in);
+		char	   *filename = to_absfilename(text2char(in));
 		FILE	   *hin;
 		char		buf[STOPBUFLEN];
 		int			reallen = 0;
-		/* if path is relative, take it as relative to share dir */
-		if (!is_absolute_path(filename))
-		{
-			char		sharepath[MAXPGPATH];
-			char	   *absfn;
-#ifdef	WIN32
-			char	delim = '\\';
-#else
-			char 	delim = '/';
-#endif
-			get_share_path(my_exec_path, sharepath);
-			absfn = palloc(strlen(sharepath) + strlen(filename) + 2);
-			sprintf(absfn, "%s%c%s", sharepath, delim, filename);
-			pfree(filename);
-			filename = absfn;
-		}
 		if ((hin = fopen(filename, "r")) == NULL)
 			ereport(ERROR,
 					(errcode(ERRCODE_CONFIG_FILE_ERROR),

--- a/contrib/tsearch2/thesaurus
+++ b/contrib/tsearch2/thesaurus
+#
+# Theasurus config file. Character ':' splits
+# string to part: 
+#     to be substituted string
+#     substituting string
+#
+#one two three : 123
+#one two : 12
+#one : 1
+#two : 2
+#foo bar : blah blah
+#f   bar : fbar
+#e   bar : ebar
+#g   bar bar : gbarbar
+#asd:sdffff
+#qwerty:qwer wert erty
--- a/contrib/tsearch2/ts_cfg.c
+++ b/contrib/tsearch2/ts_cfg.c
@@ -281,15 +281,15 @@ name2id_cfg(text *name)
 	return id;
 }
 void
 parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
 {
 	int			type,
-				lenlemm,
+				lenlemm;
-				i;
 	char	   *lemm = NULL;
 	WParserInfo *prsobj = findprs(cfg->prs_id);
+	LexizeData	ldata;
+	TSLexeme   *norms;
 	prsobj->prs = (void *) DatumGetPointer(
 										   FunctionCall2(
@@ -299,14 +299,16 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
 														 )
 		);
-	while ((type = DatumGetInt32(FunctionCall3(
+	LexizeInit(&ldata, cfg);
+	do {
+		type = DatumGetInt32(FunctionCall3(
 											   &(prsobj->getlexeme_info),
 											   PointerGetDatum(prsobj->prs),
 											   PointerGetDatum(&lemm),
-										   PointerGetDatum(&lenlemm)))) != 0)
+										   PointerGetDatum(&lenlemm)));
-	{
-		if (lenlemm >= MAXSTRLEN)
+		if (type>0 && lenlemm >= MAXSTRLEN)
 		{
 #ifdef IGNORE_LONGLEXEME
 			ereport(NOTICE,
@@ -320,25 +322,11 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
 #endif
 		}
-		if (type >= cfg->len)	/* skip this type of lexeme */
+		LexizeAddLemm(&ldata, type, lemm, lenlemm);
-			continue;
-		for (i = 0; i < cfg->map[type].len; i++)
+		while(  (norms = LexizeExec(&ldata, NULL)) != NULL )
 		{
-			DictInfo   *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
+			TSLexeme *ptr = norms;
-			TSLexeme   *norms,
-					   *ptr;
-			norms = ptr = (TSLexeme *) DatumGetPointer(
-													   FunctionCall3(
-														&(dict->lexize_info),
-										   PointerGetDatum(dict->dictionary),
-													   PointerGetDatum(lemm),
-													 PointerGetDatum(lenlemm)
-																	 )
-				);
-			if (!norms)			/* dictionary doesn't know this lexeme */
-				continue;
 			prs->pos++;			/* set pos */
@@ -350,6 +338,8 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
 					prs->words = (TSWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(TSWORD));
 				}
+				if ( ptr->flags & TSL_ADDPOS )
+					prs->pos++;
 				prs->words[prs->curwords].len = strlen(ptr->lexeme);
 				prs->words[prs->curwords].word = ptr->lexeme;
 				prs->words[prs->curwords].nvariant = ptr->nvariant;
@@ -359,9 +349,8 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
 				prs->curwords++;
 			}
 			pfree(norms);
-			break;				/* lexeme already normalized or is stop word */
-		}
 	}
+	} while(type>0);
 	FunctionCall1(
 				  &(prsobj->end_info),
@@ -417,14 +406,47 @@ hlfinditem(HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int buflen)
 	}
 }
+static void
+addHLParsedLex(HLPRSTEXT *prs, QUERYTYPE * query, ParsedLex *lexs, TSLexeme *norms) {
+	ParsedLex	*tmplexs;
+	TSLexeme *ptr;
+	while( lexs ) {
+		if ( lexs->type > 0 ) 
+			hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
+		ptr = norms;
+		while( ptr && ptr->lexeme ) {
+			hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
+			ptr++;
+		}
+		tmplexs = lexs->next;
+		pfree( lexs );
+		lexs = tmplexs;
+	}
+	if ( norms ) {
+		ptr = norms;
+		while( ptr->lexeme ) {
+			pfree( ptr->lexeme );
+			ptr++;
+		}
+		pfree(norms);
+	}
+}
 void
 hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen)
 {
 	int			type,
-				lenlemm,
+				lenlemm;
-				i;
 	char	   *lemm = NULL;
 	WParserInfo *prsobj = findprs(cfg->prs_id);
+	LexizeData	ldata;
+	TSLexeme	*norms;
+	ParsedLex	*lexs;
 	prsobj->prs = (void *) DatumGetPointer(
 										   FunctionCall2(
@@ -434,14 +456,16 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4
 														 )
 		);
-	while ((type = DatumGetInt32(FunctionCall3(
+	LexizeInit(&ldata, cfg);
+	do {
+		type = DatumGetInt32(FunctionCall3(
 											   &(prsobj->getlexeme_info),
 											   PointerGetDatum(prsobj->prs),
 											   PointerGetDatum(&lemm),
-										   PointerGetDatum(&lenlemm)))) != 0)
+									PointerGetDatum(&lenlemm)));
-	{
-		if (lenlemm >= MAXSTRLEN)
+		if (type>0 && lenlemm >= MAXSTRLEN)
 		{
 #ifdef IGNORE_LONGLEXEME
 			ereport(NOTICE,
@@ -455,38 +479,16 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4
 #endif
 		}
-		hladdword(prs, lemm, lenlemm, type);
+		LexizeAddLemm(&ldata, type, lemm, lenlemm);
-		if (type >= cfg->len)
+		do {
-			continue;
+			if ( (norms = LexizeExec(&ldata,&lexs)) != NULL ) 
+				addHLParsedLex(prs, query, lexs, norms);
+			else 
+				addHLParsedLex(prs, query, lexs, NULL);
+		} while( norms );
-		for (i = 0; i < cfg->map[type].len; i++)
+	} while( type>0 );
-		{
-			DictInfo   *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
-			TSLexeme   *norms,
-					   *ptr;
-			norms = ptr = (TSLexeme *) DatumGetPointer(
-													   FunctionCall3(
-														&(dict->lexize_info),
-										   PointerGetDatum(dict->dictionary),
-													   PointerGetDatum(lemm),
-													 PointerGetDatum(lenlemm)
-																	 )
-				);
-			if (!norms)			/* dictionary doesn't know this lexeme */
-				continue;
-			while (ptr->lexeme)
-			{
-				hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
-				pfree(ptr->lexeme);
-				ptr++;
-			}
-			pfree(norms);
-			break;				/* lexeme already normalized or is stop word */
-		}
-	}
 	FunctionCall1(
 				  &(prsobj->end_info),

--- a/contrib/tsearch2/ts_lexize.c
+++ b/contrib/tsearch2/ts_lexize.c
+/*
+ * lexize stream of lexemes 
+ * Teodor Sigaev <teodor@sigaev.ru>
+ */
+#include "postgres.h"
+#include <ctype.h>
+#include <locale.h>
+#include "ts_cfg.h"
+#include "dict.h"
+void
+LexizeInit(LexizeData *ld, TSCfgInfo *cfg) {
+	ld->cfg = cfg;
+	ld->curDictId = InvalidOid;
+	ld->posDict = 0;
+	ld->towork.head = ld->towork.tail = ld->curSub = NULL;
+	ld->waste.head = ld->waste.tail = NULL;
+	ld->lastRes=NULL;
+	ld->tmpRes=NULL;
+}
+static void
+LPLAddTail(ListParsedLex *list, ParsedLex *newpl) {
+	if ( list->tail ) {
+		list->tail->next = newpl;
+		list->tail = newpl;
+	} else
+		list->head = list->tail = newpl;
+	newpl->next = NULL;
+}
+static ParsedLex*
+LPLRemoveHead(ListParsedLex *list) {
+	ParsedLex *res = list->head;
+	if ( list->head ) 
+		list->head = list->head->next;
+	if ( list->head == NULL )
+		list->tail = NULL;
+	return res;
+}
+void
+LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm) {
+	ParsedLex *newpl = (ParsedLex*)palloc( sizeof(ParsedLex) );
+	newpl = (ParsedLex*)palloc( sizeof(ParsedLex) );
+	newpl->type = type;
+	newpl->lemm = lemm;
+	newpl->lenlemm = lenlemm;
+	LPLAddTail(&ld->towork, newpl);
+	ld->curSub = ld->towork.tail;
+}
+static void
+RemoveHead(LexizeData *ld) {
+	LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
+	ld->posDict = 0;
+}
+static void
+setCorrLex(LexizeData *ld, ParsedLex **correspondLexem) {
+	if ( correspondLexem ) {
+		*correspondLexem = ld->waste.head;
+	} else {
+		ParsedLex	*tmp, *ptr = ld->waste.head;
+		while(ptr) {
+			tmp = ptr->next;
+			pfree(ptr);
+			ptr = tmp;
+		}
+	}
+	ld->waste.head = ld->waste.tail = NULL;
+}
+static void
+moveToWaste(LexizeData *ld, ParsedLex *stop) {
+	bool	go = true;
+	while( ld->towork.head && go) {
+		if (ld->towork.head == stop) {
+			ld->curSub = stop->next;
+			go = false;
+		}
+		RemoveHead(ld);
+	}
+}
+static void
+setNewTmpRes(LexizeData *ld, ParsedLex *lex, TSLexeme *res) {
+	if ( ld->tmpRes ) {
+		TSLexeme	*ptr;
+		for( ptr=ld->tmpRes; ptr->lexeme; ptr++ ) 
+			pfree( ptr->lexeme );
+		pfree( ld->tmpRes );
+	}
+	ld->tmpRes = res;
+	ld->lastRes = lex;
+}
+TSLexeme*
+LexizeExec(LexizeData *ld, ParsedLex **correspondLexem) {
+	int i;
+	ListDictionary	*map;
+	DictInfo *dict;
+	TSLexeme	*res;
+	if ( ld->curDictId == InvalidOid ) {
+		/* 
+		 * usial mode: dictionary wants only one word,
+		 * but we should keep in mind that we should go through
+		 * all stack
+		 */
+		while( ld->towork.head ) {
+			ParsedLex	*curVal = ld->towork.head;
+			map = ld->cfg->map + curVal->type;
+			if (curVal->type == 0 || curVal->type >= ld->cfg->len || map->len == 0 ) {	
+				/* skip this type of lexeme */
+				RemoveHead(ld);
+				continue;
+			}
+			for (i = ld->posDict; i < map->len; i++) {
+				dict = finddict(DatumGetObjectId(map->dict_id[i]));
+				ld->dictState.isend = ld->dictState.getnext = false;
+				ld->dictState.private = NULL;
+				res = (TSLexeme *) DatumGetPointer( FunctionCall4(
+													&(dict->lexize_info),
+									   				PointerGetDatum(dict->dictionary),
+												   	PointerGetDatum(curVal->lemm),
+												 	Int32GetDatum(curVal->lenlemm),
+													PointerGetDatum(&ld->dictState)
+										 ));
+				if ( ld->dictState.getnext ) {
+					/* 
+					 * dictinary wants next word, so setup and store
+					 * current position and go to multiword  mode
+					 */
+					ld->curDictId = DatumGetObjectId(map->dict_id[i]);
+					ld->posDict = i+1;
+					ld->curSub = curVal->next;
+					if ( res )
+						setNewTmpRes(ld, curVal, res);
+					return LexizeExec(ld, correspondLexem);
+				}
+				if (!res)			/* dictionary doesn't know this lexeme */
+					continue;
+				RemoveHead(ld);
+				setCorrLex(ld, correspondLexem);
+				return res;
+			}
+			RemoveHead(ld);
+		} 
+	} else { /* curDictId is valid */
+		dict = finddict(ld->curDictId);
+		/*
+		 * Dictionary ld->curDictId asks  us about following words
+		 */
+		while( ld->curSub ) {
+			ParsedLex	*curVal = ld->curSub;
+			map = ld->cfg->map + curVal->type;
+			if (curVal->type != 0) {
+				bool dictExists = false;
+				if (curVal->type >= ld->cfg->len || map->len == 0 ) {	
+					/* skip this type of lexeme */
+					ld->curSub = curVal->next;
+					continue;
+				}
+				/*
+				 * We should be sure that current type of lexeme is recognized by
+				 * our dictinonary: we just check is it exist in 
+				 * list of dictionaries ?
+				 */
+				for(i=0;i < map->len && !dictExists; i++) 
+					if ( ld->curDictId == DatumGetObjectId(map->dict_id[i]) )
+						dictExists = true;
+				if ( !dictExists ) {
+					/*
+					 * Dictionary can't work with current tpe of lexeme,
+					 * return to basic mode and redo all stored lexemes
+					 */
+					ld->curDictId = InvalidOid;
+					return LexizeExec(ld, correspondLexem);
+				}
+			} 
+			ld->dictState.isend = (curVal->type==0) ? true : false;
+			ld->dictState.getnext = false;
+			res = (TSLexeme *) DatumGetPointer( FunctionCall4(
+												&(dict->lexize_info),
+								   				PointerGetDatum(dict->dictionary),
+											   	PointerGetDatum(curVal->lemm),
+											 	Int32GetDatum(curVal->lenlemm),
+												PointerGetDatum(&ld->dictState)
+										 ));
+			if ( ld->dictState.getnext ) {
+				/* Dictionary wants one more */
+				ld->curSub = curVal->next;
+				if ( res )
+					setNewTmpRes(ld, curVal, res);
+				continue;
+			}
+			if ( res || ld->tmpRes ) {
+				/*
+				 * Dictionary normalizes lexemes,
+				 * so we remove from stack all used lexemes ,
+				 * return to basic mode and redo end of stack (if it exists)
+				 */
+				if ( res ) {
+					moveToWaste( ld, ld->curSub );
+				} else {
+					res = ld->tmpRes;
+					moveToWaste( ld, ld->lastRes );
+				}
+				/* reset to initial state */
+				ld->curDictId = InvalidOid;
+				ld->posDict = 0;
+				ld->lastRes = NULL;
+				ld->tmpRes = NULL;
+				setCorrLex(ld, correspondLexem);
+				return res;
+			}
+			/* Dict don't want next lexem and didn't recognize anything,
+			   redo from ld->towork.head */
+			ld->curDictId = InvalidOid;
+			return LexizeExec(ld, correspondLexem);
+		}	
+	}
+	setCorrLex(ld, correspondLexem);
+	return NULL;
+}
--- a/contrib/tsearch2/tsearch.sql.in
+++ b/contrib/tsearch2/tsearch.sql.in
@@ -146,6 +146,25 @@ insert into pg_ts_dict select
 	'Example of synonym dictionary'
 ;
+CREATE FUNCTION thesaurus_init(internal)
+	RETURNS internal
+	as 'MODULE_PATHNAME' 
+	LANGUAGE C;
+CREATE FUNCTION thesaurus_lexize(internal,internal,int4,internal)
+	RETURNS internal
+	as 'MODULE_PATHNAME'
+	LANGUAGE C
+	RETURNS NULL ON NULL INPUT;
+insert into pg_ts_dict select 
+	'thesaurus_template', 
+	'thesaurus_init(internal)',
+	null,
+	'thesaurus_lexize(internal,internal,int4,internal)',
+	'Thesaurus template, must be pointed Dictionary and DictFile'
+;
 --dict conf
 CREATE TABLE pg_ts_parser (
 	prs_name	text not null primary key,
@@ -1193,7 +1212,11 @@ AS
 --example of ISpell dictionary
 --update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_name='ispell_template';
 --example of synonym dict
--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
+--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_name='synonym';
+--example of thesaurus dict
+--update pg_ts_dict set dict_initoption='DictFile="contrib/thesaurus", Dictionary="en_stem"' where dict_name='thesaurus_template';
+--update pg_ts_cfgmap set dict_name = '{thesaurus_template,en_stem}' where dict_name = '{en_stem}';
 END;
--- a/contrib/tsearch2/untsearch.sql.in
+++ b/contrib/tsearch2/untsearch.sql.in
@@ -41,6 +41,8 @@ DROP FUNCTION snb_lexize(internal,internal,int4);
 DROP FUNCTION snb_ru_init(internal);
 DROP FUNCTION spell_init(internal);
 DROP FUNCTION spell_lexize(internal,internal,int4);
+DROP FUNCTION thesaurus_init(internal);
+DROP FUNCTION thesaurus_lexize(internal,internal,int4);
 DROP FUNCTION syn_init(internal);
 DROP FUNCTION syn_lexize(internal,internal,int4);
 DROP FUNCTION set_curprs(int);