dict.h 2.3 KB
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.8 2006/10/04 00:29:46 momjian Exp $ */

#ifndef __DICT_H__
#define __DICT_H__
#include "postgres.h"
#include "fmgr.h"
#include "ts_cfg.h"

typedef struct
{
	int			len;
	char	  **stop;
	char	   *(*wordop) (char *);
}	StopList;

void		sortstoplist(StopList * s);
void		freestoplist(StopList * s);
void		readstoplist(text *in, StopList * s);
bool		searchstoplist(StopList * s, char *key);

typedef struct
{
	Oid			dict_id;
	FmgrInfo	lexize_info;
	void	   *dictionary;
}	DictInfo;

void		init_dict(Oid id, DictInfo * dict);
DictInfo   *finddict(Oid id);
Oid			name2id_dict(text *name);
void		reset_dict(void);

typedef struct
{
	bool		isend;			/* in: marks for lexize_info about text end is
								 * reached */
	bool		getnext;		/* out: dict wants next lexeme */
	void	   *private;		/* internal dict state between calls with
								 * getnext == true */
}	DictSubState;

/* simple parser of cfg string */
typedef struct
{
	char	   *key;
	char	   *value;
}	Map;

void		parse_cfgdict(text *in, Map ** m);

/* return struct for any lexize function */
typedef struct
{
	/*
	 * number of variant of split word , for example Word 'fotballklubber'
	 * (norwegian) has two varian to split: ( fotball, klubb ) and ( fot,
	 * ball, klubb ). So, dictionary should return: nvariant lexeme 1
	 * fotball 1	  klubb 2	   fot 2	  ball 2	  klubb
	 */
	uint16		nvariant;

	uint16		flags;

	/* C-string */
	char	   *lexeme;
}	TSLexeme;

#define TSL_ADDPOS		0x01


/*
 * Lexize subsystem
 */

typedef struct ParsedLex
{
	int			type;
	char	   *lemm;
	int			lenlemm;
	bool		resfollow;
	struct ParsedLex *next;
}	ParsedLex;

typedef struct ListParsedLex
{
	ParsedLex  *head;
	ParsedLex  *tail;
}	ListParsedLex;

typedef struct
{
	TSCfgInfo  *cfg;
	Oid			curDictId;
	int			posDict;
	DictSubState dictState;
	ParsedLex  *curSub;
	ListParsedLex towork;		/* current list to work */
	ListParsedLex waste;		/* list of lexemes that already lexized */

	/*
	 * fields to store last variant to lexize (basically, thesaurus or similar
	 * to, which wants	several lexemes
	 */

	ParsedLex  *lastRes;
	TSLexeme   *tmpRes;
}	LexizeData;


void		LexizeInit(LexizeData * ld, TSCfgInfo * cfg);
void		LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm);
TSLexeme   *LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem);

#endif