Commit 7351b5fa authored by Tom Lane's avatar Tom Lane

Cleanup for some problems in tsearch patch:

- ispell initialization crashed on empty dictionary file
- ispell initialization crashed on affix file with prefixes but no suffixes
- stop words file was run through pg_verify_mbstr, with database
  encoding, but it's supposed to be UTF-8; similar bug for synonym files
- bunch of comments added, typos fixed, and other cleanup

Introduced consistent encoding checking/conversion of data read from tsearch
configuration files, by doing this in a single t_readline() subroutine
(replacing direct usages of fgets).  Cleaned up API for readstopwords too.

Heikki Linnakangas
parent b918bf86
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/snowball/dict_snowball.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
* $PostgreSQL: pgsql/src/backend/snowball/dict_snowball.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -192,7 +192,6 @@ dsnowball_init(PG_FUNCTION_ARGS)
ListCell *l;
d = (DictSnowball *) palloc0(sizeof(DictSnowball));
d->stoplist.wordop = recode_and_lowerstr;
foreach(l, dictoptions)
{
......@@ -204,8 +203,7 @@ dsnowball_init(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
readstoplist(defGetString(defel), &d->stoplist);
sortstoplist(&d->stoplist);
readstoplist(defGetString(defel), &d->stoplist, lowerstr);
stoploaded = true;
}
else if (pg_strcasecmp("Language", defel->defname) == 0)
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_ispell.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/dict_ispell.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -39,7 +39,6 @@ dispell_init(PG_FUNCTION_ARGS)
ListCell *l;
d = (DictISpell *) palloc0(sizeof(DictISpell));
d->stoplist.wordop = recode_and_lowerstr;
foreach(l, dictoptions)
{
......@@ -73,8 +72,7 @@ dispell_init(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
readstoplist(defGetString(defel), &(d->stoplist));
sortstoplist(&(d->stoplist));
readstoplist(defGetString(defel), &(d->stoplist), lowerstr);
stoploaded = true;
}
else
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_simple.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/dict_simple.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -23,19 +23,17 @@
typedef struct
{
StopList stoplist;
} DictExample;
} DictSimple;
Datum
dsimple_init(PG_FUNCTION_ARGS)
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictExample *d = (DictExample *) palloc0(sizeof(DictExample));
DictSimple *d = (DictSimple *) palloc0(sizeof(DictSimple));
bool stoploaded = false;
ListCell *l;
d->stoplist.wordop = recode_and_lowerstr;
foreach(l, dictoptions)
{
DefElem *defel = (DefElem *) lfirst(l);
......@@ -46,8 +44,7 @@ dsimple_init(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
readstoplist(defGetString(defel), &d->stoplist);
sortstoplist(&d->stoplist);
readstoplist(defGetString(defel), &d->stoplist, lowerstr);
stoploaded = true;
}
else
......@@ -65,16 +62,16 @@ dsimple_init(PG_FUNCTION_ARGS)
Datum
dsimple_lexize(PG_FUNCTION_ARGS)
{
DictExample *d = (DictExample *) PG_GETARG_POINTER(0);
DictSimple *d = (DictSimple *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
char *txt = lowerstr_with_len(in, len);
char *txt;
TSLexeme *res = palloc0(sizeof(TSLexeme) * 2);
txt = lowerstr_with_len(in, len);
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
{
pfree(txt);
}
else
res[0].lexeme = txt;
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.2 2007/08/22 04:13:15 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -20,9 +20,6 @@
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#define SYNBUFLEN 4096
typedef struct
{
char *in;
......@@ -31,23 +28,34 @@ typedef struct
typedef struct
{
int len;
int len; /* length of syn array */
Syn *syn;
} DictSyn;
/*
* Finds the next whitespace-delimited word within the 'in' string.
* Returns a pointer to the first character of the word, and a pointer
* to the next byte after the last character in the word (in *end).
*/
static char *
findwrd(char *in, char **end)
{
char *start;
*end = NULL;
/* Skip leading spaces */
while (*in && t_isspace(in))
in += pg_mblen(in);
/* Return NULL on empty lines */
if (*in == '\0')
{
*end = NULL;
return NULL;
}
start = in;
/* Find end of word */
while (*in && !t_isspace(in))
in += pg_mblen(in);
......@@ -70,12 +78,11 @@ dsynonym_init(PG_FUNCTION_ARGS)
ListCell *l;
char *filename = NULL;
FILE *fin;
char buf[SYNBUFLEN];
char *starti,
*starto,
*end = NULL;
int cur = 0;
int slen;
char *line = NULL;
foreach(l, dictoptions)
{
......@@ -105,10 +112,33 @@ dsynonym_init(PG_FUNCTION_ARGS)
d = (DictSyn *) palloc0(sizeof(DictSyn));
while (fgets(buf, SYNBUFLEN, fin))
while ((line = t_readline(fin)) != NULL)
{
slen = strlen(buf);
pg_verifymbstr(buf, slen, false);
starti = findwrd(line, &end);
if (!starti)
{
/* Empty line */
goto skipline;
}
*end = '\0';
if (end >= line + strlen(line))
{
/* A line with only one word. Ignore silently. */
goto skipline;
}
starto = findwrd(end + 1, &end);
if (!starto)
{
/* A line with only one word. Ignore silently. */
goto skipline;
}
*end = '\0';
/* starti now points to the first word, and starto to the second
* word on the line, with a \0 terminator at the end of both words.
*/
if (cur == d->len)
{
if (d->len == 0)
......@@ -123,36 +153,19 @@ dsynonym_init(PG_FUNCTION_ARGS)
}
}
starti = findwrd(buf, &end);
if (!starti)
continue;
*end = '\0';
if (end >= buf + slen)
continue;
starto = findwrd(end + 1, &end);
if (!starto)
continue;
*end = '\0';
d->syn[cur].in = recode_and_lowerstr(starti);
d->syn[cur].out = recode_and_lowerstr(starto);
if (!(d->syn[cur].in && d->syn[cur].out))
{
FreeFile(fin);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
d->syn[cur].in = lowerstr(starti);
d->syn[cur].out = lowerstr(starto);
cur++;
skipline:
pfree(line);
}
FreeFile(fin);
d->len = cur;
if (cur > 1)
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
PG_RETURN_POINTER(d);
}
......@@ -179,8 +192,7 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
if (!found)
PG_RETURN_POINTER(NULL);
res = palloc(sizeof(TSLexeme) * 2);
memset(res, 0, sizeof(TSLexeme) * 2);
res = palloc0(sizeof(TSLexeme) * 2);
res[0].lexeme = pstrdup(found->out);
PG_RETURN_POINTER(res);
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -170,10 +170,10 @@ static void
thesaurusRead(char *filename, DictThesaurus * d)
{
FILE *fh;
char str[BUFSIZ];
int lineno = 0;
uint16 idsubst = 0;
bool useasis = false;
char *line;
filename = get_tsearch_config_filename(filename, "ths");
fh = AllocateFile(filename, "r");
......@@ -183,27 +183,28 @@ thesaurusRead(char *filename, DictThesaurus * d)
errmsg("could not open thesaurus file \"%s\": %m",
filename)));
while (fgets(str, sizeof(str), fh))
while ((line = t_readline(fh)) != NULL)
{
char *ptr,
*recoded;
char *ptr;
int state = TR_WAITLEX;
char *beginwrd = NULL;
uint16 posinsubst = 0;
uint16 nwrd = 0;
ptr = recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
GetDatabaseEncoding(), PG_UTF8);
if (recoded == NULL)
elog(ERROR, "encoding conversion failed");
lineno++;
/* is it comment ? */
while (t_isspace(ptr))
ptr = line;
/* is it a comment? */
while (*ptr && t_isspace(ptr))
ptr += pg_mblen(ptr);
if (t_iseq(recoded, '#') || *recoded == '\0' || t_iseq(recoded, '\n') || t_iseq(recoded, '\r'))
if (t_iseq(ptr, '#') || *ptr == '\0' ||
t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
{
pfree(line);
continue;
}
while (*ptr)
{
......@@ -301,8 +302,7 @@ thesaurusRead(char *filename, DictThesaurus * d)
lineno, filename)));
}
if (recoded != str)
pfree(recoded);
pfree(line);
}
d->nsubst = idsubst;
......
This diff is collapsed.
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.2 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -125,28 +125,47 @@ _t_isprint(const char *ptr)
}
#endif /* TS_USE_WIDE */
/*
* Convert C-string from UTF8 to server encoding and
* lower it
* Read the next line from a tsearch data file (expected to be in UTF-8), and
* convert it to database encoding if needed. The returned string is palloc'd.
* NULL return means EOF.
*/
char *
recode_and_lowerstr(char *str)
t_readline(FILE *fp)
{
char *recoded;
char *ret;
int len;
char *recoded;
char buf[4096]; /* lines must not be longer than this */
if (fgets(buf, sizeof(buf), fp) == NULL)
return NULL;
recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
PG_UTF8, GetDatabaseEncoding());
len = strlen(buf);
if (recoded == NULL)
elog(ERROR, "encoding conversion failed");
/* Make sure the input is valid UTF-8 */
(void) pg_verify_mbstr(PG_UTF8, buf, len, false);
ret = lowerstr(recoded);
/* And convert */
recoded = (char *) pg_do_encoding_conversion((unsigned char *) buf,
len,
PG_UTF8,
GetDatabaseEncoding());
if (recoded != str)
pfree(recoded);
if (recoded == NULL) /* should not happen */
elog(ERROR, "encoding conversion failed");
if (recoded == buf)
{
/*
* conversion didn't pstrdup, so we must.
* We can use the length of the original string, because
* no conversion was done.
*/
recoded = pnstrdup(recoded, len);
}
return ret;
return recoded;
}
char *
......@@ -155,6 +174,9 @@ lowerstr(char *str)
return lowerstr_with_len(str, strlen(str));
}
/*
* Returned string is palloc'd
*/
char *
lowerstr_with_len(char *str, int len)
{
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.2 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -308,7 +308,7 @@ LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
{
/*
* Dictionary normalizes lexemes, so we remove from stack all
* used lexemes , return to basic mode and redo end of stack
* used lexemes, return to basic mode and redo end of stack
* (if it exists)
*/
if (res)
......@@ -427,14 +427,14 @@ parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
* Headline framework
*/
static void
hladdword(HeadlineText * prs, char *buf, int4 buflen, int type)
hladdword(HeadlineParsedText * prs, char *buf, int4 buflen, int type)
{
while (prs->curwords >= prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (HeadlineWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWord));
prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
}
memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWord));
memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWordEntry));
prs->words[prs->curwords].type = (uint8) type;
prs->words[prs->curwords].len = buflen;
prs->words[prs->curwords].word = palloc(buflen);
......@@ -443,16 +443,16 @@ hladdword(HeadlineText * prs, char *buf, int4 buflen, int type)
}
static void
hlfinditem(HeadlineText * prs, TSQuery query, char *buf, int buflen)
hlfinditem(HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
{
int i;
QueryItem *item = GETQUERY(query);
HeadlineWord *word;
HeadlineWordEntry *word;
while (prs->curwords + query->size >= prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (HeadlineWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWord));
prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
}
word = &(prs->words[prs->curwords - 1]);
......@@ -462,7 +462,7 @@ hlfinditem(HeadlineText * prs, TSQuery query, char *buf, int buflen)
{
if (word->item)
{
memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWord));
memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
prs->words[prs->curwords].item = item;
prs->words[prs->curwords].repeated = 1;
prs->curwords++;
......@@ -475,7 +475,7 @@ hlfinditem(HeadlineText * prs, TSQuery query, char *buf, int buflen)
}
static void
addHLParsedLex(HeadlineText * prs, TSQuery query, ParsedLex * lexs, TSLexeme * norms)
addHLParsedLex(HeadlineParsedText * prs, TSQuery query, ParsedLex * lexs, TSLexeme * norms)
{
ParsedLex *tmplexs;
TSLexeme *ptr;
......@@ -511,7 +511,7 @@ addHLParsedLex(HeadlineText * prs, TSQuery query, ParsedLex * lexs, TSLexeme * n
}
void
hlparsetext(Oid cfgId, HeadlineText * prs, TSQuery query, char *buf, int4 buflen)
hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int4 buflen)
{
int type,
lenlemm;
......@@ -571,12 +571,12 @@ hlparsetext(Oid cfgId, HeadlineText * prs, TSQuery query, char *buf, int4 buflen
}
text *
generatHeadline(HeadlineText * prs)
generateHeadline(HeadlineParsedText * prs)
{
text *out;
int len = 128;
char *ptr;
HeadlineWord *wrd = prs->words;
HeadlineWordEntry *wrd = prs->words;
out = (text *) palloc(len);
ptr = ((char *) out) + VARHDRSZ;
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -63,21 +63,29 @@ get_tsearch_config_filename(const char *basename,
return result;
}
#define STOPBUFLEN 4096
static int
comparestr(const void *a, const void *b)
{
return strcmp(*(char **) a, *(char **) b);
}
/*
* Reads a stopword file. Each word is run through 'wordop'
* function, if given. wordop may either modify the input in-place,
* or palloc a new version.
*/
void
readstoplist(char *in, StopList * s)
readstoplist(const char *fname, StopList *s, char *(*wordop) (char *))
{
char **stop = NULL;
s->len = 0;
if (in && *in)
if (fname && *fname)
{
char *filename = get_tsearch_config_filename(in, "stop");
char *filename = get_tsearch_config_filename(fname, "stop");
FILE *hin;
char buf[STOPBUFLEN];
char *line;
int reallen = 0;
int line = 0;
if ((hin = AllocateFile(filename, "r")) == NULL)
ereport(ERROR,
......@@ -85,65 +93,56 @@ readstoplist(char *in, StopList * s)
errmsg("could not open stopword file \"%s\": %m",
filename)));
while (fgets(buf, STOPBUFLEN, hin))
while ((line = t_readline(hin)) != NULL)
{
char *pbuf = buf;
char *pbuf = line;
line++;
while (*pbuf && !isspace(*pbuf))
/* Trim trailing space */
while (*pbuf && !t_isspace(pbuf))
pbuf++;
*pbuf = '\0';
if (*buf == '\0')
continue;
if (!pg_verifymbstr(buf, strlen(buf), true))
/* Skip empty lines */
if (*line == '\0')
{
FreeFile(hin);
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte encoding at line %d in file \"%s\"",
line, filename)));
pfree(line);
continue;
}
if (s->len >= reallen)
{
if (reallen == 0)
{
reallen = 16;
reallen = 64;
stop = (char **) palloc(sizeof(char *) * reallen);
}
else
{
reallen *= 2;
stop = (char **) repalloc((void *) stop, sizeof(char *) * reallen);
stop = (char **) repalloc((void *) stop,
sizeof(char *) * reallen);
}
}
if (s->wordop)
stop[s->len] = s->wordop(buf);
if (wordop)
{
stop[s->len] = wordop(line);
if (stop[s->len] != line)
pfree(line);
}
else
stop[s->len] = pstrdup(buf);
stop[s->len] = line;
(s->len)++;
}
FreeFile(hin);
pfree(filename);
}
s->stop = stop;
}
static int
comparestr(const void *a, const void *b)
{
return strcmp(*(char **) a, *(char **) b);
}
void
sortstoplist(StopList * s)
{
/* Sort to allow binary searching */
if (s->stop && s->len > 0)
qsort(s->stop, s->len, sizeof(char *), comparestr);
}
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/wparser.c,v 1.2 2007/08/22 01:39:45 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/wparser.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -300,7 +300,7 @@ ts_headline_byid_opt(PG_FUNCTION_ARGS)
text *in = PG_GETARG_TEXT_P(1);
TSQuery query = PG_GETARG_TSQUERY(2);
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
HeadlineText prs;
HeadlineParsedText prs;
List *prsoptions;
text *out;
TSConfigCacheEntry *cfg;
......@@ -309,9 +309,9 @@ ts_headline_byid_opt(PG_FUNCTION_ARGS)
cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
prsobj = lookup_ts_parser_cache(cfg->prsId);
memset(&prs, 0, sizeof(HeadlineText));
memset(&prs, 0, sizeof(HeadlineParsedText));
prs.lenwords = 32;
prs.words = (HeadlineWord *) palloc(sizeof(HeadlineWord) * prs.lenwords);
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
hlparsetext(cfg->cfgId, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
......@@ -325,7 +325,7 @@ ts_headline_byid_opt(PG_FUNCTION_ARGS)
PointerGetDatum(prsoptions),
PointerGetDatum(query));
out = generatHeadline(&prs);
out = generateHeadline(&prs);
PG_FREE_IF_COPY(in, 1);
PG_FREE_IF_COPY(query, 2);
......
......@@ -6,7 +6,7 @@
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/tsearch/dicts/spell.h,v 1.1 2007/08/21 01:11:29 tgl Exp $
* $PostgreSQL: pgsql/src/include/tsearch/dicts/spell.h,v 1.2 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -18,12 +18,17 @@
#include "tsearch/dicts/regis.h"
#include "tsearch/ts_public.h"
/*
* Max length of a flag name. Names longer than this will be truncated
* to the maximum.
*/
#define MAXFLAGLEN 16
struct SPNode;
typedef struct
{
uint32
val:8,
uint32 val:8,
isword:1,
compoundflag:4,
affix:19;
......@@ -54,22 +59,25 @@ typedef struct spell_struct
{
union
{
char flag[16];
/*
* flag is filled in by NIImportDictionary. After NISortDictionary,
* d is valid and flag is invalid.
*/
char flag[MAXFLAGLEN];
struct
{
int affix;
int len;
} d;
} p;
char word[1];
char word[1]; /* variable length, null-terminated */
} SPELL;
#define SPELLHDRSZ (offsetof(SPELL, word))
typedef struct aff_struct
{
uint32
flag:8,
uint32 flag:8,
type:1,
flagflags:7,
issimple:1,
......@@ -85,11 +93,16 @@ typedef struct aff_struct
} AFFIX;
/*
* affixes use deictinary flags too
* affixes use dictionary flags too
*/
#define FF_COMPOUNDPERMITFLAG 0x10
#define FF_COMPOUNDFORBIDFLAG 0x20
#define FF_CROSSPRODUCT 0x40
/*
* Don't change the order of these. Initialization sorts by these,
* and expects prefixes to come first after sorting.
*/
#define FF_SUFFIX 1
#define FF_PREFIX 0
......@@ -97,8 +110,7 @@ struct AffixNode;
typedef struct
{
uint32
val:8,
uint32 val:8,
naff:24;
AFFIX **aff;
struct AffixNode *node;
......@@ -126,9 +138,13 @@ typedef struct
int naffixes;
AFFIX *Affix;
int nspell;
int mspell;
/*
* Temporary array of all words in the dict file. Only used during
* initialization
*/
SPELL **Spell;
int nspell; /* number of valid entries in Spell array */
int mspell; /* allocated length of Spell array */
AffixNode *Suffix;
AffixNode *Prefix;
......
......@@ -5,7 +5,7 @@
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.1 2007/08/21 01:11:29 tgl Exp $
* $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.2 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -38,11 +38,11 @@
#ifdef TS_USE_WIDE
size_t char2wchar(wchar_t *to, const char *from, size_t len);
extern size_t char2wchar(wchar_t *to, const char *from, size_t len);
#ifdef WIN32
size_t wchar2char(char *to, const wchar_t *from, size_t len);
extern size_t wchar2char(char *to, const wchar_t *from, size_t len);
#else /* WIN32 */
/* correct wcstombs */
......@@ -81,8 +81,8 @@ extern int _t_isprint(const char *ptr);
#define COPYCHAR(d,s) TOUCHAR(d) = TOUCHAR(s)
#endif
char *lowerstr(char *str);
char *lowerstr_with_len(char *str, int len);
char *recode_and_lowerstr(char *str);
extern char *lowerstr(char *str);
extern char *lowerstr_with_len(char *str, int len);
extern char *t_readline(FILE *fp);
#endif /* __TSLOCALE_H__ */
......@@ -6,7 +6,7 @@
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.2 2007/08/22 01:39:46 tgl Exp $
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.3 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -71,12 +71,11 @@ typedef struct
{
int len;
char **stop;
char *(*wordop) (char *);
} StopList;
extern void sortstoplist(StopList * s);
extern void readstoplist(char *in, StopList * s);
extern bool searchstoplist(StopList * s, char *key);
extern void readstoplist(const char *fname, StopList *s,
char *(*wordop) (char *));
extern bool searchstoplist(StopList *s, char *key);
/*
* Interface with dictionaries
......@@ -102,9 +101,8 @@ typedef struct
#define TSL_ADDPOS 0x01
/*
* Struct for supporting complex dictionaries like
* thesaurus, pointer to is an 4-th argument for
* dictlexize method
* Struct for supporting complex dictionaries like thesaurus.
* 4th argument for dictlexize method is a pointer to this
*/
typedef struct
{
......
......@@ -5,7 +5,7 @@
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.1 2007/08/21 01:11:29 tgl Exp $
* $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.2 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -13,6 +13,7 @@
#define _PG_TS_UTILS_H_
#include "tsearch/ts_type.h"
#include "tsearch/ts_public.h"
/*
* Common parse definitions for tsvector and tsquery
......@@ -38,7 +39,8 @@ typedef struct
extern bool gettoken_tsvector(TSVectorParseState *state);
struct ParseQueryNode;
struct ParseQueryNode; /* private in backend/utils/adt/tsquery.c */
typedef struct
{
char *buffer; /* entire string we are scanning */
......@@ -46,7 +48,7 @@ typedef struct
int4 state;
int4 count;
/* reverse polish notation in list (for temprorary usage) */
/* reverse polish notation in list (for temporary usage) */
struct ParseQueryNode *str;
/* number in str */
......@@ -102,36 +104,12 @@ extern void parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen);
* headline framework, flow in common to generate:
* 1 parse text with hlparsetext
* 2 parser-specific function to find part
* 3 generatHeadline to generate result text
* 3 generateHeadline to generate result text
*/
typedef struct
{
uint32 selected:1,
in:1,
replace:1,
repeated:1,
unused:4,
type:8,
len:16;
char *word;
QueryItem *item;
} HeadlineWord;
typedef struct
{
HeadlineWord *words;
int4 lenwords;
int4 curwords;
char *startsel;
char *stopsel;
int2 startsellen;
int2 stopsellen;
} HeadlineText;
extern void hlparsetext(Oid cfgId, HeadlineText * prs, TSQuery query,
extern void hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query,
char *buf, int4 buflen);
extern text *generatHeadline(HeadlineText * prs);
extern text *generateHeadline(HeadlineParsedText * prs);
/*
* token/node types for parsing
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment