Commit dde94572 authored by Teodor Sigaev's avatar Teodor Sigaev

Fixing and improve compound word support. This changes cannot be applied to

previous version iwthout recreating tsvector fields...

Thanks to Alexander Presber <aljoscha@weisshuhn.de> to discover a problem.
parent 21e2544a
...@@ -737,9 +737,9 @@ NISortAffixes(IspellDict * Conf) ...@@ -737,9 +737,9 @@ NISortAffixes(IspellDict * Conf)
{ {
if (firstsuffix < 0) if (firstsuffix < 0)
firstsuffix = i; firstsuffix = i;
if (Affix->flagflags & FF_COMPOUNDONLYAFX) if ((Affix->flagflags & FF_COMPOUNDONLYAFX) && Affix->replen>0 )
{ {
if (!ptr->affix || if (ptr == Conf->CompoundAffix ||
strbncmp((const unsigned char *) (ptr - 1)->affix, strbncmp((const unsigned char *) (ptr - 1)->affix,
(const unsigned char *) Affix->repl, (const unsigned char *) Affix->repl,
(ptr - 1)->len)) (ptr - 1)->len))
...@@ -1024,8 +1024,9 @@ typedef struct SplitVar ...@@ -1024,8 +1024,9 @@ typedef struct SplitVar
} SplitVar; } SplitVar;
static int static int
CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len) CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len, bool CheckInPlace)
{ {
if ( CheckInPlace ) {
while ((*ptr)->affix) while ((*ptr)->affix)
{ {
if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0) if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
...@@ -1036,6 +1037,19 @@ CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len) ...@@ -1036,6 +1037,19 @@ CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len)
} }
(*ptr)++; (*ptr)++;
} }
} else {
char *affbegin;
while ((*ptr)->affix)
{
if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
{
len = (*ptr)->len + (affbegin-word);
(*ptr)++;
return len;
}
(*ptr)++;
}
}
return 0; return 0;
} }
...@@ -1078,26 +1092,11 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word, ...@@ -1078,26 +1092,11 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
memset(notprobed, 1, wordlen); memset(notprobed, 1, wordlen);
var = CopyVar(orig, 1); var = CopyVar(orig, 1);
while (node && level < wordlen) while (level < wordlen)
{
StopLow = node->data;
StopHigh = node->data + node->length;
while (StopLow < StopHigh)
{ {
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); /* find word with epenthetic or/and compound suffix */
if (StopMiddle->val == ((uint8 *) (word))[level])
break;
else if (StopMiddle->val < ((uint8 *) (word))[level])
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
if (StopLow >= StopHigh)
break;
/* find word with epenthetic */
caff = Conf->CompoundAffix; caff = Conf->CompoundAffix;
while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level)) > 0) while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) > 0)
{ {
/* /*
* there is one of compound suffixes, so check word for existings * there is one of compound suffixes, so check word for existings
...@@ -1143,6 +1142,24 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word, ...@@ -1143,6 +1142,24 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
} }
} }
if ( !node )
break;
StopLow = node->data;
StopHigh = node->data + node->length;
while (StopLow < StopHigh)
{
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
if (StopMiddle->val == ((uint8 *) (word))[level])
break;
else if (StopMiddle->val < ((uint8 *) (word))[level])
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
if (StopLow < StopHigh) {
/* find infinitive */ /* find infinitive */
if (StopMiddle->isword && StopMiddle->compoundallow && notprobed[level]) if (StopMiddle->isword && StopMiddle->compoundallow && notprobed[level])
{ {
...@@ -1176,8 +1193,10 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word, ...@@ -1176,8 +1193,10 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
} }
} }
} }
level++;
node = StopMiddle->node; node = StopMiddle->node;
} else
node = NULL;
level++;
} }
var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos); var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment