Commit dde94572 authored by Teodor Sigaev's avatar Teodor Sigaev

Fixing and improve compound word support. This changes cannot be applied to

previous version iwthout recreating tsvector fields...

Thanks to Alexander Presber <aljoscha@weisshuhn.de> to discover a problem.
parent 21e2544a
......@@ -737,9 +737,9 @@ NISortAffixes(IspellDict * Conf)
{
if (firstsuffix < 0)
firstsuffix = i;
if (Affix->flagflags & FF_COMPOUNDONLYAFX)
if ((Affix->flagflags & FF_COMPOUNDONLYAFX) && Affix->replen>0 )
{
if (!ptr->affix ||
if (ptr == Conf->CompoundAffix ||
strbncmp((const unsigned char *) (ptr - 1)->affix,
(const unsigned char *) Affix->repl,
(ptr - 1)->len))
......@@ -1024,8 +1024,9 @@ typedef struct SplitVar
} SplitVar;
static int
CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len)
CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len, bool CheckInPlace)
{
if ( CheckInPlace ) {
while ((*ptr)->affix)
{
if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
......@@ -1036,6 +1037,19 @@ CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len)
}
(*ptr)++;
}
} else {
char *affbegin;
while ((*ptr)->affix)
{
if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
{
len = (*ptr)->len + (affbegin-word);
(*ptr)++;
return len;
}
(*ptr)++;
}
}
return 0;
}
......@@ -1078,26 +1092,11 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
memset(notprobed, 1, wordlen);
var = CopyVar(orig, 1);
while (node && level < wordlen)
{
StopLow = node->data;
StopHigh = node->data + node->length;
while (StopLow < StopHigh)
while (level < wordlen)
{
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
if (StopMiddle->val == ((uint8 *) (word))[level])
break;
else if (StopMiddle->val < ((uint8 *) (word))[level])
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
if (StopLow >= StopHigh)
break;
/* find word with epenthetic */
/* find word with epenthetic or/and compound suffix */
caff = Conf->CompoundAffix;
while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level)) > 0)
while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) > 0)
{
/*
* there is one of compound suffixes, so check word for existings
......@@ -1143,6 +1142,24 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
}
}
if ( !node )
break;
StopLow = node->data;
StopHigh = node->data + node->length;
while (StopLow < StopHigh)
{
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
if (StopMiddle->val == ((uint8 *) (word))[level])
break;
else if (StopMiddle->val < ((uint8 *) (word))[level])
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
if (StopLow < StopHigh) {
/* find infinitive */
if (StopMiddle->isword && StopMiddle->compoundallow && notprobed[level])
{
......@@ -1176,8 +1193,10 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
}
}
}
level++;
node = StopMiddle->node;
} else
node = NULL;
level++;
}
var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment