Commit bcec1e61 authored by Teodor Sigaev's avatar Teodor Sigaev

More accuracy work with MinWords option of headline function

parent c090b053
...@@ -189,7 +189,7 @@ prsd_headline(PG_FUNCTION_ARGS) ...@@ -189,7 +189,7 @@ prsd_headline(PG_FUNCTION_ARGS)
int bestb = -1, int bestb = -1,
beste = -1; beste = -1;
int bestlen = -1; int bestlen = -1;
int pose = 0, int pose = 0, posb,
poslen, poslen,
curlen; curlen;
...@@ -229,15 +229,15 @@ prsd_headline(PG_FUNCTION_ARGS) ...@@ -229,15 +229,15 @@ prsd_headline(PG_FUNCTION_ARGS)
if (min_words >= max_words) if (min_words >= max_words)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("must be MinWords < MaxWords"))); errmsg("MinWords must be less than MaxWords")));
if (min_words <= 0) if (min_words <= 0)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("must be MinWords > 0"))); errmsg("MinWords should be positive")));
if (shortword < 0) if (shortword < 0)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("must be ShortWord >= 0"))); errmsg("ShortWord hould be = 0")));
} }
while (hlCover(prs, query, &p, &q)) while (hlCover(prs, query, &p, &q))
...@@ -261,6 +261,7 @@ prsd_headline(PG_FUNCTION_ARGS) ...@@ -261,6 +261,7 @@ prsd_headline(PG_FUNCTION_ARGS)
continue; continue;
} }
posb=p;
if (curlen < max_words) if (curlen < max_words)
{ /* find good end */ { /* find good end */
for (i = i - 1; i < prs->curwords && curlen < max_words; i++) for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
...@@ -278,6 +279,19 @@ prsd_headline(PG_FUNCTION_ARGS) ...@@ -278,6 +279,19 @@ prsd_headline(PG_FUNCTION_ARGS)
if (curlen >= min_words) if (curlen >= min_words)
break; break;
} }
if ( curlen < min_words && i>=prs->curwords ) { /* got end of text and our cover is shoter than min_words */
for(i=p; i>= 0; i--) {
if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
if (prs->words[i].item && !prs->words[i].repeated)
poslen++;
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
continue;
if (curlen >= min_words)
break;
}
posb=(i>=0) ? i : 0;
}
} }
else else
{ /* shorter cover :((( */ { /* shorter cover :((( */
...@@ -298,7 +312,7 @@ prsd_headline(PG_FUNCTION_ARGS) ...@@ -298,7 +312,7 @@ prsd_headline(PG_FUNCTION_ARGS)
(bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) && (bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))) (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)))
{ {
bestb = p; bestb = posb;
beste = pose; beste = pose;
bestlen = poslen; bestlen = poslen;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment