Commit 04a2c7f4 authored by Tom Lane's avatar Tom Lane

Improve make_tsvector() to handle empty input, and simplify its callers.

It seemed a bit silly that each caller of make_tsvector() was laboriously
special-casing the situation where no lexemes were found, when it would
be easy and much more bullet-proof to make make_tsvector() handle that.
parent b4c6d31c
...@@ -149,6 +149,8 @@ uniqueWORD(ParsedWord *a, int32 l) ...@@ -149,6 +149,8 @@ uniqueWORD(ParsedWord *a, int32 l)
/* /*
* make value of tsvector, given parsed text * make value of tsvector, given parsed text
*
* Note: frees prs->words and subsidiary data.
*/ */
TSVector TSVector
make_tsvector(ParsedText *prs) make_tsvector(ParsedText *prs)
...@@ -162,7 +164,11 @@ make_tsvector(ParsedText *prs) ...@@ -162,7 +164,11 @@ make_tsvector(ParsedText *prs)
char *str; char *str;
int stroff; int stroff;
prs->curwords = uniqueWORD(prs->words, prs->curwords); /* Merge duplicate words */
if (prs->curwords > 0)
prs->curwords = uniqueWORD(prs->words, prs->curwords);
/* Determine space needed */
for (i = 0; i < prs->curwords; i++) for (i = 0; i < prs->curwords; i++)
{ {
lenstr += prs->words[i].len; lenstr += prs->words[i].len;
...@@ -217,7 +223,10 @@ make_tsvector(ParsedText *prs) ...@@ -217,7 +223,10 @@ make_tsvector(ParsedText *prs)
ptr->haspos = 0; ptr->haspos = 0;
ptr++; ptr++;
} }
pfree(prs->words);
if (prs->words)
pfree(prs->words);
return in; return in;
} }
...@@ -231,26 +240,19 @@ to_tsvector_byid(PG_FUNCTION_ARGS) ...@@ -231,26 +240,19 @@ to_tsvector_byid(PG_FUNCTION_ARGS)
prs.lenwords = VARSIZE_ANY_EXHDR(in) / 6; /* just estimation of word's prs.lenwords = VARSIZE_ANY_EXHDR(in) / 6; /* just estimation of word's
* number */ * number */
if (prs.lenwords == 0) if (prs.lenwords < 2)
prs.lenwords = 2; prs.lenwords = 2;
prs.curwords = 0; prs.curwords = 0;
prs.pos = 0; prs.pos = 0;
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
parsetext(cfgId, &prs, VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in)); parsetext(cfgId, &prs, VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
PG_FREE_IF_COPY(in, 1); PG_FREE_IF_COPY(in, 1);
if (prs.curwords) out = make_tsvector(&prs);
out = make_tsvector(&prs);
else
{
pfree(prs.words);
out = palloc(CALCDATASIZE(0, 0));
SET_VARSIZE(out, CALCDATASIZE(0, 0));
out->size = 0;
}
PG_RETURN_POINTER(out); PG_RETURN_TSVECTOR(out);
} }
Datum Datum
...@@ -281,21 +283,10 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS) ...@@ -281,21 +283,10 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
iterate_jsonb_string_values(jb, &state, add_to_tsvector); iterate_jsonb_string_values(jb, &state, add_to_tsvector);
if (prs.curwords > 0)
result = make_tsvector(&prs);
else
{
/*
* There weren't any string elements in jsonb, so we need to return an
* empty vector
*/
result = palloc(CALCDATASIZE(0, 0));
SET_VARSIZE(result, CALCDATASIZE(0, 0));
result->size = 0;
}
PG_FREE_IF_COPY(jb, 1); PG_FREE_IF_COPY(jb, 1);
result = make_tsvector(&prs);
PG_RETURN_TSVECTOR(result); PG_RETURN_TSVECTOR(result);
} }
...@@ -327,21 +318,10 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS) ...@@ -327,21 +318,10 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
iterate_json_string_values(json, &state, add_to_tsvector); iterate_json_string_values(json, &state, add_to_tsvector);
if (prs.curwords > 0)
result = make_tsvector(&prs);
else
{
/*
* There weren't any string elements in json, so we need to return an
* empty vector
*/
result = palloc(CALCDATASIZE(0, 0));
SET_VARSIZE(result, CALCDATASIZE(0, 0));
result->size = 0;
}
PG_FREE_IF_COPY(json, 1); PG_FREE_IF_COPY(json, 1);
result = make_tsvector(&prs);
PG_RETURN_TSVECTOR(result); PG_RETURN_TSVECTOR(result);
} }
......
...@@ -2579,28 +2579,15 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column) ...@@ -2579,28 +2579,15 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
} }
/* make tsvector value */ /* make tsvector value */
if (prs.curwords) datum = TSVectorGetDatum(make_tsvector(&prs));
{ isnull = false;
datum = PointerGetDatum(make_tsvector(&prs));
isnull = false; /* and insert it into tuple */
rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att, rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
1, &tsvector_attr_num, 1, &tsvector_attr_num,
&datum, &isnull); &datum, &isnull);
pfree(DatumGetPointer(datum));
} pfree(DatumGetPointer(datum));
else
{
TSVector out = palloc(CALCDATASIZE(0, 0));
SET_VARSIZE(out, CALCDATASIZE(0, 0));
out->size = 0;
datum = PointerGetDatum(out);
isnull = false;
rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
1, &tsvector_attr_num,
&datum, &isnull);
pfree(prs.words);
}
return PointerGetDatum(rettuple); return PointerGetDatum(rettuple);
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment