Commit bb36c51f authored by Tom Lane's avatar Tom Lane

Fix several bugs in tsvectorin, including crash due to uninitialized field and

miscomputation of required palloc size.  The crash could only occur if the
input contained lexemes both with and without positions, which is probably not
common in practice.  The miscomputation would definitely result in wasted
space.  Also fix some inconsistent coding around alignment of strings and
positions in a tsvector value; these errors could also lead to crashes given
mixed with/without position data and a machine that's picky about alignment.
And be more careful about checking for overflow of string offsets.

Patch is only against HEAD --- I have not looked to see if same bugs are
in back-branch contrib/tsearch2 code.
parent f5513484
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.4 2007/09/26 10:09:57 teodor Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.5 2007/10/23 00:51:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -140,23 +140,30 @@ uniqueWORD(ParsedWord * a, int4 l) ...@@ -140,23 +140,30 @@ uniqueWORD(ParsedWord * a, int4 l)
TSVector TSVector
make_tsvector(ParsedText *prs) make_tsvector(ParsedText *prs)
{ {
int4 i, int i,
j, j,
lenstr = 0, lenstr = 0,
totallen; totallen;
TSVector in; TSVector in;
WordEntry *ptr; WordEntry *ptr;
char *str, char *str;
*cur; int stroff;
prs->curwords = uniqueWORD(prs->words, prs->curwords); prs->curwords = uniqueWORD(prs->words, prs->curwords);
for (i = 0; i < prs->curwords; i++) for (i = 0; i < prs->curwords; i++)
{ {
lenstr += SHORTALIGN(prs->words[i].len); lenstr += prs->words[i].len;
if (prs->words[i].alen) if (prs->words[i].alen)
{
lenstr = SHORTALIGN(lenstr);
lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
} }
}
if (lenstr > MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("string is too long for tsvector")));
totallen = CALCDATASIZE(prs->curwords, lenstr); totallen = CALCDATASIZE(prs->curwords, lenstr);
in = (TSVector) palloc0(totallen); in = (TSVector) palloc0(totallen);
...@@ -164,31 +171,33 @@ make_tsvector(ParsedText *prs) ...@@ -164,31 +171,33 @@ make_tsvector(ParsedText *prs)
in->size = prs->curwords; in->size = prs->curwords;
ptr = ARRPTR(in); ptr = ARRPTR(in);
cur = str = STRPTR(in); str = STRPTR(in);
stroff = 0;
for (i = 0; i < prs->curwords; i++) for (i = 0; i < prs->curwords; i++)
{ {
ptr->len = prs->words[i].len; ptr->len = prs->words[i].len;
if (cur - str > MAXSTRPOS) ptr->pos = stroff;
ereport(ERROR, memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
(errcode(ERRCODE_SYNTAX_ERROR), stroff += prs->words[i].len;
errmsg("string is too long for tsvector")));
ptr->pos = cur - str;
memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
pfree(prs->words[i].word); pfree(prs->words[i].word);
cur += SHORTALIGN(prs->words[i].len);
if (prs->words[i].alen) if (prs->words[i].alen)
{ {
int k = prs->words[i].pos.apos[0];
WordEntryPos *wptr; WordEntryPos *wptr;
if (k > 0xFFFF)
elog(ERROR, "positions array too long");
ptr->haspos = 1; ptr->haspos = 1;
*(uint16 *) cur = prs->words[i].pos.apos[0]; stroff = SHORTALIGN(stroff);
*(uint16 *) (str + stroff) = (uint16) k;
wptr = POSDATAPTR(in, ptr); wptr = POSDATAPTR(in, ptr);
for (j = 0; j < *(uint16 *) cur; j++) for (j = 0; j < k; j++)
{ {
WEP_SETWEIGHT(wptr[j], 0); WEP_SETWEIGHT(wptr[j], 0);
WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]); WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
} }
cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
pfree(prs->words[i].pos.apos); pfree(prs->words[i].pos.apos);
} }
else else
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.5 2007/10/21 22:29:56 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.6 2007/10/23 00:51:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -22,16 +22,18 @@ ...@@ -22,16 +22,18 @@
typedef struct typedef struct
{ {
WordEntry entry; /* should be first ! */ WordEntry entry; /* must be first! */
WordEntryPos *pos; WordEntryPos *pos;
int poslen; /* number of elements in pos */ int poslen; /* number of elements in pos */
} WordEntryIN; } WordEntryIN;
/* Compare two WordEntryPos values for qsort */
static int static int
comparePos(const void *a, const void *b) comparePos(const void *a, const void *b)
{ {
int apos = WEP_GETPOS(*(WordEntryPos *) a); int apos = WEP_GETPOS(*(const WordEntryPos *) a);
int bpos = WEP_GETPOS(*(WordEntryPos *) b); int bpos = WEP_GETPOS(*(const WordEntryPos *) b);
if (apos == bpos) if (apos == bpos)
return 0; return 0;
...@@ -53,9 +55,9 @@ uniquePos(WordEntryPos * a, int l) ...@@ -53,9 +55,9 @@ uniquePos(WordEntryPos * a, int l)
if (l <= 1) if (l <= 1)
return l; return l;
res = a;
qsort((void *) a, l, sizeof(WordEntryPos), comparePos); qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
res = a;
ptr = a + 1; ptr = a + 1;
while (ptr - a < l) while (ptr - a < l)
{ {
...@@ -63,7 +65,8 @@ uniquePos(WordEntryPos * a, int l) ...@@ -63,7 +65,8 @@ uniquePos(WordEntryPos * a, int l)
{ {
res++; res++;
*res = *ptr; *res = *ptr;
if (res - a >= MAXNUMPOS - 1 || WEP_GETPOS(*res) == MAXENTRYPOS - 1) if (res - a >= MAXNUMPOS - 1 ||
WEP_GETPOS(*res) == MAXENTRYPOS - 1)
break; break;
} }
else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res)) else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res))
...@@ -74,12 +77,13 @@ uniquePos(WordEntryPos * a, int l) ...@@ -74,12 +77,13 @@ uniquePos(WordEntryPos * a, int l)
return res + 1 - a; return res + 1 - a;
} }
/* Compare two WordEntryIN values for qsort */
static int static int
compareentry(const void *va, const void *vb, void *arg) compareentry(const void *va, const void *vb, void *arg)
{ {
const WordEntryIN *a = (const WordEntryIN *) va;
const WordEntryIN *b = (const WordEntryIN *) vb;
char *BufferStr = (char *) arg; char *BufferStr = (char *) arg;
WordEntryIN *a = (WordEntryIN *) va;
WordEntryIN *b = (WordEntryIN *) vb;
if (a->entry.len == b->entry.len) if (a->entry.len == b->entry.len)
{ {
...@@ -91,44 +95,40 @@ compareentry(const void *va, const void *vb, void *arg) ...@@ -91,44 +95,40 @@ compareentry(const void *va, const void *vb, void *arg)
return (a->entry.len > b->entry.len) ? 1 : -1; return (a->entry.len > b->entry.len) ? 1 : -1;
} }
/*
* Sort an array of WordEntryIN, remove duplicates.
* *outbuflen receives the amount of space needed for strings and positions.
*/
static int static int
uniqueentry(WordEntryIN * a, int l, char *buf, int *outbuflen) uniqueentry(WordEntryIN * a, int l, char *buf, int *outbuflen)
{ {
int buflen;
WordEntryIN *ptr, WordEntryIN *ptr,
*res; *res;
Assert(l >= 1); Assert(l >= 1);
if (l == 1) if (l > 1)
{ qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry,
if (a->entry.haspos) (void *) buf);
{
a->poslen = uniquePos(a->pos, a->poslen);
*outbuflen = SHORTALIGN(a->entry.len) + (a->poslen + 1) * sizeof(WordEntryPos);
}
else
*outbuflen = a->entry.len;
return l; buflen = 0;
}
res = a; res = a;
ptr = a + 1; ptr = a + 1;
qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf);
while (ptr - a < l) while (ptr - a < l)
{ {
if (!(ptr->entry.len == res->entry.len && if (!(ptr->entry.len == res->entry.len &&
strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0)) strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos],
res->entry.len) == 0))
{ {
/* done accumulating data into *res, count space needed */
buflen += res->entry.len;
if (res->entry.haspos) if (res->entry.haspos)
{ {
*outbuflen += SHORTALIGN(res->entry.len);
res->poslen = uniquePos(res->pos, res->poslen); res->poslen = uniquePos(res->pos, res->poslen);
*outbuflen += res->poslen * sizeof(WordEntryPos); buflen = SHORTALIGN(buflen);
buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
} }
else
*outbuflen += res->entry.len;
res++; res++;
memcpy(res, ptr, sizeof(WordEntryIN)); memcpy(res, ptr, sizeof(WordEntryIN));
} }
...@@ -136,37 +136,37 @@ uniqueentry(WordEntryIN * a, int l, char *buf, int *outbuflen) ...@@ -136,37 +136,37 @@ uniqueentry(WordEntryIN * a, int l, char *buf, int *outbuflen)
{ {
if (res->entry.haspos) if (res->entry.haspos)
{ {
/* append ptr's positions to res's positions */
int newlen = ptr->poslen + res->poslen; int newlen = ptr->poslen + res->poslen;
/* Append res to pos */ res->pos = (WordEntryPos *)
repalloc(res->pos, newlen * sizeof(WordEntryPos));
res->pos = (WordEntryPos *) repalloc(res->pos, newlen * sizeof(WordEntryPos)); memcpy(&res->pos[res->poslen], ptr->pos,
memcpy(&res->pos[res->poslen], ptr->poslen * sizeof(WordEntryPos));
ptr->pos, ptr->poslen * sizeof(WordEntryPos));
res->poslen = newlen; res->poslen = newlen;
pfree(ptr->pos); pfree(ptr->pos);
} }
else else
{ {
/* just give ptr's positions to pos */
res->entry.haspos = 1; res->entry.haspos = 1;
res->pos = ptr->pos; res->pos = ptr->pos;
res->poslen = ptr->poslen;
} }
} }
ptr++; ptr++;
} }
/* add last item */ /* count space needed for last item */
buflen += res->entry.len;
if (res->entry.haspos) if (res->entry.haspos)
{ {
*outbuflen += SHORTALIGN(res->entry.len);
res->poslen = uniquePos(res->pos, res->poslen); res->poslen = uniquePos(res->pos, res->poslen);
*outbuflen += res->poslen * sizeof(WordEntryPos); buflen = SHORTALIGN(buflen);
buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
} }
else
*outbuflen += res->entry.len;
*outbuflen = buflen;
return res + 1 - a; return res + 1 - a;
} }
...@@ -193,6 +193,8 @@ tsvectorin(PG_FUNCTION_ARGS) ...@@ -193,6 +193,8 @@ tsvectorin(PG_FUNCTION_ARGS)
int toklen; int toklen;
WordEntryPos *pos; WordEntryPos *pos;
int poslen; int poslen;
char *strbuf;
int stroff;
/* /*
* Tokens are appended to tmpbuf, cur is a pointer * Tokens are appended to tmpbuf, cur is a pointer
...@@ -212,19 +214,17 @@ tsvectorin(PG_FUNCTION_ARGS) ...@@ -212,19 +214,17 @@ tsvectorin(PG_FUNCTION_ARGS)
while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL)) while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
{ {
if (toklen >= MAXSTRLEN) if (toklen >= MAXSTRLEN)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("word is too long (%ld bytes, max %ld bytes)", errmsg("word is too long (%ld bytes, max %ld bytes)",
(long) toklen, (long) toklen,
(long) MAXSTRLEN))); (long) (MAXSTRLEN-1))));
if (cur - tmpbuf > MAXSTRPOS) if (cur - tmpbuf > MAXSTRPOS)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("position value is too large"))); errmsg("string is too long for tsvector")));
/* /*
* Enlarge buffers if needed * Enlarge buffers if needed
...@@ -232,7 +232,8 @@ tsvectorin(PG_FUNCTION_ARGS) ...@@ -232,7 +232,8 @@ tsvectorin(PG_FUNCTION_ARGS)
if (len >= arrlen) if (len >= arrlen)
{ {
arrlen *= 2; arrlen *= 2;
arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * arrlen); arr = (WordEntryIN *)
repalloc((void *) arr, sizeof(WordEntryIN) * arrlen);
} }
while ((cur - tmpbuf) + toklen >= buflen) while ((cur - tmpbuf) + toklen >= buflen)
{ {
...@@ -254,7 +255,11 @@ tsvectorin(PG_FUNCTION_ARGS) ...@@ -254,7 +255,11 @@ tsvectorin(PG_FUNCTION_ARGS)
arr[len].poslen = poslen; arr[len].poslen = poslen;
} }
else else
{
arr[len].entry.haspos = 0; arr[len].entry.haspos = 0;
arr[len].pos = NULL;
arr[len].poslen = 0;
}
len++; len++;
} }
...@@ -264,40 +269,45 @@ tsvectorin(PG_FUNCTION_ARGS) ...@@ -264,40 +269,45 @@ tsvectorin(PG_FUNCTION_ARGS)
len = uniqueentry(arr, len, tmpbuf, &buflen); len = uniqueentry(arr, len, tmpbuf, &buflen);
else else
buflen = 0; buflen = 0;
if (buflen > MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("string is too long for tsvector")));
totallen = CALCDATASIZE(len, buflen); totallen = CALCDATASIZE(len, buflen);
in = (TSVector) palloc0(totallen); in = (TSVector) palloc0(totallen);
SET_VARSIZE(in, totallen); SET_VARSIZE(in, totallen);
in->size = len; in->size = len;
cur = STRPTR(in);
inarr = ARRPTR(in); inarr = ARRPTR(in);
strbuf = STRPTR(in);
stroff = 0;
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
{ {
memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len); memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
arr[i].entry.pos = cur - STRPTR(in); arr[i].entry.pos = stroff;
cur += SHORTALIGN(arr[i].entry.len); stroff += arr[i].entry.len;
if (arr[i].entry.haspos) if (arr[i].entry.haspos)
{ {
uint16 tmplen; if (arr[i].poslen > 0xFFFF)
if(arr[i].poslen > 0xFFFF)
elog(ERROR, "positions array too long"); elog(ERROR, "positions array too long");
tmplen = (uint16) arr[i].poslen; /* Copy number of positions */
stroff = SHORTALIGN(stroff);
/* Copy length to output struct */ *(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen;
memcpy(cur, &tmplen, sizeof(uint16)); stroff += sizeof(uint16);
cur += sizeof(uint16);
/* Copy positions */ /* Copy positions */
memcpy(cur, arr[i].pos, (arr[i].poslen) * sizeof(WordEntryPos)); memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos));
cur += arr[i].poslen * sizeof(WordEntryPos); stroff += arr[i].poslen * sizeof(WordEntryPos);
pfree(arr[i].pos); pfree(arr[i].pos);
} }
inarr[i] = arr[i].entry; inarr[i] = arr[i].entry;
} }
Assert((strbuf + stroff - (char *) in) == totallen);
PG_RETURN_TSVECTOR(in); PG_RETURN_TSVECTOR(in);
} }
...@@ -495,11 +505,12 @@ tsvectorrecv(PG_FUNCTION_ARGS) ...@@ -495,11 +505,12 @@ tsvectorrecv(PG_FUNCTION_ARGS)
datalen += lex_len; datalen += lex_len;
if (i > 0 && WordEntryCMP(&vec->entries[i], &vec->entries[i - 1], STRPTR(vec)) <= 0) if (i > 0 && WordEntryCMP(&vec->entries[i],
&vec->entries[i - 1],
STRPTR(vec)) <= 0)
elog(ERROR, "lexemes are misordered"); elog(ERROR, "lexemes are misordered");
/* Receive positions */ /* Receive positions */
if (npos > 0) if (npos > 0)
{ {
uint16 j; uint16 j;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.5 2007/09/11 08:46:29 teodor Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.6 2007/10/23 00:51:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -266,8 +266,14 @@ compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b) ...@@ -266,8 +266,14 @@ compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b)
return (a->len > b->len) ? 1 : -1; return (a->len > b->len) ? 1 : -1;
} }
/*
* Add positions from src to dest after offsetting them by maxpos.
* Return the number added (might be less than expected due to overflow)
*/
static int4 static int4
add_pos(TSVector src, WordEntry * srcptr, TSVector dest, WordEntry * destptr, int4 maxpos) add_pos(TSVector src, WordEntry * srcptr,
TSVector dest, WordEntry * destptr,
int4 maxpos)
{ {
uint16 *clen = &_POSVECPTR(dest, destptr)->npos; uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
int i; int i;
...@@ -280,7 +286,10 @@ add_pos(TSVector src, WordEntry * srcptr, TSVector dest, WordEntry * destptr, in ...@@ -280,7 +286,10 @@ add_pos(TSVector src, WordEntry * srcptr, TSVector dest, WordEntry * destptr, in
*clen = 0; *clen = 0;
startlen = *clen; startlen = *clen;
for (i = 0; i < slen && *clen < MAXNUMPOS && (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1); i++) for (i = 0;
i < slen && *clen < MAXNUMPOS &&
(*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
i++)
{ {
WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i])); WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos)); WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
...@@ -307,8 +316,8 @@ tsvector_concat(PG_FUNCTION_ARGS) ...@@ -307,8 +316,8 @@ tsvector_concat(PG_FUNCTION_ARGS)
i, i,
j, j,
i1, i1,
i2; i2,
char *cur; dataoff;
char *data, char *data,
*data1, *data1,
*data2; *data2;
...@@ -336,11 +345,13 @@ tsvector_concat(PG_FUNCTION_ARGS) ...@@ -336,11 +345,13 @@ tsvector_concat(PG_FUNCTION_ARGS)
data2 = STRPTR(in2); data2 = STRPTR(in2);
i1 = in1->size; i1 = in1->size;
i2 = in2->size; i2 = in2->size;
/* conservative estimate of space needed */
out = (TSVector) palloc0(VARSIZE(in1) + VARSIZE(in2)); out = (TSVector) palloc0(VARSIZE(in1) + VARSIZE(in2));
SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2)); SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2));
out->size = in1->size + in2->size; out->size = in1->size + in2->size;
data = cur = STRPTR(out);
ptr = ARRPTR(out); ptr = ARRPTR(out);
data = STRPTR(out);
dataoff = 0;
while (i1 && i2) while (i1 && i2)
{ {
int cmp = compareEntry(data1, ptr1, data2, ptr2); int cmp = compareEntry(data1, ptr1, data2, ptr2);
...@@ -349,16 +360,15 @@ tsvector_concat(PG_FUNCTION_ARGS) ...@@ -349,16 +360,15 @@ tsvector_concat(PG_FUNCTION_ARGS)
{ /* in1 first */ { /* in1 first */
ptr->haspos = ptr1->haspos; ptr->haspos = ptr1->haspos;
ptr->len = ptr1->len; ptr->len = ptr1->len;
memcpy(cur, data1 + ptr1->pos, ptr1->len); memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
ptr->pos = cur - data; ptr->pos = dataoff;
dataoff += ptr1->len;
if (ptr->haspos) if (ptr->haspos)
{ {
cur += SHORTALIGN(ptr1->len); dataoff = SHORTALIGN(dataoff);
memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
} }
else
cur += ptr1->len;
ptr++; ptr++;
ptr1++; ptr1++;
...@@ -368,21 +378,21 @@ tsvector_concat(PG_FUNCTION_ARGS) ...@@ -368,21 +378,21 @@ tsvector_concat(PG_FUNCTION_ARGS)
{ /* in2 first */ { /* in2 first */
ptr->haspos = ptr2->haspos; ptr->haspos = ptr2->haspos;
ptr->len = ptr2->len; ptr->len = ptr2->len;
memcpy(cur, data2 + ptr2->pos, ptr2->len); memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
ptr->pos = cur - data; ptr->pos = dataoff;
dataoff += ptr2->len;
if (ptr->haspos) if (ptr->haspos)
{ {
int addlen = add_pos(in2, ptr2, out, ptr, maxpos); int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
cur += SHORTALIGN(ptr2->len);
if (addlen == 0) if (addlen == 0)
ptr->haspos = 0; ptr->haspos = 0;
else else
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16); {
dataoff = SHORTALIGN(dataoff);
dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
}
} }
else
cur += ptr2->len;
ptr++; ptr++;
ptr2++; ptr2++;
...@@ -392,30 +402,32 @@ tsvector_concat(PG_FUNCTION_ARGS) ...@@ -392,30 +402,32 @@ tsvector_concat(PG_FUNCTION_ARGS)
{ {
ptr->haspos = ptr1->haspos | ptr2->haspos; ptr->haspos = ptr1->haspos | ptr2->haspos;
ptr->len = ptr1->len; ptr->len = ptr1->len;
memcpy(cur, data1 + ptr1->pos, ptr1->len); memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
ptr->pos = cur - data; ptr->pos = dataoff;
dataoff += ptr1->len;
if (ptr->haspos) if (ptr->haspos)
{ {
cur += SHORTALIGN(ptr1->len);
if (ptr1->haspos) if (ptr1->haspos)
{ {
memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); dataoff = SHORTALIGN(dataoff);
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
if (ptr2->haspos) if (ptr2->haspos)
cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos); dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
} }
else if (ptr2->haspos) else /* must have ptr2->haspos */
{ {
int addlen = add_pos(in2, ptr2, out, ptr, maxpos); int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
if (addlen == 0) if (addlen == 0)
ptr->haspos = 0; ptr->haspos = 0;
else else
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16); {
dataoff = SHORTALIGN(dataoff);
dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
}
} }
} }
else
cur += ptr1->len;
ptr++; ptr++;
ptr1++; ptr1++;
...@@ -429,16 +441,15 @@ tsvector_concat(PG_FUNCTION_ARGS) ...@@ -429,16 +441,15 @@ tsvector_concat(PG_FUNCTION_ARGS)
{ {
ptr->haspos = ptr1->haspos; ptr->haspos = ptr1->haspos;
ptr->len = ptr1->len; ptr->len = ptr1->len;
memcpy(cur, data1 + ptr1->pos, ptr1->len); memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
ptr->pos = cur - data; ptr->pos = dataoff;
dataoff += ptr1->len;
if (ptr->haspos) if (ptr->haspos)
{ {
cur += SHORTALIGN(ptr1->len); dataoff = SHORTALIGN(dataoff);
memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
} }
else
cur += ptr1->len;
ptr++; ptr++;
ptr1++; ptr1++;
...@@ -449,31 +460,40 @@ tsvector_concat(PG_FUNCTION_ARGS) ...@@ -449,31 +460,40 @@ tsvector_concat(PG_FUNCTION_ARGS)
{ {
ptr->haspos = ptr2->haspos; ptr->haspos = ptr2->haspos;
ptr->len = ptr2->len; ptr->len = ptr2->len;
memcpy(cur, data2 + ptr2->pos, ptr2->len); memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
ptr->pos = cur - data; ptr->pos = dataoff;
dataoff += ptr2->len;
if (ptr->haspos) if (ptr->haspos)
{ {
int addlen = add_pos(in2, ptr2, out, ptr, maxpos); int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
cur += SHORTALIGN(ptr2->len);
if (addlen == 0) if (addlen == 0)
ptr->haspos = 0; ptr->haspos = 0;
else else
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16); {
dataoff = SHORTALIGN(dataoff);
dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
}
} }
else
cur += ptr2->len;
ptr++; ptr++;
ptr2++; ptr2++;
i2--; i2--;
} }
/*
* Instead of checking each offset individually, we check for overflow
* of pos fields once at the end.
*/
if (dataoff > MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("string is too long for tsvector")));
out->size = ptr - ARRPTR(out); out->size = ptr - ARRPTR(out);
SET_VARSIZE(out, CALCDATASIZE(out->size, cur - data)); SET_VARSIZE(out, CALCDATASIZE(out->size, dataoff));
if (data != STRPTR(out)) if (data != STRPTR(out))
memmove(STRPTR(out), data, cur - data); memmove(STRPTR(out), data, dataoff);
PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in1, 0);
PG_FREE_IF_COPY(in2, 1); PG_FREE_IF_COPY(in2, 1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment