Commit e5be8998 authored by Teodor Sigaev's avatar Teodor Sigaev

Refactoring by Heikki Linnakangas <heikki@enterprisedb.com> with

small editorization by me

- Brake the QueryItem struct into QueryOperator and QueryOperand.
  Type was really the only common field between them. QueryItem still
  exists, and is used in the TSQuery struct as before, but it's now a
  union of the two. Many other changes fell from that, like separation
  of pushval_asis function into pushValue, pushOperator and pushStop.

- Moved some structs that were for internal use only from header files
  to the right .c-files.

- Moved tsvector parser to a new tsvector_parser.c file. Parser code was
  about half of the size of tsvector.c, it's also used from tsquery.c, and
  it has some data structures of its own, so it seems better to separate
  it. Cleaned up the API so that TSVectorParserState is not accessed from
  outside tsvector_parser.c.

- Separated enumerations (#defines, really) used for QueryItem.type
  field and as return codes from gettoken_query. It was just accidental
  code sharing.

- Removed ParseQueryNode struct used internally by makepol and friends.
  push*-functions now construct QueryItems directly.

- Changed int4 variables to just ints for variables like "i" or "array
  size", where the storage-size was not significant.
parent da124840
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.2 2007/09/07 15:09:55 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -225,10 +225,17 @@ to_tsvector(PG_FUNCTION_ARGS)
/*
* This function is used for morph parsing
* This function is used for morph parsing.
*
* The value is passed to parsetext which will call the right dictionary to
* lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
* to the stack.
*
* All words belonging to the same variant are pushed as an ANDed list,
* and different variants are ORred together.
*/
static void
pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, int2 weight)
pushval_morph(void *opaque, TSQueryParserState state, char *strval, int lenval, int2 weight)
{
int4 count = 0;
ParsedText prs;
......@@ -237,13 +244,14 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
cntvar = 0,
cntpos = 0,
cnt = 0;
Oid cfg_id = (Oid) opaque; /* the input is actually an Oid, not a pointer */
prs.lenwords = 4;
prs.curwords = 0;
prs.pos = 0;
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
parsetext(state->cfg_id, &prs, strval, lenval);
parsetext(cfg_id, &prs, strval, lenval);
if (prs.curwords > 0)
{
......@@ -260,21 +268,21 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
{
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
pushValue(state, prs.words[count].word, prs.words[count].len, weight);
pfree(prs.words[count].word);
if (cnt)
pushquery(state, OPR, (int4) '&', 0, 0, 0);
pushOperator(state, OP_AND);
cnt++;
count++;
}
if (cntvar)
pushquery(state, OPR, (int4) '|', 0, 0, 0);
pushOperator(state, OP_OR);
cntvar++;
}
if (cntpos)
pushquery(state, OPR, (int4) '&', 0, 0, 0);
pushOperator(state, OP_AND);
cntpos++;
}
......@@ -283,7 +291,7 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
}
else
pushval_asis(state, VALSTOP, NULL, 0, 0);
pushStop(state);
}
Datum
......@@ -295,7 +303,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
QueryItem *res;
int4 len;
query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, false);
query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *) cfgid, false);
if (query->size == 0)
PG_RETURN_TSQUERY(query);
......@@ -333,7 +341,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
QueryItem *res;
int4 len;
query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, true);
query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *)cfgid, true);
if (query->size == 0)
PG_RETURN_TSQUERY(query);
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.2 2007/08/25 00:03:59 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -344,10 +344,12 @@ LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
}
/*
* Parse string and lexize words
* Parse string and lexize words.
*
* prs will be filled in.
*/
void
parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
parsetext(Oid cfgId, ParsedText * prs, char *buf, int buflen)
{
int type,
lenlemm;
......@@ -427,7 +429,7 @@ parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
* Headline framework
*/
static void
hladdword(HeadlineParsedText * prs, char *buf, int4 buflen, int type)
hladdword(HeadlineParsedText * prs, char *buf, int buflen, int type)
{
while (prs->curwords >= prs->lenwords)
{
......@@ -458,17 +460,19 @@ hlfinditem(HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
word = &(prs->words[prs->curwords - 1]);
for (i = 0; i < query->size; i++)
{
if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0)
if (item->type == QI_VAL &&
item->operand.length == buflen &&
strncmp(GETOPERAND(query) + item->operand.distance, buf, buflen) == 0)
{
if (word->item)
{
memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
prs->words[prs->curwords].item = item;
prs->words[prs->curwords].item = &item->operand;
prs->words[prs->curwords].repeated = 1;
prs->curwords++;
}
else
word->item = item;
word->item = &item->operand;
}
item++;
}
......@@ -511,7 +515,7 @@ addHLParsedLex(HeadlineParsedText * prs, TSQuery query, ParsedLex * lexs, TSLexe
}
void
hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int4 buflen)
hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
{
int type,
lenlemm;
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.2 2007/08/22 01:39:45 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -1575,7 +1575,7 @@ typedef struct
} hlCheck;
static bool
checkcondition_HL(void *checkval, QueryItem * val)
checkcondition_HL(void *checkval, QueryOperand * val)
{
int i;
......@@ -1601,14 +1601,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
for (j = 0; j < query->size; j++)
{
if (item->type != VAL)
if (item->type != QI_VAL)
{
item++;
continue;
}
for (i = pos; i < prs->curwords; i++)
{
if (prs->words[i].item == item)
if (prs->words[i].item == &item->operand)
{
if (i > *q)
*q = i;
......@@ -1624,14 +1624,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
item = GETQUERY(query);
for (j = 0; j < query->size; j++)
{
if (item->type != VAL)
if (item->type != QI_VAL)
{
item++;
continue;
}
for (i = *q; i >= pos; i--)
{
if (prs->words[i].item == item)
if (prs->words[i].item == &item->operand)
{
if (i < *p)
*p = i;
......
#
# Makefile for utils/adt
#
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.66 2007/08/27 01:39:24 tgl Exp $
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.67 2007/09/07 15:09:56 teodor Exp $
#
subdir = src/backend/utils/adt
......@@ -28,7 +28,7 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \
ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \
tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \
tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \
tsvector.o tsvector_op.o \
tsvector.o tsvector_op.o tsvector_parser.o\
uuid.o xml.o
like.o: like.c like_match.c
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -77,24 +77,25 @@ gin_extract_query(PG_FUNCTION_ARGS)
item = GETQUERY(query);
for (i = 0; i < query->size; i++)
if (item[i].type == VAL)
if (item[i].type == QI_VAL)
(*nentries)++;
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
for (i = 0; i < query->size; i++)
if (item[i].type == VAL)
if (item[i].type == QI_VAL)
{
text *txt;
QueryOperand *val = &item[i].operand;
txt = (text *) palloc(VARHDRSZ + item[i].length);
txt = (text *) palloc(VARHDRSZ + val->length);
SET_VARSIZE(txt, VARHDRSZ + item[i].length);
memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length);
SET_VARSIZE(txt, VARHDRSZ + val->length);
memcpy(VARDATA(txt), GETOPERAND(query) + val->distance, val->length);
entries[j++] = PointerGetDatum(txt);
if (strategy != TSearchWithClassStrategyNumber && item[i].weight != 0)
if (strategy != TSearchWithClassStrategyNumber && val->weight != 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("@@ operator does not support lexeme class restrictions"),
......@@ -116,11 +117,11 @@ typedef struct
} GinChkVal;
static bool
checkcondition_gin(void *checkval, QueryItem * val)
checkcondition_gin(void *checkval, QueryOperand * val)
{
GinChkVal *gcv = (GinChkVal *) checkval;
return gcv->mapped_check[val - gcv->frst];
return gcv->mapped_check[((QueryItem *) val) - gcv->frst];
}
Datum
......@@ -142,7 +143,7 @@ gin_ts_consistent(PG_FUNCTION_ARGS)
gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
for (i = 0; i < query->size; i++)
if (item[i].type == VAL)
if (item[i].type == QI_VAL)
gcv.mapped_check[i] = check[j++];
res = TS_execute(
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.2 2007/08/21 06:34:42 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -293,7 +293,7 @@ typedef struct
* is there value 'val' in array or not ?
*/
static bool
checkcondition_arr(void *checkval, QueryItem * val)
checkcondition_arr(void *checkval, QueryOperand * val)
{
int4 *StopLow = ((CHKVAL *) checkval)->arrb;
int4 *StopHigh = ((CHKVAL *) checkval)->arre;
......@@ -304,9 +304,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
if (*StopMiddle == val->val)
if (*StopMiddle == val->valcrc)
return (true);
else if (*StopMiddle < val->val)
else if (*StopMiddle < val->valcrc)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
......@@ -316,9 +316,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
}
static bool
checkcondition_bit(void *checkval, QueryItem * val)
checkcondition_bit(void *checkval, QueryOperand * val)
{
return GETBIT(checkval, HASHVAL(val->val));
return GETBIT(checkval, HASHVAL(val->valcrc));
}
Datum
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.2 2007/08/31 02:26:29 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -23,6 +23,29 @@
#include "utils/pg_crc.h"
struct TSQueryParserStateData
{
/* State for gettoken_query */
char *buffer; /* entire string we are scanning */
char *buf; /* current scan point */
int state;
int count; /* nesting count, incremented by (,
decremented by ) */
/* polish (prefix) notation in list, filled in by push* functions */
List *polstr;
/* Strings from operands are collected in op. curop is a pointer to
* the end of used space of op. */
char *op;
char *curop;
int lenop; /* allocated size of op */
int sumlen; /* used size of op */
/* state for value's parser */
TSVectorParseState valstate;
};
/* parser's states */
#define WAITOPERAND 1
#define WAITOPERATOR 2
......@@ -30,21 +53,10 @@
#define WAITSINGLEOPERAND 4
/*
* node of query tree, also used
* for storing polish notation in parser
* subroutine to parse the weight part, like ':1AB' of a query.
*/
typedef struct ParseQueryNode
{
int2 weight;
int2 type;
int4 val;
int2 distance;
int2 length;
struct ParseQueryNode *next;
} ParseQueryNode;
static char *
get_weight(char *buf, int2 *weight)
get_weight(char *buf, int16 *weight)
{
*weight = 0;
......@@ -81,11 +93,28 @@ get_weight(char *buf, int2 *weight)
return buf;
}
/*
* token types for parsing
*/
typedef enum {
PT_END = 0,
PT_ERR = 1,
PT_VAL = 2,
PT_OPR = 3,
PT_OPEN = 4,
PT_CLOSE = 5,
} ts_tokentype;
/*
* get token from query string
*
* *operator is filled in with OP_* when return values is PT_OPR
* *strval, *lenval and *weight are filled in when return value is PT_VAL
*/
static int4
gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
static ts_tokentype
gettoken_query(TSQueryParserState state,
int8 *operator,
int *lenval, char **strval, int16 *weight)
{
while (1)
{
......@@ -97,16 +126,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
{
(state->buf)++; /* can safely ++, t_iseq guarantee
* that pg_mblen()==1 */
*val = (int4) '!';
*operator = OP_NOT;
state->state = WAITOPERAND;
return OPR;
return PT_OPR;
}
else if (t_iseq(state->buf, '('))
{
state->count++;
(state->buf)++;
state->state = WAITOPERAND;
return OPEN;
return PT_OPEN;
}
else if (t_iseq(state->buf, ':'))
{
......@@ -117,17 +146,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
}
else if (!t_isspace(state->buf))
{
state->valstate.prsbuf = state->buf;
if (gettoken_tsvector(&(state->valstate)))
/* We rely on the tsvector parser to parse the value for us */
reset_tsvector_parser(state->valstate, state->buf);
if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
{
*strval = state->valstate.word;
*lenval = state->valstate.curpos - state->valstate.word;
state->buf = get_weight(state->valstate.prsbuf, weight);
state->buf = get_weight(state->buf, weight);
state->state = WAITOPERATOR;
return VAL;
return PT_VAL;
}
else if (state->state == WAITFIRSTOPERAND)
return END;
return PT_END;
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
......@@ -136,52 +164,71 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
}
break;
case WAITOPERATOR:
if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
if (t_iseq(state->buf, '&'))
{
state->state = WAITOPERAND;
*operator = OP_AND;
(state->buf)++;
return PT_OPR;
}
if (t_iseq(state->buf, '|'))
{
state->state = WAITOPERAND;
*val = (int4) *(state->buf);
*operator = OP_OR;
(state->buf)++;
return OPR;
return PT_OPR;
}
else if (t_iseq(state->buf, ')'))
{
(state->buf)++;
state->count--;
return (state->count < 0) ? ERR : CLOSE;
return (state->count < 0) ? PT_ERR : PT_CLOSE;
}
else if (*(state->buf) == '\0')
return (state->count) ? ERR : END;
return (state->count) ? PT_ERR : PT_END;
else if (!t_isspace(state->buf))
return ERR;
return PT_ERR;
break;
case WAITSINGLEOPERAND:
if (*(state->buf) == '\0')
return END;
return PT_END;
*strval = state->buf;
*lenval = strlen(state->buf);
state->buf += strlen(state->buf);
state->count++;
return VAL;
return PT_VAL;
default:
return ERR;
return PT_ERR;
break;
}
state->buf += pg_mblen(state->buf);
}
return END;
return PT_END;
}
/*
* push new one in polish notation reverse view
* Push an operator to state->polstr
*/
void
pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
pushOperator(TSQueryParserState state, int8 oper)
{
ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode));
QueryOperator *tmp;
Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
tmp = (QueryOperator *) palloc(sizeof(QueryOperator));
tmp->type = QI_OPR;
tmp->oper = oper;
/* left is filled in later with findoprnd */
state->polstr = lcons(tmp, state->polstr);
}
static void
pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight)
{
QueryOperand *tmp;
tmp->weight = weight;
tmp->type = type;
tmp->val = val;
if (distance >= MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
......@@ -192,20 +239,27 @@ pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 l
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("operand is too long in tsearch query: \"%s\"",
state->buffer)));
tmp->distance = distance;
tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
tmp->type = QI_VAL;
tmp->weight = weight;
tmp->valcrc = (int32) valcrc;
tmp->length = lenval;
tmp->next = state->str;
state->str = tmp;
state->num++;
tmp->distance = distance;
state->polstr = lcons(tmp, state->polstr);
}
/*
* This function is used for tsquery parsing
* Push an operand to state->polstr.
*
* strval must point to a string equal to state->curop. lenval is the length
* of the string.
*/
void
pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight)
pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
{
pg_crc32 c;
pg_crc32 valcrc;
if (lenval >= MAXSTRLEN)
ereport(ERROR,
......@@ -213,162 +267,202 @@ pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int
errmsg("word is too long in tsearch query: \"%s\"",
state->buffer)));
INIT_CRC32(c);
COMP_CRC32(c, strval, lenval);
FIN_CRC32(c);
pushquery(state, type, *(int4 *) &c,
state->curop - state->op, lenval, weight);
INIT_CRC32(valcrc);
COMP_CRC32(valcrc, strval, lenval);
FIN_CRC32(valcrc);
pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);
/* append the value string to state.op, enlarging buffer if needed first */
while (state->curop - state->op + lenval + 1 >= state->lenop)
{
int4 tmp = state->curop - state->op;
int used = state->curop - state->op;
state->lenop *= 2;
state->op = (char *) repalloc((void *) state->op, state->lenop);
state->curop = state->op + tmp;
state->curop = state->op + used;
}
memcpy((void *) state->curop, (void *) strval, lenval);
state->curop += lenval;
*(state->curop) = '\0';
state->curop++;
state->sumlen += lenval + 1 /* \0 */ ;
return;
}
/*
* Push a stopword placeholder to state->polstr
*/
void
pushStop(TSQueryParserState state)
{
QueryOperand *tmp;
tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
tmp->type = QI_VALSTOP;
state->polstr = lcons(tmp, state->polstr);
}
#define STACKDEPTH 32
/*
* make polish notation of query
* Make polish (prefix) notation of query.
*
* See parse_tsquery for explanation of pushval.
*/
static int4
makepol(TSQueryParserState * state,
void (*pushval) (TSQueryParserState *, int, char *, int, int2))
static void
makepol(TSQueryParserState state,
PushFunction pushval,
void *opaque)
{
int4 val = 0,
type;
int4 lenval = 0;
int8 operator = 0;
ts_tokentype type;
int lenval = 0;
char *strval = NULL;
int4 stack[STACKDEPTH];
int4 lenstack = 0;
int2 weight = 0;
int8 opstack[STACKDEPTH];
int lenstack = 0;
int16 weight = 0;
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)
{
switch (type)
{
case VAL:
pushval(state, VAL, strval, lenval, weight);
while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
stack[lenstack - 1] == (int4) '!'))
case PT_VAL:
pushval(opaque, state, strval, lenval, weight);
while (lenstack && (opstack[lenstack - 1] == OP_AND ||
opstack[lenstack - 1] == OP_NOT))
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
pushOperator(state, opstack[lenstack]);
}
break;
case OPR:
if (lenstack && val == (int4) '|')
pushquery(state, OPR, val, 0, 0, 0);
case PT_OPR:
if (lenstack && operator == OP_OR)
pushOperator(state, OP_OR);
else
{
if (lenstack == STACKDEPTH) /* internal error */
elog(ERROR, "tsquery stack too small");
stack[lenstack] = val;
opstack[lenstack] = operator;
lenstack++;
}
break;
case OPEN:
if (makepol(state, pushval) == ERR)
return ERR;
if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
stack[lenstack - 1] == (int4) '!'))
case PT_OPEN:
makepol(state, pushval, opaque);
if (lenstack && (opstack[lenstack - 1] == OP_AND ||
opstack[lenstack - 1] == OP_NOT))
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
pushOperator(state, opstack[lenstack]);
}
break;
case CLOSE:
case PT_CLOSE:
while (lenstack)
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
pushOperator(state, opstack[lenstack]);
};
return END;
break;
case ERR:
return;
case PT_ERR:
default:
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsearch query: \"%s\"",
state->buffer)));
return ERR;
}
}
while (lenstack)
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
};
return END;
pushOperator(state, opstack[lenstack]);
}
}
/*
* Fills in the left-fields previously left unfilled. The input
* QueryItems must be in polish (prefix) notation.
*/
static void
findoprnd(QueryItem * ptr, int4 *pos)
findoprnd(QueryItem *ptr, int *pos)
{
if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP)
/* since this function recurses, it could be driven to stack overflow. */
check_stack_depth();
if (ptr[*pos].type == QI_VAL ||
ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here,
* they haven't been cleansed
* away yet.
*/
{
ptr[*pos].left = 0;
(*pos)++;
}
else if (ptr[*pos].val == (int4) '!')
else
{
ptr[*pos].left = 1;
Assert(ptr[*pos].type == QI_OPR);
if (ptr[*pos].operator.oper == OP_NOT)
{
ptr[*pos].operator.left = 1;
(*pos)++;
findoprnd(ptr, pos);
}
else
{
QueryItem *curitem = &ptr[*pos];
int4 tmp = *pos;
QueryOperator *curitem = &ptr[*pos].operator;
int tmp = *pos;
Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
(*pos)++;
findoprnd(ptr, pos);
curitem->left = *pos - tmp;
findoprnd(ptr, pos);
}
}
}
/*
* input
* Each value (operand) in the query is be passed to pushval. pushval can
* transform the simple value to an arbitrarily complex expression using
* pushValue and pushOperator. It must push a single value with pushValue,
* a complete expression with all operands, or a a stopword placeholder
* with pushStop, otherwise the prefix notation representation will be broken,
* having an operator with no operand.
*
* opaque is passed on to pushval as is, pushval can use it to store its
* private state.
*
* The returned query might contain QI_STOPVAL nodes. The caller is responsible
* for cleaning them up (with clean_fakeval)
*/
TSQuery
parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain)
parse_tsquery(char *buf,
PushFunction pushval,
void *opaque,
bool isplain)
{
TSQueryParserState state;
int4 i;
struct TSQueryParserStateData state;
int i;
TSQuery query;
int4 commonlen;
int commonlen;
QueryItem *ptr;
ParseQueryNode *tmp;
int4 pos = 0;
int pos = 0;
ListCell *cell;
/* init state */
state.buffer = buf;
state.buf = buf;
state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
state.count = 0;
state.num = 0;
state.str = NULL;
state.cfg_id = cfg_id;
state.polstr = NIL;
/* init value parser's state */
state.valstate.oprisdelim = true;
state.valstate.len = 32;
state.valstate.word = (char *) palloc(state.valstate.len);
state.valstate = init_tsvector_parser(NULL, true);
/* init list of operand */
state.sumlen = 0;
......@@ -377,9 +471,11 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int
*(state.curop) = '\0';
/* parse query & make polish notation (postfix, but in reverse order) */
makepol(&state, pushval);
pfree(state.valstate.word);
if (!state.num)
makepol(&state, pushval, opaque);
close_tsvector_parser(state.valstate);
if (list_length(state.polstr) == 0)
{
ereport(NOTICE,
(errmsg("tsearch query doesn't contain lexeme(s): \"%s\"",
......@@ -390,37 +486,54 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int
return query;
}
/* make finish struct */
commonlen = COMPUTESIZE(state.num, state.sumlen);
query = (TSQuery) palloc(commonlen);
/* Pack the QueryItems in the final TSQuery struct to return to caller */
commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
query = (TSQuery) palloc0(commonlen);
SET_VARSIZE(query, commonlen);
query->size = state.num;
query->size = list_length(state.polstr);
ptr = GETQUERY(query);
/* set item in polish notation */
for (i = 0; i < state.num; i++)
/* Copy QueryItems to TSQuery */
i = 0;
foreach(cell, state.polstr)
{
ptr[i].weight = state.str->weight;
ptr[i].type = state.str->type;
ptr[i].val = state.str->val;
ptr[i].distance = state.str->distance;
ptr[i].length = state.str->length;
tmp = state.str->next;
pfree(state.str);
state.str = tmp;
QueryItem *item = (QueryItem *) lfirst(cell);
switch(item->type)
{
case QI_VAL:
memcpy(&ptr[i], item, sizeof(QueryOperand));
break;
case QI_VALSTOP:
ptr[i].type = QI_VALSTOP;
break;
case QI_OPR:
memcpy(&ptr[i], item, sizeof(QueryOperator));
break;
default:
elog(ERROR, "unknown QueryItem type %d", item->type);
}
i++;
}
/* set user friendly-operand view */
/* Copy all the operand strings to TSQuery */
memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
pfree(state.op);
/* set left operand's position for every operator */
/* Set left operand pointers for every operator. */
pos = 0;
findoprnd(ptr, &pos);
return query;
}
static void
pushval_asis(void *opaque, TSQueryParserState state, char *strval, int lenval,
int16 weight)
{
pushValue(state, strval, lenval, weight);
}
/*
* in without morphology
*/
......@@ -431,7 +544,7 @@ tsqueryin(PG_FUNCTION_ARGS)
pg_verifymbstr(in, strlen(in), false);
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false));
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, NULL, false));
}
/*
......@@ -443,13 +556,14 @@ typedef struct
char *buf;
char *cur;
char *op;
int4 buflen;
int buflen;
} INFIX;
#define RESIZEBUF(inf,addsize) \
/* Makes sure inf->buf is large enough for adding 'addsize' bytes */
#define RESIZEBUF(inf, addsize) \
while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
{ \
int4 len = (inf)->cur - (inf)->buf; \
int len = (inf)->cur - (inf)->buf; \
(inf)->buflen *= 2; \
(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
(inf)->cur = (inf)->buf + len; \
......@@ -462,12 +576,16 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
static void
infix(INFIX * in, bool first)
{
if (in->curpol->type == VAL)
/* since this function recurses, it could be driven to stack overflow. */
check_stack_depth();
if (in->curpol->type == QI_VAL)
{
char *op = in->op + in->curpol->distance;
QueryOperand *curpol = &in->curpol->operand;
char *op = in->op + curpol->distance;
int clen;
RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
*(in->cur) = '\'';
in->cur++;
while (*op)
......@@ -485,26 +603,26 @@ infix(INFIX * in, bool first)
}
*(in->cur) = '\'';
in->cur++;
if (in->curpol->weight)
if (curpol->weight)
{
*(in->cur) = ':';
in->cur++;
if (in->curpol->weight & (1 << 3))
if (curpol->weight & (1 << 3))
{
*(in->cur) = 'A';
in->cur++;
}
if (in->curpol->weight & (1 << 2))
if (curpol->weight & (1 << 2))
{
*(in->cur) = 'B';
in->cur++;
}
if (in->curpol->weight & (1 << 1))
if (curpol->weight & (1 << 1))
{
*(in->cur) = 'C';
in->cur++;
}
if (in->curpol->weight & 1)
if (curpol->weight & 1)
{
*(in->cur) = 'D';
in->cur++;
......@@ -513,7 +631,7 @@ infix(INFIX * in, bool first)
*(in->cur) = '\0';
in->curpol++;
}
else if (in->curpol->val == (int4) '!')
else if (in->curpol->operator.oper == OP_NOT)
{
bool isopr = false;
......@@ -522,13 +640,15 @@ infix(INFIX * in, bool first)
in->cur++;
*(in->cur) = '\0';
in->curpol++;
if (in->curpol->type == OPR)
if (in->curpol->type == QI_OPR)
{
isopr = true;
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
in->cur = strchr(in->cur, '\0');
}
infix(in, isopr);
if (isopr)
{
......@@ -539,11 +659,11 @@ infix(INFIX * in, bool first)
}
else
{
int4 op = in->curpol->val;
int8 op = in->curpol->operator.oper;
INFIX nrm;
in->curpol++;
if (op == (int4) '|' && !first)
if (op == OP_OR && !first)
{
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
......@@ -564,11 +684,22 @@ infix(INFIX * in, bool first)
/* print operator & right operand */
RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
sprintf(in->cur, " %c %s", op, nrm.buf);
switch(op)
{
case OP_OR:
sprintf(in->cur, " | %s", nrm.buf);
break;
case OP_AND:
sprintf(in->cur, " & %s", nrm.buf);
break;
default:
/* OP_NOT is handled in above if-branch*/
elog(ERROR, "unexpected operator type %d", op);
}
in->cur = strchr(in->cur, '\0');
pfree(nrm.buf);
if (op == (int4) '|' && !first)
if (op == OP_OR && !first)
{
RESIZEBUF(in, 2);
sprintf(in->cur, " )");
......@@ -615,28 +746,33 @@ tsquerysend(PG_FUNCTION_ARGS)
pq_sendint(&buf, query->size, sizeof(int32));
for (i = 0; i < query->size; i++)
{
int tmp;
pq_sendint(&buf, item->type, sizeof(item->type));
pq_sendint(&buf, item->weight, sizeof(item->weight));
pq_sendint(&buf, item->left, sizeof(item->left));
pq_sendint(&buf, item->val, sizeof(item->val));
/*
* We are sure that sizeof(WordEntry) == sizeof(int32), and about
* layout of QueryItem
*/
tmp = *(int32 *) (((char *) item) + HDRSIZEQI);
pq_sendint(&buf, tmp, sizeof(tmp));
switch(item->type)
{
case QI_VAL:
pq_sendint(&buf, item->operand.weight, sizeof(item->operand.weight));
pq_sendint(&buf, item->operand.valcrc, sizeof(item->operand.valcrc));
pq_sendint(&buf, item->operand.length, sizeof(int16));
/* istrue flag is just for temporary use in tsrank.c/Cover,
* so we don't need to transfer that */
break;
case QI_OPR:
pq_sendint(&buf, item->operator.oper, sizeof(item->operator.oper));
if (item->operator.oper != OP_NOT)
pq_sendint(&buf, item->operator.left, sizeof(item->operator.left));
break;
default:
elog(ERROR, "unknown tsquery node type %d", item->type);
}
item++;
}
item = GETQUERY(query);
for (i = 0; i < query->size; i++)
{
if (item->type == VAL)
pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length);
if (item->type == QI_VAL)
pq_sendbytes(&buf, GETOPERAND(query) + item->operand.distance, item->operand.length);
item++;
}
......@@ -652,8 +788,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
TSQuery query;
int i,
size,
tmp,
len = HDRSIZETQ;
len;
QueryItem *item;
int datalen = 0;
char *ptr;
......@@ -661,7 +796,8 @@ tsqueryrecv(PG_FUNCTION_ARGS)
size = pq_getmsgint(buf, sizeof(uint32));
if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
elog(ERROR, "invalid size of tsquery");
len += sizeof(QueryItem) * size;
len = HDRSIZETQ + sizeof(QueryItem) * size;
query = (TSQuery) palloc(len);
query->size = size;
......@@ -670,32 +806,67 @@ tsqueryrecv(PG_FUNCTION_ARGS)
for (i = 0; i < size; i++)
{
item->type = (int8) pq_getmsgint(buf, sizeof(int8));
item->weight = (int8) pq_getmsgint(buf, sizeof(int8));
item->left = (int16) pq_getmsgint(buf, sizeof(int16));
item->val = (int32) pq_getmsgint(buf, sizeof(int32));
tmp = pq_getmsgint(buf, sizeof(int32));
memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32));
switch(item->type)
{
case QI_VAL:
item->operand.weight = (int8) pq_getmsgint(buf, sizeof(int8));
item->operand.valcrc = (int32) pq_getmsgint(buf, sizeof(int32));
item->operand.length = pq_getmsgint(buf, sizeof(int16));
/*
* Check that datalen doesn't grow too large. Without the
* check, a malicious client could induce a buffer overflow
* by sending a tsquery whose size exceeds 2GB. datalen
* would overflow, we would allocate a too small buffer below,
* and overflow the buffer. Because operand.length is a 20-bit
* field, adding one such value to datalen must exceed
* MaxAllocSize before wrapping over the 32-bit datalen field,
* so this check will protect from it.
*/
if (datalen > MAXSTRLEN)
elog(ERROR, "invalid tsquery; total operand length exceeded");
/* We can calculate distance from datalen, no need to send it
* through the wire. If we did, we would have to check that
* it's valid anyway.
*/
item->operand.distance = datalen;
datalen += item->operand.length + 1; /* \0 */
break;
case QI_OPR:
item->operator.oper = (int8) pq_getmsgint(buf, sizeof(int8));
if (item->operator.oper != OP_NOT &&
item->operator.oper != OP_OR &&
item->operator.oper != OP_AND)
elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
if(item->operator.oper != OP_NOT)
{
item->operator.left = (int16) pq_getmsgint(buf, sizeof(int16));
/*
* Sanity checks
*/
if (item->type == VAL)
{
datalen += item->length + 1; /* \0 */
}
else if (item->type == OPR)
{
if (item->val == '|' || item->val == '&')
{
if (item->left <= 0 || i + item->left >= size)
if (item->operator.left <= 0 || i + item->operator.left >= size)
elog(ERROR, "invalid pointer to left operand");
/* XXX: Though there's no way to construct a TSQuery that's
* not in polish notation, we don't enforce that for
* queries received from client in binary mode. Is there
* anything that relies on it?
*
* XXX: The tree could be malformed in other ways too,
* a node could have two parents, for example.
*/
}
if (i == size - 1)
elog(ERROR, "invalid pointer to right operand");
break;
default:
elog(ERROR, "unknown tsquery node type %d", item->type);
}
else
elog(ERROR, "unknown tsquery node type");
item++;
}
......@@ -706,13 +877,12 @@ tsqueryrecv(PG_FUNCTION_ARGS)
ptr = GETOPERAND(query);
for (i = 0; i < size; i++)
{
if (item->type == VAL)
if (item->type == QI_VAL)
{
item->distance = ptr - GETOPERAND(query);
memcpy(ptr,
pq_getmsgbytes(buf, item->length),
item->length);
ptr += item->length;
pq_getmsgbytes(buf, item->operand.length),
item->operand.length);
ptr += item->operand.length;
*ptr++ = '\0';
}
item++;
......@@ -736,7 +906,7 @@ tsquerytree(PG_FUNCTION_ARGS)
INFIX nrm;
text *res;
QueryItem *q;
int4 len;
int len;
if (query->size == 0)
{
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -35,20 +35,23 @@ maketree(QueryItem * in)
node->valnode = in;
node->right = node->left = NULL;
if (in->type == OPR)
if (in->type == QI_OPR)
{
node->right = maketree(in + 1);
if (in->val != (int4) '!')
node->left = maketree(in + in->left);
if (in->operator.oper != OP_NOT)
node->left = maketree(in + in->operator.left);
}
return node;
}
/*
* Internal state for plaintree and plainnode
*/
typedef struct
{
QueryItem *ptr;
int4 len;
int4 cur;
int len; /* allocated size of ptr */
int cur; /* number of elements in ptr */
} PLAINTREE;
static void
......@@ -60,37 +63,37 @@ plainnode(PLAINTREE * state, NODE * node)
state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem));
}
memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem));
if (node->valnode->type == VAL)
if (node->valnode->type == QI_VAL)
state->cur++;
else if (node->valnode->val == (int4) '!')
else if (node->valnode->operator.oper == OP_NOT)
{
state->ptr[state->cur].left = 1;
state->ptr[state->cur].operator.left = 1;
state->cur++;
plainnode(state, node->right);
}
else
{
int4 cur = state->cur;
int cur = state->cur;
state->cur++;
plainnode(state, node->right);
state->ptr[cur].left = state->cur - cur;
state->ptr[cur].operator.left = state->cur - cur;
plainnode(state, node->left);
}
pfree(node);
}
/*
* make plain view of tree from 'normal' view of tree
* make plain view of tree from a NODE-tree representation
*/
static QueryItem *
plaintree(NODE * root, int4 *len)
plaintree(NODE * root, int *len)
{
PLAINTREE pl;
pl.cur = 0;
pl.len = 16;
if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
if (root && (root->valnode->type == QI_VAL || root->valnode->type == QI_OPR))
{
pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
plainnode(&pl, root);
......@@ -122,17 +125,17 @@ freetree(NODE * node)
static NODE *
clean_NOT_intree(NODE * node)
{
if (node->valnode->type == VAL)
if (node->valnode->type == QI_VAL)
return node;
if (node->valnode->val == (int4) '!')
if (node->valnode->operator.oper == OP_NOT)
{
freetree(node);
return NULL;
}
/* operator & or | */
if (node->valnode->val == (int4) '|')
if (node->valnode->operator.oper == OP_OR)
{
if ((node->left = clean_NOT_intree(node->left)) == NULL ||
(node->right = clean_NOT_intree(node->right)) == NULL)
......@@ -145,6 +148,8 @@ clean_NOT_intree(NODE * node)
{
NODE *res = node;
Assert(node->valnode->operator.oper == OP_AND);
node->left = clean_NOT_intree(node->left);
node->right = clean_NOT_intree(node->right);
if (node->left == NULL && node->right == NULL)
......@@ -168,7 +173,7 @@ clean_NOT_intree(NODE * node)
}
QueryItem *
clean_NOT(QueryItem * ptr, int4 *len)
clean_NOT(QueryItem * ptr, int *len)
{
NODE *root = maketree(ptr);
......@@ -180,10 +185,13 @@ clean_NOT(QueryItem * ptr, int4 *len)
#undef V_UNKNOWN
#endif
#define V_UNKNOWN 0
#define V_TRUE 1
#define V_FALSE 2
#define V_STOP 3
/*
* output values for result output parameter of clean_fakeval_intree
*/
#define V_UNKNOWN 0 /* the expression can't be evaluated statically */
#define V_TRUE 1 /* the expression is always true (not implemented) */
#define V_FALSE 2 /* the expression is always false (not implemented) */
#define V_STOP 3 /* the expression is a stop word */
/*
* Clean query tree from values which is always in
......@@ -195,17 +203,19 @@ clean_fakeval_intree(NODE * node, char *result)
char lresult = V_UNKNOWN,
rresult = V_UNKNOWN;
if (node->valnode->type == VAL)
if (node->valnode->type == QI_VAL)
return node;
else if (node->valnode->type == VALSTOP)
else
if (node->valnode->type == QI_VALSTOP)
{
pfree(node);
*result = V_STOP;
return NULL;
}
Assert(node->valnode->type == QI_OPR);
if (node->valnode->val == (int4) '!')
if (node->valnode->operator.oper == OP_NOT)
{
node->right = clean_fakeval_intree(node->right, &rresult);
if (!node->right)
......@@ -221,6 +231,7 @@ clean_fakeval_intree(NODE * node, char *result)
node->left = clean_fakeval_intree(node->left, &lresult);
node->right = clean_fakeval_intree(node->right, &rresult);
if (lresult == V_STOP && rresult == V_STOP)
{
freetree(node);
......@@ -243,7 +254,7 @@ clean_fakeval_intree(NODE * node, char *result)
}
QueryItem *
clean_fakeval(QueryItem * ptr, int4 *len)
clean_fakeval(QueryItem * ptr, int *len)
{
NODE *root = maketree(ptr);
char result = V_UNKNOWN;
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -30,14 +30,15 @@ tsquery_numnode(PG_FUNCTION_ARGS)
}
static QTNode *
join_tsqueries(TSQuery a, TSQuery b)
join_tsqueries(TSQuery a, TSQuery b, int8 operator)
{
QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = OPR;
res->valnode->type = QI_OPR;
res->valnode->operator.oper = operator;
res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
......@@ -66,9 +67,7 @@ tsquery_and(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(a);
}
res = join_tsqueries(a, b);
res->valnode->val = '&';
res = join_tsqueries(a, b, OP_AND);
query = QTN2QT(res);
......@@ -98,9 +97,7 @@ tsquery_or(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(a);
}
res = join_tsqueries(a, b);
res->valnode->val = '|';
res = join_tsqueries(a, b, OP_OR);
query = QTN2QT(res);
......@@ -126,8 +123,8 @@ tsquery_not(PG_FUNCTION_ARGS)
res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = OPR;
res->valnode->val = '!';
res->valnode->type = QI_OPR;
res->valnode->operator.oper = OP_NOT;
res->child = (QTNode **) palloc0(sizeof(QTNode *));
res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
......@@ -209,8 +206,8 @@ makeTSQuerySign(TSQuery a)
for (i = 0; i < a->size; i++)
{
if (ptr->type == VAL)
sign |= ((TSQuerySign) 1) << (ptr->val % TSQS_SIGLEN);
if (ptr->type == QI_VAL)
sign |= ((TSQuerySign) 1) << (ptr->operand.valcrc % TSQS_SIGLEN);
ptr++;
}
......@@ -253,10 +250,10 @@ tsq_mcontains(PG_FUNCTION_ARGS)
for (i = 0; i < ex->size; i++)
{
iq = GETQUERY(query);
if (ie[i].type != VAL)
if (ie[i].type != QI_VAL)
continue;
for (j = 0; j < query->size; j++)
if (iq[j].type == VAL && ie[i].val == iq[j].val)
if (iq[j].type == QI_VAL && ie[i].operand.valcrc == iq[j].operand.valcrc)
{
j = query->size + 1;
break;
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -34,18 +34,26 @@ addone(int *counters, int last, int total)
return 1;
}
/*
* If node is equal to ex, replace it with subs. Replacement is actually done
* by returning either node or a copy of subs.
*/
static QTNode *
findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
{
if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val)
if ((node->sign & ex->sign) != ex->sign ||
node->valnode->type != ex->valnode->type)
return node;
if (node->flags & QTN_NOCHANGE)
return node;
if (node->valnode->type == OPR)
if (node->valnode->type == QI_OPR)
{
if (node->valnode->operator.oper != ex->valnode->operator.oper)
return node;
if (node->nchild == ex->nchild)
{
if (QTNEq(node, ex))
......@@ -63,6 +71,12 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
}
else if (node->nchild > ex->nchild)
{
/*
* AND and NOT are commutative, so we check if a subset of the
* children match. For example, if tnode is A | B | C, and
* ex is B | C, we have a match after we convert tnode to
* A | (B | C).
*/
int *counters = (int *) palloc(sizeof(int) * node->nchild);
int i;
QTNode *tnode = (QTNode *) palloc(sizeof(QTNode));
......@@ -131,6 +145,12 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
pfree(counters);
}
}
else
{
Assert(node->valnode->type == QI_VAL);
if (node->valnode->operand.valcrc != ex->valnode->operand.valcrc)
return node;
else if (QTNEq(node, ex))
{
QTNFree(node);
......@@ -145,6 +165,7 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
}
*isfind = true;
}
}
return node;
}
......@@ -154,7 +175,7 @@ dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
{
root = findeq(root, ex, subs, isfind);
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR)
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
{
int i;
......@@ -172,7 +193,7 @@ dropvoidsubtree(QTNode * root)
if (!root)
return NULL;
if (root->valnode->type == OPR)
if (root->valnode->type == QI_OPR)
{
int i,
j = 0;
......@@ -188,7 +209,7 @@ dropvoidsubtree(QTNode * root)
root->nchild = j;
if (root->valnode->val == (int4) '!' && root->nchild == 0)
if (root->valnode->operator.oper == OP_NOT && root->nchild == 0)
{
QTNFree(root);
root = NULL;
......@@ -256,9 +277,9 @@ ts_rewrite_accum(PG_FUNCTION_ARGS)
elog(ERROR, "array must be one-dimensional, not %d dimensions",
ARR_NDIM(qa));
if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3)
elog(ERROR, "array should have only three elements");
elog(ERROR, "array must have three elements");
if (ARR_ELEMTYPE(qa) != TSQUERYOID)
elog(ERROR, "array should contain tsquery type");
elog(ERROR, "array must contain tsquery elements");
deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp);
......@@ -499,6 +520,7 @@ tsquery_rewrite_query(PG_FUNCTION_ARGS)
subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
tree = findsubquery(tree, qex, subs, NULL);
QTNFree(qex);
QTNFree(subs);
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -17,7 +17,6 @@
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
QTNode *
QT2QTN(QueryItem * in, char *operand)
{
......@@ -25,24 +24,24 @@ QT2QTN(QueryItem * in, char *operand)
node->valnode = in;
if (in->type == OPR)
if (in->type == QI_OPR)
{
node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
node->child[0] = QT2QTN(in + 1, operand);
node->sign = node->child[0]->sign;
if (in->val == (int4) '!')
if (in->operator.oper == OP_NOT)
node->nchild = 1;
else
{
node->nchild = 2;
node->child[1] = QT2QTN(in + in->left, operand);
node->child[1] = QT2QTN(in + in->operator.left, operand);
node->sign |= node->child[1]->sign;
}
}
else if (operand)
{
node->word = operand + in->distance;
node->sign = 1 << (in->val % 32);
node->word = operand + in->operand.distance;
node->sign = 1 << (in->operand.valcrc % 32);
}
return node;
......@@ -54,14 +53,14 @@ QTNFree(QTNode * in)
if (!in)
return;
if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
pfree(in->word);
if (in->child)
{
if (in->valnode)
{
if (in->valnode->type == OPR && in->nchild > 0)
if (in->valnode->type == QI_OPR && in->nchild > 0)
{
int i;
......@@ -82,20 +81,18 @@ QTNodeCompare(QTNode * an, QTNode * bn)
{
if (an->valnode->type != bn->valnode->type)
return (an->valnode->type > bn->valnode->type) ? -1 : 1;
else if (an->valnode->val != bn->valnode->val)
return (an->valnode->val > bn->valnode->val) ? -1 : 1;
else if (an->valnode->type == VAL)
{
if (an->valnode->length == bn->valnode->length)
return strncmp(an->word, bn->word, an->valnode->length);
else
return (an->valnode->length > bn->valnode->length) ? -1 : 1;
}
else if (an->nchild != bn->nchild)
if (an->valnode->type == QI_OPR)
{
QueryOperator *ao = &an->valnode->operator;
QueryOperator *bo = &bn->valnode->operator;
if(ao->oper != bo->oper)
return (ao->oper > bo->oper) ? -1 : 1;
if (an->nchild != bn->nchild)
return (an->nchild > bn->nchild) ? -1 : 1;
}
else
{
int i,
res;
......@@ -104,8 +101,25 @@ QTNodeCompare(QTNode * an, QTNode * bn)
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
return res;
}
return 0;
}
else
{
QueryOperand *ao = &an->valnode->operand;
QueryOperand *bo = &bn->valnode->operand;
Assert(an->valnode->type == QI_VAL);
if (ao->valcrc != bo->valcrc)
{
return (ao->valcrc > bo->valcrc) ? -1 : 1;
}
if (ao->length == bo->length)
return strncmp(an->word, bn->word, ao->length);
else
return (ao->length > bo->length) ? -1 : 1;
}
}
static int
......@@ -119,7 +133,7 @@ QTNSort(QTNode * in)
{
int i;
if (in->valnode->type != OPR)
if (in->valnode->type != QI_OPR)
return;
for (i = 0; i < in->nchild; i++)
......@@ -139,22 +153,30 @@ QTNEq(QTNode * a, QTNode * b)
return (QTNodeCompare(a, b) == 0) ? true : false;
}
/*
* Remove unnecessary intermediate nodes. For example:
*
* OR OR
* a OR -> a b c
* b c
*/
void
QTNTernary(QTNode * in)
{
int i;
if (in->valnode->type != OPR)
if (in->valnode->type != QI_OPR)
return;
for (i = 0; i < in->nchild; i++)
QTNTernary(in->child[i]);
for (i = 0; i < in->nchild; i++)
{
if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val)
{
QTNode *cc = in->child[i];
if (cc->valnode->type == QI_OPR && in->valnode->operator.oper == cc->valnode->operator.oper)
{
int oldnchild = in->nchild;
in->nchild += cc->nchild - 1;
......@@ -167,17 +189,23 @@ QTNTernary(QTNode * in)
memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
i += cc->nchild - 1;
if(cc->flags & QTN_NEEDFREE)
pfree(cc->valnode);
pfree(cc);
}
}
}
/*
* Convert a tree to binary tree by inserting intermediate nodes.
* (Opposite of QTNTernary)
*/
void
QTNBinary(QTNode * in)
{
int i;
if (in->valnode->type != OPR)
if (in->valnode->type != QI_OPR)
return;
for (i = 0; i < in->nchild; i++)
......@@ -201,7 +229,7 @@ QTNBinary(QTNode * in)
nn->sign = nn->child[0]->sign | nn->child[1]->sign;
nn->valnode->type = in->valnode->type;
nn->valnode->val = in->valnode->val;
nn->valnode->operator.oper = in->valnode->operator.oper;
in->child[0] = nn;
in->child[1] = in->child[in->nchild - 1];
......@@ -209,11 +237,15 @@ QTNBinary(QTNode * in)
}
}
/*
* Count the total length of operand string in tree, including '\0'-
* terminators.
*/
static void
cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
cntsize(QTNode * in, int *sumlen, int *nnode)
{
*nnode += 1;
if (in->valnode->type == OPR)
if (in->valnode->type == QI_OPR)
{
int i;
......@@ -222,7 +254,7 @@ cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
}
else
{
*sumlen += in->valnode->length + 1;
*sumlen += in->valnode->operand.length + 1;
}
}
......@@ -234,22 +266,26 @@ typedef struct
} QTN2QTState;
static void
fillQT(QTN2QTState * state, QTNode * in)
fillQT(QTN2QTState *state, QTNode *in)
{
*(state->curitem) = *(in->valnode);
if (in->valnode->type == VAL)
if (in->valnode->type == QI_VAL)
{
memcpy(state->curoperand, in->word, in->valnode->length);
state->curitem->distance = state->curoperand - state->operand;
state->curoperand[in->valnode->length] = '\0';
state->curoperand += in->valnode->length + 1;
memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
memcpy(state->curoperand, in->word, in->valnode->operand.length);
state->curitem->operand.distance = state->curoperand - state->operand;
state->curoperand[in->valnode->operand.length] = '\0';
state->curoperand += in->valnode->operand.length + 1;
state->curitem++;
}
else
{
QueryItem *curitem = state->curitem;
Assert(in->valnode->type == QI_OPR);
memcpy(state->curitem, in->valnode, sizeof(QueryOperator));
Assert(in->nchild <= 2);
state->curitem++;
......@@ -257,7 +293,7 @@ fillQT(QTN2QTState * state, QTNode * in)
if (in->nchild == 2)
{
curitem->left = state->curitem - curitem;
curitem->operator.left = state->curitem - curitem;
fillQT(state, in->child[1]);
}
}
......@@ -296,11 +332,11 @@ QTNCopy(QTNode *in)
*(out->valnode) = *(in->valnode);
out->flags |= QTN_NEEDFREE;
if (in->valnode->type == VAL)
if (in->valnode->type == QI_VAL)
{
out->word = palloc(in->valnode->length + 1);
memcpy(out->word, in->word, in->valnode->length);
out->word[in->valnode->length] = '\0';
out->word = palloc(in->valnode->operand.length + 1);
memcpy(out->word, in->word, in->valnode->operand.length);
out->word[in->valnode->operand.length] = '\0';
out->flags |= QTN_WORDFREE;
}
else
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -68,7 +68,7 @@ cnt_length(TSVector t)
}
static int4
WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
{
if (ptr->len == item->length)
return strncmp(
......@@ -80,7 +80,7 @@ WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
}
static WordEntry *
find_wordentry(TSVector t, TSQuery q, QueryItem * item)
find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
{
WordEntry *StopLow = ARRPTR(t);
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
......@@ -105,33 +105,48 @@ find_wordentry(TSVector t, TSQuery q, QueryItem * item)
}
/*
* sort QueryOperands by (length, word)
*/
static int
compareQueryItem(const void *a, const void *b, void *arg)
compareQueryOperand(const void *a, const void *b, void *arg)
{
char *operand = (char *) arg;
QueryOperand *qa = (*(QueryOperand **) a);
QueryOperand *qb = (*(QueryOperand **) b);
if ((*(QueryItem **) a)->length == (*(QueryItem **) b)->length)
return strncmp(operand + (*(QueryItem **) a)->distance,
operand + (*(QueryItem **) b)->distance,
(*(QueryItem **) b)->length);
if (qa->length == qb->length)
return strncmp(operand + qa->distance,
operand + qb->distance,
qb->length);
return ((*(QueryItem **) a)->length > (*(QueryItem **) b)->length) ? 1 : -1;
return (qa->length > qb->length) ? 1 : -1;
}
static QueryItem **
SortAndUniqItems(char *operand, QueryItem * item, int *size)
/*
* Returns a sorted, de-duplicated array of QueryOperands in a query.
* The returned QueryOperands are pointers to the original QueryOperands
* in the query.
*
* Length of the returned array is stored in *size
*/
static QueryOperand **
SortAndUniqItems(TSQuery q, int *size)
{
QueryItem **res,
char *operand = GETOPERAND(q);
QueryItem * item = GETQUERY(q);
QueryOperand **res,
**ptr,
**prevptr;
ptr = res = (QueryItem **) palloc(sizeof(QueryItem *) * *size);
ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
/* Collect all operands from the tree to res */
while ((*size)--)
{
if (item->type == VAL)
if (item->type == QI_VAL)
{
*ptr = item;
*ptr = (QueryOperand *) item;
ptr++;
}
item++;
......@@ -141,14 +156,15 @@ SortAndUniqItems(char *operand, QueryItem * item, int *size)
if (*size < 2)
return res;
qsort_arg(res, *size, sizeof(QueryItem **), compareQueryItem, (void *) operand);
qsort_arg(res, *size, sizeof(QueryOperand **), compareQueryOperand, (void *) operand);
ptr = res + 1;
prevptr = res;
/* remove duplicates */
while (ptr - res < *size)
{
if (compareQueryItem((void *) ptr, (void *) prevptr, (void *) operand) != 0)
if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
{
prevptr++;
*prevptr = *ptr;
......@@ -180,10 +196,10 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
lenct,
dist;
float res = -1.0;
QueryItem **item;
QueryOperand **item;
int size = q->size;
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
item = SortAndUniqItems(q, &size);
if (size < 2)
{
pfree(item);
......@@ -246,11 +262,11 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
j,
i;
float res = 0.0;
QueryItem **item;
QueryOperand **item;
int size = q->size;
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
item = SortAndUniqItems(q, &size);
for (i = 0; i < size; i++)
{
......@@ -310,7 +326,8 @@ calc_rank(float *w, TSVector t, TSQuery q, int4 method)
if (!t->size || !q->size)
return 0.0;
res = (item->type != VAL && item->val == (int4) '&') ?
/* XXX: What about NOT? */
res = (item->type == QI_OPR && item->operator.oper == OP_AND) ?
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
if (res < 0)
......@@ -453,7 +470,7 @@ compareDocR(const void *a, const void *b)
}
static bool
checkcondition_QueryItem(void *checkval, QueryItem * val)
checkcondition_QueryOperand(void *checkval, QueryOperand *val)
{
return (bool) (val->istrue);
}
......@@ -467,8 +484,8 @@ reset_istrue_flag(TSQuery query)
/* reset istrue flag */
for (i = 0; i < query->size; i++)
{
if (item->type == VAL)
item->istrue = 0;
if (item->type == QI_VAL)
item->operand.istrue = 0;
item++;
}
}
......@@ -484,7 +501,7 @@ typedef struct
static bool
Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
Cover(DocRepresentation *doc, int len, TSQuery query, Extention *ext)
{
DocRepresentation *ptr;
int lastpos = ext->pos;
......@@ -501,8 +518,11 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
while (ptr - doc < len)
{
for (i = 0; i < ptr->nitem; i++)
ptr->item[i]->istrue = 1;
if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryItem))
{
if(ptr->item[i]->type == QI_VAL)
ptr->item[i]->operand.istrue = 1;
}
if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryOperand))
{
if (ptr->pos > ext->q)
{
......@@ -527,8 +547,9 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
while (ptr >= doc + ext->pos)
{
for (i = 0; i < ptr->nitem; i++)
ptr->item[i]->istrue = 1;
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryItem))
if(ptr->item[i]->type == QI_VAL) /* XXX */
ptr->item[i]->operand.istrue = 1;
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand))
{
if (ptr->pos < ext->p)
{
......@@ -575,10 +596,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
for (i = 0; i < query->size; i++)
{
if (item[i].type != VAL || item[i].istrue)
QueryOperand *curoperand;
if (item[i].type != QI_VAL)
continue;
entry = find_wordentry(txt, query, &(item[i]));
curoperand = &item[i].operand;
if(item[i].operand.istrue)
continue;
entry = find_wordentry(txt, query, curoperand);
if (!entry)
continue;
......@@ -603,8 +631,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
{
if (j == 0)
{
QueryItem *kptr,
*iptr = item + i;
int k;
doc[cur].needfree = false;
......@@ -613,14 +639,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
for (k = 0; k < query->size; k++)
{
kptr = item + k;
QueryOperand *kptr = &item[k].operand;
QueryOperand *iptr = &item[i].operand;
if (k == i ||
(item[k].type == VAL &&
compareQueryItem(&kptr, &iptr, operand) == 0))
(item[k].type == QI_VAL &&
compareQueryOperand(&kptr, &iptr, operand) == 0))
{
/* if k == i, we've already checked above that it's type == Q_VAL */
doc[cur].item[doc[cur].nitem] = item + k;
doc[cur].nitem++;
kptr->istrue = 1;
item[k].operand.istrue = 1;
}
}
}
......@@ -640,7 +669,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
if (cur > 0)
{
if (cur > 1)
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
return doc;
}
......@@ -746,7 +774,7 @@ ts_rankcd_wttf(PG_FUNCTION_ARGS)
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(2);
TSQuery query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
int method = PG_GETARG_INT32(3);
float res;
......@@ -763,7 +791,7 @@ ts_rankcd_wtt(PG_FUNCTION_ARGS)
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(2);
TSQuery query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
float res;
res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
......@@ -778,7 +806,7 @@ Datum
ts_rankcd_ttf(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY_COPY(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
int method = PG_GETARG_INT32(2);
float res;
......@@ -793,7 +821,7 @@ Datum
ts_rankcd_tt(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY_COPY(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
float res;
res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.2 2007/08/21 01:45:33 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -20,22 +20,37 @@
#include "tsearch/ts_utils.h"
#include "utils/memutils.h"
typedef struct
{
WordEntry entry; /* should be first ! */
WordEntryPos *pos;
int poslen; /* number of elements in pos */
} WordEntryIN;
static int
comparePos(const void *a, const void *b)
{
if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b))
int apos = WEP_GETPOS(*(WordEntryPos *) a);
int bpos = WEP_GETPOS(*(WordEntryPos *) b);
if (apos == bpos)
return 0;
return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1;
return (apos > bpos) ? 1 : -1;
}
/*
* Removes duplicate pos entries. If there's two entries with same pos
* but different weight, the higher weight is retained.
*
* Returns new length.
*/
static int
uniquePos(WordEntryPos * a, int4 l)
uniquePos(WordEntryPos * a, int l)
{
WordEntryPos *ptr,
*res;
if (l == 1)
if (l <= 1)
return l;
res = a;
......@@ -75,21 +90,23 @@ compareentry(const void *a, const void *b, void *arg)
}
static int
uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
uniqueentry(WordEntryIN * a, int l, char *buf, int *outbuflen)
{
WordEntryIN *ptr,
*res;
res = a;
Assert(l >= 1);
if (l == 1)
{
if (a->entry.haspos)
{
*(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
a->poslen = uniquePos(a->pos, a->poslen);
*outbuflen = SHORTALIGN(a->entry.len) + (a->poslen + 1) * sizeof(WordEntryPos);
}
return l;
}
res = a;
ptr = a + 1;
qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf);
......@@ -101,8 +118,8 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
{
if (res->entry.haspos)
{
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
res->poslen = uniquePos(res->pos, res->poslen);
*outbuflen += res->poslen * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
res++;
......@@ -112,12 +129,14 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
{
if (res->entry.haspos)
{
int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
int newlen = ptr->poslen + res->poslen;
/* Append res to pos */
res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
&(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
*(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
res->pos = (WordEntryPos *) repalloc(res->pos, newlen * sizeof(WordEntryPos));
memcpy(&res->pos[res->poslen],
ptr->pos, ptr->poslen * sizeof(WordEntryPos));
res->poslen = newlen;
pfree(ptr->pos);
}
else
......@@ -130,8 +149,8 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
}
if (res->entry.haspos)
{
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
res->poslen = uniquePos(res->pos, res->poslen);
*outbuflen += res->poslen * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
......@@ -144,248 +163,6 @@ WordEntryCMP(WordEntry * a, WordEntry * b, char *buf)
return compareentry(a, b, buf);
}
#define WAITWORD 1
#define WAITENDWORD 2
#define WAITNEXTCHAR 3
#define WAITENDCMPLX 4
#define WAITPOSINFO 5
#define INPOSINFO 6
#define WAITPOSDELIM 7
#define WAITCHARCMPLX 8
#define RESIZEPRSBUF \
do { \
if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
{ \
int4 clen = state->curpos - state->word; \
state->len *= 2; \
state->word = (char*)repalloc( (void*)state->word, state->len ); \
state->curpos = state->word + clen; \
} \
} while (0)
bool
gettoken_tsvector(TSVectorParseState *state)
{
int4 oldstate = 0;
state->curpos = state->word;
state->state = WAITWORD;
state->alen = 0;
while (1)
{
if (state->state == WAITWORD)
{
if (*(state->prsbuf) == '\0')
return false;
else if (t_iseq(state->prsbuf, '\''))
state->state = WAITENDCMPLX;
else if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else if (!t_isspace(state->prsbuf))
{
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = WAITENDWORD;
}
}
else if (state->state == WAITNEXTCHAR)
{
if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character")));
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = oldstate;
}
}
else if (state->state == WAITENDWORD)
{
if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
{
RESIZEPRSBUF;
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(state->curpos) = '\0';
return true;
}
else if (t_iseq(state->prsbuf, ':'))
{
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(state->curpos) = '\0';
if (state->oprisdelim)
return true;
else
state->state = INPOSINFO;
}
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
}
}
else if (state->state == WAITENDCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
state->state = WAITCHARCMPLX;
}
else if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDCMPLX;
}
else if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
}
}
else if (state->state == WAITCHARCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = WAITENDCMPLX;
}
else
{
RESIZEPRSBUF;
*(state->curpos) = '\0';
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
if (state->oprisdelim)
{
/* state->prsbuf+=pg_mblen(state->prsbuf); */
return true;
}
else
state->state = WAITPOSINFO;
continue; /* recheck current character */
}
}
else if (state->state == WAITPOSINFO)
{
if (t_iseq(state->prsbuf, ':'))
state->state = INPOSINFO;
else
return true;
}
else if (state->state == INPOSINFO)
{
if (t_isdigit(state->prsbuf))
{
if (state->alen == 0)
{
state->alen = 4;
state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
*(uint16 *) (state->pos) = 0;
}
else if (*(uint16 *) (state->pos) + 1 >= state->alen)
{
state->alen *= 2;
state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
}
(*(uint16 *) (state->pos))++;
WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf)));
if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
state->state = WAITPOSDELIM;
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else if (state->state == WAITPOSDELIM)
{
if (t_iseq(state->prsbuf, ','))
state->state = INPOSINFO;
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
}
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
}
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
}
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
}
else if (t_isspace(state->prsbuf) ||
*(state->prsbuf) == '\0')
return true;
else if (!t_isdigit(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else /* internal error */
elog(ERROR, "internal error in gettoken_tsvector");
/* get next char */
state->prsbuf += pg_mblen(state->prsbuf);
}
return false;
}
Datum
tsvectorin(PG_FUNCTION_ARGS)
......@@ -393,70 +170,82 @@ tsvectorin(PG_FUNCTION_ARGS)
char *buf = PG_GETARG_CSTRING(0);
TSVectorParseState state;
WordEntryIN *arr;
int totallen;
int arrlen; /* allocated size of arr */
WordEntry *inarr;
int4 len = 0,
totallen = 64;
int len = 0;
TSVector in;
char *tmpbuf,
*cur;
int4 i,
buflen = 256;
int i;
char *token;
int toklen;
WordEntryPos *pos;
int poslen;
/*
* Tokens are appended to tmpbuf, cur is a pointer
* to the end of used space in tmpbuf.
*/
char *tmpbuf;
char *cur;
int buflen = 256; /* allocated size of tmpbuf */
pg_verifymbstr(buf, strlen(buf), false);
state.prsbuf = buf;
state.len = 32;
state.word = (char *) palloc(state.len);
state.oprisdelim = false;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
cur = tmpbuf = (char *) palloc(buflen);
state = init_tsvector_parser(buf, false);
while (gettoken_tsvector(&state))
{
/*
* Realloc buffers if it's needed
*/
if (len >= totallen)
{
totallen *= 2;
arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
}
arrlen = 64;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
cur = tmpbuf = (char *) palloc(buflen);
while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
{
int4 dist = cur - tmpbuf;
buflen *= 2;
tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
cur = tmpbuf + dist;
}
if (state.curpos - state.word >= MAXSTRLEN)
if (toklen >= MAXSTRLEN)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long (%ld bytes, max %ld bytes)",
(long) (state.curpos - state.word),
(long) toklen,
(long) MAXSTRLEN)));
arr[len].entry.len = state.curpos - state.word;
if (cur - tmpbuf > MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("position value too large")));
/*
* Enlarge buffers if needed
*/
if (len >= arrlen)
{
arrlen *= 2;
arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * arrlen);
}
while ((cur - tmpbuf) + toklen >= buflen)
{
int dist = cur - tmpbuf;
buflen *= 2;
tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
cur = tmpbuf + dist;
}
arr[len].entry.len = toklen;
arr[len].entry.pos = cur - tmpbuf;
memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
cur += arr[len].entry.len;
memcpy((void *) cur, (void *) token, toklen);
cur += toklen;
if (state.alen)
if (poslen != 0)
{
arr[len].entry.haspos = 1;
arr[len].pos = state.pos;
arr[len].pos = pos;
arr[len].poslen = poslen;
}
else
arr[len].entry.haspos = 0;
len++;
}
pfree(state.word);
close_tsvector_parser(state);
if (len > 0)
len = uniqueentry(arr, len, tmpbuf, &buflen);
......@@ -476,8 +265,21 @@ tsvectorin(PG_FUNCTION_ARGS)
cur += SHORTALIGN(arr[i].entry.len);
if (arr[i].entry.haspos)
{
memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
uint16 tmplen;
if(arr[i].poslen > 0xFFFF)
elog(ERROR, "positions array too long");
tmplen = (uint16) arr[i].poslen;
/* Copy length to output struct */
memcpy(cur, &tmplen, sizeof(uint16));
cur += sizeof(uint16);
/* Copy positions */
memcpy(cur, arr[i].pos, (arr[i].poslen) * sizeof(WordEntryPos));
cur += arr[i].poslen * sizeof(WordEntryPos);
pfree(arr[i].pos);
}
inarr[i] = arr[i].entry;
......@@ -604,26 +406,26 @@ tsvectorrecv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
TSVector vec;
int i,
size,
len = DATAHDRSIZE;
int i;
uint32 size;
WordEntry *weptr;
int datalen = 0;
Size len;
size = pq_getmsgint(buf, sizeof(uint32));
if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry)))
elog(ERROR, "invalid size of tsvector");
len += sizeof(WordEntry) * size;
len = DATAHDRSIZE + sizeof(WordEntry) * size;
len *= 2;
len = len * 2; /* times two to make room for lexemes */
vec = (TSVector) palloc0(len);
vec->size = size;
weptr = ARRPTR(vec);
for (i = 0; i < size; i++)
{
int tmp;
int32 tmp;
weptr = ARRPTR(vec) + i;
......@@ -654,7 +456,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
npos;
WordEntryPos *wepptr;
npos = (uint16) pq_getmsgint(buf, sizeof(int16));
npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
if (npos > MAXNUMPOS)
elog(ERROR, "unexpected number of positions");
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.2 2007/08/31 02:26:29 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -66,6 +66,9 @@ typedef struct
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
/*
* Order: haspos, len, word, for all positions (pos, weight)
*/
static int
silly_cmp_tsvector(const TSVector a, const TSVector b)
{
......@@ -464,7 +467,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
* compare 2 string values
*/
static int4
ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item)
{
if (ptr->len == item->length)
return strncmp(
......@@ -479,7 +482,7 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
* check weight info
*/
static bool
checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
{
WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
......@@ -497,10 +500,11 @@ checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
* is there value 'val' in array or not ?
*/
static bool
checkcondition_str(void *checkval, QueryItem * val)
checkcondition_str(void *checkval, QueryOperand * val)
{
WordEntry *StopLow = ((CHKVAL *) checkval)->arrb;
WordEntry *StopHigh = ((CHKVAL *) checkval)->arre;
CHKVAL *chkval = (CHKVAL *) checkval;
WordEntry *StopLow = chkval->arrb;
WordEntry *StopHigh = chkval->arre;
WordEntry *StopMiddle;
int difference;
......@@ -509,10 +513,10 @@ checkcondition_str(void *checkval, QueryItem * val)
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
difference = ValCompare(chkval, StopMiddle, val);
if (difference == 0)
return (val->weight && StopMiddle->haspos) ?
checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true;
checkclass_str(chkval, StopMiddle, val) : true;
else if (difference < 0)
StopLow = StopMiddle + 1;
else
......@@ -523,37 +527,48 @@ checkcondition_str(void *checkval, QueryItem * val)
}
/*
* check for boolean condition
* check for boolean condition.
*
* if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking.
* checkval can be used to pass information to the callback. TS_execute doesn't
* do anything with it.
* chkcond is a callback function used to evaluate each VAL node in the query.
*
*/
bool
TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
bool (*chkcond) (void *checkval, QueryItem * val))
bool (*chkcond) (void *checkval, QueryOperand * val))
{
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (curitem->type == VAL)
return chkcond(checkval, curitem);
else if (curitem->val == (int4) '!')
{
return (calcnot) ?
!TS_execute(curitem + 1, checkval, calcnot, chkcond)
: true;
}
else if (curitem->val == (int4) '&')
if (curitem->type == QI_VAL)
return chkcond(checkval, (QueryOperand *) curitem);
switch(curitem->operator.oper)
{
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
case OP_NOT:
if (calcnot)
return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
else
return true;
case OP_AND:
if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
else
return false;
}
else
{ /* |-operator */
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
case OP_OR:
if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
return true;
else
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
default:
elog(ERROR, "unknown operator %d", curitem->operator.oper);
}
/* not reachable, but keep compiler quiet */
return false;
}
......
/*-------------------------------------------------------------------------
*
* tsvector_parser.c
* Parser for tsvector
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.1 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "libpq/pqformat.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/memutils.h"
struct TSVectorParseStateData
{
char *prsbuf;
char *word; /* buffer to hold the current word */
int len; /* size in bytes allocated for 'word' */
bool oprisdelim;
};
/*
* Initializes parser for the input string. If oprisdelim is set, the
* following characters are treated as delimiters in addition to whitespace:
* ! | & ( )
*/
TSVectorParseState
init_tsvector_parser(char *input, bool oprisdelim)
{
TSVectorParseState state;
state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
state->prsbuf = input;
state->len = 32;
state->word = (char *) palloc(state->len);
state->oprisdelim = oprisdelim;
return state;
}
/*
* Reinitializes parser for parsing 'input', instead of previous input.
*/
void
reset_tsvector_parser(TSVectorParseState state, char *input)
{
state->prsbuf = input;
}
/*
* Shuts down a tsvector parser.
*/
void
close_tsvector_parser(TSVectorParseState state)
{
pfree(state->word);
pfree(state);
}
#define RESIZEPRSBUF \
do { \
if ( curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
{ \
int clen = curpos - state->word; \
state->len *= 2; \
state->word = (char*)repalloc( (void*)state->word, state->len ); \
curpos = state->word + clen; \
} \
} while (0)
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
/* Fills the output parameters, and returns true */
#define RETURN_TOKEN \
do { \
if (pos_ptr != NULL) \
{ \
*pos_ptr = pos; \
*poslen = npos; \
} \
else if (pos != NULL) \
pfree(pos); \
\
if (strval != NULL) \
*strval = state->word; \
if (lenval != NULL) \
*lenval = curpos - state->word; \
if (endptr != NULL) \
*endptr = state->prsbuf; \
return true; \
} while(0)
/* State codes used in gettoken_tsvector */
#define WAITWORD 1
#define WAITENDWORD 2
#define WAITNEXTCHAR 3
#define WAITENDCMPLX 4
#define WAITPOSINFO 5
#define INPOSINFO 6
#define WAITPOSDELIM 7
#define WAITCHARCMPLX 8
/*
* Get next token from string being parsed. Returns false if
* end of input string is reached, otherwise strval, lenval, pos_ptr
* and poslen output parameters are filled in:
*
* *strval token
* *lenval length of*strval
* *pos_ptr pointer to a palloc'd array of positions and weights
* associated with the token. If the caller is not interested
* in the information, NULL can be supplied. Otherwise
* the caller is responsible for pfreeing the array.
* *poslen number of elements in *pos_ptr
*/
bool
gettoken_tsvector(TSVectorParseState state,
char **strval, int *lenval,
WordEntryPos **pos_ptr, int *poslen,
char **endptr)
{
int oldstate = 0;
char *curpos = state->word;
int statecode = WAITWORD;
/* pos is for collecting the comma delimited list of positions followed
* by the actual token.
*/
WordEntryPos *pos = NULL;
int npos = 0; /* elements of pos used */
int posalen = 0; /* allocated size of pos */
while (1)
{
if (statecode == WAITWORD)
{
if (*(state->prsbuf) == '\0')
return false;
else if (t_iseq(state->prsbuf, '\''))
statecode = WAITENDCMPLX;
else if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else if (!t_isspace(state->prsbuf))
{
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
statecode = WAITENDWORD;
}
}
else if (statecode == WAITNEXTCHAR)
{
if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character")));
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
Assert(oldstate != 0);
statecode = oldstate;
}
}
else if (statecode == WAITENDWORD)
{
if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
{
RESIZEPRSBUF;
if (curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(curpos) = '\0';
RETURN_TOKEN;
}
else if (t_iseq(state->prsbuf, ':'))
{
if (curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(curpos) = '\0';
if (state->oprisdelim)
RETURN_TOKEN;
else
statecode = INPOSINFO;
}
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
}
}
else if (statecode == WAITENDCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
statecode = WAITCHARCMPLX;
}
else if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDCMPLX;
}
else if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
}
}
else if (statecode == WAITCHARCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
statecode = WAITENDCMPLX;
}
else
{
RESIZEPRSBUF;
*(curpos) = '\0';
if (curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
if (state->oprisdelim)
{
/* state->prsbuf+=pg_mblen(state->prsbuf); */
RETURN_TOKEN;
}
else
statecode = WAITPOSINFO;
continue; /* recheck current character */
}
}
else if (statecode == WAITPOSINFO)
{
if (t_iseq(state->prsbuf, ':'))
statecode = INPOSINFO;
else
RETURN_TOKEN;
}
else if (statecode == INPOSINFO)
{
if (t_isdigit(state->prsbuf))
{
if (posalen == 0)
{
posalen = 4;
pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
npos = 0;
}
else if (npos + 1 >= posalen)
{
posalen *= 2;
pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
}
npos++;
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
if (WEP_GETPOS(pos[npos - 1]) == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 0);
statecode = WAITPOSDELIM;
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else if (statecode == WAITPOSDELIM)
{
if (t_iseq(state->prsbuf, ','))
statecode = INPOSINFO;
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 3);
}
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 2);
}
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 1);
}
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 0);
}
else if (t_isspace(state->prsbuf) ||
*(state->prsbuf) == '\0')
RETURN_TOKEN;
else if (!t_isdigit(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else /* internal error */
elog(ERROR, "internal error in gettoken_tsvector");
/* get next char */
state->prsbuf += pg_mblen(state->prsbuf);
}
return false;
}
......@@ -6,7 +6,7 @@
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.3 2007/08/25 00:03:59 tgl Exp $
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.4 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -42,7 +42,7 @@ typedef struct
type:8,
len:16;
char *word;
QueryItem *item;
QueryOperand *item;
} HeadlineWordEntry;
typedef struct
......
......@@ -5,7 +5,7 @@
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.1 2007/08/21 01:11:29 tgl Exp $
* $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -13,6 +13,8 @@
#define _PG_TSTYPE_H_
#include "fmgr.h"
#include "utils/pg_crc.h"
/*
* TSVector type.
......@@ -27,8 +29,8 @@ typedef struct
pos:20; /* MAX 1Mb */
} WordEntry;
#define MAXSTRLEN ( 1<<11 )
#define MAXSTRPOS ( 1<<20 )
#define MAXSTRLEN ( (1<<11) - 1)
#define MAXSTRPOS ( (1<<20) - 1)
/*
* Equivalent to
......@@ -68,7 +70,7 @@ typedef uint16 WordEntryPos;
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int4 size;
uint32 size;
char data[1];
} TSVectorData;
......@@ -140,36 +142,65 @@ extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS);
/*
* TSQuery
*
*
*/
typedef int8 QueryItemType;
/* Valid values for QueryItemType: */
#define QI_VAL 1
#define QI_OPR 2
#define QI_VALSTOP 3 /* This is only used in an intermediate stack representation in parse_tsquery. It's not a legal type elsewhere. */
/*
* QueryItem is one node in tsquery - operator or operand.
*/
typedef struct QueryItem
typedef struct
{
int8 type; /* operand or kind of operator */
int8 weight; /* weights of operand to search */
int2 left; /* pointer to left operand Right operand is
* item + 1, left operand is placed
* item+item->left */
int4 val; /* crc32 value of operand's value */
QueryItemType type; /* operand or kind of operator (ts_tokentype) */
int8 weight; /* weights of operand to search. It's a bitmask of allowed weights.
* if it =0 then any weight are allowed */
int32 valcrc; /* XXX: pg_crc32 would be a more appropriate data type,
* but we use comparisons to signed integers in the code.
* They would need to be changed as well. */
/* pointer to text value of operand, must correlate with WordEntry */
uint32
istrue:1, /* use for ranking in Cover */
length:11,
distance:20;
} QueryItem;
} QueryOperand;
/* Legal values for QueryOperator.operator */
#define OP_NOT 1
#define OP_AND 2
#define OP_OR 3
typedef struct
{
QueryItemType type;
int8 oper; /* see above */
int16 left; /* pointer to left operand. Right operand is
* item + 1, left operand is placed
* item+item->left */
} QueryOperator;
/*
* It's impossible to use offsetof(QueryItem, istrue)
* Note: TSQuery is 4-bytes aligned, so make sure there's no fields
* inside QueryItem requiring 8-byte alignment, like int64.
*/
#define HDRSIZEQI ( sizeof(int8) + sizeof(int8) + sizeof(int2) + sizeof(int4) )
typedef union
{
QueryItemType type;
QueryOperator operator;
QueryOperand operand;
} QueryItem;
/*
* Storage:
* (len)(size)(array of ITEM)(array of operand in text form)
* operands are always finished by '\0'
* (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
*/
typedef struct
......@@ -182,13 +213,17 @@ typedef struct
typedef TSQueryData *TSQuery;
#define HDRSIZETQ ( VARHDRSZ + sizeof(int4) )
#define COMPUTESIZE(size,lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
#define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
#define OPERANDSSIZE(x) ( (x)->len - HDRSIZETQ - (x)->size * sizeof(QueryItem) )
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
/* Computes the size of header and all QueryItems. size is the number of
* QueryItems, and lenofoperand is the total length of all operands
*/
#define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
/* Returns a pointer to the first QueryItem in a TSVector */
#define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
/* Returns a pointer to the beginning of operands in a TSVector */
#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
/*
* fmgr interface macros
......
......@@ -5,7 +5,7 @@
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.2 2007/08/25 00:03:59 tgl Exp $
* $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -14,65 +14,41 @@
#include "tsearch/ts_type.h"
#include "tsearch/ts_public.h"
#include "nodes/pg_list.h"
/*
* Common parse definitions for tsvector and tsquery
*/
typedef struct
{
WordEntry entry; /* should be first ! */
WordEntryPos *pos;
} WordEntryIN;
typedef struct
{
char *prsbuf;
char *word;
char *curpos;
int4 len;
int4 state;
int4 alen;
WordEntryPos *pos;
bool oprisdelim;
} TSVectorParseState;
/* tsvector parser support. */
extern bool gettoken_tsvector(TSVectorParseState *state);
struct TSVectorParseStateData;
typedef struct TSVectorParseStateData *TSVectorParseState;
struct ParseQueryNode; /* private in backend/utils/adt/tsquery.c */
extern TSVectorParseState init_tsvector_parser(char *input, bool oprisdelim);
extern void reset_tsvector_parser(TSVectorParseState state, char *input);
extern bool gettoken_tsvector(TSVectorParseState state,
char **token, int *len,
WordEntryPos **pos, int *poslen,
char **endptr);
extern void close_tsvector_parser(TSVectorParseState state);
typedef struct
{
char *buffer; /* entire string we are scanning */
char *buf; /* current scan point */
int4 state;
int4 count;
/* parse_tsquery */
/* reverse polish notation in list (for temporary usage) */
struct ParseQueryNode *str;
struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
typedef struct TSQueryParserStateData *TSQueryParserState;
/* number in str */
int4 num;
/* text-form operand */
int4 lenop;
int4 sumlen;
char *op;
char *curop;
/* state for value's parser */
TSVectorParseState valstate;
/* tscfg */
Oid cfg_id;
} TSQueryParserState;
typedef void (*PushFunction)(void *opaque, TSQueryParserState state, char *, int, int2);
extern TSQuery parse_tsquery(char *buf,
void (*pushval) (TSQueryParserState *, int, char *, int, int2),
Oid cfg_id, bool isplain);
extern void pushval_asis(TSQueryParserState * state,
int type, char *strval, int lenval, int2 weight);
extern void pushquery(TSQueryParserState * state, int4 type, int4 val,
int4 distance, int4 lenval, int2 weight);
PushFunction pushval,
void *opaque, bool isplain);
/* Functions for use by PushFunction implementations */
extern void pushValue(TSQueryParserState state,
char *strval, int lenval, int2 weight);
extern void pushStop(TSQueryParserState state);
extern void pushOperator(TSQueryParserState state, int8 operator);
/*
* parse plain text and lexize words
......@@ -84,6 +60,11 @@ typedef struct
union
{
uint16 pos;
/*
* When apos array is used, apos[0] is the number of elements
* in the array (excluding apos[0]), and alen is the allocated
* size of the array.
*/
uint16 *apos;
} pos;
char *word;
......@@ -111,23 +92,12 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query,
char *buf, int4 buflen);
extern text *generateHeadline(HeadlineParsedText * prs);
/*
* token/node types for parsing
*/
#define END 0
#define ERR 1
#define VAL 2
#define OPR 3
#define OPEN 4
#define CLOSE 5
#define VALSTOP 6 /* for stop words */
/*
* Common check function for tsvector @@ tsquery
*/
extern bool TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
bool (*chkcond) (void *checkval, QueryItem * val));
bool (*chkcond) (void *checkval, QueryOperand * val));
/*
* Useful conversion macros
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment