Commit e5be8998 authored by Teodor Sigaev's avatar Teodor Sigaev

Refactoring by Heikki Linnakangas <heikki@enterprisedb.com> with

small editorization by me

- Brake the QueryItem struct into QueryOperator and QueryOperand.
  Type was really the only common field between them. QueryItem still
  exists, and is used in the TSQuery struct as before, but it's now a
  union of the two. Many other changes fell from that, like separation
  of pushval_asis function into pushValue, pushOperator and pushStop.

- Moved some structs that were for internal use only from header files
  to the right .c-files.

- Moved tsvector parser to a new tsvector_parser.c file. Parser code was
  about half of the size of tsvector.c, it's also used from tsquery.c, and
  it has some data structures of its own, so it seems better to separate
  it. Cleaned up the API so that TSVectorParserState is not accessed from
  outside tsvector_parser.c.

- Separated enumerations (#defines, really) used for QueryItem.type
  field and as return codes from gettoken_query. It was just accidental
  code sharing.

- Removed ParseQueryNode struct used internally by makepol and friends.
  push*-functions now construct QueryItems directly.

- Changed int4 variables to just ints for variables like "i" or "array
  size", where the storage-size was not significant.
parent da124840
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.2 2007/09/07 15:09:55 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -225,10 +225,17 @@ to_tsvector(PG_FUNCTION_ARGS) ...@@ -225,10 +225,17 @@ to_tsvector(PG_FUNCTION_ARGS)
/* /*
* This function is used for morph parsing * This function is used for morph parsing.
*
* The value is passed to parsetext which will call the right dictionary to
* lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
* to the stack.
*
* All words belonging to the same variant are pushed as an ANDed list,
* and different variants are ORred together.
*/ */
static void static void
pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, int2 weight) pushval_morph(void *opaque, TSQueryParserState state, char *strval, int lenval, int2 weight)
{ {
int4 count = 0; int4 count = 0;
ParsedText prs; ParsedText prs;
...@@ -237,13 +244,14 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, ...@@ -237,13 +244,14 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
cntvar = 0, cntvar = 0,
cntpos = 0, cntpos = 0,
cnt = 0; cnt = 0;
Oid cfg_id = (Oid) opaque; /* the input is actually an Oid, not a pointer */
prs.lenwords = 4; prs.lenwords = 4;
prs.curwords = 0; prs.curwords = 0;
prs.pos = 0; prs.pos = 0;
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
parsetext(state->cfg_id, &prs, strval, lenval); parsetext(cfg_id, &prs, strval, lenval);
if (prs.curwords > 0) if (prs.curwords > 0)
{ {
...@@ -260,21 +268,21 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, ...@@ -260,21 +268,21 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant) while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
{ {
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight); pushValue(state, prs.words[count].word, prs.words[count].len, weight);
pfree(prs.words[count].word); pfree(prs.words[count].word);
if (cnt) if (cnt)
pushquery(state, OPR, (int4) '&', 0, 0, 0); pushOperator(state, OP_AND);
cnt++; cnt++;
count++; count++;
} }
if (cntvar) if (cntvar)
pushquery(state, OPR, (int4) '|', 0, 0, 0); pushOperator(state, OP_OR);
cntvar++; cntvar++;
} }
if (cntpos) if (cntpos)
pushquery(state, OPR, (int4) '&', 0, 0, 0); pushOperator(state, OP_AND);
cntpos++; cntpos++;
} }
...@@ -283,7 +291,7 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, ...@@ -283,7 +291,7 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
} }
else else
pushval_asis(state, VALSTOP, NULL, 0, 0); pushStop(state);
} }
Datum Datum
...@@ -295,7 +303,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS) ...@@ -295,7 +303,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
QueryItem *res; QueryItem *res;
int4 len; int4 len;
query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, false); query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *) cfgid, false);
if (query->size == 0) if (query->size == 0)
PG_RETURN_TSQUERY(query); PG_RETURN_TSQUERY(query);
...@@ -333,7 +341,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS) ...@@ -333,7 +341,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
QueryItem *res; QueryItem *res;
int4 len; int4 len;
query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, true); query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *)cfgid, true);
if (query->size == 0) if (query->size == 0)
PG_RETURN_TSQUERY(query); PG_RETURN_TSQUERY(query);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.2 2007/08/25 00:03:59 tgl Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -344,10 +344,12 @@ LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem) ...@@ -344,10 +344,12 @@ LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
} }
/* /*
* Parse string and lexize words * Parse string and lexize words.
*
* prs will be filled in.
*/ */
void void
parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen) parsetext(Oid cfgId, ParsedText * prs, char *buf, int buflen)
{ {
int type, int type,
lenlemm; lenlemm;
...@@ -427,7 +429,7 @@ parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen) ...@@ -427,7 +429,7 @@ parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
* Headline framework * Headline framework
*/ */
static void static void
hladdword(HeadlineParsedText * prs, char *buf, int4 buflen, int type) hladdword(HeadlineParsedText * prs, char *buf, int buflen, int type)
{ {
while (prs->curwords >= prs->lenwords) while (prs->curwords >= prs->lenwords)
{ {
...@@ -458,17 +460,19 @@ hlfinditem(HeadlineParsedText * prs, TSQuery query, char *buf, int buflen) ...@@ -458,17 +460,19 @@ hlfinditem(HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
word = &(prs->words[prs->curwords - 1]); word = &(prs->words[prs->curwords - 1]);
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
{ {
if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0) if (item->type == QI_VAL &&
item->operand.length == buflen &&
strncmp(GETOPERAND(query) + item->operand.distance, buf, buflen) == 0)
{ {
if (word->item) if (word->item)
{ {
memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry)); memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
prs->words[prs->curwords].item = item; prs->words[prs->curwords].item = &item->operand;
prs->words[prs->curwords].repeated = 1; prs->words[prs->curwords].repeated = 1;
prs->curwords++; prs->curwords++;
} }
else else
word->item = item; word->item = &item->operand;
} }
item++; item++;
} }
...@@ -511,7 +515,7 @@ addHLParsedLex(HeadlineParsedText * prs, TSQuery query, ParsedLex * lexs, TSLexe ...@@ -511,7 +515,7 @@ addHLParsedLex(HeadlineParsedText * prs, TSQuery query, ParsedLex * lexs, TSLexe
} }
void void
hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int4 buflen) hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
{ {
int type, int type,
lenlemm; lenlemm;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.2 2007/08/22 01:39:45 tgl Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1575,7 +1575,7 @@ typedef struct ...@@ -1575,7 +1575,7 @@ typedef struct
} hlCheck; } hlCheck;
static bool static bool
checkcondition_HL(void *checkval, QueryItem * val) checkcondition_HL(void *checkval, QueryOperand * val)
{ {
int i; int i;
...@@ -1601,14 +1601,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q) ...@@ -1601,14 +1601,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
for (j = 0; j < query->size; j++) for (j = 0; j < query->size; j++)
{ {
if (item->type != VAL) if (item->type != QI_VAL)
{ {
item++; item++;
continue; continue;
} }
for (i = pos; i < prs->curwords; i++) for (i = pos; i < prs->curwords; i++)
{ {
if (prs->words[i].item == item) if (prs->words[i].item == &item->operand)
{ {
if (i > *q) if (i > *q)
*q = i; *q = i;
...@@ -1624,14 +1624,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q) ...@@ -1624,14 +1624,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
item = GETQUERY(query); item = GETQUERY(query);
for (j = 0; j < query->size; j++) for (j = 0; j < query->size; j++)
{ {
if (item->type != VAL) if (item->type != QI_VAL)
{ {
item++; item++;
continue; continue;
} }
for (i = *q; i >= pos; i--) for (i = *q; i >= pos; i--)
{ {
if (prs->words[i].item == item) if (prs->words[i].item == &item->operand)
{ {
if (i < *p) if (i < *p)
*p = i; *p = i;
......
# #
# Makefile for utils/adt # Makefile for utils/adt
# #
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.66 2007/08/27 01:39:24 tgl Exp $ # $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.67 2007/09/07 15:09:56 teodor Exp $
# #
subdir = src/backend/utils/adt subdir = src/backend/utils/adt
...@@ -28,7 +28,7 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \ ...@@ -28,7 +28,7 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \
ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \ ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \
tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \ tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \
tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \ tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \
tsvector.o tsvector_op.o \ tsvector.o tsvector_op.o tsvector_parser.o\
uuid.o xml.o uuid.o xml.o
like.o: like.c like_match.c like.o: like.c like_match.c
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -77,24 +77,25 @@ gin_extract_query(PG_FUNCTION_ARGS) ...@@ -77,24 +77,25 @@ gin_extract_query(PG_FUNCTION_ARGS)
item = GETQUERY(query); item = GETQUERY(query);
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
if (item[i].type == VAL) if (item[i].type == QI_VAL)
(*nentries)++; (*nentries)++;
entries = (Datum *) palloc(sizeof(Datum) * (*nentries)); entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
if (item[i].type == VAL) if (item[i].type == QI_VAL)
{ {
text *txt; text *txt;
QueryOperand *val = &item[i].operand;
txt = (text *) palloc(VARHDRSZ + item[i].length); txt = (text *) palloc(VARHDRSZ + val->length);
SET_VARSIZE(txt, VARHDRSZ + item[i].length); SET_VARSIZE(txt, VARHDRSZ + val->length);
memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length); memcpy(VARDATA(txt), GETOPERAND(query) + val->distance, val->length);
entries[j++] = PointerGetDatum(txt); entries[j++] = PointerGetDatum(txt);
if (strategy != TSearchWithClassStrategyNumber && item[i].weight != 0) if (strategy != TSearchWithClassStrategyNumber && val->weight != 0)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("@@ operator does not support lexeme class restrictions"), errmsg("@@ operator does not support lexeme class restrictions"),
...@@ -116,11 +117,11 @@ typedef struct ...@@ -116,11 +117,11 @@ typedef struct
} GinChkVal; } GinChkVal;
static bool static bool
checkcondition_gin(void *checkval, QueryItem * val) checkcondition_gin(void *checkval, QueryOperand * val)
{ {
GinChkVal *gcv = (GinChkVal *) checkval; GinChkVal *gcv = (GinChkVal *) checkval;
return gcv->mapped_check[val - gcv->frst]; return gcv->mapped_check[((QueryItem *) val) - gcv->frst];
} }
Datum Datum
...@@ -142,7 +143,7 @@ gin_ts_consistent(PG_FUNCTION_ARGS) ...@@ -142,7 +143,7 @@ gin_ts_consistent(PG_FUNCTION_ARGS)
gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size); gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
if (item[i].type == VAL) if (item[i].type == QI_VAL)
gcv.mapped_check[i] = check[j++]; gcv.mapped_check[i] = check[j++];
res = TS_execute( res = TS_execute(
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.2 2007/08/21 06:34:42 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -293,7 +293,7 @@ typedef struct ...@@ -293,7 +293,7 @@ typedef struct
* is there value 'val' in array or not ? * is there value 'val' in array or not ?
*/ */
static bool static bool
checkcondition_arr(void *checkval, QueryItem * val) checkcondition_arr(void *checkval, QueryOperand * val)
{ {
int4 *StopLow = ((CHKVAL *) checkval)->arrb; int4 *StopLow = ((CHKVAL *) checkval)->arrb;
int4 *StopHigh = ((CHKVAL *) checkval)->arre; int4 *StopHigh = ((CHKVAL *) checkval)->arre;
...@@ -304,9 +304,9 @@ checkcondition_arr(void *checkval, QueryItem * val) ...@@ -304,9 +304,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
while (StopLow < StopHigh) while (StopLow < StopHigh)
{ {
StopMiddle = StopLow + (StopHigh - StopLow) / 2; StopMiddle = StopLow + (StopHigh - StopLow) / 2;
if (*StopMiddle == val->val) if (*StopMiddle == val->valcrc)
return (true); return (true);
else if (*StopMiddle < val->val) else if (*StopMiddle < val->valcrc)
StopLow = StopMiddle + 1; StopLow = StopMiddle + 1;
else else
StopHigh = StopMiddle; StopHigh = StopMiddle;
...@@ -316,9 +316,9 @@ checkcondition_arr(void *checkval, QueryItem * val) ...@@ -316,9 +316,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
} }
static bool static bool
checkcondition_bit(void *checkval, QueryItem * val) checkcondition_bit(void *checkval, QueryOperand * val)
{ {
return GETBIT(checkval, HASHVAL(val->val)); return GETBIT(checkval, HASHVAL(val->valcrc));
} }
Datum Datum
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.2 2007/08/31 02:26:29 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -23,6 +23,29 @@ ...@@ -23,6 +23,29 @@
#include "utils/pg_crc.h" #include "utils/pg_crc.h"
struct TSQueryParserStateData
{
/* State for gettoken_query */
char *buffer; /* entire string we are scanning */
char *buf; /* current scan point */
int state;
int count; /* nesting count, incremented by (,
decremented by ) */
/* polish (prefix) notation in list, filled in by push* functions */
List *polstr;
/* Strings from operands are collected in op. curop is a pointer to
* the end of used space of op. */
char *op;
char *curop;
int lenop; /* allocated size of op */
int sumlen; /* used size of op */
/* state for value's parser */
TSVectorParseState valstate;
};
/* parser's states */ /* parser's states */
#define WAITOPERAND 1 #define WAITOPERAND 1
#define WAITOPERATOR 2 #define WAITOPERATOR 2
...@@ -30,21 +53,10 @@ ...@@ -30,21 +53,10 @@
#define WAITSINGLEOPERAND 4 #define WAITSINGLEOPERAND 4
/* /*
* node of query tree, also used * subroutine to parse the weight part, like ':1AB' of a query.
* for storing polish notation in parser
*/ */
typedef struct ParseQueryNode
{
int2 weight;
int2 type;
int4 val;
int2 distance;
int2 length;
struct ParseQueryNode *next;
} ParseQueryNode;
static char * static char *
get_weight(char *buf, int2 *weight) get_weight(char *buf, int16 *weight)
{ {
*weight = 0; *weight = 0;
...@@ -81,11 +93,28 @@ get_weight(char *buf, int2 *weight) ...@@ -81,11 +93,28 @@ get_weight(char *buf, int2 *weight)
return buf; return buf;
} }
/*
* token types for parsing
*/
typedef enum {
PT_END = 0,
PT_ERR = 1,
PT_VAL = 2,
PT_OPR = 3,
PT_OPEN = 4,
PT_CLOSE = 5,
} ts_tokentype;
/* /*
* get token from query string * get token from query string
*
* *operator is filled in with OP_* when return values is PT_OPR
* *strval, *lenval and *weight are filled in when return value is PT_VAL
*/ */
static int4 static ts_tokentype
gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight) gettoken_query(TSQueryParserState state,
int8 *operator,
int *lenval, char **strval, int16 *weight)
{ {
while (1) while (1)
{ {
...@@ -97,16 +126,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva ...@@ -97,16 +126,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
{ {
(state->buf)++; /* can safely ++, t_iseq guarantee (state->buf)++; /* can safely ++, t_iseq guarantee
* that pg_mblen()==1 */ * that pg_mblen()==1 */
*val = (int4) '!'; *operator = OP_NOT;
state->state = WAITOPERAND; state->state = WAITOPERAND;
return OPR; return PT_OPR;
} }
else if (t_iseq(state->buf, '(')) else if (t_iseq(state->buf, '('))
{ {
state->count++; state->count++;
(state->buf)++; (state->buf)++;
state->state = WAITOPERAND; state->state = WAITOPERAND;
return OPEN; return PT_OPEN;
} }
else if (t_iseq(state->buf, ':')) else if (t_iseq(state->buf, ':'))
{ {
...@@ -117,17 +146,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva ...@@ -117,17 +146,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
} }
else if (!t_isspace(state->buf)) else if (!t_isspace(state->buf))
{ {
state->valstate.prsbuf = state->buf; /* We rely on the tsvector parser to parse the value for us */
if (gettoken_tsvector(&(state->valstate))) reset_tsvector_parser(state->valstate, state->buf);
if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
{ {
*strval = state->valstate.word; state->buf = get_weight(state->buf, weight);
*lenval = state->valstate.curpos - state->valstate.word;
state->buf = get_weight(state->valstate.prsbuf, weight);
state->state = WAITOPERATOR; state->state = WAITOPERATOR;
return VAL; return PT_VAL;
} }
else if (state->state == WAITFIRSTOPERAND) else if (state->state == WAITFIRSTOPERAND)
return END; return PT_END;
else else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
...@@ -136,52 +164,71 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva ...@@ -136,52 +164,71 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
} }
break; break;
case WAITOPERATOR: case WAITOPERATOR:
if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')) if (t_iseq(state->buf, '&'))
{
state->state = WAITOPERAND;
*operator = OP_AND;
(state->buf)++;
return PT_OPR;
}
if (t_iseq(state->buf, '|'))
{ {
state->state = WAITOPERAND; state->state = WAITOPERAND;
*val = (int4) *(state->buf); *operator = OP_OR;
(state->buf)++; (state->buf)++;
return OPR; return PT_OPR;
} }
else if (t_iseq(state->buf, ')')) else if (t_iseq(state->buf, ')'))
{ {
(state->buf)++; (state->buf)++;
state->count--; state->count--;
return (state->count < 0) ? ERR : CLOSE; return (state->count < 0) ? PT_ERR : PT_CLOSE;
} }
else if (*(state->buf) == '\0') else if (*(state->buf) == '\0')
return (state->count) ? ERR : END; return (state->count) ? PT_ERR : PT_END;
else if (!t_isspace(state->buf)) else if (!t_isspace(state->buf))
return ERR; return PT_ERR;
break; break;
case WAITSINGLEOPERAND: case WAITSINGLEOPERAND:
if (*(state->buf) == '\0') if (*(state->buf) == '\0')
return END; return PT_END;
*strval = state->buf; *strval = state->buf;
*lenval = strlen(state->buf); *lenval = strlen(state->buf);
state->buf += strlen(state->buf); state->buf += strlen(state->buf);
state->count++; state->count++;
return VAL; return PT_VAL;
default: default:
return ERR; return PT_ERR;
break; break;
} }
state->buf += pg_mblen(state->buf); state->buf += pg_mblen(state->buf);
} }
return END; return PT_END;
} }
/* /*
* push new one in polish notation reverse view * Push an operator to state->polstr
*/ */
void void
pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight) pushOperator(TSQueryParserState state, int8 oper)
{ {
ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode)); QueryOperator *tmp;
Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
tmp = (QueryOperator *) palloc(sizeof(QueryOperator));
tmp->type = QI_OPR;
tmp->oper = oper;
/* left is filled in later with findoprnd */
state->polstr = lcons(tmp, state->polstr);
}
static void
pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight)
{
QueryOperand *tmp;
tmp->weight = weight;
tmp->type = type;
tmp->val = val;
if (distance >= MAXSTRPOS) if (distance >= MAXSTRPOS)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
...@@ -192,20 +239,27 @@ pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 l ...@@ -192,20 +239,27 @@ pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 l
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("operand is too long in tsearch query: \"%s\"", errmsg("operand is too long in tsearch query: \"%s\"",
state->buffer))); state->buffer)));
tmp->distance = distance;
tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
tmp->type = QI_VAL;
tmp->weight = weight;
tmp->valcrc = (int32) valcrc;
tmp->length = lenval; tmp->length = lenval;
tmp->next = state->str; tmp->distance = distance;
state->str = tmp;
state->num++; state->polstr = lcons(tmp, state->polstr);
} }
/* /*
* This function is used for tsquery parsing * Push an operand to state->polstr.
*
* strval must point to a string equal to state->curop. lenval is the length
* of the string.
*/ */
void void
pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight) pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
{ {
pg_crc32 c; pg_crc32 valcrc;
if (lenval >= MAXSTRLEN) if (lenval >= MAXSTRLEN)
ereport(ERROR, ereport(ERROR,
...@@ -213,162 +267,202 @@ pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int ...@@ -213,162 +267,202 @@ pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int
errmsg("word is too long in tsearch query: \"%s\"", errmsg("word is too long in tsearch query: \"%s\"",
state->buffer))); state->buffer)));
INIT_CRC32(c); INIT_CRC32(valcrc);
COMP_CRC32(c, strval, lenval); COMP_CRC32(valcrc, strval, lenval);
FIN_CRC32(c); FIN_CRC32(valcrc);
pushquery(state, type, *(int4 *) &c, pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);
state->curop - state->op, lenval, weight);
/* append the value string to state.op, enlarging buffer if needed first */
while (state->curop - state->op + lenval + 1 >= state->lenop) while (state->curop - state->op + lenval + 1 >= state->lenop)
{ {
int4 tmp = state->curop - state->op; int used = state->curop - state->op;
state->lenop *= 2; state->lenop *= 2;
state->op = (char *) repalloc((void *) state->op, state->lenop); state->op = (char *) repalloc((void *) state->op, state->lenop);
state->curop = state->op + tmp; state->curop = state->op + used;
} }
memcpy((void *) state->curop, (void *) strval, lenval); memcpy((void *) state->curop, (void *) strval, lenval);
state->curop += lenval; state->curop += lenval;
*(state->curop) = '\0'; *(state->curop) = '\0';
state->curop++; state->curop++;
state->sumlen += lenval + 1 /* \0 */ ; state->sumlen += lenval + 1 /* \0 */ ;
return;
} }
/*
* Push a stopword placeholder to state->polstr
*/
void
pushStop(TSQueryParserState state)
{
QueryOperand *tmp;
tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
tmp->type = QI_VALSTOP;
state->polstr = lcons(tmp, state->polstr);
}
#define STACKDEPTH 32 #define STACKDEPTH 32
/* /*
* make polish notation of query * Make polish (prefix) notation of query.
*
* See parse_tsquery for explanation of pushval.
*/ */
static int4 static void
makepol(TSQueryParserState * state, makepol(TSQueryParserState state,
void (*pushval) (TSQueryParserState *, int, char *, int, int2)) PushFunction pushval,
void *opaque)
{ {
int4 val = 0, int8 operator = 0;
type; ts_tokentype type;
int4 lenval = 0; int lenval = 0;
char *strval = NULL; char *strval = NULL;
int4 stack[STACKDEPTH]; int8 opstack[STACKDEPTH];
int4 lenstack = 0; int lenstack = 0;
int2 weight = 0; int16 weight = 0;
/* since this function recurses, it could be driven to stack overflow */ /* since this function recurses, it could be driven to stack overflow */
check_stack_depth(); check_stack_depth();
while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END) while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)
{ {
switch (type) switch (type)
{ {
case VAL: case PT_VAL:
pushval(state, VAL, strval, lenval, weight); pushval(opaque, state, strval, lenval, weight);
while (lenstack && (stack[lenstack - 1] == (int4) '&' || while (lenstack && (opstack[lenstack - 1] == OP_AND ||
stack[lenstack - 1] == (int4) '!')) opstack[lenstack - 1] == OP_NOT))
{ {
lenstack--; lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0); pushOperator(state, opstack[lenstack]);
} }
break; break;
case OPR: case PT_OPR:
if (lenstack && val == (int4) '|') if (lenstack && operator == OP_OR)
pushquery(state, OPR, val, 0, 0, 0); pushOperator(state, OP_OR);
else else
{ {
if (lenstack == STACKDEPTH) /* internal error */ if (lenstack == STACKDEPTH) /* internal error */
elog(ERROR, "tsquery stack too small"); elog(ERROR, "tsquery stack too small");
stack[lenstack] = val; opstack[lenstack] = operator;
lenstack++; lenstack++;
} }
break; break;
case OPEN: case PT_OPEN:
if (makepol(state, pushval) == ERR) makepol(state, pushval, opaque);
return ERR;
if (lenstack && (stack[lenstack - 1] == (int4) '&' || if (lenstack && (opstack[lenstack - 1] == OP_AND ||
stack[lenstack - 1] == (int4) '!')) opstack[lenstack - 1] == OP_NOT))
{ {
lenstack--; lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0); pushOperator(state, opstack[lenstack]);
} }
break; break;
case CLOSE: case PT_CLOSE:
while (lenstack) while (lenstack)
{ {
lenstack--; lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0); pushOperator(state, opstack[lenstack]);
}; };
return END; return;
break; case PT_ERR:
case ERR:
default: default:
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsearch query: \"%s\"", errmsg("syntax error in tsearch query: \"%s\"",
state->buffer))); state->buffer)));
return ERR;
} }
} }
while (lenstack) while (lenstack)
{ {
lenstack--; lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0); pushOperator(state, opstack[lenstack]);
}; }
return END;
} }
/*
* Fills in the left-fields previously left unfilled. The input
* QueryItems must be in polish (prefix) notation.
*/
static void static void
findoprnd(QueryItem * ptr, int4 *pos) findoprnd(QueryItem *ptr, int *pos)
{ {
if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP) /* since this function recurses, it could be driven to stack overflow. */
{ check_stack_depth();
ptr[*pos].left = 0;
(*pos)++; if (ptr[*pos].type == QI_VAL ||
} ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here,
else if (ptr[*pos].val == (int4) '!') * they haven't been cleansed
* away yet.
*/
{ {
ptr[*pos].left = 1;
(*pos)++; (*pos)++;
findoprnd(ptr, pos);
} }
else else
{ {
QueryItem *curitem = &ptr[*pos]; Assert(ptr[*pos].type == QI_OPR);
int4 tmp = *pos;
(*pos)++; if (ptr[*pos].operator.oper == OP_NOT)
findoprnd(ptr, pos); {
curitem->left = *pos - tmp; ptr[*pos].operator.left = 1;
findoprnd(ptr, pos); (*pos)++;
findoprnd(ptr, pos);
}
else
{
QueryOperator *curitem = &ptr[*pos].operator;
int tmp = *pos;
Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
(*pos)++;
findoprnd(ptr, pos);
curitem->left = *pos - tmp;
findoprnd(ptr, pos);
}
} }
} }
/* /*
* input * Each value (operand) in the query is be passed to pushval. pushval can
* transform the simple value to an arbitrarily complex expression using
* pushValue and pushOperator. It must push a single value with pushValue,
* a complete expression with all operands, or a a stopword placeholder
* with pushStop, otherwise the prefix notation representation will be broken,
* having an operator with no operand.
*
* opaque is passed on to pushval as is, pushval can use it to store its
* private state.
*
* The returned query might contain QI_STOPVAL nodes. The caller is responsible
* for cleaning them up (with clean_fakeval)
*/ */
TSQuery TSQuery
parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain) parse_tsquery(char *buf,
PushFunction pushval,
void *opaque,
bool isplain)
{ {
TSQueryParserState state; struct TSQueryParserStateData state;
int4 i; int i;
TSQuery query; TSQuery query;
int4 commonlen; int commonlen;
QueryItem *ptr; QueryItem *ptr;
ParseQueryNode *tmp; int pos = 0;
int4 pos = 0; ListCell *cell;
/* init state */ /* init state */
state.buffer = buf; state.buffer = buf;
state.buf = buf; state.buf = buf;
state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND; state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
state.count = 0; state.count = 0;
state.num = 0; state.polstr = NIL;
state.str = NULL;
state.cfg_id = cfg_id;
/* init value parser's state */ /* init value parser's state */
state.valstate.oprisdelim = true; state.valstate = init_tsvector_parser(NULL, true);
state.valstate.len = 32;
state.valstate.word = (char *) palloc(state.valstate.len);
/* init list of operand */ /* init list of operand */
state.sumlen = 0; state.sumlen = 0;
...@@ -377,9 +471,11 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int ...@@ -377,9 +471,11 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int
*(state.curop) = '\0'; *(state.curop) = '\0';
/* parse query & make polish notation (postfix, but in reverse order) */ /* parse query & make polish notation (postfix, but in reverse order) */
makepol(&state, pushval); makepol(&state, pushval, opaque);
pfree(state.valstate.word);
if (!state.num) close_tsvector_parser(state.valstate);
if (list_length(state.polstr) == 0)
{ {
ereport(NOTICE, ereport(NOTICE,
(errmsg("tsearch query doesn't contain lexeme(s): \"%s\"", (errmsg("tsearch query doesn't contain lexeme(s): \"%s\"",
...@@ -390,37 +486,54 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int ...@@ -390,37 +486,54 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int
return query; return query;
} }
/* make finish struct */ /* Pack the QueryItems in the final TSQuery struct to return to caller */
commonlen = COMPUTESIZE(state.num, state.sumlen); commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
query = (TSQuery) palloc(commonlen); query = (TSQuery) palloc0(commonlen);
SET_VARSIZE(query, commonlen); SET_VARSIZE(query, commonlen);
query->size = state.num; query->size = list_length(state.polstr);
ptr = GETQUERY(query); ptr = GETQUERY(query);
/* set item in polish notation */ /* Copy QueryItems to TSQuery */
for (i = 0; i < state.num; i++) i = 0;
foreach(cell, state.polstr)
{ {
ptr[i].weight = state.str->weight; QueryItem *item = (QueryItem *) lfirst(cell);
ptr[i].type = state.str->type;
ptr[i].val = state.str->val; switch(item->type)
ptr[i].distance = state.str->distance; {
ptr[i].length = state.str->length; case QI_VAL:
tmp = state.str->next; memcpy(&ptr[i], item, sizeof(QueryOperand));
pfree(state.str); break;
state.str = tmp; case QI_VALSTOP:
ptr[i].type = QI_VALSTOP;
break;
case QI_OPR:
memcpy(&ptr[i], item, sizeof(QueryOperator));
break;
default:
elog(ERROR, "unknown QueryItem type %d", item->type);
}
i++;
} }
/* set user friendly-operand view */ /* Copy all the operand strings to TSQuery */
memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen); memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
pfree(state.op); pfree(state.op);
/* set left operand's position for every operator */ /* Set left operand pointers for every operator. */
pos = 0; pos = 0;
findoprnd(ptr, &pos); findoprnd(ptr, &pos);
return query; return query;
} }
static void
pushval_asis(void *opaque, TSQueryParserState state, char *strval, int lenval,
int16 weight)
{
pushValue(state, strval, lenval, weight);
}
/* /*
* in without morphology * in without morphology
*/ */
...@@ -431,7 +544,7 @@ tsqueryin(PG_FUNCTION_ARGS) ...@@ -431,7 +544,7 @@ tsqueryin(PG_FUNCTION_ARGS)
pg_verifymbstr(in, strlen(in), false); pg_verifymbstr(in, strlen(in), false);
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false)); PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, NULL, false));
} }
/* /*
...@@ -443,13 +556,14 @@ typedef struct ...@@ -443,13 +556,14 @@ typedef struct
char *buf; char *buf;
char *cur; char *cur;
char *op; char *op;
int4 buflen; int buflen;
} INFIX; } INFIX;
#define RESIZEBUF(inf,addsize) \ /* Makes sure inf->buf is large enough for adding 'addsize' bytes */
#define RESIZEBUF(inf, addsize) \
while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
{ \ { \
int4 len = (inf)->cur - (inf)->buf; \ int len = (inf)->cur - (inf)->buf; \
(inf)->buflen *= 2; \ (inf)->buflen *= 2; \
(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \ (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
(inf)->cur = (inf)->buf + len; \ (inf)->cur = (inf)->buf + len; \
...@@ -462,12 +576,16 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ ...@@ -462,12 +576,16 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
static void static void
infix(INFIX * in, bool first) infix(INFIX * in, bool first)
{ {
if (in->curpol->type == VAL) /* since this function recurses, it could be driven to stack overflow. */
check_stack_depth();
if (in->curpol->type == QI_VAL)
{ {
char *op = in->op + in->curpol->distance; QueryOperand *curpol = &in->curpol->operand;
char *op = in->op + curpol->distance;
int clen; int clen;
RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5); RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
*(in->cur) = '\''; *(in->cur) = '\'';
in->cur++; in->cur++;
while (*op) while (*op)
...@@ -485,26 +603,26 @@ infix(INFIX * in, bool first) ...@@ -485,26 +603,26 @@ infix(INFIX * in, bool first)
} }
*(in->cur) = '\''; *(in->cur) = '\'';
in->cur++; in->cur++;
if (in->curpol->weight) if (curpol->weight)
{ {
*(in->cur) = ':'; *(in->cur) = ':';
in->cur++; in->cur++;
if (in->curpol->weight & (1 << 3)) if (curpol->weight & (1 << 3))
{ {
*(in->cur) = 'A'; *(in->cur) = 'A';
in->cur++; in->cur++;
} }
if (in->curpol->weight & (1 << 2)) if (curpol->weight & (1 << 2))
{ {
*(in->cur) = 'B'; *(in->cur) = 'B';
in->cur++; in->cur++;
} }
if (in->curpol->weight & (1 << 1)) if (curpol->weight & (1 << 1))
{ {
*(in->cur) = 'C'; *(in->cur) = 'C';
in->cur++; in->cur++;
} }
if (in->curpol->weight & 1) if (curpol->weight & 1)
{ {
*(in->cur) = 'D'; *(in->cur) = 'D';
in->cur++; in->cur++;
...@@ -513,7 +631,7 @@ infix(INFIX * in, bool first) ...@@ -513,7 +631,7 @@ infix(INFIX * in, bool first)
*(in->cur) = '\0'; *(in->cur) = '\0';
in->curpol++; in->curpol++;
} }
else if (in->curpol->val == (int4) '!') else if (in->curpol->operator.oper == OP_NOT)
{ {
bool isopr = false; bool isopr = false;
...@@ -522,13 +640,15 @@ infix(INFIX * in, bool first) ...@@ -522,13 +640,15 @@ infix(INFIX * in, bool first)
in->cur++; in->cur++;
*(in->cur) = '\0'; *(in->cur) = '\0';
in->curpol++; in->curpol++;
if (in->curpol->type == OPR)
if (in->curpol->type == QI_OPR)
{ {
isopr = true; isopr = true;
RESIZEBUF(in, 2); RESIZEBUF(in, 2);
sprintf(in->cur, "( "); sprintf(in->cur, "( ");
in->cur = strchr(in->cur, '\0'); in->cur = strchr(in->cur, '\0');
} }
infix(in, isopr); infix(in, isopr);
if (isopr) if (isopr)
{ {
...@@ -539,11 +659,11 @@ infix(INFIX * in, bool first) ...@@ -539,11 +659,11 @@ infix(INFIX * in, bool first)
} }
else else
{ {
int4 op = in->curpol->val; int8 op = in->curpol->operator.oper;
INFIX nrm; INFIX nrm;
in->curpol++; in->curpol++;
if (op == (int4) '|' && !first) if (op == OP_OR && !first)
{ {
RESIZEBUF(in, 2); RESIZEBUF(in, 2);
sprintf(in->cur, "( "); sprintf(in->cur, "( ");
...@@ -564,11 +684,22 @@ infix(INFIX * in, bool first) ...@@ -564,11 +684,22 @@ infix(INFIX * in, bool first)
/* print operator & right operand */ /* print operator & right operand */
RESIZEBUF(in, 3 + (nrm.cur - nrm.buf)); RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
sprintf(in->cur, " %c %s", op, nrm.buf); switch(op)
{
case OP_OR:
sprintf(in->cur, " | %s", nrm.buf);
break;
case OP_AND:
sprintf(in->cur, " & %s", nrm.buf);
break;
default:
/* OP_NOT is handled in above if-branch*/
elog(ERROR, "unexpected operator type %d", op);
}
in->cur = strchr(in->cur, '\0'); in->cur = strchr(in->cur, '\0');
pfree(nrm.buf); pfree(nrm.buf);
if (op == (int4) '|' && !first) if (op == OP_OR && !first)
{ {
RESIZEBUF(in, 2); RESIZEBUF(in, 2);
sprintf(in->cur, " )"); sprintf(in->cur, " )");
...@@ -615,28 +746,33 @@ tsquerysend(PG_FUNCTION_ARGS) ...@@ -615,28 +746,33 @@ tsquerysend(PG_FUNCTION_ARGS)
pq_sendint(&buf, query->size, sizeof(int32)); pq_sendint(&buf, query->size, sizeof(int32));
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
{ {
int tmp;
pq_sendint(&buf, item->type, sizeof(item->type)); pq_sendint(&buf, item->type, sizeof(item->type));
pq_sendint(&buf, item->weight, sizeof(item->weight));
pq_sendint(&buf, item->left, sizeof(item->left));
pq_sendint(&buf, item->val, sizeof(item->val));
/*
* We are sure that sizeof(WordEntry) == sizeof(int32), and about
* layout of QueryItem
*/
tmp = *(int32 *) (((char *) item) + HDRSIZEQI);
pq_sendint(&buf, tmp, sizeof(tmp));
switch(item->type)
{
case QI_VAL:
pq_sendint(&buf, item->operand.weight, sizeof(item->operand.weight));
pq_sendint(&buf, item->operand.valcrc, sizeof(item->operand.valcrc));
pq_sendint(&buf, item->operand.length, sizeof(int16));
/* istrue flag is just for temporary use in tsrank.c/Cover,
* so we don't need to transfer that */
break;
case QI_OPR:
pq_sendint(&buf, item->operator.oper, sizeof(item->operator.oper));
if (item->operator.oper != OP_NOT)
pq_sendint(&buf, item->operator.left, sizeof(item->operator.left));
break;
default:
elog(ERROR, "unknown tsquery node type %d", item->type);
}
item++; item++;
} }
item = GETQUERY(query); item = GETQUERY(query);
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
{ {
if (item->type == VAL) if (item->type == QI_VAL)
pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length); pq_sendbytes(&buf, GETOPERAND(query) + item->operand.distance, item->operand.length);
item++; item++;
} }
...@@ -652,8 +788,7 @@ tsqueryrecv(PG_FUNCTION_ARGS) ...@@ -652,8 +788,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
TSQuery query; TSQuery query;
int i, int i,
size, size,
tmp, len;
len = HDRSIZETQ;
QueryItem *item; QueryItem *item;
int datalen = 0; int datalen = 0;
char *ptr; char *ptr;
...@@ -661,7 +796,8 @@ tsqueryrecv(PG_FUNCTION_ARGS) ...@@ -661,7 +796,8 @@ tsqueryrecv(PG_FUNCTION_ARGS)
size = pq_getmsgint(buf, sizeof(uint32)); size = pq_getmsgint(buf, sizeof(uint32));
if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem))) if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
elog(ERROR, "invalid size of tsquery"); elog(ERROR, "invalid size of tsquery");
len += sizeof(QueryItem) * size;
len = HDRSIZETQ + sizeof(QueryItem) * size;
query = (TSQuery) palloc(len); query = (TSQuery) palloc(len);
query->size = size; query->size = size;
...@@ -670,32 +806,67 @@ tsqueryrecv(PG_FUNCTION_ARGS) ...@@ -670,32 +806,67 @@ tsqueryrecv(PG_FUNCTION_ARGS)
for (i = 0; i < size; i++) for (i = 0; i < size; i++)
{ {
item->type = (int8) pq_getmsgint(buf, sizeof(int8)); item->type = (int8) pq_getmsgint(buf, sizeof(int8));
item->weight = (int8) pq_getmsgint(buf, sizeof(int8));
item->left = (int16) pq_getmsgint(buf, sizeof(int16)); switch(item->type)
item->val = (int32) pq_getmsgint(buf, sizeof(int32));
tmp = pq_getmsgint(buf, sizeof(int32));
memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32));
/*
* Sanity checks
*/
if (item->type == VAL)
{
datalen += item->length + 1; /* \0 */
}
else if (item->type == OPR)
{ {
if (item->val == '|' || item->val == '&') case QI_VAL:
{ item->operand.weight = (int8) pq_getmsgint(buf, sizeof(int8));
if (item->left <= 0 || i + item->left >= size) item->operand.valcrc = (int32) pq_getmsgint(buf, sizeof(int32));
elog(ERROR, "invalid pointer to left operand"); item->operand.length = pq_getmsgint(buf, sizeof(int16));
}
/*
* Check that datalen doesn't grow too large. Without the
* check, a malicious client could induce a buffer overflow
* by sending a tsquery whose size exceeds 2GB. datalen
* would overflow, we would allocate a too small buffer below,
* and overflow the buffer. Because operand.length is a 20-bit
* field, adding one such value to datalen must exceed
* MaxAllocSize before wrapping over the 32-bit datalen field,
* so this check will protect from it.
*/
if (datalen > MAXSTRLEN)
elog(ERROR, "invalid tsquery; total operand length exceeded");
/* We can calculate distance from datalen, no need to send it
* through the wire. If we did, we would have to check that
* it's valid anyway.
*/
item->operand.distance = datalen;
datalen += item->operand.length + 1; /* \0 */
if (i == size - 1) break;
elog(ERROR, "invalid pointer to right operand"); case QI_OPR:
item->operator.oper = (int8) pq_getmsgint(buf, sizeof(int8));
if (item->operator.oper != OP_NOT &&
item->operator.oper != OP_OR &&
item->operator.oper != OP_AND)
elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
if(item->operator.oper != OP_NOT)
{
item->operator.left = (int16) pq_getmsgint(buf, sizeof(int16));
/*
* Sanity checks
*/
if (item->operator.left <= 0 || i + item->operator.left >= size)
elog(ERROR, "invalid pointer to left operand");
/* XXX: Though there's no way to construct a TSQuery that's
* not in polish notation, we don't enforce that for
* queries received from client in binary mode. Is there
* anything that relies on it?
*
* XXX: The tree could be malformed in other ways too,
* a node could have two parents, for example.
*/
}
if (i == size - 1)
elog(ERROR, "invalid pointer to right operand");
break;
default:
elog(ERROR, "unknown tsquery node type %d", item->type);
} }
else
elog(ERROR, "unknown tsquery node type");
item++; item++;
} }
...@@ -706,13 +877,12 @@ tsqueryrecv(PG_FUNCTION_ARGS) ...@@ -706,13 +877,12 @@ tsqueryrecv(PG_FUNCTION_ARGS)
ptr = GETOPERAND(query); ptr = GETOPERAND(query);
for (i = 0; i < size; i++) for (i = 0; i < size; i++)
{ {
if (item->type == VAL) if (item->type == QI_VAL)
{ {
item->distance = ptr - GETOPERAND(query);
memcpy(ptr, memcpy(ptr,
pq_getmsgbytes(buf, item->length), pq_getmsgbytes(buf, item->operand.length),
item->length); item->operand.length);
ptr += item->length; ptr += item->operand.length;
*ptr++ = '\0'; *ptr++ = '\0';
} }
item++; item++;
...@@ -736,7 +906,7 @@ tsquerytree(PG_FUNCTION_ARGS) ...@@ -736,7 +906,7 @@ tsquerytree(PG_FUNCTION_ARGS)
INFIX nrm; INFIX nrm;
text *res; text *res;
QueryItem *q; QueryItem *q;
int4 len; int len;
if (query->size == 0) if (query->size == 0)
{ {
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -35,20 +35,23 @@ maketree(QueryItem * in) ...@@ -35,20 +35,23 @@ maketree(QueryItem * in)
node->valnode = in; node->valnode = in;
node->right = node->left = NULL; node->right = node->left = NULL;
if (in->type == OPR) if (in->type == QI_OPR)
{ {
node->right = maketree(in + 1); node->right = maketree(in + 1);
if (in->val != (int4) '!') if (in->operator.oper != OP_NOT)
node->left = maketree(in + in->left); node->left = maketree(in + in->operator.left);
} }
return node; return node;
} }
/*
* Internal state for plaintree and plainnode
*/
typedef struct typedef struct
{ {
QueryItem *ptr; QueryItem *ptr;
int4 len; int len; /* allocated size of ptr */
int4 cur; int cur; /* number of elements in ptr */
} PLAINTREE; } PLAINTREE;
static void static void
...@@ -60,37 +63,37 @@ plainnode(PLAINTREE * state, NODE * node) ...@@ -60,37 +63,37 @@ plainnode(PLAINTREE * state, NODE * node)
state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem)); state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem));
} }
memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem)); memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem));
if (node->valnode->type == VAL) if (node->valnode->type == QI_VAL)
state->cur++; state->cur++;
else if (node->valnode->val == (int4) '!') else if (node->valnode->operator.oper == OP_NOT)
{ {
state->ptr[state->cur].left = 1; state->ptr[state->cur].operator.left = 1;
state->cur++; state->cur++;
plainnode(state, node->right); plainnode(state, node->right);
} }
else else
{ {
int4 cur = state->cur; int cur = state->cur;
state->cur++; state->cur++;
plainnode(state, node->right); plainnode(state, node->right);
state->ptr[cur].left = state->cur - cur; state->ptr[cur].operator.left = state->cur - cur;
plainnode(state, node->left); plainnode(state, node->left);
} }
pfree(node); pfree(node);
} }
/* /*
* make plain view of tree from 'normal' view of tree * make plain view of tree from a NODE-tree representation
*/ */
static QueryItem * static QueryItem *
plaintree(NODE * root, int4 *len) plaintree(NODE * root, int *len)
{ {
PLAINTREE pl; PLAINTREE pl;
pl.cur = 0; pl.cur = 0;
pl.len = 16; pl.len = 16;
if (root && (root->valnode->type == VAL || root->valnode->type == OPR)) if (root && (root->valnode->type == QI_VAL || root->valnode->type == QI_OPR))
{ {
pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem)); pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
plainnode(&pl, root); plainnode(&pl, root);
...@@ -122,17 +125,17 @@ freetree(NODE * node) ...@@ -122,17 +125,17 @@ freetree(NODE * node)
static NODE * static NODE *
clean_NOT_intree(NODE * node) clean_NOT_intree(NODE * node)
{ {
if (node->valnode->type == VAL) if (node->valnode->type == QI_VAL)
return node; return node;
if (node->valnode->val == (int4) '!') if (node->valnode->operator.oper == OP_NOT)
{ {
freetree(node); freetree(node);
return NULL; return NULL;
} }
/* operator & or | */ /* operator & or | */
if (node->valnode->val == (int4) '|') if (node->valnode->operator.oper == OP_OR)
{ {
if ((node->left = clean_NOT_intree(node->left)) == NULL || if ((node->left = clean_NOT_intree(node->left)) == NULL ||
(node->right = clean_NOT_intree(node->right)) == NULL) (node->right = clean_NOT_intree(node->right)) == NULL)
...@@ -144,6 +147,8 @@ clean_NOT_intree(NODE * node) ...@@ -144,6 +147,8 @@ clean_NOT_intree(NODE * node)
else else
{ {
NODE *res = node; NODE *res = node;
Assert(node->valnode->operator.oper == OP_AND);
node->left = clean_NOT_intree(node->left); node->left = clean_NOT_intree(node->left);
node->right = clean_NOT_intree(node->right); node->right = clean_NOT_intree(node->right);
...@@ -168,7 +173,7 @@ clean_NOT_intree(NODE * node) ...@@ -168,7 +173,7 @@ clean_NOT_intree(NODE * node)
} }
QueryItem * QueryItem *
clean_NOT(QueryItem * ptr, int4 *len) clean_NOT(QueryItem * ptr, int *len)
{ {
NODE *root = maketree(ptr); NODE *root = maketree(ptr);
...@@ -180,10 +185,13 @@ clean_NOT(QueryItem * ptr, int4 *len) ...@@ -180,10 +185,13 @@ clean_NOT(QueryItem * ptr, int4 *len)
#undef V_UNKNOWN #undef V_UNKNOWN
#endif #endif
#define V_UNKNOWN 0 /*
#define V_TRUE 1 * output values for result output parameter of clean_fakeval_intree
#define V_FALSE 2 */
#define V_STOP 3 #define V_UNKNOWN 0 /* the expression can't be evaluated statically */
#define V_TRUE 1 /* the expression is always true (not implemented) */
#define V_FALSE 2 /* the expression is always false (not implemented) */
#define V_STOP 3 /* the expression is a stop word */
/* /*
* Clean query tree from values which is always in * Clean query tree from values which is always in
...@@ -195,17 +203,19 @@ clean_fakeval_intree(NODE * node, char *result) ...@@ -195,17 +203,19 @@ clean_fakeval_intree(NODE * node, char *result)
char lresult = V_UNKNOWN, char lresult = V_UNKNOWN,
rresult = V_UNKNOWN; rresult = V_UNKNOWN;
if (node->valnode->type == VAL) if (node->valnode->type == QI_VAL)
return node; return node;
else if (node->valnode->type == VALSTOP) else
if (node->valnode->type == QI_VALSTOP)
{ {
pfree(node); pfree(node);
*result = V_STOP; *result = V_STOP;
return NULL; return NULL;
} }
Assert(node->valnode->type == QI_OPR);
if (node->valnode->val == (int4) '!') if (node->valnode->operator.oper == OP_NOT)
{ {
node->right = clean_fakeval_intree(node->right, &rresult); node->right = clean_fakeval_intree(node->right, &rresult);
if (!node->right) if (!node->right)
...@@ -221,6 +231,7 @@ clean_fakeval_intree(NODE * node, char *result) ...@@ -221,6 +231,7 @@ clean_fakeval_intree(NODE * node, char *result)
node->left = clean_fakeval_intree(node->left, &lresult); node->left = clean_fakeval_intree(node->left, &lresult);
node->right = clean_fakeval_intree(node->right, &rresult); node->right = clean_fakeval_intree(node->right, &rresult);
if (lresult == V_STOP && rresult == V_STOP) if (lresult == V_STOP && rresult == V_STOP)
{ {
freetree(node); freetree(node);
...@@ -243,7 +254,7 @@ clean_fakeval_intree(NODE * node, char *result) ...@@ -243,7 +254,7 @@ clean_fakeval_intree(NODE * node, char *result)
} }
QueryItem * QueryItem *
clean_fakeval(QueryItem * ptr, int4 *len) clean_fakeval(QueryItem * ptr, int *len)
{ {
NODE *root = maketree(ptr); NODE *root = maketree(ptr);
char result = V_UNKNOWN; char result = V_UNKNOWN;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -30,14 +30,15 @@ tsquery_numnode(PG_FUNCTION_ARGS) ...@@ -30,14 +30,15 @@ tsquery_numnode(PG_FUNCTION_ARGS)
} }
static QTNode * static QTNode *
join_tsqueries(TSQuery a, TSQuery b) join_tsqueries(TSQuery a, TSQuery b, int8 operator)
{ {
QTNode *res = (QTNode *) palloc0(sizeof(QTNode)); QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
res->flags |= QTN_NEEDFREE; res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem)); res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = OPR; res->valnode->type = QI_OPR;
res->valnode->operator.oper = operator;
res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b)); res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
...@@ -66,9 +67,7 @@ tsquery_and(PG_FUNCTION_ARGS) ...@@ -66,9 +67,7 @@ tsquery_and(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(a); PG_RETURN_POINTER(a);
} }
res = join_tsqueries(a, b); res = join_tsqueries(a, b, OP_AND);
res->valnode->val = '&';
query = QTN2QT(res); query = QTN2QT(res);
...@@ -98,9 +97,7 @@ tsquery_or(PG_FUNCTION_ARGS) ...@@ -98,9 +97,7 @@ tsquery_or(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(a); PG_RETURN_POINTER(a);
} }
res = join_tsqueries(a, b); res = join_tsqueries(a, b, OP_OR);
res->valnode->val = '|';
query = QTN2QT(res); query = QTN2QT(res);
...@@ -126,8 +123,8 @@ tsquery_not(PG_FUNCTION_ARGS) ...@@ -126,8 +123,8 @@ tsquery_not(PG_FUNCTION_ARGS)
res->flags |= QTN_NEEDFREE; res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem)); res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = OPR; res->valnode->type = QI_OPR;
res->valnode->val = '!'; res->valnode->operator.oper = OP_NOT;
res->child = (QTNode **) palloc0(sizeof(QTNode *)); res->child = (QTNode **) palloc0(sizeof(QTNode *));
res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a)); res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
...@@ -209,8 +206,8 @@ makeTSQuerySign(TSQuery a) ...@@ -209,8 +206,8 @@ makeTSQuerySign(TSQuery a)
for (i = 0; i < a->size; i++) for (i = 0; i < a->size; i++)
{ {
if (ptr->type == VAL) if (ptr->type == QI_VAL)
sign |= ((TSQuerySign) 1) << (ptr->val % TSQS_SIGLEN); sign |= ((TSQuerySign) 1) << (ptr->operand.valcrc % TSQS_SIGLEN);
ptr++; ptr++;
} }
...@@ -253,10 +250,10 @@ tsq_mcontains(PG_FUNCTION_ARGS) ...@@ -253,10 +250,10 @@ tsq_mcontains(PG_FUNCTION_ARGS)
for (i = 0; i < ex->size; i++) for (i = 0; i < ex->size; i++)
{ {
iq = GETQUERY(query); iq = GETQUERY(query);
if (ie[i].type != VAL) if (ie[i].type != QI_VAL)
continue; continue;
for (j = 0; j < query->size; j++) for (j = 0; j < query->size; j++)
if (iq[j].type == VAL && ie[i].val == iq[j].val) if (iq[j].type == QI_VAL && ie[i].operand.valcrc == iq[j].operand.valcrc)
{ {
j = query->size + 1; j = query->size + 1;
break; break;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -34,18 +34,26 @@ addone(int *counters, int last, int total) ...@@ -34,18 +34,26 @@ addone(int *counters, int last, int total)
return 1; return 1;
} }
/*
* If node is equal to ex, replace it with subs. Replacement is actually done
* by returning either node or a copy of subs.
*/
static QTNode * static QTNode *
findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind) findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
{ {
if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val) if ((node->sign & ex->sign) != ex->sign ||
node->valnode->type != ex->valnode->type)
return node; return node;
if (node->flags & QTN_NOCHANGE) if (node->flags & QTN_NOCHANGE)
return node; return node;
if (node->valnode->type == OPR) if (node->valnode->type == QI_OPR)
{ {
if (node->valnode->operator.oper != ex->valnode->operator.oper)
return node;
if (node->nchild == ex->nchild) if (node->nchild == ex->nchild)
{ {
if (QTNEq(node, ex)) if (QTNEq(node, ex))
...@@ -63,6 +71,12 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind) ...@@ -63,6 +71,12 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
} }
else if (node->nchild > ex->nchild) else if (node->nchild > ex->nchild)
{ {
/*
* AND and NOT are commutative, so we check if a subset of the
* children match. For example, if tnode is A | B | C, and
* ex is B | C, we have a match after we convert tnode to
* A | (B | C).
*/
int *counters = (int *) palloc(sizeof(int) * node->nchild); int *counters = (int *) palloc(sizeof(int) * node->nchild);
int i; int i;
QTNode *tnode = (QTNode *) palloc(sizeof(QTNode)); QTNode *tnode = (QTNode *) palloc(sizeof(QTNode));
...@@ -131,19 +145,26 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind) ...@@ -131,19 +145,26 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
pfree(counters); pfree(counters);
} }
} }
else if (QTNEq(node, ex)) else
{ {
QTNFree(node); Assert(node->valnode->type == QI_VAL);
if (subs)
{ if (node->valnode->operand.valcrc != ex->valnode->operand.valcrc)
node = QTNCopy(subs); return node;
node->flags |= QTN_NOCHANGE; else if (QTNEq(node, ex))
}
else
{ {
node = NULL; QTNFree(node);
if (subs)
{
node = QTNCopy(subs);
node->flags |= QTN_NOCHANGE;
}
else
{
node = NULL;
}
*isfind = true;
} }
*isfind = true;
} }
return node; return node;
...@@ -154,7 +175,7 @@ dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind) ...@@ -154,7 +175,7 @@ dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
{ {
root = findeq(root, ex, subs, isfind); root = findeq(root, ex, subs, isfind);
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR) if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
{ {
int i; int i;
...@@ -172,7 +193,7 @@ dropvoidsubtree(QTNode * root) ...@@ -172,7 +193,7 @@ dropvoidsubtree(QTNode * root)
if (!root) if (!root)
return NULL; return NULL;
if (root->valnode->type == OPR) if (root->valnode->type == QI_OPR)
{ {
int i, int i,
j = 0; j = 0;
...@@ -188,7 +209,7 @@ dropvoidsubtree(QTNode * root) ...@@ -188,7 +209,7 @@ dropvoidsubtree(QTNode * root)
root->nchild = j; root->nchild = j;
if (root->valnode->val == (int4) '!' && root->nchild == 0) if (root->valnode->operator.oper == OP_NOT && root->nchild == 0)
{ {
QTNFree(root); QTNFree(root);
root = NULL; root = NULL;
...@@ -256,9 +277,9 @@ ts_rewrite_accum(PG_FUNCTION_ARGS) ...@@ -256,9 +277,9 @@ ts_rewrite_accum(PG_FUNCTION_ARGS)
elog(ERROR, "array must be one-dimensional, not %d dimensions", elog(ERROR, "array must be one-dimensional, not %d dimensions",
ARR_NDIM(qa)); ARR_NDIM(qa));
if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3) if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3)
elog(ERROR, "array should have only three elements"); elog(ERROR, "array must have three elements");
if (ARR_ELEMTYPE(qa) != TSQUERYOID) if (ARR_ELEMTYPE(qa) != TSQUERYOID)
elog(ERROR, "array should contain tsquery type"); elog(ERROR, "array must contain tsquery elements");
deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp); deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp);
...@@ -499,6 +520,7 @@ tsquery_rewrite_query(PG_FUNCTION_ARGS) ...@@ -499,6 +520,7 @@ tsquery_rewrite_query(PG_FUNCTION_ARGS)
subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst)); subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
tree = findsubquery(tree, qex, subs, NULL); tree = findsubquery(tree, qex, subs, NULL);
QTNFree(qex); QTNFree(qex);
QTNFree(subs); QTNFree(subs);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include "tsearch/ts_type.h" #include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h" #include "tsearch/ts_utils.h"
QTNode * QTNode *
QT2QTN(QueryItem * in, char *operand) QT2QTN(QueryItem * in, char *operand)
{ {
...@@ -25,24 +24,24 @@ QT2QTN(QueryItem * in, char *operand) ...@@ -25,24 +24,24 @@ QT2QTN(QueryItem * in, char *operand)
node->valnode = in; node->valnode = in;
if (in->type == OPR) if (in->type == QI_OPR)
{ {
node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
node->child[0] = QT2QTN(in + 1, operand); node->child[0] = QT2QTN(in + 1, operand);
node->sign = node->child[0]->sign; node->sign = node->child[0]->sign;
if (in->val == (int4) '!') if (in->operator.oper == OP_NOT)
node->nchild = 1; node->nchild = 1;
else else
{ {
node->nchild = 2; node->nchild = 2;
node->child[1] = QT2QTN(in + in->left, operand); node->child[1] = QT2QTN(in + in->operator.left, operand);
node->sign |= node->child[1]->sign; node->sign |= node->child[1]->sign;
} }
} }
else if (operand) else if (operand)
{ {
node->word = operand + in->distance; node->word = operand + in->operand.distance;
node->sign = 1 << (in->val % 32); node->sign = 1 << (in->operand.valcrc % 32);
} }
return node; return node;
...@@ -54,14 +53,14 @@ QTNFree(QTNode * in) ...@@ -54,14 +53,14 @@ QTNFree(QTNode * in)
if (!in) if (!in)
return; return;
if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0) if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
pfree(in->word); pfree(in->word);
if (in->child) if (in->child)
{ {
if (in->valnode) if (in->valnode)
{ {
if (in->valnode->type == OPR && in->nchild > 0) if (in->valnode->type == QI_OPR && in->nchild > 0)
{ {
int i; int i;
...@@ -82,30 +81,45 @@ QTNodeCompare(QTNode * an, QTNode * bn) ...@@ -82,30 +81,45 @@ QTNodeCompare(QTNode * an, QTNode * bn)
{ {
if (an->valnode->type != bn->valnode->type) if (an->valnode->type != bn->valnode->type)
return (an->valnode->type > bn->valnode->type) ? -1 : 1; return (an->valnode->type > bn->valnode->type) ? -1 : 1;
else if (an->valnode->val != bn->valnode->val)
return (an->valnode->val > bn->valnode->val) ? -1 : 1; if (an->valnode->type == QI_OPR)
else if (an->valnode->type == VAL)
{
if (an->valnode->length == bn->valnode->length)
return strncmp(an->word, bn->word, an->valnode->length);
else
return (an->valnode->length > bn->valnode->length) ? -1 : 1;
}
else if (an->nchild != bn->nchild)
{ {
return (an->nchild > bn->nchild) ? -1 : 1; QueryOperator *ao = &an->valnode->operator;
QueryOperator *bo = &bn->valnode->operator;
if(ao->oper != bo->oper)
return (ao->oper > bo->oper) ? -1 : 1;
if (an->nchild != bn->nchild)
return (an->nchild > bn->nchild) ? -1 : 1;
{
int i,
res;
for (i = 0; i < an->nchild; i++)
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
return res;
}
return 0;
} }
else else
{ {
int i, QueryOperand *ao = &an->valnode->operand;
res; QueryOperand *bo = &bn->valnode->operand;
for (i = 0; i < an->nchild; i++) Assert(an->valnode->type == QI_VAL);
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
return res; if (ao->valcrc != bo->valcrc)
} {
return (ao->valcrc > bo->valcrc) ? -1 : 1;
}
return 0; if (ao->length == bo->length)
return strncmp(an->word, bn->word, ao->length);
else
return (ao->length > bo->length) ? -1 : 1;
}
} }
static int static int
...@@ -119,7 +133,7 @@ QTNSort(QTNode * in) ...@@ -119,7 +133,7 @@ QTNSort(QTNode * in)
{ {
int i; int i;
if (in->valnode->type != OPR) if (in->valnode->type != QI_OPR)
return; return;
for (i = 0; i < in->nchild; i++) for (i = 0; i < in->nchild; i++)
...@@ -139,12 +153,19 @@ QTNEq(QTNode * a, QTNode * b) ...@@ -139,12 +153,19 @@ QTNEq(QTNode * a, QTNode * b)
return (QTNodeCompare(a, b) == 0) ? true : false; return (QTNodeCompare(a, b) == 0) ? true : false;
} }
/*
* Remove unnecessary intermediate nodes. For example:
*
* OR OR
* a OR -> a b c
* b c
*/
void void
QTNTernary(QTNode * in) QTNTernary(QTNode * in)
{ {
int i; int i;
if (in->valnode->type != OPR) if (in->valnode->type != QI_OPR)
return; return;
for (i = 0; i < in->nchild; i++) for (i = 0; i < in->nchild; i++)
...@@ -152,9 +173,10 @@ QTNTernary(QTNode * in) ...@@ -152,9 +173,10 @@ QTNTernary(QTNode * in)
for (i = 0; i < in->nchild; i++) for (i = 0; i < in->nchild; i++)
{ {
if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val) QTNode *cc = in->child[i];
if (cc->valnode->type == QI_OPR && in->valnode->operator.oper == cc->valnode->operator.oper)
{ {
QTNode *cc = in->child[i];
int oldnchild = in->nchild; int oldnchild = in->nchild;
in->nchild += cc->nchild - 1; in->nchild += cc->nchild - 1;
...@@ -167,17 +189,23 @@ QTNTernary(QTNode * in) ...@@ -167,17 +189,23 @@ QTNTernary(QTNode * in)
memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *)); memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
i += cc->nchild - 1; i += cc->nchild - 1;
if(cc->flags & QTN_NEEDFREE)
pfree(cc->valnode);
pfree(cc); pfree(cc);
} }
} }
} }
/*
* Convert a tree to binary tree by inserting intermediate nodes.
* (Opposite of QTNTernary)
*/
void void
QTNBinary(QTNode * in) QTNBinary(QTNode * in)
{ {
int i; int i;
if (in->valnode->type != OPR) if (in->valnode->type != QI_OPR)
return; return;
for (i = 0; i < in->nchild; i++) for (i = 0; i < in->nchild; i++)
...@@ -201,7 +229,7 @@ QTNBinary(QTNode * in) ...@@ -201,7 +229,7 @@ QTNBinary(QTNode * in)
nn->sign = nn->child[0]->sign | nn->child[1]->sign; nn->sign = nn->child[0]->sign | nn->child[1]->sign;
nn->valnode->type = in->valnode->type; nn->valnode->type = in->valnode->type;
nn->valnode->val = in->valnode->val; nn->valnode->operator.oper = in->valnode->operator.oper;
in->child[0] = nn; in->child[0] = nn;
in->child[1] = in->child[in->nchild - 1]; in->child[1] = in->child[in->nchild - 1];
...@@ -209,11 +237,15 @@ QTNBinary(QTNode * in) ...@@ -209,11 +237,15 @@ QTNBinary(QTNode * in)
} }
} }
/*
* Count the total length of operand string in tree, including '\0'-
* terminators.
*/
static void static void
cntsize(QTNode * in, int4 *sumlen, int4 *nnode) cntsize(QTNode * in, int *sumlen, int *nnode)
{ {
*nnode += 1; *nnode += 1;
if (in->valnode->type == OPR) if (in->valnode->type == QI_OPR)
{ {
int i; int i;
...@@ -222,7 +254,7 @@ cntsize(QTNode * in, int4 *sumlen, int4 *nnode) ...@@ -222,7 +254,7 @@ cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
} }
else else
{ {
*sumlen += in->valnode->length + 1; *sumlen += in->valnode->operand.length + 1;
} }
} }
...@@ -234,22 +266,26 @@ typedef struct ...@@ -234,22 +266,26 @@ typedef struct
} QTN2QTState; } QTN2QTState;
static void static void
fillQT(QTN2QTState * state, QTNode * in) fillQT(QTN2QTState *state, QTNode *in)
{ {
*(state->curitem) = *(in->valnode); if (in->valnode->type == QI_VAL)
if (in->valnode->type == VAL)
{ {
memcpy(state->curoperand, in->word, in->valnode->length); memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
state->curitem->distance = state->curoperand - state->operand;
state->curoperand[in->valnode->length] = '\0'; memcpy(state->curoperand, in->word, in->valnode->operand.length);
state->curoperand += in->valnode->length + 1; state->curitem->operand.distance = state->curoperand - state->operand;
state->curoperand[in->valnode->operand.length] = '\0';
state->curoperand += in->valnode->operand.length + 1;
state->curitem++; state->curitem++;
} }
else else
{ {
QueryItem *curitem = state->curitem; QueryItem *curitem = state->curitem;
Assert(in->valnode->type == QI_OPR);
memcpy(state->curitem, in->valnode, sizeof(QueryOperator));
Assert(in->nchild <= 2); Assert(in->nchild <= 2);
state->curitem++; state->curitem++;
...@@ -257,7 +293,7 @@ fillQT(QTN2QTState * state, QTNode * in) ...@@ -257,7 +293,7 @@ fillQT(QTN2QTState * state, QTNode * in)
if (in->nchild == 2) if (in->nchild == 2)
{ {
curitem->left = state->curitem - curitem; curitem->operator.left = state->curitem - curitem;
fillQT(state, in->child[1]); fillQT(state, in->child[1]);
} }
} }
...@@ -296,11 +332,11 @@ QTNCopy(QTNode *in) ...@@ -296,11 +332,11 @@ QTNCopy(QTNode *in)
*(out->valnode) = *(in->valnode); *(out->valnode) = *(in->valnode);
out->flags |= QTN_NEEDFREE; out->flags |= QTN_NEEDFREE;
if (in->valnode->type == VAL) if (in->valnode->type == QI_VAL)
{ {
out->word = palloc(in->valnode->length + 1); out->word = palloc(in->valnode->operand.length + 1);
memcpy(out->word, in->word, in->valnode->length); memcpy(out->word, in->word, in->valnode->operand.length);
out->word[in->valnode->length] = '\0'; out->word[in->valnode->operand.length] = '\0';
out->flags |= QTN_WORDFREE; out->flags |= QTN_WORDFREE;
} }
else else
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -68,7 +68,7 @@ cnt_length(TSVector t) ...@@ -68,7 +68,7 @@ cnt_length(TSVector t)
} }
static int4 static int4
WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item) WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
{ {
if (ptr->len == item->length) if (ptr->len == item->length)
return strncmp( return strncmp(
...@@ -80,7 +80,7 @@ WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item) ...@@ -80,7 +80,7 @@ WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
} }
static WordEntry * static WordEntry *
find_wordentry(TSVector t, TSQuery q, QueryItem * item) find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
{ {
WordEntry *StopLow = ARRPTR(t); WordEntry *StopLow = ARRPTR(t);
WordEntry *StopHigh = (WordEntry *) STRPTR(t); WordEntry *StopHigh = (WordEntry *) STRPTR(t);
...@@ -105,33 +105,48 @@ find_wordentry(TSVector t, TSQuery q, QueryItem * item) ...@@ -105,33 +105,48 @@ find_wordentry(TSVector t, TSQuery q, QueryItem * item)
} }
/*
* sort QueryOperands by (length, word)
*/
static int static int
compareQueryItem(const void *a, const void *b, void *arg) compareQueryOperand(const void *a, const void *b, void *arg)
{ {
char *operand = (char *) arg; char *operand = (char *) arg;
QueryOperand *qa = (*(QueryOperand **) a);
QueryOperand *qb = (*(QueryOperand **) b);
if ((*(QueryItem **) a)->length == (*(QueryItem **) b)->length) if (qa->length == qb->length)
return strncmp(operand + (*(QueryItem **) a)->distance, return strncmp(operand + qa->distance,
operand + (*(QueryItem **) b)->distance, operand + qb->distance,
(*(QueryItem **) b)->length); qb->length);
return ((*(QueryItem **) a)->length > (*(QueryItem **) b)->length) ? 1 : -1; return (qa->length > qb->length) ? 1 : -1;
} }
static QueryItem ** /*
SortAndUniqItems(char *operand, QueryItem * item, int *size) * Returns a sorted, de-duplicated array of QueryOperands in a query.
* The returned QueryOperands are pointers to the original QueryOperands
* in the query.
*
* Length of the returned array is stored in *size
*/
static QueryOperand **
SortAndUniqItems(TSQuery q, int *size)
{ {
QueryItem **res, char *operand = GETOPERAND(q);
QueryItem * item = GETQUERY(q);
QueryOperand **res,
**ptr, **ptr,
**prevptr; **prevptr;
ptr = res = (QueryItem **) palloc(sizeof(QueryItem *) * *size); ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
/* Collect all operands from the tree to res */
while ((*size)--) while ((*size)--)
{ {
if (item->type == VAL) if (item->type == QI_VAL)
{ {
*ptr = item; *ptr = (QueryOperand *) item;
ptr++; ptr++;
} }
item++; item++;
...@@ -141,14 +156,15 @@ SortAndUniqItems(char *operand, QueryItem * item, int *size) ...@@ -141,14 +156,15 @@ SortAndUniqItems(char *operand, QueryItem * item, int *size)
if (*size < 2) if (*size < 2)
return res; return res;
qsort_arg(res, *size, sizeof(QueryItem **), compareQueryItem, (void *) operand); qsort_arg(res, *size, sizeof(QueryOperand **), compareQueryOperand, (void *) operand);
ptr = res + 1; ptr = res + 1;
prevptr = res; prevptr = res;
/* remove duplicates */
while (ptr - res < *size) while (ptr - res < *size)
{ {
if (compareQueryItem((void *) ptr, (void *) prevptr, (void *) operand) != 0) if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
{ {
prevptr++; prevptr++;
*prevptr = *ptr; *prevptr = *ptr;
...@@ -180,10 +196,10 @@ calc_rank_and(float *w, TSVector t, TSQuery q) ...@@ -180,10 +196,10 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
lenct, lenct,
dist; dist;
float res = -1.0; float res = -1.0;
QueryItem **item; QueryOperand **item;
int size = q->size; int size = q->size;
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size); item = SortAndUniqItems(q, &size);
if (size < 2) if (size < 2)
{ {
pfree(item); pfree(item);
...@@ -246,11 +262,11 @@ calc_rank_or(float *w, TSVector t, TSQuery q) ...@@ -246,11 +262,11 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
j, j,
i; i;
float res = 0.0; float res = 0.0;
QueryItem **item; QueryOperand **item;
int size = q->size; int size = q->size;
*(uint16 *) POSNULL = lengthof(POSNULL) - 1; *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size); item = SortAndUniqItems(q, &size);
for (i = 0; i < size; i++) for (i = 0; i < size; i++)
{ {
...@@ -310,7 +326,8 @@ calc_rank(float *w, TSVector t, TSQuery q, int4 method) ...@@ -310,7 +326,8 @@ calc_rank(float *w, TSVector t, TSQuery q, int4 method)
if (!t->size || !q->size) if (!t->size || !q->size)
return 0.0; return 0.0;
res = (item->type != VAL && item->val == (int4) '&') ? /* XXX: What about NOT? */
res = (item->type == QI_OPR && item->operator.oper == OP_AND) ?
calc_rank_and(w, t, q) : calc_rank_or(w, t, q); calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
if (res < 0) if (res < 0)
...@@ -453,7 +470,7 @@ compareDocR(const void *a, const void *b) ...@@ -453,7 +470,7 @@ compareDocR(const void *a, const void *b)
} }
static bool static bool
checkcondition_QueryItem(void *checkval, QueryItem * val) checkcondition_QueryOperand(void *checkval, QueryOperand *val)
{ {
return (bool) (val->istrue); return (bool) (val->istrue);
} }
...@@ -467,8 +484,8 @@ reset_istrue_flag(TSQuery query) ...@@ -467,8 +484,8 @@ reset_istrue_flag(TSQuery query)
/* reset istrue flag */ /* reset istrue flag */
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
{ {
if (item->type == VAL) if (item->type == QI_VAL)
item->istrue = 0; item->operand.istrue = 0;
item++; item++;
} }
} }
...@@ -484,7 +501,7 @@ typedef struct ...@@ -484,7 +501,7 @@ typedef struct
static bool static bool
Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext) Cover(DocRepresentation *doc, int len, TSQuery query, Extention *ext)
{ {
DocRepresentation *ptr; DocRepresentation *ptr;
int lastpos = ext->pos; int lastpos = ext->pos;
...@@ -501,8 +518,11 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext) ...@@ -501,8 +518,11 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
while (ptr - doc < len) while (ptr - doc < len)
{ {
for (i = 0; i < ptr->nitem; i++) for (i = 0; i < ptr->nitem; i++)
ptr->item[i]->istrue = 1; {
if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryItem)) if(ptr->item[i]->type == QI_VAL)
ptr->item[i]->operand.istrue = 1;
}
if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryOperand))
{ {
if (ptr->pos > ext->q) if (ptr->pos > ext->q)
{ {
...@@ -527,8 +547,9 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext) ...@@ -527,8 +547,9 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
while (ptr >= doc + ext->pos) while (ptr >= doc + ext->pos)
{ {
for (i = 0; i < ptr->nitem; i++) for (i = 0; i < ptr->nitem; i++)
ptr->item[i]->istrue = 1; if(ptr->item[i]->type == QI_VAL) /* XXX */
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryItem)) ptr->item[i]->operand.istrue = 1;
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand))
{ {
if (ptr->pos < ext->p) if (ptr->pos < ext->p)
{ {
...@@ -575,10 +596,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen) ...@@ -575,10 +596,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
{ {
if (item[i].type != VAL || item[i].istrue) QueryOperand *curoperand;
if (item[i].type != QI_VAL)
continue;
curoperand = &item[i].operand;
if(item[i].operand.istrue)
continue; continue;
entry = find_wordentry(txt, query, &(item[i])); entry = find_wordentry(txt, query, curoperand);
if (!entry) if (!entry)
continue; continue;
...@@ -603,8 +631,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen) ...@@ -603,8 +631,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
{ {
if (j == 0) if (j == 0)
{ {
QueryItem *kptr,
*iptr = item + i;
int k; int k;
doc[cur].needfree = false; doc[cur].needfree = false;
...@@ -613,14 +639,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen) ...@@ -613,14 +639,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
for (k = 0; k < query->size; k++) for (k = 0; k < query->size; k++)
{ {
kptr = item + k; QueryOperand *kptr = &item[k].operand;
QueryOperand *iptr = &item[i].operand;
if (k == i || if (k == i ||
(item[k].type == VAL && (item[k].type == QI_VAL &&
compareQueryItem(&kptr, &iptr, operand) == 0)) compareQueryOperand(&kptr, &iptr, operand) == 0))
{ {
/* if k == i, we've already checked above that it's type == Q_VAL */
doc[cur].item[doc[cur].nitem] = item + k; doc[cur].item[doc[cur].nitem] = item + k;
doc[cur].nitem++; doc[cur].nitem++;
kptr->istrue = 1; item[k].operand.istrue = 1;
} }
} }
} }
...@@ -640,8 +669,7 @@ get_docrep(TSVector txt, TSQuery query, int *doclen) ...@@ -640,8 +669,7 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
if (cur > 0) if (cur > 0)
{ {
if (cur > 1) qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
return doc; return doc;
} }
...@@ -746,7 +774,7 @@ ts_rankcd_wttf(PG_FUNCTION_ARGS) ...@@ -746,7 +774,7 @@ ts_rankcd_wttf(PG_FUNCTION_ARGS)
{ {
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1); TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(2); TSQuery query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
int method = PG_GETARG_INT32(3); int method = PG_GETARG_INT32(3);
float res; float res;
...@@ -763,7 +791,7 @@ ts_rankcd_wtt(PG_FUNCTION_ARGS) ...@@ -763,7 +791,7 @@ ts_rankcd_wtt(PG_FUNCTION_ARGS)
{ {
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1); TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(2); TSQuery query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
float res; float res;
res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD); res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
...@@ -778,7 +806,7 @@ Datum ...@@ -778,7 +806,7 @@ Datum
ts_rankcd_ttf(PG_FUNCTION_ARGS) ts_rankcd_ttf(PG_FUNCTION_ARGS)
{ {
TSVector txt = PG_GETARG_TSVECTOR(0); TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY_COPY(1); TSQuery query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
int method = PG_GETARG_INT32(2); int method = PG_GETARG_INT32(2);
float res; float res;
...@@ -793,7 +821,7 @@ Datum ...@@ -793,7 +821,7 @@ Datum
ts_rankcd_tt(PG_FUNCTION_ARGS) ts_rankcd_tt(PG_FUNCTION_ARGS)
{ {
TSVector txt = PG_GETARG_TSVECTOR(0); TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY_COPY(1); TSQuery query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
float res; float res;
res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD); res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.2 2007/08/21 01:45:33 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -20,22 +20,37 @@ ...@@ -20,22 +20,37 @@
#include "tsearch/ts_utils.h" #include "tsearch/ts_utils.h"
#include "utils/memutils.h" #include "utils/memutils.h"
typedef struct
{
WordEntry entry; /* should be first ! */
WordEntryPos *pos;
int poslen; /* number of elements in pos */
} WordEntryIN;
static int static int
comparePos(const void *a, const void *b) comparePos(const void *a, const void *b)
{ {
if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b)) int apos = WEP_GETPOS(*(WordEntryPos *) a);
int bpos = WEP_GETPOS(*(WordEntryPos *) b);
if (apos == bpos)
return 0; return 0;
return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1; return (apos > bpos) ? 1 : -1;
} }
/*
* Removes duplicate pos entries. If there's two entries with same pos
* but different weight, the higher weight is retained.
*
* Returns new length.
*/
static int static int
uniquePos(WordEntryPos * a, int4 l) uniquePos(WordEntryPos * a, int l)
{ {
WordEntryPos *ptr, WordEntryPos *ptr,
*res; *res;
if (l == 1) if (l <= 1)
return l; return l;
res = a; res = a;
...@@ -75,21 +90,23 @@ compareentry(const void *a, const void *b, void *arg) ...@@ -75,21 +90,23 @@ compareentry(const void *a, const void *b, void *arg)
} }
static int static int
uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen) uniqueentry(WordEntryIN * a, int l, char *buf, int *outbuflen)
{ {
WordEntryIN *ptr, WordEntryIN *ptr,
*res; *res;
res = a; Assert(l >= 1);
if (l == 1) if (l == 1)
{ {
if (a->entry.haspos) if (a->entry.haspos)
{ {
*(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos)); a->poslen = uniquePos(a->pos, a->poslen);
*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos); *outbuflen = SHORTALIGN(a->entry.len) + (a->poslen + 1) * sizeof(WordEntryPos);
} }
return l; return l;
} }
res = a;
ptr = a + 1; ptr = a + 1;
qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf); qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf);
...@@ -101,8 +118,8 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen) ...@@ -101,8 +118,8 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
{ {
if (res->entry.haspos) if (res->entry.haspos)
{ {
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos)); res->poslen = uniquePos(res->pos, res->poslen);
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos); *outbuflen += res->poslen * sizeof(WordEntryPos);
} }
*outbuflen += SHORTALIGN(res->entry.len); *outbuflen += SHORTALIGN(res->entry.len);
res++; res++;
...@@ -112,12 +129,14 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen) ...@@ -112,12 +129,14 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
{ {
if (res->entry.haspos) if (res->entry.haspos)
{ {
int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos); int newlen = ptr->poslen + res->poslen;
/* Append res to pos */
res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos)); res->pos = (WordEntryPos *) repalloc(res->pos, newlen * sizeof(WordEntryPos));
memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]), memcpy(&res->pos[res->poslen],
&(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos)); ptr->pos, ptr->poslen * sizeof(WordEntryPos));
*(uint16 *) (res->pos) += *(uint16 *) (ptr->pos); res->poslen = newlen;
pfree(ptr->pos); pfree(ptr->pos);
} }
else else
...@@ -130,8 +149,8 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen) ...@@ -130,8 +149,8 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
} }
if (res->entry.haspos) if (res->entry.haspos)
{ {
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos)); res->poslen = uniquePos(res->pos, res->poslen);
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos); *outbuflen += res->poslen * sizeof(WordEntryPos);
} }
*outbuflen += SHORTALIGN(res->entry.len); *outbuflen += SHORTALIGN(res->entry.len);
...@@ -144,248 +163,6 @@ WordEntryCMP(WordEntry * a, WordEntry * b, char *buf) ...@@ -144,248 +163,6 @@ WordEntryCMP(WordEntry * a, WordEntry * b, char *buf)
return compareentry(a, b, buf); return compareentry(a, b, buf);
} }
#define WAITWORD 1
#define WAITENDWORD 2
#define WAITNEXTCHAR 3
#define WAITENDCMPLX 4
#define WAITPOSINFO 5
#define INPOSINFO 6
#define WAITPOSDELIM 7
#define WAITCHARCMPLX 8
#define RESIZEPRSBUF \
do { \
if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
{ \
int4 clen = state->curpos - state->word; \
state->len *= 2; \
state->word = (char*)repalloc( (void*)state->word, state->len ); \
state->curpos = state->word + clen; \
} \
} while (0)
bool
gettoken_tsvector(TSVectorParseState *state)
{
int4 oldstate = 0;
state->curpos = state->word;
state->state = WAITWORD;
state->alen = 0;
while (1)
{
if (state->state == WAITWORD)
{
if (*(state->prsbuf) == '\0')
return false;
else if (t_iseq(state->prsbuf, '\''))
state->state = WAITENDCMPLX;
else if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else if (!t_isspace(state->prsbuf))
{
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = WAITENDWORD;
}
}
else if (state->state == WAITNEXTCHAR)
{
if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character")));
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = oldstate;
}
}
else if (state->state == WAITENDWORD)
{
if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
{
RESIZEPRSBUF;
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(state->curpos) = '\0';
return true;
}
else if (t_iseq(state->prsbuf, ':'))
{
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(state->curpos) = '\0';
if (state->oprisdelim)
return true;
else
state->state = INPOSINFO;
}
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
}
}
else if (state->state == WAITENDCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
state->state = WAITCHARCMPLX;
}
else if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDCMPLX;
}
else if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
}
}
else if (state->state == WAITCHARCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = WAITENDCMPLX;
}
else
{
RESIZEPRSBUF;
*(state->curpos) = '\0';
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
if (state->oprisdelim)
{
/* state->prsbuf+=pg_mblen(state->prsbuf); */
return true;
}
else
state->state = WAITPOSINFO;
continue; /* recheck current character */
}
}
else if (state->state == WAITPOSINFO)
{
if (t_iseq(state->prsbuf, ':'))
state->state = INPOSINFO;
else
return true;
}
else if (state->state == INPOSINFO)
{
if (t_isdigit(state->prsbuf))
{
if (state->alen == 0)
{
state->alen = 4;
state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
*(uint16 *) (state->pos) = 0;
}
else if (*(uint16 *) (state->pos) + 1 >= state->alen)
{
state->alen *= 2;
state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
}
(*(uint16 *) (state->pos))++;
WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf)));
if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
state->state = WAITPOSDELIM;
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else if (state->state == WAITPOSDELIM)
{
if (t_iseq(state->prsbuf, ','))
state->state = INPOSINFO;
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
}
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
}
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
}
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
}
else if (t_isspace(state->prsbuf) ||
*(state->prsbuf) == '\0')
return true;
else if (!t_isdigit(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else /* internal error */
elog(ERROR, "internal error in gettoken_tsvector");
/* get next char */
state->prsbuf += pg_mblen(state->prsbuf);
}
return false;
}
Datum Datum
tsvectorin(PG_FUNCTION_ARGS) tsvectorin(PG_FUNCTION_ARGS)
...@@ -393,70 +170,82 @@ tsvectorin(PG_FUNCTION_ARGS) ...@@ -393,70 +170,82 @@ tsvectorin(PG_FUNCTION_ARGS)
char *buf = PG_GETARG_CSTRING(0); char *buf = PG_GETARG_CSTRING(0);
TSVectorParseState state; TSVectorParseState state;
WordEntryIN *arr; WordEntryIN *arr;
int totallen;
int arrlen; /* allocated size of arr */
WordEntry *inarr; WordEntry *inarr;
int4 len = 0, int len = 0;
totallen = 64;
TSVector in; TSVector in;
char *tmpbuf, int i;
*cur; char *token;
int4 i, int toklen;
buflen = 256; WordEntryPos *pos;
int poslen;
/*
* Tokens are appended to tmpbuf, cur is a pointer
* to the end of used space in tmpbuf.
*/
char *tmpbuf;
char *cur;
int buflen = 256; /* allocated size of tmpbuf */
pg_verifymbstr(buf, strlen(buf), false); pg_verifymbstr(buf, strlen(buf), false);
state.prsbuf = buf;
state.len = 32;
state.word = (char *) palloc(state.len);
state.oprisdelim = false;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen); state = init_tsvector_parser(buf, false);
arrlen = 64;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
cur = tmpbuf = (char *) palloc(buflen); cur = tmpbuf = (char *) palloc(buflen);
while (gettoken_tsvector(&state)) while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
{ {
/*
* Realloc buffers if it's needed
*/
if (len >= totallen)
{
totallen *= 2;
arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
}
while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
{
int4 dist = cur - tmpbuf;
buflen *= 2;
tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
cur = tmpbuf + dist;
}
if (state.curpos - state.word >= MAXSTRLEN) if (toklen >= MAXSTRLEN)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long (%ld bytes, max %ld bytes)", errmsg("word is too long (%ld bytes, max %ld bytes)",
(long) (state.curpos - state.word), (long) toklen,
(long) MAXSTRLEN))); (long) MAXSTRLEN)));
arr[len].entry.len = state.curpos - state.word;
if (cur - tmpbuf > MAXSTRPOS) if (cur - tmpbuf > MAXSTRPOS)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("position value too large"))); errmsg("position value too large")));
/*
* Enlarge buffers if needed
*/
if (len >= arrlen)
{
arrlen *= 2;
arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * arrlen);
}
while ((cur - tmpbuf) + toklen >= buflen)
{
int dist = cur - tmpbuf;
buflen *= 2;
tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
cur = tmpbuf + dist;
}
arr[len].entry.len = toklen;
arr[len].entry.pos = cur - tmpbuf; arr[len].entry.pos = cur - tmpbuf;
memcpy((void *) cur, (void *) state.word, arr[len].entry.len); memcpy((void *) cur, (void *) token, toklen);
cur += arr[len].entry.len; cur += toklen;
if (state.alen) if (poslen != 0)
{ {
arr[len].entry.haspos = 1; arr[len].entry.haspos = 1;
arr[len].pos = state.pos; arr[len].pos = pos;
arr[len].poslen = poslen;
} }
else else
arr[len].entry.haspos = 0; arr[len].entry.haspos = 0;
len++; len++;
} }
pfree(state.word);
close_tsvector_parser(state);
if (len > 0) if (len > 0)
len = uniqueentry(arr, len, tmpbuf, &buflen); len = uniqueentry(arr, len, tmpbuf, &buflen);
...@@ -476,8 +265,21 @@ tsvectorin(PG_FUNCTION_ARGS) ...@@ -476,8 +265,21 @@ tsvectorin(PG_FUNCTION_ARGS)
cur += SHORTALIGN(arr[i].entry.len); cur += SHORTALIGN(arr[i].entry.len);
if (arr[i].entry.haspos) if (arr[i].entry.haspos)
{ {
memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos)); uint16 tmplen;
cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
if(arr[i].poslen > 0xFFFF)
elog(ERROR, "positions array too long");
tmplen = (uint16) arr[i].poslen;
/* Copy length to output struct */
memcpy(cur, &tmplen, sizeof(uint16));
cur += sizeof(uint16);
/* Copy positions */
memcpy(cur, arr[i].pos, (arr[i].poslen) * sizeof(WordEntryPos));
cur += arr[i].poslen * sizeof(WordEntryPos);
pfree(arr[i].pos); pfree(arr[i].pos);
} }
inarr[i] = arr[i].entry; inarr[i] = arr[i].entry;
...@@ -604,26 +406,26 @@ tsvectorrecv(PG_FUNCTION_ARGS) ...@@ -604,26 +406,26 @@ tsvectorrecv(PG_FUNCTION_ARGS)
{ {
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
TSVector vec; TSVector vec;
int i, int i;
size, uint32 size;
len = DATAHDRSIZE;
WordEntry *weptr; WordEntry *weptr;
int datalen = 0; int datalen = 0;
Size len;
size = pq_getmsgint(buf, sizeof(uint32)); size = pq_getmsgint(buf, sizeof(uint32));
if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry))) if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry)))
elog(ERROR, "invalid size of tsvector"); elog(ERROR, "invalid size of tsvector");
len += sizeof(WordEntry) * size; len = DATAHDRSIZE + sizeof(WordEntry) * size;
len *= 2; len = len * 2; /* times two to make room for lexemes */
vec = (TSVector) palloc0(len); vec = (TSVector) palloc0(len);
vec->size = size; vec->size = size;
weptr = ARRPTR(vec); weptr = ARRPTR(vec);
for (i = 0; i < size; i++) for (i = 0; i < size; i++)
{ {
int tmp; int32 tmp;
weptr = ARRPTR(vec) + i; weptr = ARRPTR(vec) + i;
...@@ -654,7 +456,7 @@ tsvectorrecv(PG_FUNCTION_ARGS) ...@@ -654,7 +456,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
npos; npos;
WordEntryPos *wepptr; WordEntryPos *wepptr;
npos = (uint16) pq_getmsgint(buf, sizeof(int16)); npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
if (npos > MAXNUMPOS) if (npos > MAXNUMPOS)
elog(ERROR, "unexpected number of positions"); elog(ERROR, "unexpected number of positions");
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.2 2007/08/31 02:26:29 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -66,6 +66,9 @@ typedef struct ...@@ -66,6 +66,9 @@ typedef struct
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column); static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
/*
* Order: haspos, len, word, for all positions (pos, weight)
*/
static int static int
silly_cmp_tsvector(const TSVector a, const TSVector b) silly_cmp_tsvector(const TSVector a, const TSVector b)
{ {
...@@ -464,7 +467,7 @@ tsvector_concat(PG_FUNCTION_ARGS) ...@@ -464,7 +467,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
* compare 2 string values * compare 2 string values
*/ */
static int4 static int4
ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item) ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item)
{ {
if (ptr->len == item->length) if (ptr->len == item->length)
return strncmp( return strncmp(
...@@ -479,7 +482,7 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item) ...@@ -479,7 +482,7 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
* check weight info * check weight info
*/ */
static bool static bool
checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item) checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
{ {
WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16)); WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len))); uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
...@@ -497,10 +500,11 @@ checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item) ...@@ -497,10 +500,11 @@ checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
* is there value 'val' in array or not ? * is there value 'val' in array or not ?
*/ */
static bool static bool
checkcondition_str(void *checkval, QueryItem * val) checkcondition_str(void *checkval, QueryOperand * val)
{ {
WordEntry *StopLow = ((CHKVAL *) checkval)->arrb; CHKVAL *chkval = (CHKVAL *) checkval;
WordEntry *StopHigh = ((CHKVAL *) checkval)->arre; WordEntry *StopLow = chkval->arrb;
WordEntry *StopHigh = chkval->arre;
WordEntry *StopMiddle; WordEntry *StopMiddle;
int difference; int difference;
...@@ -509,10 +513,10 @@ checkcondition_str(void *checkval, QueryItem * val) ...@@ -509,10 +513,10 @@ checkcondition_str(void *checkval, QueryItem * val)
while (StopLow < StopHigh) while (StopLow < StopHigh)
{ {
StopMiddle = StopLow + (StopHigh - StopLow) / 2; StopMiddle = StopLow + (StopHigh - StopLow) / 2;
difference = ValCompare((CHKVAL *) checkval, StopMiddle, val); difference = ValCompare(chkval, StopMiddle, val);
if (difference == 0) if (difference == 0)
return (val->weight && StopMiddle->haspos) ? return (val->weight && StopMiddle->haspos) ?
checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true; checkclass_str(chkval, StopMiddle, val) : true;
else if (difference < 0) else if (difference < 0)
StopLow = StopMiddle + 1; StopLow = StopMiddle + 1;
else else
...@@ -523,37 +527,48 @@ checkcondition_str(void *checkval, QueryItem * val) ...@@ -523,37 +527,48 @@ checkcondition_str(void *checkval, QueryItem * val)
} }
/* /*
* check for boolean condition * check for boolean condition.
*
* if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking.
* checkval can be used to pass information to the callback. TS_execute doesn't
* do anything with it.
* chkcond is a callback function used to evaluate each VAL node in the query.
*
*/ */
bool bool
TS_execute(QueryItem * curitem, void *checkval, bool calcnot, TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
bool (*chkcond) (void *checkval, QueryItem * val)) bool (*chkcond) (void *checkval, QueryOperand * val))
{ {
/* since this function recurses, it could be driven to stack overflow */ /* since this function recurses, it could be driven to stack overflow */
check_stack_depth(); check_stack_depth();
if (curitem->type == VAL) if (curitem->type == QI_VAL)
return chkcond(checkval, curitem); return chkcond(checkval, (QueryOperand *) curitem);
else if (curitem->val == (int4) '!')
{ switch(curitem->operator.oper)
return (calcnot) ?
!TS_execute(curitem + 1, checkval, calcnot, chkcond)
: true;
}
else if (curitem->val == (int4) '&')
{ {
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond)) case OP_NOT:
return TS_execute(curitem + 1, checkval, calcnot, chkcond); if (calcnot)
else return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
return false; else
} return true;
else case OP_AND:
{ /* |-operator */ if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond)) return TS_execute(curitem + 1, checkval, calcnot, chkcond);
return true; else
else return false;
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
case OP_OR:
if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
return true;
else
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
default:
elog(ERROR, "unknown operator %d", curitem->operator.oper);
} }
/* not reachable, but keep compiler quiet */
return false; return false;
} }
......
/*-------------------------------------------------------------------------
*
* tsvector_parser.c
* Parser for tsvector
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.1 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "libpq/pqformat.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/memutils.h"
struct TSVectorParseStateData
{
char *prsbuf;
char *word; /* buffer to hold the current word */
int len; /* size in bytes allocated for 'word' */
bool oprisdelim;
};
/*
* Initializes parser for the input string. If oprisdelim is set, the
* following characters are treated as delimiters in addition to whitespace:
* ! | & ( )
*/
TSVectorParseState
init_tsvector_parser(char *input, bool oprisdelim)
{
TSVectorParseState state;
state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
state->prsbuf = input;
state->len = 32;
state->word = (char *) palloc(state->len);
state->oprisdelim = oprisdelim;
return state;
}
/*
* Reinitializes parser for parsing 'input', instead of previous input.
*/
void
reset_tsvector_parser(TSVectorParseState state, char *input)
{
state->prsbuf = input;
}
/*
* Shuts down a tsvector parser.
*/
void
close_tsvector_parser(TSVectorParseState state)
{
pfree(state->word);
pfree(state);
}
#define RESIZEPRSBUF \
do { \
if ( curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
{ \
int clen = curpos - state->word; \
state->len *= 2; \
state->word = (char*)repalloc( (void*)state->word, state->len ); \
curpos = state->word + clen; \
} \
} while (0)
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
/* Fills the output parameters, and returns true */
#define RETURN_TOKEN \
do { \
if (pos_ptr != NULL) \
{ \
*pos_ptr = pos; \
*poslen = npos; \
} \
else if (pos != NULL) \
pfree(pos); \
\
if (strval != NULL) \
*strval = state->word; \
if (lenval != NULL) \
*lenval = curpos - state->word; \
if (endptr != NULL) \
*endptr = state->prsbuf; \
return true; \
} while(0)
/* State codes used in gettoken_tsvector */
#define WAITWORD 1
#define WAITENDWORD 2
#define WAITNEXTCHAR 3
#define WAITENDCMPLX 4
#define WAITPOSINFO 5
#define INPOSINFO 6
#define WAITPOSDELIM 7
#define WAITCHARCMPLX 8
/*
* Get next token from string being parsed. Returns false if
* end of input string is reached, otherwise strval, lenval, pos_ptr
* and poslen output parameters are filled in:
*
* *strval token
* *lenval length of*strval
* *pos_ptr pointer to a palloc'd array of positions and weights
* associated with the token. If the caller is not interested
* in the information, NULL can be supplied. Otherwise
* the caller is responsible for pfreeing the array.
* *poslen number of elements in *pos_ptr
*/
bool
gettoken_tsvector(TSVectorParseState state,
char **strval, int *lenval,
WordEntryPos **pos_ptr, int *poslen,
char **endptr)
{
int oldstate = 0;
char *curpos = state->word;
int statecode = WAITWORD;
/* pos is for collecting the comma delimited list of positions followed
* by the actual token.
*/
WordEntryPos *pos = NULL;
int npos = 0; /* elements of pos used */
int posalen = 0; /* allocated size of pos */
while (1)
{
if (statecode == WAITWORD)
{
if (*(state->prsbuf) == '\0')
return false;
else if (t_iseq(state->prsbuf, '\''))
statecode = WAITENDCMPLX;
else if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else if (!t_isspace(state->prsbuf))
{
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
statecode = WAITENDWORD;
}
}
else if (statecode == WAITNEXTCHAR)
{
if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character")));
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
Assert(oldstate != 0);
statecode = oldstate;
}
}
else if (statecode == WAITENDWORD)
{
if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
{
RESIZEPRSBUF;
if (curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(curpos) = '\0';
RETURN_TOKEN;
}
else if (t_iseq(state->prsbuf, ':'))
{
if (curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(curpos) = '\0';
if (state->oprisdelim)
RETURN_TOKEN;
else
statecode = INPOSINFO;
}
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
}
}
else if (statecode == WAITENDCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
statecode = WAITCHARCMPLX;
}
else if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDCMPLX;
}
else if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
}
}
else if (statecode == WAITCHARCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
statecode = WAITENDCMPLX;
}
else
{
RESIZEPRSBUF;
*(curpos) = '\0';
if (curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
if (state->oprisdelim)
{
/* state->prsbuf+=pg_mblen(state->prsbuf); */
RETURN_TOKEN;
}
else
statecode = WAITPOSINFO;
continue; /* recheck current character */
}
}
else if (statecode == WAITPOSINFO)
{
if (t_iseq(state->prsbuf, ':'))
statecode = INPOSINFO;
else
RETURN_TOKEN;
}
else if (statecode == INPOSINFO)
{
if (t_isdigit(state->prsbuf))
{
if (posalen == 0)
{
posalen = 4;
pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
npos = 0;
}
else if (npos + 1 >= posalen)
{
posalen *= 2;
pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
}
npos++;
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
if (WEP_GETPOS(pos[npos - 1]) == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 0);
statecode = WAITPOSDELIM;
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else if (statecode == WAITPOSDELIM)
{
if (t_iseq(state->prsbuf, ','))
statecode = INPOSINFO;
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 3);
}
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 2);
}
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 1);
}
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 0);
}
else if (t_isspace(state->prsbuf) ||
*(state->prsbuf) == '\0')
RETURN_TOKEN;
else if (!t_isdigit(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else /* internal error */
elog(ERROR, "internal error in gettoken_tsvector");
/* get next char */
state->prsbuf += pg_mblen(state->prsbuf);
}
return false;
}
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* *
* Copyright (c) 1998-2007, PostgreSQL Global Development Group * Copyright (c) 1998-2007, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.3 2007/08/25 00:03:59 tgl Exp $ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.4 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -42,7 +42,7 @@ typedef struct ...@@ -42,7 +42,7 @@ typedef struct
type:8, type:8,
len:16; len:16;
char *word; char *word;
QueryItem *item; QueryOperand *item;
} HeadlineWordEntry; } HeadlineWordEntry;
typedef struct typedef struct
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* *
* Copyright (c) 1998-2007, PostgreSQL Global Development Group * Copyright (c) 1998-2007, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.1 2007/08/21 01:11:29 tgl Exp $ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
#define _PG_TSTYPE_H_ #define _PG_TSTYPE_H_
#include "fmgr.h" #include "fmgr.h"
#include "utils/pg_crc.h"
/* /*
* TSVector type. * TSVector type.
...@@ -27,8 +29,8 @@ typedef struct ...@@ -27,8 +29,8 @@ typedef struct
pos:20; /* MAX 1Mb */ pos:20; /* MAX 1Mb */
} WordEntry; } WordEntry;
#define MAXSTRLEN ( 1<<11 ) #define MAXSTRLEN ( (1<<11) - 1)
#define MAXSTRPOS ( 1<<20 ) #define MAXSTRPOS ( (1<<20) - 1)
/* /*
* Equivalent to * Equivalent to
...@@ -68,7 +70,7 @@ typedef uint16 WordEntryPos; ...@@ -68,7 +70,7 @@ typedef uint16 WordEntryPos;
typedef struct typedef struct
{ {
int32 vl_len_; /* varlena header (do not touch directly!) */ int32 vl_len_; /* varlena header (do not touch directly!) */
int4 size; uint32 size;
char data[1]; char data[1];
} TSVectorData; } TSVectorData;
...@@ -140,36 +142,65 @@ extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS); ...@@ -140,36 +142,65 @@ extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS);
/* /*
* TSQuery * TSQuery
*
*
*/ */
typedef int8 QueryItemType;
/* Valid values for QueryItemType: */
#define QI_VAL 1
#define QI_OPR 2
#define QI_VALSTOP 3 /* This is only used in an intermediate stack representation in parse_tsquery. It's not a legal type elsewhere. */
/* /*
* QueryItem is one node in tsquery - operator or operand. * QueryItem is one node in tsquery - operator or operand.
*/ */
typedef struct
typedef struct QueryItem
{ {
int8 type; /* operand or kind of operator */ QueryItemType type; /* operand or kind of operator (ts_tokentype) */
int8 weight; /* weights of operand to search */ int8 weight; /* weights of operand to search. It's a bitmask of allowed weights.
int2 left; /* pointer to left operand Right operand is * if it =0 then any weight are allowed */
* item + 1, left operand is placed int32 valcrc; /* XXX: pg_crc32 would be a more appropriate data type,
* item+item->left */ * but we use comparisons to signed integers in the code.
int4 val; /* crc32 value of operand's value */ * They would need to be changed as well. */
/* pointer to text value of operand, must correlate with WordEntry */ /* pointer to text value of operand, must correlate with WordEntry */
uint32 uint32
istrue:1, /* use for ranking in Cover */ istrue:1, /* use for ranking in Cover */
length:11, length:11,
distance:20; distance:20;
} QueryItem; } QueryOperand;
/* Legal values for QueryOperator.operator */
#define OP_NOT 1
#define OP_AND 2
#define OP_OR 3
typedef struct
{
QueryItemType type;
int8 oper; /* see above */
int16 left; /* pointer to left operand. Right operand is
* item + 1, left operand is placed
* item+item->left */
} QueryOperator;
/* /*
* It's impossible to use offsetof(QueryItem, istrue) * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
* inside QueryItem requiring 8-byte alignment, like int64.
*/ */
#define HDRSIZEQI ( sizeof(int8) + sizeof(int8) + sizeof(int2) + sizeof(int4) ) typedef union
{
QueryItemType type;
QueryOperator operator;
QueryOperand operand;
} QueryItem;
/* /*
* Storage: * Storage:
* (len)(size)(array of ITEM)(array of operand in text form) * (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
* operands are always finished by '\0'
*/ */
typedef struct typedef struct
...@@ -182,13 +213,17 @@ typedef struct ...@@ -182,13 +213,17 @@ typedef struct
typedef TSQueryData *TSQuery; typedef TSQueryData *TSQuery;
#define HDRSIZETQ ( VARHDRSZ + sizeof(int4) ) #define HDRSIZETQ ( VARHDRSZ + sizeof(int4) )
#define COMPUTESIZE(size,lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
#define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
#define OPERANDSSIZE(x) ( (x)->len - HDRSIZETQ - (x)->size * sizeof(QueryItem) )
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) ) /* Computes the size of header and all QueryItems. size is the number of
* QueryItems, and lenofoperand is the total length of all operands
*/
#define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
/* Returns a pointer to the first QueryItem in a TSVector */
#define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
/* Returns a pointer to the beginning of operands in a TSVector */
#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
/* /*
* fmgr interface macros * fmgr interface macros
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* *
* Copyright (c) 1998-2007, PostgreSQL Global Development Group * Copyright (c) 1998-2007, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.2 2007/08/25 00:03:59 tgl Exp $ * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.3 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -14,65 +14,41 @@ ...@@ -14,65 +14,41 @@
#include "tsearch/ts_type.h" #include "tsearch/ts_type.h"
#include "tsearch/ts_public.h" #include "tsearch/ts_public.h"
#include "nodes/pg_list.h"
/* /*
* Common parse definitions for tsvector and tsquery * Common parse definitions for tsvector and tsquery
*/ */
typedef struct /* tsvector parser support. */
{
WordEntry entry; /* should be first ! */
WordEntryPos *pos;
} WordEntryIN;
typedef struct
{
char *prsbuf;
char *word;
char *curpos;
int4 len;
int4 state;
int4 alen;
WordEntryPos *pos;
bool oprisdelim;
} TSVectorParseState;
extern bool gettoken_tsvector(TSVectorParseState *state);
struct ParseQueryNode; /* private in backend/utils/adt/tsquery.c */ struct TSVectorParseStateData;
typedef struct TSVectorParseStateData *TSVectorParseState;
typedef struct extern TSVectorParseState init_tsvector_parser(char *input, bool oprisdelim);
{ extern void reset_tsvector_parser(TSVectorParseState state, char *input);
char *buffer; /* entire string we are scanning */ extern bool gettoken_tsvector(TSVectorParseState state,
char *buf; /* current scan point */ char **token, int *len,
int4 state; WordEntryPos **pos, int *poslen,
int4 count; char **endptr);
extern void close_tsvector_parser(TSVectorParseState state);
/* reverse polish notation in list (for temporary usage) */ /* parse_tsquery */
struct ParseQueryNode *str;
/* number in str */ struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
int4 num; typedef struct TSQueryParserStateData *TSQueryParserState;
/* text-form operand */ typedef void (*PushFunction)(void *opaque, TSQueryParserState state, char *, int, int2);
int4 lenop;
int4 sumlen;
char *op;
char *curop;
/* state for value's parser */
TSVectorParseState valstate;
/* tscfg */
Oid cfg_id;
} TSQueryParserState;
extern TSQuery parse_tsquery(char *buf, extern TSQuery parse_tsquery(char *buf,
void (*pushval) (TSQueryParserState *, int, char *, int, int2), PushFunction pushval,
Oid cfg_id, bool isplain); void *opaque, bool isplain);
extern void pushval_asis(TSQueryParserState * state,
int type, char *strval, int lenval, int2 weight); /* Functions for use by PushFunction implementations */
extern void pushquery(TSQueryParserState * state, int4 type, int4 val, extern void pushValue(TSQueryParserState state,
int4 distance, int4 lenval, int2 weight); char *strval, int lenval, int2 weight);
extern void pushStop(TSQueryParserState state);
extern void pushOperator(TSQueryParserState state, int8 operator);
/* /*
* parse plain text and lexize words * parse plain text and lexize words
...@@ -84,6 +60,11 @@ typedef struct ...@@ -84,6 +60,11 @@ typedef struct
union union
{ {
uint16 pos; uint16 pos;
/*
* When apos array is used, apos[0] is the number of elements
* in the array (excluding apos[0]), and alen is the allocated
* size of the array.
*/
uint16 *apos; uint16 *apos;
} pos; } pos;
char *word; char *word;
...@@ -111,23 +92,12 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, ...@@ -111,23 +92,12 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query,
char *buf, int4 buflen); char *buf, int4 buflen);
extern text *generateHeadline(HeadlineParsedText * prs); extern text *generateHeadline(HeadlineParsedText * prs);
/*
* token/node types for parsing
*/
#define END 0
#define ERR 1
#define VAL 2
#define OPR 3
#define OPEN 4
#define CLOSE 5
#define VALSTOP 6 /* for stop words */
/* /*
* Common check function for tsvector @@ tsquery * Common check function for tsvector @@ tsquery
*/ */
extern bool TS_execute(QueryItem * curitem, void *checkval, bool calcnot, extern bool TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
bool (*chkcond) (void *checkval, QueryItem * val)); bool (*chkcond) (void *checkval, QueryOperand * val));
/* /*
* Useful conversion macros * Useful conversion macros
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment