Commit e5be8998 authored by Teodor Sigaev's avatar Teodor Sigaev

Refactoring by Heikki Linnakangas <heikki@enterprisedb.com> with

small editorization by me

- Brake the QueryItem struct into QueryOperator and QueryOperand.
  Type was really the only common field between them. QueryItem still
  exists, and is used in the TSQuery struct as before, but it's now a
  union of the two. Many other changes fell from that, like separation
  of pushval_asis function into pushValue, pushOperator and pushStop.

- Moved some structs that were for internal use only from header files
  to the right .c-files.

- Moved tsvector parser to a new tsvector_parser.c file. Parser code was
  about half of the size of tsvector.c, it's also used from tsquery.c, and
  it has some data structures of its own, so it seems better to separate
  it. Cleaned up the API so that TSVectorParserState is not accessed from
  outside tsvector_parser.c.

- Separated enumerations (#defines, really) used for QueryItem.type
  field and as return codes from gettoken_query. It was just accidental
  code sharing.

- Removed ParseQueryNode struct used internally by makepol and friends.
  push*-functions now construct QueryItems directly.

- Changed int4 variables to just ints for variables like "i" or "array
  size", where the storage-size was not significant.
parent da124840
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.2 2007/09/07 15:09:55 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -225,10 +225,17 @@ to_tsvector(PG_FUNCTION_ARGS) ...@@ -225,10 +225,17 @@ to_tsvector(PG_FUNCTION_ARGS)
/* /*
* This function is used for morph parsing * This function is used for morph parsing.
*
* The value is passed to parsetext which will call the right dictionary to
* lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
* to the stack.
*
* All words belonging to the same variant are pushed as an ANDed list,
* and different variants are ORred together.
*/ */
static void static void
pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, int2 weight) pushval_morph(void *opaque, TSQueryParserState state, char *strval, int lenval, int2 weight)
{ {
int4 count = 0; int4 count = 0;
ParsedText prs; ParsedText prs;
...@@ -237,13 +244,14 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, ...@@ -237,13 +244,14 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
cntvar = 0, cntvar = 0,
cntpos = 0, cntpos = 0,
cnt = 0; cnt = 0;
Oid cfg_id = (Oid) opaque; /* the input is actually an Oid, not a pointer */
prs.lenwords = 4; prs.lenwords = 4;
prs.curwords = 0; prs.curwords = 0;
prs.pos = 0; prs.pos = 0;
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
parsetext(state->cfg_id, &prs, strval, lenval); parsetext(cfg_id, &prs, strval, lenval);
if (prs.curwords > 0) if (prs.curwords > 0)
{ {
...@@ -260,21 +268,21 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, ...@@ -260,21 +268,21 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant) while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
{ {
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight); pushValue(state, prs.words[count].word, prs.words[count].len, weight);
pfree(prs.words[count].word); pfree(prs.words[count].word);
if (cnt) if (cnt)
pushquery(state, OPR, (int4) '&', 0, 0, 0); pushOperator(state, OP_AND);
cnt++; cnt++;
count++; count++;
} }
if (cntvar) if (cntvar)
pushquery(state, OPR, (int4) '|', 0, 0, 0); pushOperator(state, OP_OR);
cntvar++; cntvar++;
} }
if (cntpos) if (cntpos)
pushquery(state, OPR, (int4) '&', 0, 0, 0); pushOperator(state, OP_AND);
cntpos++; cntpos++;
} }
...@@ -283,7 +291,7 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, ...@@ -283,7 +291,7 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
} }
else else
pushval_asis(state, VALSTOP, NULL, 0, 0); pushStop(state);
} }
Datum Datum
...@@ -295,7 +303,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS) ...@@ -295,7 +303,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
QueryItem *res; QueryItem *res;
int4 len; int4 len;
query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, false); query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *) cfgid, false);
if (query->size == 0) if (query->size == 0)
PG_RETURN_TSQUERY(query); PG_RETURN_TSQUERY(query);
...@@ -333,7 +341,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS) ...@@ -333,7 +341,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
QueryItem *res; QueryItem *res;
int4 len; int4 len;
query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, true); query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *)cfgid, true);
if (query->size == 0) if (query->size == 0)
PG_RETURN_TSQUERY(query); PG_RETURN_TSQUERY(query);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.2 2007/08/25 00:03:59 tgl Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -344,10 +344,12 @@ LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem) ...@@ -344,10 +344,12 @@ LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
} }
/* /*
* Parse string and lexize words * Parse string and lexize words.
*
* prs will be filled in.
*/ */
void void
parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen) parsetext(Oid cfgId, ParsedText * prs, char *buf, int buflen)
{ {
int type, int type,
lenlemm; lenlemm;
...@@ -427,7 +429,7 @@ parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen) ...@@ -427,7 +429,7 @@ parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
* Headline framework * Headline framework
*/ */
static void static void
hladdword(HeadlineParsedText * prs, char *buf, int4 buflen, int type) hladdword(HeadlineParsedText * prs, char *buf, int buflen, int type)
{ {
while (prs->curwords >= prs->lenwords) while (prs->curwords >= prs->lenwords)
{ {
...@@ -458,17 +460,19 @@ hlfinditem(HeadlineParsedText * prs, TSQuery query, char *buf, int buflen) ...@@ -458,17 +460,19 @@ hlfinditem(HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
word = &(prs->words[prs->curwords - 1]); word = &(prs->words[prs->curwords - 1]);
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
{ {
if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0) if (item->type == QI_VAL &&
item->operand.length == buflen &&
strncmp(GETOPERAND(query) + item->operand.distance, buf, buflen) == 0)
{ {
if (word->item) if (word->item)
{ {
memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry)); memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
prs->words[prs->curwords].item = item; prs->words[prs->curwords].item = &item->operand;
prs->words[prs->curwords].repeated = 1; prs->words[prs->curwords].repeated = 1;
prs->curwords++; prs->curwords++;
} }
else else
word->item = item; word->item = &item->operand;
} }
item++; item++;
} }
...@@ -511,7 +515,7 @@ addHLParsedLex(HeadlineParsedText * prs, TSQuery query, ParsedLex * lexs, TSLexe ...@@ -511,7 +515,7 @@ addHLParsedLex(HeadlineParsedText * prs, TSQuery query, ParsedLex * lexs, TSLexe
} }
void void
hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int4 buflen) hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
{ {
int type, int type,
lenlemm; lenlemm;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.2 2007/08/22 01:39:45 tgl Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1575,7 +1575,7 @@ typedef struct ...@@ -1575,7 +1575,7 @@ typedef struct
} hlCheck; } hlCheck;
static bool static bool
checkcondition_HL(void *checkval, QueryItem * val) checkcondition_HL(void *checkval, QueryOperand * val)
{ {
int i; int i;
...@@ -1601,14 +1601,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q) ...@@ -1601,14 +1601,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
for (j = 0; j < query->size; j++) for (j = 0; j < query->size; j++)
{ {
if (item->type != VAL) if (item->type != QI_VAL)
{ {
item++; item++;
continue; continue;
} }
for (i = pos; i < prs->curwords; i++) for (i = pos; i < prs->curwords; i++)
{ {
if (prs->words[i].item == item) if (prs->words[i].item == &item->operand)
{ {
if (i > *q) if (i > *q)
*q = i; *q = i;
...@@ -1624,14 +1624,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q) ...@@ -1624,14 +1624,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
item = GETQUERY(query); item = GETQUERY(query);
for (j = 0; j < query->size; j++) for (j = 0; j < query->size; j++)
{ {
if (item->type != VAL) if (item->type != QI_VAL)
{ {
item++; item++;
continue; continue;
} }
for (i = *q; i >= pos; i--) for (i = *q; i >= pos; i--)
{ {
if (prs->words[i].item == item) if (prs->words[i].item == &item->operand)
{ {
if (i < *p) if (i < *p)
*p = i; *p = i;
......
# #
# Makefile for utils/adt # Makefile for utils/adt
# #
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.66 2007/08/27 01:39:24 tgl Exp $ # $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.67 2007/09/07 15:09:56 teodor Exp $
# #
subdir = src/backend/utils/adt subdir = src/backend/utils/adt
...@@ -28,7 +28,7 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \ ...@@ -28,7 +28,7 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \
ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \ ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \
tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \ tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \
tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \ tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \
tsvector.o tsvector_op.o \ tsvector.o tsvector_op.o tsvector_parser.o\
uuid.o xml.o uuid.o xml.o
like.o: like.c like_match.c like.o: like.c like_match.c
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -77,24 +77,25 @@ gin_extract_query(PG_FUNCTION_ARGS) ...@@ -77,24 +77,25 @@ gin_extract_query(PG_FUNCTION_ARGS)
item = GETQUERY(query); item = GETQUERY(query);
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
if (item[i].type == VAL) if (item[i].type == QI_VAL)
(*nentries)++; (*nentries)++;
entries = (Datum *) palloc(sizeof(Datum) * (*nentries)); entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
if (item[i].type == VAL) if (item[i].type == QI_VAL)
{ {
text *txt; text *txt;
QueryOperand *val = &item[i].operand;
txt = (text *) palloc(VARHDRSZ + item[i].length); txt = (text *) palloc(VARHDRSZ + val->length);
SET_VARSIZE(txt, VARHDRSZ + item[i].length); SET_VARSIZE(txt, VARHDRSZ + val->length);
memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length); memcpy(VARDATA(txt), GETOPERAND(query) + val->distance, val->length);
entries[j++] = PointerGetDatum(txt); entries[j++] = PointerGetDatum(txt);
if (strategy != TSearchWithClassStrategyNumber && item[i].weight != 0) if (strategy != TSearchWithClassStrategyNumber && val->weight != 0)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("@@ operator does not support lexeme class restrictions"), errmsg("@@ operator does not support lexeme class restrictions"),
...@@ -116,11 +117,11 @@ typedef struct ...@@ -116,11 +117,11 @@ typedef struct
} GinChkVal; } GinChkVal;
static bool static bool
checkcondition_gin(void *checkval, QueryItem * val) checkcondition_gin(void *checkval, QueryOperand * val)
{ {
GinChkVal *gcv = (GinChkVal *) checkval; GinChkVal *gcv = (GinChkVal *) checkval;
return gcv->mapped_check[val - gcv->frst]; return gcv->mapped_check[((QueryItem *) val) - gcv->frst];
} }
Datum Datum
...@@ -142,7 +143,7 @@ gin_ts_consistent(PG_FUNCTION_ARGS) ...@@ -142,7 +143,7 @@ gin_ts_consistent(PG_FUNCTION_ARGS)
gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size); gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
if (item[i].type == VAL) if (item[i].type == QI_VAL)
gcv.mapped_check[i] = check[j++]; gcv.mapped_check[i] = check[j++];
res = TS_execute( res = TS_execute(
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.2 2007/08/21 06:34:42 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -293,7 +293,7 @@ typedef struct ...@@ -293,7 +293,7 @@ typedef struct
* is there value 'val' in array or not ? * is there value 'val' in array or not ?
*/ */
static bool static bool
checkcondition_arr(void *checkval, QueryItem * val) checkcondition_arr(void *checkval, QueryOperand * val)
{ {
int4 *StopLow = ((CHKVAL *) checkval)->arrb; int4 *StopLow = ((CHKVAL *) checkval)->arrb;
int4 *StopHigh = ((CHKVAL *) checkval)->arre; int4 *StopHigh = ((CHKVAL *) checkval)->arre;
...@@ -304,9 +304,9 @@ checkcondition_arr(void *checkval, QueryItem * val) ...@@ -304,9 +304,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
while (StopLow < StopHigh) while (StopLow < StopHigh)
{ {
StopMiddle = StopLow + (StopHigh - StopLow) / 2; StopMiddle = StopLow + (StopHigh - StopLow) / 2;
if (*StopMiddle == val->val) if (*StopMiddle == val->valcrc)
return (true); return (true);
else if (*StopMiddle < val->val) else if (*StopMiddle < val->valcrc)
StopLow = StopMiddle + 1; StopLow = StopMiddle + 1;
else else
StopHigh = StopMiddle; StopHigh = StopMiddle;
...@@ -316,9 +316,9 @@ checkcondition_arr(void *checkval, QueryItem * val) ...@@ -316,9 +316,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
} }
static bool static bool
checkcondition_bit(void *checkval, QueryItem * val) checkcondition_bit(void *checkval, QueryOperand * val)
{ {
return GETBIT(checkval, HASHVAL(val->val)); return GETBIT(checkval, HASHVAL(val->valcrc));
} }
Datum Datum
......
This diff is collapsed.
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -35,20 +35,23 @@ maketree(QueryItem * in) ...@@ -35,20 +35,23 @@ maketree(QueryItem * in)
node->valnode = in; node->valnode = in;
node->right = node->left = NULL; node->right = node->left = NULL;
if (in->type == OPR) if (in->type == QI_OPR)
{ {
node->right = maketree(in + 1); node->right = maketree(in + 1);
if (in->val != (int4) '!') if (in->operator.oper != OP_NOT)
node->left = maketree(in + in->left); node->left = maketree(in + in->operator.left);
} }
return node; return node;
} }
/*
* Internal state for plaintree and plainnode
*/
typedef struct typedef struct
{ {
QueryItem *ptr; QueryItem *ptr;
int4 len; int len; /* allocated size of ptr */
int4 cur; int cur; /* number of elements in ptr */
} PLAINTREE; } PLAINTREE;
static void static void
...@@ -60,37 +63,37 @@ plainnode(PLAINTREE * state, NODE * node) ...@@ -60,37 +63,37 @@ plainnode(PLAINTREE * state, NODE * node)
state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem)); state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem));
} }
memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem)); memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem));
if (node->valnode->type == VAL) if (node->valnode->type == QI_VAL)
state->cur++; state->cur++;
else if (node->valnode->val == (int4) '!') else if (node->valnode->operator.oper == OP_NOT)
{ {
state->ptr[state->cur].left = 1; state->ptr[state->cur].operator.left = 1;
state->cur++; state->cur++;
plainnode(state, node->right); plainnode(state, node->right);
} }
else else
{ {
int4 cur = state->cur; int cur = state->cur;
state->cur++; state->cur++;
plainnode(state, node->right); plainnode(state, node->right);
state->ptr[cur].left = state->cur - cur; state->ptr[cur].operator.left = state->cur - cur;
plainnode(state, node->left); plainnode(state, node->left);
} }
pfree(node); pfree(node);
} }
/* /*
* make plain view of tree from 'normal' view of tree * make plain view of tree from a NODE-tree representation
*/ */
static QueryItem * static QueryItem *
plaintree(NODE * root, int4 *len) plaintree(NODE * root, int *len)
{ {
PLAINTREE pl; PLAINTREE pl;
pl.cur = 0; pl.cur = 0;
pl.len = 16; pl.len = 16;
if (root && (root->valnode->type == VAL || root->valnode->type == OPR)) if (root && (root->valnode->type == QI_VAL || root->valnode->type == QI_OPR))
{ {
pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem)); pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
plainnode(&pl, root); plainnode(&pl, root);
...@@ -122,17 +125,17 @@ freetree(NODE * node) ...@@ -122,17 +125,17 @@ freetree(NODE * node)
static NODE * static NODE *
clean_NOT_intree(NODE * node) clean_NOT_intree(NODE * node)
{ {
if (node->valnode->type == VAL) if (node->valnode->type == QI_VAL)
return node; return node;
if (node->valnode->val == (int4) '!') if (node->valnode->operator.oper == OP_NOT)
{ {
freetree(node); freetree(node);
return NULL; return NULL;
} }
/* operator & or | */ /* operator & or | */
if (node->valnode->val == (int4) '|') if (node->valnode->operator.oper == OP_OR)
{ {
if ((node->left = clean_NOT_intree(node->left)) == NULL || if ((node->left = clean_NOT_intree(node->left)) == NULL ||
(node->right = clean_NOT_intree(node->right)) == NULL) (node->right = clean_NOT_intree(node->right)) == NULL)
...@@ -144,6 +147,8 @@ clean_NOT_intree(NODE * node) ...@@ -144,6 +147,8 @@ clean_NOT_intree(NODE * node)
else else
{ {
NODE *res = node; NODE *res = node;
Assert(node->valnode->operator.oper == OP_AND);
node->left = clean_NOT_intree(node->left); node->left = clean_NOT_intree(node->left);
node->right = clean_NOT_intree(node->right); node->right = clean_NOT_intree(node->right);
...@@ -168,7 +173,7 @@ clean_NOT_intree(NODE * node) ...@@ -168,7 +173,7 @@ clean_NOT_intree(NODE * node)
} }
QueryItem * QueryItem *
clean_NOT(QueryItem * ptr, int4 *len) clean_NOT(QueryItem * ptr, int *len)
{ {
NODE *root = maketree(ptr); NODE *root = maketree(ptr);
...@@ -180,10 +185,13 @@ clean_NOT(QueryItem * ptr, int4 *len) ...@@ -180,10 +185,13 @@ clean_NOT(QueryItem * ptr, int4 *len)
#undef V_UNKNOWN #undef V_UNKNOWN
#endif #endif
#define V_UNKNOWN 0 /*
#define V_TRUE 1 * output values for result output parameter of clean_fakeval_intree
#define V_FALSE 2 */
#define V_STOP 3 #define V_UNKNOWN 0 /* the expression can't be evaluated statically */
#define V_TRUE 1 /* the expression is always true (not implemented) */
#define V_FALSE 2 /* the expression is always false (not implemented) */
#define V_STOP 3 /* the expression is a stop word */
/* /*
* Clean query tree from values which is always in * Clean query tree from values which is always in
...@@ -195,17 +203,19 @@ clean_fakeval_intree(NODE * node, char *result) ...@@ -195,17 +203,19 @@ clean_fakeval_intree(NODE * node, char *result)
char lresult = V_UNKNOWN, char lresult = V_UNKNOWN,
rresult = V_UNKNOWN; rresult = V_UNKNOWN;
if (node->valnode->type == VAL) if (node->valnode->type == QI_VAL)
return node; return node;
else if (node->valnode->type == VALSTOP) else
if (node->valnode->type == QI_VALSTOP)
{ {
pfree(node); pfree(node);
*result = V_STOP; *result = V_STOP;
return NULL; return NULL;
} }
Assert(node->valnode->type == QI_OPR);
if (node->valnode->val == (int4) '!') if (node->valnode->operator.oper == OP_NOT)
{ {
node->right = clean_fakeval_intree(node->right, &rresult); node->right = clean_fakeval_intree(node->right, &rresult);
if (!node->right) if (!node->right)
...@@ -221,6 +231,7 @@ clean_fakeval_intree(NODE * node, char *result) ...@@ -221,6 +231,7 @@ clean_fakeval_intree(NODE * node, char *result)
node->left = clean_fakeval_intree(node->left, &lresult); node->left = clean_fakeval_intree(node->left, &lresult);
node->right = clean_fakeval_intree(node->right, &rresult); node->right = clean_fakeval_intree(node->right, &rresult);
if (lresult == V_STOP && rresult == V_STOP) if (lresult == V_STOP && rresult == V_STOP)
{ {
freetree(node); freetree(node);
...@@ -243,7 +254,7 @@ clean_fakeval_intree(NODE * node, char *result) ...@@ -243,7 +254,7 @@ clean_fakeval_intree(NODE * node, char *result)
} }
QueryItem * QueryItem *
clean_fakeval(QueryItem * ptr, int4 *len) clean_fakeval(QueryItem * ptr, int *len)
{ {
NODE *root = maketree(ptr); NODE *root = maketree(ptr);
char result = V_UNKNOWN; char result = V_UNKNOWN;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -30,14 +30,15 @@ tsquery_numnode(PG_FUNCTION_ARGS) ...@@ -30,14 +30,15 @@ tsquery_numnode(PG_FUNCTION_ARGS)
} }
static QTNode * static QTNode *
join_tsqueries(TSQuery a, TSQuery b) join_tsqueries(TSQuery a, TSQuery b, int8 operator)
{ {
QTNode *res = (QTNode *) palloc0(sizeof(QTNode)); QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
res->flags |= QTN_NEEDFREE; res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem)); res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = OPR; res->valnode->type = QI_OPR;
res->valnode->operator.oper = operator;
res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b)); res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
...@@ -66,9 +67,7 @@ tsquery_and(PG_FUNCTION_ARGS) ...@@ -66,9 +67,7 @@ tsquery_and(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(a); PG_RETURN_POINTER(a);
} }
res = join_tsqueries(a, b); res = join_tsqueries(a, b, OP_AND);
res->valnode->val = '&';
query = QTN2QT(res); query = QTN2QT(res);
...@@ -98,9 +97,7 @@ tsquery_or(PG_FUNCTION_ARGS) ...@@ -98,9 +97,7 @@ tsquery_or(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(a); PG_RETURN_POINTER(a);
} }
res = join_tsqueries(a, b); res = join_tsqueries(a, b, OP_OR);
res->valnode->val = '|';
query = QTN2QT(res); query = QTN2QT(res);
...@@ -126,8 +123,8 @@ tsquery_not(PG_FUNCTION_ARGS) ...@@ -126,8 +123,8 @@ tsquery_not(PG_FUNCTION_ARGS)
res->flags |= QTN_NEEDFREE; res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem)); res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = OPR; res->valnode->type = QI_OPR;
res->valnode->val = '!'; res->valnode->operator.oper = OP_NOT;
res->child = (QTNode **) palloc0(sizeof(QTNode *)); res->child = (QTNode **) palloc0(sizeof(QTNode *));
res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a)); res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
...@@ -209,8 +206,8 @@ makeTSQuerySign(TSQuery a) ...@@ -209,8 +206,8 @@ makeTSQuerySign(TSQuery a)
for (i = 0; i < a->size; i++) for (i = 0; i < a->size; i++)
{ {
if (ptr->type == VAL) if (ptr->type == QI_VAL)
sign |= ((TSQuerySign) 1) << (ptr->val % TSQS_SIGLEN); sign |= ((TSQuerySign) 1) << (ptr->operand.valcrc % TSQS_SIGLEN);
ptr++; ptr++;
} }
...@@ -253,10 +250,10 @@ tsq_mcontains(PG_FUNCTION_ARGS) ...@@ -253,10 +250,10 @@ tsq_mcontains(PG_FUNCTION_ARGS)
for (i = 0; i < ex->size; i++) for (i = 0; i < ex->size; i++)
{ {
iq = GETQUERY(query); iq = GETQUERY(query);
if (ie[i].type != VAL) if (ie[i].type != QI_VAL)
continue; continue;
for (j = 0; j < query->size; j++) for (j = 0; j < query->size; j++)
if (iq[j].type == VAL && ie[i].val == iq[j].val) if (iq[j].type == QI_VAL && ie[i].operand.valcrc == iq[j].operand.valcrc)
{ {
j = query->size + 1; j = query->size + 1;
break; break;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -34,18 +34,26 @@ addone(int *counters, int last, int total) ...@@ -34,18 +34,26 @@ addone(int *counters, int last, int total)
return 1; return 1;
} }
/*
* If node is equal to ex, replace it with subs. Replacement is actually done
* by returning either node or a copy of subs.
*/
static QTNode * static QTNode *
findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind) findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
{ {
if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val) if ((node->sign & ex->sign) != ex->sign ||
node->valnode->type != ex->valnode->type)
return node; return node;
if (node->flags & QTN_NOCHANGE) if (node->flags & QTN_NOCHANGE)
return node; return node;
if (node->valnode->type == OPR) if (node->valnode->type == QI_OPR)
{ {
if (node->valnode->operator.oper != ex->valnode->operator.oper)
return node;
if (node->nchild == ex->nchild) if (node->nchild == ex->nchild)
{ {
if (QTNEq(node, ex)) if (QTNEq(node, ex))
...@@ -63,6 +71,12 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind) ...@@ -63,6 +71,12 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
} }
else if (node->nchild > ex->nchild) else if (node->nchild > ex->nchild)
{ {
/*
* AND and NOT are commutative, so we check if a subset of the
* children match. For example, if tnode is A | B | C, and
* ex is B | C, we have a match after we convert tnode to
* A | (B | C).
*/
int *counters = (int *) palloc(sizeof(int) * node->nchild); int *counters = (int *) palloc(sizeof(int) * node->nchild);
int i; int i;
QTNode *tnode = (QTNode *) palloc(sizeof(QTNode)); QTNode *tnode = (QTNode *) palloc(sizeof(QTNode));
...@@ -131,19 +145,26 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind) ...@@ -131,19 +145,26 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
pfree(counters); pfree(counters);
} }
} }
else if (QTNEq(node, ex)) else
{ {
QTNFree(node); Assert(node->valnode->type == QI_VAL);
if (subs)
{ if (node->valnode->operand.valcrc != ex->valnode->operand.valcrc)
node = QTNCopy(subs); return node;
node->flags |= QTN_NOCHANGE; else if (QTNEq(node, ex))
}
else
{ {
node = NULL; QTNFree(node);
if (subs)
{
node = QTNCopy(subs);
node->flags |= QTN_NOCHANGE;
}
else
{
node = NULL;
}
*isfind = true;
} }
*isfind = true;
} }
return node; return node;
...@@ -154,7 +175,7 @@ dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind) ...@@ -154,7 +175,7 @@ dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
{ {
root = findeq(root, ex, subs, isfind); root = findeq(root, ex, subs, isfind);
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR) if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
{ {
int i; int i;
...@@ -172,7 +193,7 @@ dropvoidsubtree(QTNode * root) ...@@ -172,7 +193,7 @@ dropvoidsubtree(QTNode * root)
if (!root) if (!root)
return NULL; return NULL;
if (root->valnode->type == OPR) if (root->valnode->type == QI_OPR)
{ {
int i, int i,
j = 0; j = 0;
...@@ -188,7 +209,7 @@ dropvoidsubtree(QTNode * root) ...@@ -188,7 +209,7 @@ dropvoidsubtree(QTNode * root)
root->nchild = j; root->nchild = j;
if (root->valnode->val == (int4) '!' && root->nchild == 0) if (root->valnode->operator.oper == OP_NOT && root->nchild == 0)
{ {
QTNFree(root); QTNFree(root);
root = NULL; root = NULL;
...@@ -256,9 +277,9 @@ ts_rewrite_accum(PG_FUNCTION_ARGS) ...@@ -256,9 +277,9 @@ ts_rewrite_accum(PG_FUNCTION_ARGS)
elog(ERROR, "array must be one-dimensional, not %d dimensions", elog(ERROR, "array must be one-dimensional, not %d dimensions",
ARR_NDIM(qa)); ARR_NDIM(qa));
if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3) if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3)
elog(ERROR, "array should have only three elements"); elog(ERROR, "array must have three elements");
if (ARR_ELEMTYPE(qa) != TSQUERYOID) if (ARR_ELEMTYPE(qa) != TSQUERYOID)
elog(ERROR, "array should contain tsquery type"); elog(ERROR, "array must contain tsquery elements");
deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp); deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp);
...@@ -499,6 +520,7 @@ tsquery_rewrite_query(PG_FUNCTION_ARGS) ...@@ -499,6 +520,7 @@ tsquery_rewrite_query(PG_FUNCTION_ARGS)
subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst)); subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
tree = findsubquery(tree, qex, subs, NULL); tree = findsubquery(tree, qex, subs, NULL);
QTNFree(qex); QTNFree(qex);
QTNFree(subs); QTNFree(subs);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include "tsearch/ts_type.h" #include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h" #include "tsearch/ts_utils.h"
QTNode * QTNode *
QT2QTN(QueryItem * in, char *operand) QT2QTN(QueryItem * in, char *operand)
{ {
...@@ -25,24 +24,24 @@ QT2QTN(QueryItem * in, char *operand) ...@@ -25,24 +24,24 @@ QT2QTN(QueryItem * in, char *operand)
node->valnode = in; node->valnode = in;
if (in->type == OPR) if (in->type == QI_OPR)
{ {
node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
node->child[0] = QT2QTN(in + 1, operand); node->child[0] = QT2QTN(in + 1, operand);
node->sign = node->child[0]->sign; node->sign = node->child[0]->sign;
if (in->val == (int4) '!') if (in->operator.oper == OP_NOT)
node->nchild = 1; node->nchild = 1;
else else
{ {
node->nchild = 2; node->nchild = 2;
node->child[1] = QT2QTN(in + in->left, operand); node->child[1] = QT2QTN(in + in->operator.left, operand);
node->sign |= node->child[1]->sign; node->sign |= node->child[1]->sign;
} }
} }
else if (operand) else if (operand)
{ {
node->word = operand + in->distance; node->word = operand + in->operand.distance;
node->sign = 1 << (in->val % 32); node->sign = 1 << (in->operand.valcrc % 32);
} }
return node; return node;
...@@ -54,14 +53,14 @@ QTNFree(QTNode * in) ...@@ -54,14 +53,14 @@ QTNFree(QTNode * in)
if (!in) if (!in)
return; return;
if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0) if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
pfree(in->word); pfree(in->word);
if (in->child) if (in->child)
{ {
if (in->valnode) if (in->valnode)
{ {
if (in->valnode->type == OPR && in->nchild > 0) if (in->valnode->type == QI_OPR && in->nchild > 0)
{ {
int i; int i;
...@@ -82,30 +81,45 @@ QTNodeCompare(QTNode * an, QTNode * bn) ...@@ -82,30 +81,45 @@ QTNodeCompare(QTNode * an, QTNode * bn)
{ {
if (an->valnode->type != bn->valnode->type) if (an->valnode->type != bn->valnode->type)
return (an->valnode->type > bn->valnode->type) ? -1 : 1; return (an->valnode->type > bn->valnode->type) ? -1 : 1;
else if (an->valnode->val != bn->valnode->val)
return (an->valnode->val > bn->valnode->val) ? -1 : 1; if (an->valnode->type == QI_OPR)
else if (an->valnode->type == VAL)
{
if (an->valnode->length == bn->valnode->length)
return strncmp(an->word, bn->word, an->valnode->length);
else
return (an->valnode->length > bn->valnode->length) ? -1 : 1;
}
else if (an->nchild != bn->nchild)
{ {
return (an->nchild > bn->nchild) ? -1 : 1; QueryOperator *ao = &an->valnode->operator;
QueryOperator *bo = &bn->valnode->operator;
if(ao->oper != bo->oper)
return (ao->oper > bo->oper) ? -1 : 1;
if (an->nchild != bn->nchild)
return (an->nchild > bn->nchild) ? -1 : 1;
{
int i,
res;
for (i = 0; i < an->nchild; i++)
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
return res;
}
return 0;
} }
else else
{ {
int i, QueryOperand *ao = &an->valnode->operand;
res; QueryOperand *bo = &bn->valnode->operand;
for (i = 0; i < an->nchild; i++) Assert(an->valnode->type == QI_VAL);
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
return res; if (ao->valcrc != bo->valcrc)
} {
return (ao->valcrc > bo->valcrc) ? -1 : 1;
}
return 0; if (ao->length == bo->length)
return strncmp(an->word, bn->word, ao->length);
else
return (ao->length > bo->length) ? -1 : 1;
}
} }
static int static int
...@@ -119,7 +133,7 @@ QTNSort(QTNode * in) ...@@ -119,7 +133,7 @@ QTNSort(QTNode * in)
{ {
int i; int i;
if (in->valnode->type != OPR) if (in->valnode->type != QI_OPR)
return; return;
for (i = 0; i < in->nchild; i++) for (i = 0; i < in->nchild; i++)
...@@ -139,12 +153,19 @@ QTNEq(QTNode * a, QTNode * b) ...@@ -139,12 +153,19 @@ QTNEq(QTNode * a, QTNode * b)
return (QTNodeCompare(a, b) == 0) ? true : false; return (QTNodeCompare(a, b) == 0) ? true : false;
} }
/*
* Remove unnecessary intermediate nodes. For example:
*
* OR OR
* a OR -> a b c
* b c
*/
void void
QTNTernary(QTNode * in) QTNTernary(QTNode * in)
{ {
int i; int i;
if (in->valnode->type != OPR) if (in->valnode->type != QI_OPR)
return; return;
for (i = 0; i < in->nchild; i++) for (i = 0; i < in->nchild; i++)
...@@ -152,9 +173,10 @@ QTNTernary(QTNode * in) ...@@ -152,9 +173,10 @@ QTNTernary(QTNode * in)
for (i = 0; i < in->nchild; i++) for (i = 0; i < in->nchild; i++)
{ {
if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val) QTNode *cc = in->child[i];
if (cc->valnode->type == QI_OPR && in->valnode->operator.oper == cc->valnode->operator.oper)
{ {
QTNode *cc = in->child[i];
int oldnchild = in->nchild; int oldnchild = in->nchild;
in->nchild += cc->nchild - 1; in->nchild += cc->nchild - 1;
...@@ -167,17 +189,23 @@ QTNTernary(QTNode * in) ...@@ -167,17 +189,23 @@ QTNTernary(QTNode * in)
memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *)); memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
i += cc->nchild - 1; i += cc->nchild - 1;
if(cc->flags & QTN_NEEDFREE)
pfree(cc->valnode);
pfree(cc); pfree(cc);
} }
} }
} }
/*
* Convert a tree to binary tree by inserting intermediate nodes.
* (Opposite of QTNTernary)
*/
void void
QTNBinary(QTNode * in) QTNBinary(QTNode * in)
{ {
int i; int i;
if (in->valnode->type != OPR) if (in->valnode->type != QI_OPR)
return; return;
for (i = 0; i < in->nchild; i++) for (i = 0; i < in->nchild; i++)
...@@ -201,7 +229,7 @@ QTNBinary(QTNode * in) ...@@ -201,7 +229,7 @@ QTNBinary(QTNode * in)
nn->sign = nn->child[0]->sign | nn->child[1]->sign; nn->sign = nn->child[0]->sign | nn->child[1]->sign;
nn->valnode->type = in->valnode->type; nn->valnode->type = in->valnode->type;
nn->valnode->val = in->valnode->val; nn->valnode->operator.oper = in->valnode->operator.oper;
in->child[0] = nn; in->child[0] = nn;
in->child[1] = in->child[in->nchild - 1]; in->child[1] = in->child[in->nchild - 1];
...@@ -209,11 +237,15 @@ QTNBinary(QTNode * in) ...@@ -209,11 +237,15 @@ QTNBinary(QTNode * in)
} }
} }
/*
* Count the total length of operand string in tree, including '\0'-
* terminators.
*/
static void static void
cntsize(QTNode * in, int4 *sumlen, int4 *nnode) cntsize(QTNode * in, int *sumlen, int *nnode)
{ {
*nnode += 1; *nnode += 1;
if (in->valnode->type == OPR) if (in->valnode->type == QI_OPR)
{ {
int i; int i;
...@@ -222,7 +254,7 @@ cntsize(QTNode * in, int4 *sumlen, int4 *nnode) ...@@ -222,7 +254,7 @@ cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
} }
else else
{ {
*sumlen += in->valnode->length + 1; *sumlen += in->valnode->operand.length + 1;
} }
} }
...@@ -234,22 +266,26 @@ typedef struct ...@@ -234,22 +266,26 @@ typedef struct
} QTN2QTState; } QTN2QTState;
static void static void
fillQT(QTN2QTState * state, QTNode * in) fillQT(QTN2QTState *state, QTNode *in)
{ {
*(state->curitem) = *(in->valnode); if (in->valnode->type == QI_VAL)
if (in->valnode->type == VAL)
{ {
memcpy(state->curoperand, in->word, in->valnode->length); memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
state->curitem->distance = state->curoperand - state->operand;
state->curoperand[in->valnode->length] = '\0'; memcpy(state->curoperand, in->word, in->valnode->operand.length);
state->curoperand += in->valnode->length + 1; state->curitem->operand.distance = state->curoperand - state->operand;
state->curoperand[in->valnode->operand.length] = '\0';
state->curoperand += in->valnode->operand.length + 1;
state->curitem++; state->curitem++;
} }
else else
{ {
QueryItem *curitem = state->curitem; QueryItem *curitem = state->curitem;
Assert(in->valnode->type == QI_OPR);
memcpy(state->curitem, in->valnode, sizeof(QueryOperator));
Assert(in->nchild <= 2); Assert(in->nchild <= 2);
state->curitem++; state->curitem++;
...@@ -257,7 +293,7 @@ fillQT(QTN2QTState * state, QTNode * in) ...@@ -257,7 +293,7 @@ fillQT(QTN2QTState * state, QTNode * in)
if (in->nchild == 2) if (in->nchild == 2)
{ {
curitem->left = state->curitem - curitem; curitem->operator.left = state->curitem - curitem;
fillQT(state, in->child[1]); fillQT(state, in->child[1]);
} }
} }
...@@ -296,11 +332,11 @@ QTNCopy(QTNode *in) ...@@ -296,11 +332,11 @@ QTNCopy(QTNode *in)
*(out->valnode) = *(in->valnode); *(out->valnode) = *(in->valnode);
out->flags |= QTN_NEEDFREE; out->flags |= QTN_NEEDFREE;
if (in->valnode->type == VAL) if (in->valnode->type == QI_VAL)
{ {
out->word = palloc(in->valnode->length + 1); out->word = palloc(in->valnode->operand.length + 1);
memcpy(out->word, in->word, in->valnode->length); memcpy(out->word, in->word, in->valnode->operand.length);
out->word[in->valnode->length] = '\0'; out->word[in->valnode->operand.length] = '\0';
out->flags |= QTN_WORDFREE; out->flags |= QTN_WORDFREE;
} }
else else
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -68,7 +68,7 @@ cnt_length(TSVector t) ...@@ -68,7 +68,7 @@ cnt_length(TSVector t)
} }
static int4 static int4
WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item) WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
{ {
if (ptr->len == item->length) if (ptr->len == item->length)
return strncmp( return strncmp(
...@@ -80,7 +80,7 @@ WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item) ...@@ -80,7 +80,7 @@ WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
} }
static WordEntry * static WordEntry *
find_wordentry(TSVector t, TSQuery q, QueryItem * item) find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
{ {
WordEntry *StopLow = ARRPTR(t); WordEntry *StopLow = ARRPTR(t);
WordEntry *StopHigh = (WordEntry *) STRPTR(t); WordEntry *StopHigh = (WordEntry *) STRPTR(t);
...@@ -105,33 +105,48 @@ find_wordentry(TSVector t, TSQuery q, QueryItem * item) ...@@ -105,33 +105,48 @@ find_wordentry(TSVector t, TSQuery q, QueryItem * item)
} }
/*
* sort QueryOperands by (length, word)
*/
static int static int
compareQueryItem(const void *a, const void *b, void *arg) compareQueryOperand(const void *a, const void *b, void *arg)
{ {
char *operand = (char *) arg; char *operand = (char *) arg;
QueryOperand *qa = (*(QueryOperand **) a);
QueryOperand *qb = (*(QueryOperand **) b);
if ((*(QueryItem **) a)->length == (*(QueryItem **) b)->length) if (qa->length == qb->length)
return strncmp(operand + (*(QueryItem **) a)->distance, return strncmp(operand + qa->distance,
operand + (*(QueryItem **) b)->distance, operand + qb->distance,
(*(QueryItem **) b)->length); qb->length);
return ((*(QueryItem **) a)->length > (*(QueryItem **) b)->length) ? 1 : -1; return (qa->length > qb->length) ? 1 : -1;
} }
static QueryItem ** /*
SortAndUniqItems(char *operand, QueryItem * item, int *size) * Returns a sorted, de-duplicated array of QueryOperands in a query.
* The returned QueryOperands are pointers to the original QueryOperands
* in the query.
*
* Length of the returned array is stored in *size
*/
static QueryOperand **
SortAndUniqItems(TSQuery q, int *size)
{ {
QueryItem **res, char *operand = GETOPERAND(q);
QueryItem * item = GETQUERY(q);
QueryOperand **res,
**ptr, **ptr,
**prevptr; **prevptr;
ptr = res = (QueryItem **) palloc(sizeof(QueryItem *) * *size); ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
/* Collect all operands from the tree to res */
while ((*size)--) while ((*size)--)
{ {
if (item->type == VAL) if (item->type == QI_VAL)
{ {
*ptr = item; *ptr = (QueryOperand *) item;
ptr++; ptr++;
} }
item++; item++;
...@@ -141,14 +156,15 @@ SortAndUniqItems(char *operand, QueryItem * item, int *size) ...@@ -141,14 +156,15 @@ SortAndUniqItems(char *operand, QueryItem * item, int *size)
if (*size < 2) if (*size < 2)
return res; return res;
qsort_arg(res, *size, sizeof(QueryItem **), compareQueryItem, (void *) operand); qsort_arg(res, *size, sizeof(QueryOperand **), compareQueryOperand, (void *) operand);
ptr = res + 1; ptr = res + 1;
prevptr = res; prevptr = res;
/* remove duplicates */
while (ptr - res < *size) while (ptr - res < *size)
{ {
if (compareQueryItem((void *) ptr, (void *) prevptr, (void *) operand) != 0) if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
{ {
prevptr++; prevptr++;
*prevptr = *ptr; *prevptr = *ptr;
...@@ -180,10 +196,10 @@ calc_rank_and(float *w, TSVector t, TSQuery q) ...@@ -180,10 +196,10 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
lenct, lenct,
dist; dist;
float res = -1.0; float res = -1.0;
QueryItem **item; QueryOperand **item;
int size = q->size; int size = q->size;
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size); item = SortAndUniqItems(q, &size);
if (size < 2) if (size < 2)
{ {
pfree(item); pfree(item);
...@@ -246,11 +262,11 @@ calc_rank_or(float *w, TSVector t, TSQuery q) ...@@ -246,11 +262,11 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
j, j,
i; i;
float res = 0.0; float res = 0.0;
QueryItem **item; QueryOperand **item;
int size = q->size; int size = q->size;
*(uint16 *) POSNULL = lengthof(POSNULL) - 1; *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size); item = SortAndUniqItems(q, &size);
for (i = 0; i < size; i++) for (i = 0; i < size; i++)
{ {
...@@ -310,7 +326,8 @@ calc_rank(float *w, TSVector t, TSQuery q, int4 method) ...@@ -310,7 +326,8 @@ calc_rank(float *w, TSVector t, TSQuery q, int4 method)
if (!t->size || !q->size) if (!t->size || !q->size)
return 0.0; return 0.0;
res = (item->type != VAL && item->val == (int4) '&') ? /* XXX: What about NOT? */
res = (item->type == QI_OPR && item->operator.oper == OP_AND) ?
calc_rank_and(w, t, q) : calc_rank_or(w, t, q); calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
if (res < 0) if (res < 0)
...@@ -453,7 +470,7 @@ compareDocR(const void *a, const void *b) ...@@ -453,7 +470,7 @@ compareDocR(const void *a, const void *b)
} }
static bool static bool
checkcondition_QueryItem(void *checkval, QueryItem * val) checkcondition_QueryOperand(void *checkval, QueryOperand *val)
{ {
return (bool) (val->istrue); return (bool) (val->istrue);
} }
...@@ -467,8 +484,8 @@ reset_istrue_flag(TSQuery query) ...@@ -467,8 +484,8 @@ reset_istrue_flag(TSQuery query)
/* reset istrue flag */ /* reset istrue flag */
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
{ {
if (item->type == VAL) if (item->type == QI_VAL)
item->istrue = 0; item->operand.istrue = 0;
item++; item++;
} }
} }
...@@ -484,7 +501,7 @@ typedef struct ...@@ -484,7 +501,7 @@ typedef struct
static bool static bool
Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext) Cover(DocRepresentation *doc, int len, TSQuery query, Extention *ext)
{ {
DocRepresentation *ptr; DocRepresentation *ptr;
int lastpos = ext->pos; int lastpos = ext->pos;
...@@ -501,8 +518,11 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext) ...@@ -501,8 +518,11 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
while (ptr - doc < len) while (ptr - doc < len)
{ {
for (i = 0; i < ptr->nitem; i++) for (i = 0; i < ptr->nitem; i++)
ptr->item[i]->istrue = 1; {
if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryItem)) if(ptr->item[i]->type == QI_VAL)
ptr->item[i]->operand.istrue = 1;
}
if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryOperand))
{ {
if (ptr->pos > ext->q) if (ptr->pos > ext->q)
{ {
...@@ -527,8 +547,9 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext) ...@@ -527,8 +547,9 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
while (ptr >= doc + ext->pos) while (ptr >= doc + ext->pos)
{ {
for (i = 0; i < ptr->nitem; i++) for (i = 0; i < ptr->nitem; i++)
ptr->item[i]->istrue = 1; if(ptr->item[i]->type == QI_VAL) /* XXX */
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryItem)) ptr->item[i]->operand.istrue = 1;
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand))
{ {
if (ptr->pos < ext->p) if (ptr->pos < ext->p)
{ {
...@@ -575,10 +596,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen) ...@@ -575,10 +596,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
{ {
if (item[i].type != VAL || item[i].istrue) QueryOperand *curoperand;
if (item[i].type != QI_VAL)
continue;
curoperand = &item[i].operand;
if(item[i].operand.istrue)
continue; continue;
entry = find_wordentry(txt, query, &(item[i])); entry = find_wordentry(txt, query, curoperand);
if (!entry) if (!entry)
continue; continue;
...@@ -603,8 +631,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen) ...@@ -603,8 +631,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
{ {
if (j == 0) if (j == 0)
{ {
QueryItem *kptr,
*iptr = item + i;
int k; int k;
doc[cur].needfree = false; doc[cur].needfree = false;
...@@ -613,14 +639,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen) ...@@ -613,14 +639,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
for (k = 0; k < query->size; k++) for (k = 0; k < query->size; k++)
{ {
kptr = item + k; QueryOperand *kptr = &item[k].operand;
QueryOperand *iptr = &item[i].operand;
if (k == i || if (k == i ||
(item[k].type == VAL && (item[k].type == QI_VAL &&
compareQueryItem(&kptr, &iptr, operand) == 0)) compareQueryOperand(&kptr, &iptr, operand) == 0))
{ {
/* if k == i, we've already checked above that it's type == Q_VAL */
doc[cur].item[doc[cur].nitem] = item + k; doc[cur].item[doc[cur].nitem] = item + k;
doc[cur].nitem++; doc[cur].nitem++;
kptr->istrue = 1; item[k].operand.istrue = 1;
} }
} }
} }
...@@ -640,8 +669,7 @@ get_docrep(TSVector txt, TSQuery query, int *doclen) ...@@ -640,8 +669,7 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
if (cur > 0) if (cur > 0)
{ {
if (cur > 1) qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
return doc; return doc;
} }
...@@ -746,7 +774,7 @@ ts_rankcd_wttf(PG_FUNCTION_ARGS) ...@@ -746,7 +774,7 @@ ts_rankcd_wttf(PG_FUNCTION_ARGS)
{ {
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1); TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(2); TSQuery query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
int method = PG_GETARG_INT32(3); int method = PG_GETARG_INT32(3);
float res; float res;
...@@ -763,7 +791,7 @@ ts_rankcd_wtt(PG_FUNCTION_ARGS) ...@@ -763,7 +791,7 @@ ts_rankcd_wtt(PG_FUNCTION_ARGS)
{ {
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1); TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(2); TSQuery query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
float res; float res;
res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD); res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
...@@ -778,7 +806,7 @@ Datum ...@@ -778,7 +806,7 @@ Datum
ts_rankcd_ttf(PG_FUNCTION_ARGS) ts_rankcd_ttf(PG_FUNCTION_ARGS)
{ {
TSVector txt = PG_GETARG_TSVECTOR(0); TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY_COPY(1); TSQuery query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
int method = PG_GETARG_INT32(2); int method = PG_GETARG_INT32(2);
float res; float res;
...@@ -793,7 +821,7 @@ Datum ...@@ -793,7 +821,7 @@ Datum
ts_rankcd_tt(PG_FUNCTION_ARGS) ts_rankcd_tt(PG_FUNCTION_ARGS)
{ {
TSVector txt = PG_GETARG_TSVECTOR(0); TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY_COPY(1); TSQuery query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
float res; float res;
res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD); res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
......
This diff is collapsed.
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.2 2007/08/31 02:26:29 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -66,6 +66,9 @@ typedef struct ...@@ -66,6 +66,9 @@ typedef struct
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column); static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
/*
* Order: haspos, len, word, for all positions (pos, weight)
*/
static int static int
silly_cmp_tsvector(const TSVector a, const TSVector b) silly_cmp_tsvector(const TSVector a, const TSVector b)
{ {
...@@ -464,7 +467,7 @@ tsvector_concat(PG_FUNCTION_ARGS) ...@@ -464,7 +467,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
* compare 2 string values * compare 2 string values
*/ */
static int4 static int4
ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item) ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item)
{ {
if (ptr->len == item->length) if (ptr->len == item->length)
return strncmp( return strncmp(
...@@ -479,7 +482,7 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item) ...@@ -479,7 +482,7 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
* check weight info * check weight info
*/ */
static bool static bool
checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item) checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
{ {
WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16)); WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len))); uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
...@@ -497,10 +500,11 @@ checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item) ...@@ -497,10 +500,11 @@ checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
* is there value 'val' in array or not ? * is there value 'val' in array or not ?
*/ */
static bool static bool
checkcondition_str(void *checkval, QueryItem * val) checkcondition_str(void *checkval, QueryOperand * val)
{ {
WordEntry *StopLow = ((CHKVAL *) checkval)->arrb; CHKVAL *chkval = (CHKVAL *) checkval;
WordEntry *StopHigh = ((CHKVAL *) checkval)->arre; WordEntry *StopLow = chkval->arrb;
WordEntry *StopHigh = chkval->arre;
WordEntry *StopMiddle; WordEntry *StopMiddle;
int difference; int difference;
...@@ -509,10 +513,10 @@ checkcondition_str(void *checkval, QueryItem * val) ...@@ -509,10 +513,10 @@ checkcondition_str(void *checkval, QueryItem * val)
while (StopLow < StopHigh) while (StopLow < StopHigh)
{ {
StopMiddle = StopLow + (StopHigh - StopLow) / 2; StopMiddle = StopLow + (StopHigh - StopLow) / 2;
difference = ValCompare((CHKVAL *) checkval, StopMiddle, val); difference = ValCompare(chkval, StopMiddle, val);
if (difference == 0) if (difference == 0)
return (val->weight && StopMiddle->haspos) ? return (val->weight && StopMiddle->haspos) ?
checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true; checkclass_str(chkval, StopMiddle, val) : true;
else if (difference < 0) else if (difference < 0)
StopLow = StopMiddle + 1; StopLow = StopMiddle + 1;
else else
...@@ -523,37 +527,48 @@ checkcondition_str(void *checkval, QueryItem * val) ...@@ -523,37 +527,48 @@ checkcondition_str(void *checkval, QueryItem * val)
} }
/* /*
* check for boolean condition * check for boolean condition.
*
* if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking.
* checkval can be used to pass information to the callback. TS_execute doesn't
* do anything with it.
* chkcond is a callback function used to evaluate each VAL node in the query.
*
*/ */
bool bool
TS_execute(QueryItem * curitem, void *checkval, bool calcnot, TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
bool (*chkcond) (void *checkval, QueryItem * val)) bool (*chkcond) (void *checkval, QueryOperand * val))
{ {
/* since this function recurses, it could be driven to stack overflow */ /* since this function recurses, it could be driven to stack overflow */
check_stack_depth(); check_stack_depth();
if (curitem->type == VAL) if (curitem->type == QI_VAL)
return chkcond(checkval, curitem); return chkcond(checkval, (QueryOperand *) curitem);
else if (curitem->val == (int4) '!')
{ switch(curitem->operator.oper)
return (calcnot) ?
!TS_execute(curitem + 1, checkval, calcnot, chkcond)
: true;
}
else if (curitem->val == (int4) '&')
{ {
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond)) case OP_NOT:
return TS_execute(curitem + 1, checkval, calcnot, chkcond); if (calcnot)
else return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
return false; else
} return true;
else case OP_AND:
{ /* |-operator */ if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond)) return TS_execute(curitem + 1, checkval, calcnot, chkcond);
return true; else
else return false;
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
case OP_OR:
if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
return true;
else
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
default:
elog(ERROR, "unknown operator %d", curitem->operator.oper);
} }
/* not reachable, but keep compiler quiet */
return false; return false;
} }
......
/*-------------------------------------------------------------------------
*
* tsvector_parser.c
* Parser for tsvector
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.1 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "libpq/pqformat.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/memutils.h"
struct TSVectorParseStateData
{
char *prsbuf;
char *word; /* buffer to hold the current word */
int len; /* size in bytes allocated for 'word' */
bool oprisdelim;
};
/*
* Initializes parser for the input string. If oprisdelim is set, the
* following characters are treated as delimiters in addition to whitespace:
* ! | & ( )
*/
TSVectorParseState
init_tsvector_parser(char *input, bool oprisdelim)
{
TSVectorParseState state;
state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
state->prsbuf = input;
state->len = 32;
state->word = (char *) palloc(state->len);
state->oprisdelim = oprisdelim;
return state;
}
/*
* Reinitializes parser for parsing 'input', instead of previous input.
*/
void
reset_tsvector_parser(TSVectorParseState state, char *input)
{
state->prsbuf = input;
}
/*
* Shuts down a tsvector parser.
*/
void
close_tsvector_parser(TSVectorParseState state)
{
pfree(state->word);
pfree(state);
}
#define RESIZEPRSBUF \
do { \
if ( curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
{ \
int clen = curpos - state->word; \
state->len *= 2; \
state->word = (char*)repalloc( (void*)state->word, state->len ); \
curpos = state->word + clen; \
} \
} while (0)
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
/* Fills the output parameters, and returns true */
#define RETURN_TOKEN \
do { \
if (pos_ptr != NULL) \
{ \
*pos_ptr = pos; \
*poslen = npos; \
} \
else if (pos != NULL) \
pfree(pos); \
\
if (strval != NULL) \
*strval = state->word; \
if (lenval != NULL) \
*lenval = curpos - state->word; \
if (endptr != NULL) \
*endptr = state->prsbuf; \
return true; \
} while(0)
/* State codes used in gettoken_tsvector */
#define WAITWORD 1
#define WAITENDWORD 2
#define WAITNEXTCHAR 3
#define WAITENDCMPLX 4
#define WAITPOSINFO 5
#define INPOSINFO 6
#define WAITPOSDELIM 7
#define WAITCHARCMPLX 8
/*
* Get next token from string being parsed. Returns false if
* end of input string is reached, otherwise strval, lenval, pos_ptr
* and poslen output parameters are filled in:
*
* *strval token
* *lenval length of*strval
* *pos_ptr pointer to a palloc'd array of positions and weights
* associated with the token. If the caller is not interested
* in the information, NULL can be supplied. Otherwise
* the caller is responsible for pfreeing the array.
* *poslen number of elements in *pos_ptr
*/
bool
gettoken_tsvector(TSVectorParseState state,
char **strval, int *lenval,
WordEntryPos **pos_ptr, int *poslen,
char **endptr)
{
int oldstate = 0;
char *curpos = state->word;
int statecode = WAITWORD;
/* pos is for collecting the comma delimited list of positions followed
* by the actual token.
*/
WordEntryPos *pos = NULL;
int npos = 0; /* elements of pos used */
int posalen = 0; /* allocated size of pos */
while (1)
{
if (statecode == WAITWORD)
{
if (*(state->prsbuf) == '\0')
return false;
else if (t_iseq(state->prsbuf, '\''))
statecode = WAITENDCMPLX;
else if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else if (!t_isspace(state->prsbuf))
{
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
statecode = WAITENDWORD;
}
}
else if (statecode == WAITNEXTCHAR)
{
if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character")));
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
Assert(oldstate != 0);
statecode = oldstate;
}
}
else if (statecode == WAITENDWORD)
{
if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
{
RESIZEPRSBUF;
if (curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(curpos) = '\0';
RETURN_TOKEN;
}
else if (t_iseq(state->prsbuf, ':'))
{
if (curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(curpos) = '\0';
if (state->oprisdelim)
RETURN_TOKEN;
else
statecode = INPOSINFO;
}
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
}
}
else if (statecode == WAITENDCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
statecode = WAITCHARCMPLX;
}
else if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDCMPLX;
}
else if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
}
}
else if (statecode == WAITCHARCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
statecode = WAITENDCMPLX;
}
else
{
RESIZEPRSBUF;
*(curpos) = '\0';
if (curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
if (state->oprisdelim)
{
/* state->prsbuf+=pg_mblen(state->prsbuf); */
RETURN_TOKEN;
}
else
statecode = WAITPOSINFO;
continue; /* recheck current character */
}
}
else if (statecode == WAITPOSINFO)
{
if (t_iseq(state->prsbuf, ':'))
statecode = INPOSINFO;
else
RETURN_TOKEN;
}
else if (statecode == INPOSINFO)
{
if (t_isdigit(state->prsbuf))
{
if (posalen == 0)
{
posalen = 4;
pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
npos = 0;
}
else if (npos + 1 >= posalen)
{
posalen *= 2;
pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
}
npos++;
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
if (WEP_GETPOS(pos[npos - 1]) == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 0);
statecode = WAITPOSDELIM;
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else if (statecode == WAITPOSDELIM)
{
if (t_iseq(state->prsbuf, ','))
statecode = INPOSINFO;
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 3);
}
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 2);
}
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 1);
}
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 0);
}
else if (t_isspace(state->prsbuf) ||
*(state->prsbuf) == '\0')
RETURN_TOKEN;
else if (!t_isdigit(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else /* internal error */
elog(ERROR, "internal error in gettoken_tsvector");
/* get next char */
state->prsbuf += pg_mblen(state->prsbuf);
}
return false;
}
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* *
* Copyright (c) 1998-2007, PostgreSQL Global Development Group * Copyright (c) 1998-2007, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.3 2007/08/25 00:03:59 tgl Exp $ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.4 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -42,7 +42,7 @@ typedef struct ...@@ -42,7 +42,7 @@ typedef struct
type:8, type:8,
len:16; len:16;
char *word; char *word;
QueryItem *item; QueryOperand *item;
} HeadlineWordEntry; } HeadlineWordEntry;
typedef struct typedef struct
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* *
* Copyright (c) 1998-2007, PostgreSQL Global Development Group * Copyright (c) 1998-2007, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.1 2007/08/21 01:11:29 tgl Exp $ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.2 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
#define _PG_TSTYPE_H_ #define _PG_TSTYPE_H_
#include "fmgr.h" #include "fmgr.h"
#include "utils/pg_crc.h"
/* /*
* TSVector type. * TSVector type.
...@@ -27,8 +29,8 @@ typedef struct ...@@ -27,8 +29,8 @@ typedef struct
pos:20; /* MAX 1Mb */ pos:20; /* MAX 1Mb */
} WordEntry; } WordEntry;
#define MAXSTRLEN ( 1<<11 ) #define MAXSTRLEN ( (1<<11) - 1)
#define MAXSTRPOS ( 1<<20 ) #define MAXSTRPOS ( (1<<20) - 1)
/* /*
* Equivalent to * Equivalent to
...@@ -68,7 +70,7 @@ typedef uint16 WordEntryPos; ...@@ -68,7 +70,7 @@ typedef uint16 WordEntryPos;
typedef struct typedef struct
{ {
int32 vl_len_; /* varlena header (do not touch directly!) */ int32 vl_len_; /* varlena header (do not touch directly!) */
int4 size; uint32 size;
char data[1]; char data[1];
} TSVectorData; } TSVectorData;
...@@ -140,36 +142,65 @@ extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS); ...@@ -140,36 +142,65 @@ extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS);
/* /*
* TSQuery * TSQuery
*
*
*/ */
typedef int8 QueryItemType;
/* Valid values for QueryItemType: */
#define QI_VAL 1
#define QI_OPR 2
#define QI_VALSTOP 3 /* This is only used in an intermediate stack representation in parse_tsquery. It's not a legal type elsewhere. */
/* /*
* QueryItem is one node in tsquery - operator or operand. * QueryItem is one node in tsquery - operator or operand.
*/ */
typedef struct
typedef struct QueryItem
{ {
int8 type; /* operand or kind of operator */ QueryItemType type; /* operand or kind of operator (ts_tokentype) */
int8 weight; /* weights of operand to search */ int8 weight; /* weights of operand to search. It's a bitmask of allowed weights.
int2 left; /* pointer to left operand Right operand is * if it =0 then any weight are allowed */
* item + 1, left operand is placed int32 valcrc; /* XXX: pg_crc32 would be a more appropriate data type,
* item+item->left */ * but we use comparisons to signed integers in the code.
int4 val; /* crc32 value of operand's value */ * They would need to be changed as well. */
/* pointer to text value of operand, must correlate with WordEntry */ /* pointer to text value of operand, must correlate with WordEntry */
uint32 uint32
istrue:1, /* use for ranking in Cover */ istrue:1, /* use for ranking in Cover */
length:11, length:11,
distance:20; distance:20;
} QueryItem; } QueryOperand;
/* Legal values for QueryOperator.operator */
#define OP_NOT 1
#define OP_AND 2
#define OP_OR 3
typedef struct
{
QueryItemType type;
int8 oper; /* see above */
int16 left; /* pointer to left operand. Right operand is
* item + 1, left operand is placed
* item+item->left */
} QueryOperator;
/* /*
* It's impossible to use offsetof(QueryItem, istrue) * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
* inside QueryItem requiring 8-byte alignment, like int64.
*/ */
#define HDRSIZEQI ( sizeof(int8) + sizeof(int8) + sizeof(int2) + sizeof(int4) ) typedef union
{
QueryItemType type;
QueryOperator operator;
QueryOperand operand;
} QueryItem;
/* /*
* Storage: * Storage:
* (len)(size)(array of ITEM)(array of operand in text form) * (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
* operands are always finished by '\0'
*/ */
typedef struct typedef struct
...@@ -182,13 +213,17 @@ typedef struct ...@@ -182,13 +213,17 @@ typedef struct
typedef TSQueryData *TSQuery; typedef TSQueryData *TSQuery;
#define HDRSIZETQ ( VARHDRSZ + sizeof(int4) ) #define HDRSIZETQ ( VARHDRSZ + sizeof(int4) )
#define COMPUTESIZE(size,lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
#define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
#define OPERANDSSIZE(x) ( (x)->len - HDRSIZETQ - (x)->size * sizeof(QueryItem) )
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) ) /* Computes the size of header and all QueryItems. size is the number of
* QueryItems, and lenofoperand is the total length of all operands
*/
#define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
/* Returns a pointer to the first QueryItem in a TSVector */
#define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
/* Returns a pointer to the beginning of operands in a TSVector */
#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
/* /*
* fmgr interface macros * fmgr interface macros
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* *
* Copyright (c) 1998-2007, PostgreSQL Global Development Group * Copyright (c) 1998-2007, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.2 2007/08/25 00:03:59 tgl Exp $ * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.3 2007/09/07 15:09:56 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -14,65 +14,41 @@ ...@@ -14,65 +14,41 @@
#include "tsearch/ts_type.h" #include "tsearch/ts_type.h"
#include "tsearch/ts_public.h" #include "tsearch/ts_public.h"
#include "nodes/pg_list.h"
/* /*
* Common parse definitions for tsvector and tsquery * Common parse definitions for tsvector and tsquery
*/ */
typedef struct /* tsvector parser support. */
{
WordEntry entry; /* should be first ! */
WordEntryPos *pos;
} WordEntryIN;
typedef struct
{
char *prsbuf;
char *word;
char *curpos;
int4 len;
int4 state;
int4 alen;
WordEntryPos *pos;
bool oprisdelim;
} TSVectorParseState;
extern bool gettoken_tsvector(TSVectorParseState *state);
struct ParseQueryNode; /* private in backend/utils/adt/tsquery.c */ struct TSVectorParseStateData;
typedef struct TSVectorParseStateData *TSVectorParseState;
typedef struct extern TSVectorParseState init_tsvector_parser(char *input, bool oprisdelim);
{ extern void reset_tsvector_parser(TSVectorParseState state, char *input);
char *buffer; /* entire string we are scanning */ extern bool gettoken_tsvector(TSVectorParseState state,
char *buf; /* current scan point */ char **token, int *len,
int4 state; WordEntryPos **pos, int *poslen,
int4 count; char **endptr);
extern void close_tsvector_parser(TSVectorParseState state);
/* reverse polish notation in list (for temporary usage) */ /* parse_tsquery */
struct ParseQueryNode *str;
/* number in str */ struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
int4 num; typedef struct TSQueryParserStateData *TSQueryParserState;
/* text-form operand */ typedef void (*PushFunction)(void *opaque, TSQueryParserState state, char *, int, int2);
int4 lenop;
int4 sumlen;
char *op;
char *curop;
/* state for value's parser */
TSVectorParseState valstate;
/* tscfg */
Oid cfg_id;
} TSQueryParserState;
extern TSQuery parse_tsquery(char *buf, extern TSQuery parse_tsquery(char *buf,
void (*pushval) (TSQueryParserState *, int, char *, int, int2), PushFunction pushval,
Oid cfg_id, bool isplain); void *opaque, bool isplain);
extern void pushval_asis(TSQueryParserState * state,
int type, char *strval, int lenval, int2 weight); /* Functions for use by PushFunction implementations */
extern void pushquery(TSQueryParserState * state, int4 type, int4 val, extern void pushValue(TSQueryParserState state,
int4 distance, int4 lenval, int2 weight); char *strval, int lenval, int2 weight);
extern void pushStop(TSQueryParserState state);
extern void pushOperator(TSQueryParserState state, int8 operator);
/* /*
* parse plain text and lexize words * parse plain text and lexize words
...@@ -84,6 +60,11 @@ typedef struct ...@@ -84,6 +60,11 @@ typedef struct
union union
{ {
uint16 pos; uint16 pos;
/*
* When apos array is used, apos[0] is the number of elements
* in the array (excluding apos[0]), and alen is the allocated
* size of the array.
*/
uint16 *apos; uint16 *apos;
} pos; } pos;
char *word; char *word;
...@@ -111,23 +92,12 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, ...@@ -111,23 +92,12 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query,
char *buf, int4 buflen); char *buf, int4 buflen);
extern text *generateHeadline(HeadlineParsedText * prs); extern text *generateHeadline(HeadlineParsedText * prs);
/*
* token/node types for parsing
*/
#define END 0
#define ERR 1
#define VAL 2
#define OPR 3
#define OPEN 4
#define CLOSE 5
#define VALSTOP 6 /* for stop words */
/* /*
* Common check function for tsvector @@ tsquery * Common check function for tsvector @@ tsquery
*/ */
extern bool TS_execute(QueryItem * curitem, void *checkval, bool calcnot, extern bool TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
bool (*chkcond) (void *checkval, QueryItem * val)); bool (*chkcond) (void *checkval, QueryOperand * val));
/* /*
* Useful conversion macros * Useful conversion macros
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment