Commit e5be8998 authored by Teodor Sigaev's avatar Teodor Sigaev

Refactoring by Heikki Linnakangas <heikki@enterprisedb.com> with

small editorization by me

- Brake the QueryItem struct into QueryOperator and QueryOperand.
  Type was really the only common field between them. QueryItem still
  exists, and is used in the TSQuery struct as before, but it's now a
  union of the two. Many other changes fell from that, like separation
  of pushval_asis function into pushValue, pushOperator and pushStop.

- Moved some structs that were for internal use only from header files
  to the right .c-files.

- Moved tsvector parser to a new tsvector_parser.c file. Parser code was
  about half of the size of tsvector.c, it's also used from tsquery.c, and
  it has some data structures of its own, so it seems better to separate
  it. Cleaned up the API so that TSVectorParserState is not accessed from
  outside tsvector_parser.c.

- Separated enumerations (#defines, really) used for QueryItem.type
  field and as return codes from gettoken_query. It was just accidental
  code sharing.

- Removed ParseQueryNode struct used internally by makepol and friends.
  push*-functions now construct QueryItems directly.

- Changed int4 variables to just ints for variables like "i" or "array
  size", where the storage-size was not significant.
parent da124840
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.2 2007/09/07 15:09:55 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -225,10 +225,17 @@ to_tsvector(PG_FUNCTION_ARGS)
/*
* This function is used for morph parsing
* This function is used for morph parsing.
*
* The value is passed to parsetext which will call the right dictionary to
* lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
* to the stack.
*
* All words belonging to the same variant are pushed as an ANDed list,
* and different variants are ORred together.
*/
static void
pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, int2 weight)
pushval_morph(void *opaque, TSQueryParserState state, char *strval, int lenval, int2 weight)
{
int4 count = 0;
ParsedText prs;
......@@ -237,13 +244,14 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
cntvar = 0,
cntpos = 0,
cnt = 0;
Oid cfg_id = (Oid) opaque; /* the input is actually an Oid, not a pointer */
prs.lenwords = 4;
prs.curwords = 0;
prs.pos = 0;
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
parsetext(state->cfg_id, &prs, strval, lenval);
parsetext(cfg_id, &prs, strval, lenval);
if (prs.curwords > 0)
{
......@@ -260,21 +268,21 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
{
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
pushValue(state, prs.words[count].word, prs.words[count].len, weight);
pfree(prs.words[count].word);
if (cnt)
pushquery(state, OPR, (int4) '&', 0, 0, 0);
pushOperator(state, OP_AND);
cnt++;
count++;
}
if (cntvar)
pushquery(state, OPR, (int4) '|', 0, 0, 0);
pushOperator(state, OP_OR);
cntvar++;
}
if (cntpos)
pushquery(state, OPR, (int4) '&', 0, 0, 0);
pushOperator(state, OP_AND);
cntpos++;
}
......@@ -283,7 +291,7 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
}
else
pushval_asis(state, VALSTOP, NULL, 0, 0);
pushStop(state);
}
Datum
......@@ -295,7 +303,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
QueryItem *res;
int4 len;
query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, false);
query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *) cfgid, false);
if (query->size == 0)
PG_RETURN_TSQUERY(query);
......@@ -333,7 +341,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
QueryItem *res;
int4 len;
query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, true);
query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *)cfgid, true);
if (query->size == 0)
PG_RETURN_TSQUERY(query);
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.2 2007/08/25 00:03:59 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -344,10 +344,12 @@ LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
}
/*
* Parse string and lexize words
* Parse string and lexize words.
*
* prs will be filled in.
*/
void
parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
parsetext(Oid cfgId, ParsedText * prs, char *buf, int buflen)
{
int type,
lenlemm;
......@@ -427,7 +429,7 @@ parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
* Headline framework
*/
static void
hladdword(HeadlineParsedText * prs, char *buf, int4 buflen, int type)
hladdword(HeadlineParsedText * prs, char *buf, int buflen, int type)
{
while (prs->curwords >= prs->lenwords)
{
......@@ -458,17 +460,19 @@ hlfinditem(HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
word = &(prs->words[prs->curwords - 1]);
for (i = 0; i < query->size; i++)
{
if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0)
if (item->type == QI_VAL &&
item->operand.length == buflen &&
strncmp(GETOPERAND(query) + item->operand.distance, buf, buflen) == 0)
{
if (word->item)
{
memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
prs->words[prs->curwords].item = item;
prs->words[prs->curwords].item = &item->operand;
prs->words[prs->curwords].repeated = 1;
prs->curwords++;
}
else
word->item = item;
word->item = &item->operand;
}
item++;
}
......@@ -511,7 +515,7 @@ addHLParsedLex(HeadlineParsedText * prs, TSQuery query, ParsedLex * lexs, TSLexe
}
void
hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int4 buflen)
hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
{
int type,
lenlemm;
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.2 2007/08/22 01:39:45 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -1575,7 +1575,7 @@ typedef struct
} hlCheck;
static bool
checkcondition_HL(void *checkval, QueryItem * val)
checkcondition_HL(void *checkval, QueryOperand * val)
{
int i;
......@@ -1601,14 +1601,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
for (j = 0; j < query->size; j++)
{
if (item->type != VAL)
if (item->type != QI_VAL)
{
item++;
continue;
}
for (i = pos; i < prs->curwords; i++)
{
if (prs->words[i].item == item)
if (prs->words[i].item == &item->operand)
{
if (i > *q)
*q = i;
......@@ -1624,14 +1624,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
item = GETQUERY(query);
for (j = 0; j < query->size; j++)
{
if (item->type != VAL)
if (item->type != QI_VAL)
{
item++;
continue;
}
for (i = *q; i >= pos; i--)
{
if (prs->words[i].item == item)
if (prs->words[i].item == &item->operand)
{
if (i < *p)
*p = i;
......
#
# Makefile for utils/adt
#
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.66 2007/08/27 01:39:24 tgl Exp $
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.67 2007/09/07 15:09:56 teodor Exp $
#
subdir = src/backend/utils/adt
......@@ -28,7 +28,7 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \
ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \
tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \
tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \
tsvector.o tsvector_op.o \
tsvector.o tsvector_op.o tsvector_parser.o\
uuid.o xml.o
like.o: like.c like_match.c
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -77,24 +77,25 @@ gin_extract_query(PG_FUNCTION_ARGS)
item = GETQUERY(query);
for (i = 0; i < query->size; i++)
if (item[i].type == VAL)
if (item[i].type == QI_VAL)
(*nentries)++;
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
for (i = 0; i < query->size; i++)
if (item[i].type == VAL)
if (item[i].type == QI_VAL)
{
text *txt;
QueryOperand *val = &item[i].operand;
txt = (text *) palloc(VARHDRSZ + item[i].length);
txt = (text *) palloc(VARHDRSZ + val->length);
SET_VARSIZE(txt, VARHDRSZ + item[i].length);
memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length);
SET_VARSIZE(txt, VARHDRSZ + val->length);
memcpy(VARDATA(txt), GETOPERAND(query) + val->distance, val->length);
entries[j++] = PointerGetDatum(txt);
if (strategy != TSearchWithClassStrategyNumber && item[i].weight != 0)
if (strategy != TSearchWithClassStrategyNumber && val->weight != 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("@@ operator does not support lexeme class restrictions"),
......@@ -116,11 +117,11 @@ typedef struct
} GinChkVal;
static bool
checkcondition_gin(void *checkval, QueryItem * val)
checkcondition_gin(void *checkval, QueryOperand * val)
{
GinChkVal *gcv = (GinChkVal *) checkval;
return gcv->mapped_check[val - gcv->frst];
return gcv->mapped_check[((QueryItem *) val) - gcv->frst];
}
Datum
......@@ -142,7 +143,7 @@ gin_ts_consistent(PG_FUNCTION_ARGS)
gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
for (i = 0; i < query->size; i++)
if (item[i].type == VAL)
if (item[i].type == QI_VAL)
gcv.mapped_check[i] = check[j++];
res = TS_execute(
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.2 2007/08/21 06:34:42 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -293,7 +293,7 @@ typedef struct
* is there value 'val' in array or not ?
*/
static bool
checkcondition_arr(void *checkval, QueryItem * val)
checkcondition_arr(void *checkval, QueryOperand * val)
{
int4 *StopLow = ((CHKVAL *) checkval)->arrb;
int4 *StopHigh = ((CHKVAL *) checkval)->arre;
......@@ -304,9 +304,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
if (*StopMiddle == val->val)
if (*StopMiddle == val->valcrc)
return (true);
else if (*StopMiddle < val->val)
else if (*StopMiddle < val->valcrc)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
......@@ -316,9 +316,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
}
static bool
checkcondition_bit(void *checkval, QueryItem * val)
checkcondition_bit(void *checkval, QueryOperand * val)
{
return GETBIT(checkval, HASHVAL(val->val));
return GETBIT(checkval, HASHVAL(val->valcrc));
}
Datum
......
This diff is collapsed.
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -35,20 +35,23 @@ maketree(QueryItem * in)
node->valnode = in;
node->right = node->left = NULL;
if (in->type == OPR)
if (in->type == QI_OPR)
{
node->right = maketree(in + 1);
if (in->val != (int4) '!')
node->left = maketree(in + in->left);
if (in->operator.oper != OP_NOT)
node->left = maketree(in + in->operator.left);
}
return node;
}
/*
* Internal state for plaintree and plainnode
*/
typedef struct
{
QueryItem *ptr;
int4 len;
int4 cur;
int len; /* allocated size of ptr */
int cur; /* number of elements in ptr */
} PLAINTREE;
static void
......@@ -60,37 +63,37 @@ plainnode(PLAINTREE * state, NODE * node)
state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem));
}
memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem));
if (node->valnode->type == VAL)
if (node->valnode->type == QI_VAL)
state->cur++;
else if (node->valnode->val == (int4) '!')
else if (node->valnode->operator.oper == OP_NOT)
{
state->ptr[state->cur].left = 1;
state->ptr[state->cur].operator.left = 1;
state->cur++;
plainnode(state, node->right);
}
else
{
int4 cur = state->cur;
int cur = state->cur;
state->cur++;
plainnode(state, node->right);
state->ptr[cur].left = state->cur - cur;
state->ptr[cur].operator.left = state->cur - cur;
plainnode(state, node->left);
}
pfree(node);
}
/*
* make plain view of tree from 'normal' view of tree
* make plain view of tree from a NODE-tree representation
*/
static QueryItem *
plaintree(NODE * root, int4 *len)
plaintree(NODE * root, int *len)
{
PLAINTREE pl;
pl.cur = 0;
pl.len = 16;
if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
if (root && (root->valnode->type == QI_VAL || root->valnode->type == QI_OPR))
{
pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
plainnode(&pl, root);
......@@ -122,17 +125,17 @@ freetree(NODE * node)
static NODE *
clean_NOT_intree(NODE * node)
{
if (node->valnode->type == VAL)
if (node->valnode->type == QI_VAL)
return node;
if (node->valnode->val == (int4) '!')
if (node->valnode->operator.oper == OP_NOT)
{
freetree(node);
return NULL;
}
/* operator & or | */
if (node->valnode->val == (int4) '|')
if (node->valnode->operator.oper == OP_OR)
{
if ((node->left = clean_NOT_intree(node->left)) == NULL ||
(node->right = clean_NOT_intree(node->right)) == NULL)
......@@ -144,6 +147,8 @@ clean_NOT_intree(NODE * node)
else
{
NODE *res = node;
Assert(node->valnode->operator.oper == OP_AND);
node->left = clean_NOT_intree(node->left);
node->right = clean_NOT_intree(node->right);
......@@ -168,7 +173,7 @@ clean_NOT_intree(NODE * node)
}
QueryItem *
clean_NOT(QueryItem * ptr, int4 *len)
clean_NOT(QueryItem * ptr, int *len)
{
NODE *root = maketree(ptr);
......@@ -180,10 +185,13 @@ clean_NOT(QueryItem * ptr, int4 *len)
#undef V_UNKNOWN
#endif
#define V_UNKNOWN 0
#define V_TRUE 1
#define V_FALSE 2
#define V_STOP 3
/*
* output values for result output parameter of clean_fakeval_intree
*/
#define V_UNKNOWN 0 /* the expression can't be evaluated statically */
#define V_TRUE 1 /* the expression is always true (not implemented) */
#define V_FALSE 2 /* the expression is always false (not implemented) */
#define V_STOP 3 /* the expression is a stop word */
/*
* Clean query tree from values which is always in
......@@ -195,17 +203,19 @@ clean_fakeval_intree(NODE * node, char *result)
char lresult = V_UNKNOWN,
rresult = V_UNKNOWN;
if (node->valnode->type == VAL)
if (node->valnode->type == QI_VAL)
return node;
else if (node->valnode->type == VALSTOP)
else
if (node->valnode->type == QI_VALSTOP)
{
pfree(node);
*result = V_STOP;
return NULL;
}
Assert(node->valnode->type == QI_OPR);
if (node->valnode->val == (int4) '!')
if (node->valnode->operator.oper == OP_NOT)
{
node->right = clean_fakeval_intree(node->right, &rresult);
if (!node->right)
......@@ -221,6 +231,7 @@ clean_fakeval_intree(NODE * node, char *result)
node->left = clean_fakeval_intree(node->left, &lresult);
node->right = clean_fakeval_intree(node->right, &rresult);
if (lresult == V_STOP && rresult == V_STOP)
{
freetree(node);
......@@ -243,7 +254,7 @@ clean_fakeval_intree(NODE * node, char *result)
}
QueryItem *
clean_fakeval(QueryItem * ptr, int4 *len)
clean_fakeval(QueryItem * ptr, int *len)
{
NODE *root = maketree(ptr);
char result = V_UNKNOWN;
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -30,14 +30,15 @@ tsquery_numnode(PG_FUNCTION_ARGS)
}
static QTNode *
join_tsqueries(TSQuery a, TSQuery b)
join_tsqueries(TSQuery a, TSQuery b, int8 operator)
{
QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = OPR;
res->valnode->type = QI_OPR;
res->valnode->operator.oper = operator;
res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
......@@ -66,9 +67,7 @@ tsquery_and(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(a);
}
res = join_tsqueries(a, b);
res->valnode->val = '&';
res = join_tsqueries(a, b, OP_AND);
query = QTN2QT(res);
......@@ -98,9 +97,7 @@ tsquery_or(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(a);
}
res = join_tsqueries(a, b);
res->valnode->val = '|';
res = join_tsqueries(a, b, OP_OR);
query = QTN2QT(res);
......@@ -126,8 +123,8 @@ tsquery_not(PG_FUNCTION_ARGS)
res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = OPR;
res->valnode->val = '!';
res->valnode->type = QI_OPR;
res->valnode->operator.oper = OP_NOT;
res->child = (QTNode **) palloc0(sizeof(QTNode *));
res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
......@@ -209,8 +206,8 @@ makeTSQuerySign(TSQuery a)
for (i = 0; i < a->size; i++)
{
if (ptr->type == VAL)
sign |= ((TSQuerySign) 1) << (ptr->val % TSQS_SIGLEN);
if (ptr->type == QI_VAL)
sign |= ((TSQuerySign) 1) << (ptr->operand.valcrc % TSQS_SIGLEN);
ptr++;
}
......@@ -253,10 +250,10 @@ tsq_mcontains(PG_FUNCTION_ARGS)
for (i = 0; i < ex->size; i++)
{
iq = GETQUERY(query);
if (ie[i].type != VAL)
if (ie[i].type != QI_VAL)
continue;
for (j = 0; j < query->size; j++)
if (iq[j].type == VAL && ie[i].val == iq[j].val)
if (iq[j].type == QI_VAL && ie[i].operand.valcrc == iq[j].operand.valcrc)
{
j = query->size + 1;
break;
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -34,18 +34,26 @@ addone(int *counters, int last, int total)
return 1;
}
/*
* If node is equal to ex, replace it with subs. Replacement is actually done
* by returning either node or a copy of subs.
*/
static QTNode *
findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
{
if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val)
if ((node->sign & ex->sign) != ex->sign ||
node->valnode->type != ex->valnode->type)
return node;
if (node->flags & QTN_NOCHANGE)
return node;
if (node->valnode->type == OPR)
if (node->valnode->type == QI_OPR)
{
if (node->valnode->operator.oper != ex->valnode->operator.oper)
return node;
if (node->nchild == ex->nchild)
{
if (QTNEq(node, ex))
......@@ -63,6 +71,12 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
}
else if (node->nchild > ex->nchild)
{
/*
* AND and NOT are commutative, so we check if a subset of the
* children match. For example, if tnode is A | B | C, and
* ex is B | C, we have a match after we convert tnode to
* A | (B | C).
*/
int *counters = (int *) palloc(sizeof(int) * node->nchild);
int i;
QTNode *tnode = (QTNode *) palloc(sizeof(QTNode));
......@@ -131,19 +145,26 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
pfree(counters);
}
}
else if (QTNEq(node, ex))
else
{
QTNFree(node);
if (subs)
{
node = QTNCopy(subs);
node->flags |= QTN_NOCHANGE;
}
else
Assert(node->valnode->type == QI_VAL);
if (node->valnode->operand.valcrc != ex->valnode->operand.valcrc)
return node;
else if (QTNEq(node, ex))
{
node = NULL;
QTNFree(node);
if (subs)
{
node = QTNCopy(subs);
node->flags |= QTN_NOCHANGE;
}
else
{
node = NULL;
}
*isfind = true;
}
*isfind = true;
}
return node;
......@@ -154,7 +175,7 @@ dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
{
root = findeq(root, ex, subs, isfind);
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR)
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
{
int i;
......@@ -172,7 +193,7 @@ dropvoidsubtree(QTNode * root)
if (!root)
return NULL;
if (root->valnode->type == OPR)
if (root->valnode->type == QI_OPR)
{
int i,
j = 0;
......@@ -188,7 +209,7 @@ dropvoidsubtree(QTNode * root)
root->nchild = j;
if (root->valnode->val == (int4) '!' && root->nchild == 0)
if (root->valnode->operator.oper == OP_NOT && root->nchild == 0)
{
QTNFree(root);
root = NULL;
......@@ -256,9 +277,9 @@ ts_rewrite_accum(PG_FUNCTION_ARGS)
elog(ERROR, "array must be one-dimensional, not %d dimensions",
ARR_NDIM(qa));
if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3)
elog(ERROR, "array should have only three elements");
elog(ERROR, "array must have three elements");
if (ARR_ELEMTYPE(qa) != TSQUERYOID)
elog(ERROR, "array should contain tsquery type");
elog(ERROR, "array must contain tsquery elements");
deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp);
......@@ -499,6 +520,7 @@ tsquery_rewrite_query(PG_FUNCTION_ARGS)
subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
tree = findsubquery(tree, qex, subs, NULL);
QTNFree(qex);
QTNFree(subs);
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -17,7 +17,6 @@
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
QTNode *
QT2QTN(QueryItem * in, char *operand)
{
......@@ -25,24 +24,24 @@ QT2QTN(QueryItem * in, char *operand)
node->valnode = in;
if (in->type == OPR)
if (in->type == QI_OPR)
{
node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
node->child[0] = QT2QTN(in + 1, operand);
node->sign = node->child[0]->sign;
if (in->val == (int4) '!')
if (in->operator.oper == OP_NOT)
node->nchild = 1;
else
{
node->nchild = 2;
node->child[1] = QT2QTN(in + in->left, operand);
node->child[1] = QT2QTN(in + in->operator.left, operand);
node->sign |= node->child[1]->sign;
}
}
else if (operand)
{
node->word = operand + in->distance;
node->sign = 1 << (in->val % 32);
node->word = operand + in->operand.distance;
node->sign = 1 << (in->operand.valcrc % 32);
}
return node;
......@@ -54,14 +53,14 @@ QTNFree(QTNode * in)
if (!in)
return;
if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
pfree(in->word);
if (in->child)
{
if (in->valnode)
{
if (in->valnode->type == OPR && in->nchild > 0)
if (in->valnode->type == QI_OPR && in->nchild > 0)
{
int i;
......@@ -82,30 +81,45 @@ QTNodeCompare(QTNode * an, QTNode * bn)
{
if (an->valnode->type != bn->valnode->type)
return (an->valnode->type > bn->valnode->type) ? -1 : 1;
else if (an->valnode->val != bn->valnode->val)
return (an->valnode->val > bn->valnode->val) ? -1 : 1;
else if (an->valnode->type == VAL)
{
if (an->valnode->length == bn->valnode->length)
return strncmp(an->word, bn->word, an->valnode->length);
else
return (an->valnode->length > bn->valnode->length) ? -1 : 1;
}
else if (an->nchild != bn->nchild)
if (an->valnode->type == QI_OPR)
{
return (an->nchild > bn->nchild) ? -1 : 1;
QueryOperator *ao = &an->valnode->operator;
QueryOperator *bo = &bn->valnode->operator;
if(ao->oper != bo->oper)
return (ao->oper > bo->oper) ? -1 : 1;
if (an->nchild != bn->nchild)
return (an->nchild > bn->nchild) ? -1 : 1;
{
int i,
res;
for (i = 0; i < an->nchild; i++)
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
return res;
}
return 0;
}
else
{
int i,
res;
QueryOperand *ao = &an->valnode->operand;
QueryOperand *bo = &bn->valnode->operand;
for (i = 0; i < an->nchild; i++)
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
return res;
}
Assert(an->valnode->type == QI_VAL);
if (ao->valcrc != bo->valcrc)
{
return (ao->valcrc > bo->valcrc) ? -1 : 1;
}
return 0;
if (ao->length == bo->length)
return strncmp(an->word, bn->word, ao->length);
else
return (ao->length > bo->length) ? -1 : 1;
}
}
static int
......@@ -119,7 +133,7 @@ QTNSort(QTNode * in)
{
int i;
if (in->valnode->type != OPR)
if (in->valnode->type != QI_OPR)
return;
for (i = 0; i < in->nchild; i++)
......@@ -139,12 +153,19 @@ QTNEq(QTNode * a, QTNode * b)
return (QTNodeCompare(a, b) == 0) ? true : false;
}
/*
* Remove unnecessary intermediate nodes. For example:
*
* OR OR
* a OR -> a b c
* b c
*/
void
QTNTernary(QTNode * in)
{
int i;
if (in->valnode->type != OPR)
if (in->valnode->type != QI_OPR)
return;
for (i = 0; i < in->nchild; i++)
......@@ -152,9 +173,10 @@ QTNTernary(QTNode * in)
for (i = 0; i < in->nchild; i++)
{
if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val)
QTNode *cc = in->child[i];
if (cc->valnode->type == QI_OPR && in->valnode->operator.oper == cc->valnode->operator.oper)
{
QTNode *cc = in->child[i];
int oldnchild = in->nchild;
in->nchild += cc->nchild - 1;
......@@ -167,17 +189,23 @@ QTNTernary(QTNode * in)
memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
i += cc->nchild - 1;
if(cc->flags & QTN_NEEDFREE)
pfree(cc->valnode);
pfree(cc);
}
}
}
/*
* Convert a tree to binary tree by inserting intermediate nodes.
* (Opposite of QTNTernary)
*/
void
QTNBinary(QTNode * in)
{
int i;
if (in->valnode->type != OPR)
if (in->valnode->type != QI_OPR)
return;
for (i = 0; i < in->nchild; i++)
......@@ -201,7 +229,7 @@ QTNBinary(QTNode * in)
nn->sign = nn->child[0]->sign | nn->child[1]->sign;
nn->valnode->type = in->valnode->type;
nn->valnode->val = in->valnode->val;
nn->valnode->operator.oper = in->valnode->operator.oper;
in->child[0] = nn;
in->child[1] = in->child[in->nchild - 1];
......@@ -209,11 +237,15 @@ QTNBinary(QTNode * in)
}
}
/*
* Count the total length of operand string in tree, including '\0'-
* terminators.
*/
static void
cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
cntsize(QTNode * in, int *sumlen, int *nnode)
{
*nnode += 1;
if (in->valnode->type == OPR)
if (in->valnode->type == QI_OPR)
{
int i;
......@@ -222,7 +254,7 @@ cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
}
else
{
*sumlen += in->valnode->length + 1;
*sumlen += in->valnode->operand.length + 1;
}
}
......@@ -234,22 +266,26 @@ typedef struct
} QTN2QTState;
static void
fillQT(QTN2QTState * state, QTNode * in)
fillQT(QTN2QTState *state, QTNode *in)
{
*(state->curitem) = *(in->valnode);
if (in->valnode->type == VAL)
if (in->valnode->type == QI_VAL)
{
memcpy(state->curoperand, in->word, in->valnode->length);
state->curitem->distance = state->curoperand - state->operand;
state->curoperand[in->valnode->length] = '\0';
state->curoperand += in->valnode->length + 1;
memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
memcpy(state->curoperand, in->word, in->valnode->operand.length);
state->curitem->operand.distance = state->curoperand - state->operand;
state->curoperand[in->valnode->operand.length] = '\0';
state->curoperand += in->valnode->operand.length + 1;
state->curitem++;
}
else
{
QueryItem *curitem = state->curitem;
Assert(in->valnode->type == QI_OPR);
memcpy(state->curitem, in->valnode, sizeof(QueryOperator));
Assert(in->nchild <= 2);
state->curitem++;
......@@ -257,7 +293,7 @@ fillQT(QTN2QTState * state, QTNode * in)
if (in->nchild == 2)
{
curitem->left = state->curitem - curitem;
curitem->operator.left = state->curitem - curitem;
fillQT(state, in->child[1]);
}
}
......@@ -296,11 +332,11 @@ QTNCopy(QTNode *in)
*(out->valnode) = *(in->valnode);
out->flags |= QTN_NEEDFREE;
if (in->valnode->type == VAL)
if (in->valnode->type == QI_VAL)
{
out->word = palloc(in->valnode->length + 1);
memcpy(out->word, in->word, in->valnode->length);
out->word[in->valnode->length] = '\0';
out->word = palloc(in->valnode->operand.length + 1);
memcpy(out->word, in->word, in->valnode->operand.length);
out->word[in->valnode->operand.length] = '\0';
out->flags |= QTN_WORDFREE;
}
else
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -68,7 +68,7 @@ cnt_length(TSVector t)
}
static int4
WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
{
if (ptr->len == item->length)
return strncmp(
......@@ -80,7 +80,7 @@ WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
}
static WordEntry *
find_wordentry(TSVector t, TSQuery q, QueryItem * item)
find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
{
WordEntry *StopLow = ARRPTR(t);
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
......@@ -105,33 +105,48 @@ find_wordentry(TSVector t, TSQuery q, QueryItem * item)
}
/*
* sort QueryOperands by (length, word)
*/
static int
compareQueryItem(const void *a, const void *b, void *arg)
compareQueryOperand(const void *a, const void *b, void *arg)
{
char *operand = (char *) arg;
QueryOperand *qa = (*(QueryOperand **) a);
QueryOperand *qb = (*(QueryOperand **) b);
if ((*(QueryItem **) a)->length == (*(QueryItem **) b)->length)
return strncmp(operand + (*(QueryItem **) a)->distance,
operand + (*(QueryItem **) b)->distance,
(*(QueryItem **) b)->length);
if (qa->length == qb->length)
return strncmp(operand + qa->distance,
operand + qb->distance,
qb->length);
return ((*(QueryItem **) a)->length > (*(QueryItem **) b)->length) ? 1 : -1;
return (qa->length > qb->length) ? 1 : -1;
}
static QueryItem **
SortAndUniqItems(char *operand, QueryItem * item, int *size)
/*
* Returns a sorted, de-duplicated array of QueryOperands in a query.
* The returned QueryOperands are pointers to the original QueryOperands
* in the query.
*
* Length of the returned array is stored in *size
*/
static QueryOperand **
SortAndUniqItems(TSQuery q, int *size)
{
QueryItem **res,
char *operand = GETOPERAND(q);
QueryItem * item = GETQUERY(q);
QueryOperand **res,
**ptr,
**prevptr;
ptr = res = (QueryItem **) palloc(sizeof(QueryItem *) * *size);
ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
/* Collect all operands from the tree to res */
while ((*size)--)
{
if (item->type == VAL)
if (item->type == QI_VAL)
{
*ptr = item;
*ptr = (QueryOperand *) item;
ptr++;
}
item++;
......@@ -141,14 +156,15 @@ SortAndUniqItems(char *operand, QueryItem * item, int *size)
if (*size < 2)
return res;
qsort_arg(res, *size, sizeof(QueryItem **), compareQueryItem, (void *) operand);
qsort_arg(res, *size, sizeof(QueryOperand **), compareQueryOperand, (void *) operand);
ptr = res + 1;
prevptr = res;
/* remove duplicates */
while (ptr - res < *size)
{
if (compareQueryItem((void *) ptr, (void *) prevptr, (void *) operand) != 0)
if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
{
prevptr++;
*prevptr = *ptr;
......@@ -180,10 +196,10 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
lenct,
dist;
float res = -1.0;
QueryItem **item;
QueryOperand **item;
int size = q->size;
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
item = SortAndUniqItems(q, &size);
if (size < 2)
{
pfree(item);
......@@ -246,11 +262,11 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
j,
i;
float res = 0.0;
QueryItem **item;
QueryOperand **item;
int size = q->size;
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
item = SortAndUniqItems(q, &size);
for (i = 0; i < size; i++)
{
......@@ -310,7 +326,8 @@ calc_rank(float *w, TSVector t, TSQuery q, int4 method)
if (!t->size || !q->size)
return 0.0;
res = (item->type != VAL && item->val == (int4) '&') ?
/* XXX: What about NOT? */
res = (item->type == QI_OPR && item->operator.oper == OP_AND) ?
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
if (res < 0)
......@@ -453,7 +470,7 @@ compareDocR(const void *a, const void *b)
}
static bool
checkcondition_QueryItem(void *checkval, QueryItem * val)
checkcondition_QueryOperand(void *checkval, QueryOperand *val)
{
return (bool) (val->istrue);
}
......@@ -467,8 +484,8 @@ reset_istrue_flag(TSQuery query)
/* reset istrue flag */
for (i = 0; i < query->size; i++)
{
if (item->type == VAL)
item->istrue = 0;
if (item->type == QI_VAL)
item->operand.istrue = 0;
item++;
}
}
......@@ -484,7 +501,7 @@ typedef struct
static bool
Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
Cover(DocRepresentation *doc, int len, TSQuery query, Extention *ext)
{
DocRepresentation *ptr;
int lastpos = ext->pos;
......@@ -501,8 +518,11 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
while (ptr - doc < len)
{
for (i = 0; i < ptr->nitem; i++)
ptr->item[i]->istrue = 1;
if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryItem))
{
if(ptr->item[i]->type == QI_VAL)
ptr->item[i]->operand.istrue = 1;
}
if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryOperand))
{
if (ptr->pos > ext->q)
{
......@@ -527,8 +547,9 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
while (ptr >= doc + ext->pos)
{
for (i = 0; i < ptr->nitem; i++)
ptr->item[i]->istrue = 1;
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryItem))
if(ptr->item[i]->type == QI_VAL) /* XXX */
ptr->item[i]->operand.istrue = 1;
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand))
{
if (ptr->pos < ext->p)
{
......@@ -575,10 +596,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
for (i = 0; i < query->size; i++)
{
if (item[i].type != VAL || item[i].istrue)
QueryOperand *curoperand;
if (item[i].type != QI_VAL)
continue;
curoperand = &item[i].operand;
if(item[i].operand.istrue)
continue;
entry = find_wordentry(txt, query, &(item[i]));
entry = find_wordentry(txt, query, curoperand);
if (!entry)
continue;
......@@ -603,8 +631,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
{
if (j == 0)
{
QueryItem *kptr,
*iptr = item + i;
int k;
doc[cur].needfree = false;
......@@ -613,14 +639,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
for (k = 0; k < query->size; k++)
{
kptr = item + k;
QueryOperand *kptr = &item[k].operand;
QueryOperand *iptr = &item[i].operand;
if (k == i ||
(item[k].type == VAL &&
compareQueryItem(&kptr, &iptr, operand) == 0))
(item[k].type == QI_VAL &&
compareQueryOperand(&kptr, &iptr, operand) == 0))
{
/* if k == i, we've already checked above that it's type == Q_VAL */
doc[cur].item[doc[cur].nitem] = item + k;
doc[cur].nitem++;
kptr->istrue = 1;
item[k].operand.istrue = 1;
}
}
}
......@@ -640,8 +669,7 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
if (cur > 0)
{
if (cur > 1)
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
return doc;
}
......@@ -746,7 +774,7 @@ ts_rankcd_wttf(PG_FUNCTION_ARGS)
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(2);
TSQuery query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
int method = PG_GETARG_INT32(3);
float res;
......@@ -763,7 +791,7 @@ ts_rankcd_wtt(PG_FUNCTION_ARGS)
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(2);
TSQuery query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
float res;
res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
......@@ -778,7 +806,7 @@ Datum
ts_rankcd_ttf(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY_COPY(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
int method = PG_GETARG_INT32(2);
float res;
......@@ -793,7 +821,7 @@ Datum
ts_rankcd_tt(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY_COPY(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
float res;
res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
......
This diff is collapsed.
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.2 2007/08/31 02:26:29 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -66,6 +66,9 @@ typedef struct
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
/*
* Order: haspos, len, word, for all positions (pos, weight)
*/
static int
silly_cmp_tsvector(const TSVector a, const TSVector b)
{
......@@ -464,7 +467,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
* compare 2 string values
*/
static int4
ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item)
{
if (ptr->len == item->length)
return strncmp(
......@@ -479,7 +482,7 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
* check weight info
*/
static bool
checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
{
WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
......@@ -497,10 +500,11 @@ checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
* is there value 'val' in array or not ?
*/
static bool
checkcondition_str(void *checkval, QueryItem * val)
checkcondition_str(void *checkval, QueryOperand * val)
{
WordEntry *StopLow = ((CHKVAL *) checkval)->arrb;
WordEntry *StopHigh = ((CHKVAL *) checkval)->arre;
CHKVAL *chkval = (CHKVAL *) checkval;
WordEntry *StopLow = chkval->arrb;
WordEntry *StopHigh = chkval->arre;
WordEntry *StopMiddle;
int difference;
......@@ -509,10 +513,10 @@ checkcondition_str(void *checkval, QueryItem * val)
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
difference = ValCompare(chkval, StopMiddle, val);
if (difference == 0)
return (val->weight && StopMiddle->haspos) ?
checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true;
checkclass_str(chkval, StopMiddle, val) : true;
else if (difference < 0)
StopLow = StopMiddle + 1;
else
......@@ -523,37 +527,48 @@ checkcondition_str(void *checkval, QueryItem * val)
}
/*
* check for boolean condition
* check for boolean condition.
*
* if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking.
* checkval can be used to pass information to the callback. TS_execute doesn't
* do anything with it.
* chkcond is a callback function used to evaluate each VAL node in the query.
*
*/
bool
TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
bool (*chkcond) (void *checkval, QueryItem * val))
bool (*chkcond) (void *checkval, QueryOperand * val))
{
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (curitem->type == VAL)
return chkcond(checkval, curitem);
else if (curitem->val == (int4) '!')
{
return (calcnot) ?
!TS_execute(curitem + 1, checkval, calcnot, chkcond)
: true;
}
else if (curitem->val == (int4) '&')
if (curitem->type == QI_VAL)
return chkcond(checkval, (QueryOperand *) curitem);
switch(curitem->operator.oper)
{
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
else
return false;
}
else
{ /* |-operator */
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
return true;
else
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
case OP_NOT:
if (calcnot)
return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
else
return true;
case OP_AND:
if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
else
return false;
case OP_OR:
if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
return true;
else
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
default:
elog(ERROR, "unknown operator %d", curitem->operator.oper);
}
/* not reachable, but keep compiler quiet */
return false;
}
......
/*-------------------------------------------------------------------------
*
* tsvector_parser.c
* Parser for tsvector
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.1 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "libpq/pqformat.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/memutils.h"
struct TSVectorParseStateData
{
char *prsbuf;
char *word; /* buffer to hold the current word */
int len; /* size in bytes allocated for 'word' */
bool oprisdelim;
};
/*
* Initializes parser for the input string. If oprisdelim is set, the
* following characters are treated as delimiters in addition to whitespace:
* ! | & ( )
*/
TSVectorParseState
init_tsvector_parser(char *input, bool oprisdelim)
{
TSVectorParseState state;
state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
state->prsbuf = input;
state->len = 32;
state->word = (char *) palloc(state->len);
state->oprisdelim = oprisdelim;
return state;
}
/*
* Reinitializes parser for parsing 'input', instead of previous input.
*/
void
reset_tsvector_parser(TSVectorParseState state, char *input)
{
state->prsbuf = input;
}
/*
* Shuts down a tsvector parser.
*/
void
close_tsvector_parser(TSVectorParseState state)
{
pfree(state->word);
pfree(state);
}
#define RESIZEPRSBUF \
do { \
if ( curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
{ \
int clen = curpos - state->word; \
state->len *= 2; \
state->word = (char*)repalloc( (void*)state->word, state->len ); \
curpos = state->word + clen; \
} \
} while (0)
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
/* Fills the output parameters, and returns true */
#define RETURN_TOKEN \
do { \
if (pos_ptr != NULL) \
{ \
*pos_ptr = pos; \
*poslen = npos; \
} \
else if (pos != NULL) \
pfree(pos); \
\
if (strval != NULL) \
*strval = state->word; \
if (lenval != NULL) \
*lenval = curpos - state->word; \
if (endptr != NULL) \
*endptr = state->prsbuf; \
return true; \
} while(0)
/* State codes used in gettoken_tsvector */
#define WAITWORD 1
#define WAITENDWORD 2
#define WAITNEXTCHAR 3
#define WAITENDCMPLX 4
#define WAITPOSINFO 5
#define INPOSINFO 6
#define WAITPOSDELIM 7
#define WAITCHARCMPLX 8
/*
* Get next token from string being parsed. Returns false if
* end of input string is reached, otherwise strval, lenval, pos_ptr
* and poslen output parameters are filled in:
*
* *strval token
* *lenval length of*strval
* *pos_ptr pointer to a palloc'd array of positions and weights
* associated with the token. If the caller is not interested
* in the information, NULL can be supplied. Otherwise
* the caller is responsible for pfreeing the array.
* *poslen number of elements in *pos_ptr
*/
bool
gettoken_tsvector(TSVectorParseState state,
char **strval, int *lenval,
WordEntryPos **pos_ptr, int *poslen,
char **endptr)
{
int oldstate = 0;
char *curpos = state->word;
int statecode = WAITWORD;
/* pos is for collecting the comma delimited list of positions followed
* by the actual token.
*/
WordEntryPos *pos = NULL;
int npos = 0; /* elements of pos used */
int posalen = 0; /* allocated size of pos */
while (1)
{
if (statecode == WAITWORD)
{
if (*(state->prsbuf) == '\0')
return false;
else if (t_iseq(state->prsbuf, '\''))
statecode = WAITENDCMPLX;
else if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else if (!t_isspace(state->prsbuf))
{
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
statecode = WAITENDWORD;
}
}
else if (statecode == WAITNEXTCHAR)
{
if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character")));
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
Assert(oldstate != 0);
statecode = oldstate;
}
}
else if (statecode == WAITENDWORD)
{
if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
{
RESIZEPRSBUF;
if (curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(curpos) = '\0';
RETURN_TOKEN;
}
else if (t_iseq(state->prsbuf, ':'))
{
if (curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(curpos) = '\0';
if (state->oprisdelim)
RETURN_TOKEN;
else
statecode = INPOSINFO;
}
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
}
}
else if (statecode == WAITENDCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
statecode = WAITCHARCMPLX;
}
else if (t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDCMPLX;
}
else if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
}
}
else if (statecode == WAITCHARCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
statecode = WAITENDCMPLX;
}
else
{
RESIZEPRSBUF;
*(curpos) = '\0';
if (curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
if (state->oprisdelim)
{
/* state->prsbuf+=pg_mblen(state->prsbuf); */
RETURN_TOKEN;
}
else
statecode = WAITPOSINFO;
continue; /* recheck current character */
}
}
else if (statecode == WAITPOSINFO)
{
if (t_iseq(state->prsbuf, ':'))
statecode = INPOSINFO;
else
RETURN_TOKEN;
}
else if (statecode == INPOSINFO)
{
if (t_isdigit(state->prsbuf))
{
if (posalen == 0)
{
posalen = 4;
pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
npos = 0;
}
else if (npos + 1 >= posalen)
{
posalen *= 2;
pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
}
npos++;
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
if (WEP_GETPOS(pos[npos - 1]) == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 0);
statecode = WAITPOSDELIM;
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else if (statecode == WAITPOSDELIM)
{
if (t_iseq(state->prsbuf, ','))
statecode = INPOSINFO;
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 3);
}
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 2);
}
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 1);
}
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
{
if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 0);
}
else if (t_isspace(state->prsbuf) ||
*(state->prsbuf) == '\0')
RETURN_TOKEN;
else if (!t_isdigit(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else /* internal error */
elog(ERROR, "internal error in gettoken_tsvector");
/* get next char */
state->prsbuf += pg_mblen(state->prsbuf);
}
return false;
}
......@@ -6,7 +6,7 @@
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.3 2007/08/25 00:03:59 tgl Exp $
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.4 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -42,7 +42,7 @@ typedef struct
type:8,
len:16;
char *word;
QueryItem *item;
QueryOperand *item;
} HeadlineWordEntry;
typedef struct
......
......@@ -5,7 +5,7 @@
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.1 2007/08/21 01:11:29 tgl Exp $
* $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.2 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -13,6 +13,8 @@
#define _PG_TSTYPE_H_
#include "fmgr.h"
#include "utils/pg_crc.h"
/*
* TSVector type.
......@@ -27,8 +29,8 @@ typedef struct
pos:20; /* MAX 1Mb */
} WordEntry;
#define MAXSTRLEN ( 1<<11 )
#define MAXSTRPOS ( 1<<20 )
#define MAXSTRLEN ( (1<<11) - 1)
#define MAXSTRPOS ( (1<<20) - 1)
/*
* Equivalent to
......@@ -68,7 +70,7 @@ typedef uint16 WordEntryPos;
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int4 size;
uint32 size;
char data[1];
} TSVectorData;
......@@ -140,36 +142,65 @@ extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS);
/*
* TSQuery
*
*
*/
typedef int8 QueryItemType;
/* Valid values for QueryItemType: */
#define QI_VAL 1
#define QI_OPR 2
#define QI_VALSTOP 3 /* This is only used in an intermediate stack representation in parse_tsquery. It's not a legal type elsewhere. */
/*
* QueryItem is one node in tsquery - operator or operand.
*/
typedef struct QueryItem
typedef struct
{
int8 type; /* operand or kind of operator */
int8 weight; /* weights of operand to search */
int2 left; /* pointer to left operand Right operand is
* item + 1, left operand is placed
* item+item->left */
int4 val; /* crc32 value of operand's value */
QueryItemType type; /* operand or kind of operator (ts_tokentype) */
int8 weight; /* weights of operand to search. It's a bitmask of allowed weights.
* if it =0 then any weight are allowed */
int32 valcrc; /* XXX: pg_crc32 would be a more appropriate data type,
* but we use comparisons to signed integers in the code.
* They would need to be changed as well. */
/* pointer to text value of operand, must correlate with WordEntry */
uint32
istrue:1, /* use for ranking in Cover */
length:11,
distance:20;
} QueryItem;
} QueryOperand;
/* Legal values for QueryOperator.operator */
#define OP_NOT 1
#define OP_AND 2
#define OP_OR 3
typedef struct
{
QueryItemType type;
int8 oper; /* see above */
int16 left; /* pointer to left operand. Right operand is
* item + 1, left operand is placed
* item+item->left */
} QueryOperator;
/*
* It's impossible to use offsetof(QueryItem, istrue)
* Note: TSQuery is 4-bytes aligned, so make sure there's no fields
* inside QueryItem requiring 8-byte alignment, like int64.
*/
#define HDRSIZEQI ( sizeof(int8) + sizeof(int8) + sizeof(int2) + sizeof(int4) )
typedef union
{
QueryItemType type;
QueryOperator operator;
QueryOperand operand;
} QueryItem;
/*
* Storage:
* (len)(size)(array of ITEM)(array of operand in text form)
* operands are always finished by '\0'
* (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
*/
typedef struct
......@@ -182,13 +213,17 @@ typedef struct
typedef TSQueryData *TSQuery;
#define HDRSIZETQ ( VARHDRSZ + sizeof(int4) )
#define COMPUTESIZE(size,lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
#define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
#define OPERANDSSIZE(x) ( (x)->len - HDRSIZETQ - (x)->size * sizeof(QueryItem) )
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
/* Computes the size of header and all QueryItems. size is the number of
* QueryItems, and lenofoperand is the total length of all operands
*/
#define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
/* Returns a pointer to the first QueryItem in a TSVector */
#define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
/* Returns a pointer to the beginning of operands in a TSVector */
#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
/*
* fmgr interface macros
......
......@@ -5,7 +5,7 @@
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.2 2007/08/25 00:03:59 tgl Exp $
* $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.3 2007/09/07 15:09:56 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -14,65 +14,41 @@
#include "tsearch/ts_type.h"
#include "tsearch/ts_public.h"
#include "nodes/pg_list.h"
/*
* Common parse definitions for tsvector and tsquery
*/
typedef struct
{
WordEntry entry; /* should be first ! */
WordEntryPos *pos;
} WordEntryIN;
typedef struct
{
char *prsbuf;
char *word;
char *curpos;
int4 len;
int4 state;
int4 alen;
WordEntryPos *pos;
bool oprisdelim;
} TSVectorParseState;
extern bool gettoken_tsvector(TSVectorParseState *state);
/* tsvector parser support. */
struct ParseQueryNode; /* private in backend/utils/adt/tsquery.c */
struct TSVectorParseStateData;
typedef struct TSVectorParseStateData *TSVectorParseState;
typedef struct
{
char *buffer; /* entire string we are scanning */
char *buf; /* current scan point */
int4 state;
int4 count;
extern TSVectorParseState init_tsvector_parser(char *input, bool oprisdelim);
extern void reset_tsvector_parser(TSVectorParseState state, char *input);
extern bool gettoken_tsvector(TSVectorParseState state,
char **token, int *len,
WordEntryPos **pos, int *poslen,
char **endptr);
extern void close_tsvector_parser(TSVectorParseState state);
/* reverse polish notation in list (for temporary usage) */
struct ParseQueryNode *str;
/* parse_tsquery */
/* number in str */
int4 num;
struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
typedef struct TSQueryParserStateData *TSQueryParserState;
/* text-form operand */
int4 lenop;
int4 sumlen;
char *op;
char *curop;
/* state for value's parser */
TSVectorParseState valstate;
/* tscfg */
Oid cfg_id;
} TSQueryParserState;
typedef void (*PushFunction)(void *opaque, TSQueryParserState state, char *, int, int2);
extern TSQuery parse_tsquery(char *buf,
void (*pushval) (TSQueryParserState *, int, char *, int, int2),
Oid cfg_id, bool isplain);
extern void pushval_asis(TSQueryParserState * state,
int type, char *strval, int lenval, int2 weight);
extern void pushquery(TSQueryParserState * state, int4 type, int4 val,
int4 distance, int4 lenval, int2 weight);
PushFunction pushval,
void *opaque, bool isplain);
/* Functions for use by PushFunction implementations */
extern void pushValue(TSQueryParserState state,
char *strval, int lenval, int2 weight);
extern void pushStop(TSQueryParserState state);
extern void pushOperator(TSQueryParserState state, int8 operator);
/*
* parse plain text and lexize words
......@@ -84,6 +60,11 @@ typedef struct
union
{
uint16 pos;
/*
* When apos array is used, apos[0] is the number of elements
* in the array (excluding apos[0]), and alen is the allocated
* size of the array.
*/
uint16 *apos;
} pos;
char *word;
......@@ -111,23 +92,12 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query,
char *buf, int4 buflen);
extern text *generateHeadline(HeadlineParsedText * prs);
/*
* token/node types for parsing
*/
#define END 0
#define ERR 1
#define VAL 2
#define OPR 3
#define OPEN 4
#define CLOSE 5
#define VALSTOP 6 /* for stop words */
/*
* Common check function for tsvector @@ tsquery
*/
extern bool TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
bool (*chkcond) (void *checkval, QueryItem * val));
bool (*chkcond) (void *checkval, QueryOperand * val));
/*
* Useful conversion macros
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment