Commit 21b748e7 authored by Teodor Sigaev's avatar Teodor Sigaev

1 Fix problem with lost precision in rank with OR-ed lexemes

2 Allow tsquery_in to input void tsquery: resolve dump/restore problem with tsquery
parent fbff2e96
...@@ -747,20 +747,20 @@ select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright'); ...@@ -747,20 +747,20 @@ select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
select rank(' a:1 s:2C d g'::tsvector, 'a | s'); select rank(' a:1 s:2C d g'::tsvector, 'a | s');
rank rank
------ -----------
0.28 0.0911891
(1 row) (1 row)
select rank(' a:1 s:2B d g'::tsvector, 'a | s'); select rank(' a:1 s:2B d g'::tsvector, 'a | s');
rank rank
------ ----------
0.46 0.151982
(1 row) (1 row)
select rank(' a:1 s:2 d g'::tsvector, 'a | s'); select rank(' a:1 s:2 d g'::tsvector, 'a | s');
rank rank
------ -----------
0.19 0.0607927
(1 row) (1 row)
select rank(' a:1 s:2C d g'::tsvector, 'a & s'); select rank(' a:1 s:2C d g'::tsvector, 'a & s');
......
...@@ -55,6 +55,7 @@ Datum to_tsquery_current(PG_FUNCTION_ARGS); ...@@ -55,6 +55,7 @@ Datum to_tsquery_current(PG_FUNCTION_ARGS);
/* parser's states */ /* parser's states */
#define WAITOPERAND 1 #define WAITOPERAND 1
#define WAITOPERATOR 2 #define WAITOPERATOR 2
#define WAITFIRSTOPERAND 3
/* /*
* node of query tree, also used * node of query tree, also used
...@@ -137,6 +138,7 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 ...@@ -137,6 +138,7 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
{ {
switch (state->state) switch (state->state)
{ {
case WAITFIRSTOPERAND:
case WAITOPERAND: case WAITOPERAND:
if (*(state->buf) == '!') if (*(state->buf) == '!')
{ {
...@@ -159,14 +161,16 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 ...@@ -159,14 +161,16 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
else if (*(state->buf) != ' ') else if (*(state->buf) != ' ')
{ {
state->valstate.prsbuf = state->buf; state->valstate.prsbuf = state->buf;
state->state = WAITOPERATOR;
if (gettoken_tsvector(&(state->valstate))) if (gettoken_tsvector(&(state->valstate)))
{ {
*strval = state->valstate.word; *strval = state->valstate.word;
*lenval = state->valstate.curpos - state->valstate.word; *lenval = state->valstate.curpos - state->valstate.word;
state->buf = get_weight(state->valstate.prsbuf, weight); state->buf = get_weight(state->valstate.prsbuf, weight);
state->state = WAITOPERATOR;
return VAL; return VAL;
} }
else if ( state->state == WAITFIRSTOPERAND )
return END;
else else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
...@@ -596,7 +600,7 @@ static QUERYTYPE * ...@@ -596,7 +600,7 @@ static QUERYTYPE *
/* init state */ /* init state */
state.buf = buf; state.buf = buf;
state.state = WAITOPERAND; state.state = WAITFIRSTOPERAND;
state.count = 0; state.count = 0;
state.num = 0; state.num = 0;
state.str = NULL; state.str = NULL;
...@@ -616,10 +620,13 @@ static QUERYTYPE * ...@@ -616,10 +620,13 @@ static QUERYTYPE *
/* parse query & make polish notation (postfix, but in reverse order) */ /* parse query & make polish notation (postfix, but in reverse order) */
makepol(&state, pushval); makepol(&state, pushval);
pfree(state.valstate.word); pfree(state.valstate.word);
if (!state.num) if (!state.num) {
ereport(ERROR, elog(NOTICE, "Query doesn't contain lexem(s)");
(errcode(ERRCODE_SYNTAX_ERROR), query = (QUERYTYPE*)palloc( HDRSIZEQT );
errmsg("empty query"))); query->len = HDRSIZEQT;
query->size = 0;
return query;
}
/* make finish struct */ /* make finish struct */
commonlen = COMPUTESIZE(state.num, state.sumlen); commonlen = COMPUTESIZE(state.num, state.sumlen);
...@@ -905,6 +912,10 @@ to_tsquery(PG_FUNCTION_ARGS) ...@@ -905,6 +912,10 @@ to_tsquery(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(in, 1); PG_FREE_IF_COPY(in, 1);
query = queryin(str, pushval_morph, PG_GETARG_INT32(0)); query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
if ( query->size == 0 )
PG_RETURN_POINTER(query);
res = clean_fakeval_v2(GETQUERY(query), &len); res = clean_fakeval_v2(GETQUERY(query), &len);
if (!res) if (!res)
{ {
......
...@@ -257,7 +257,7 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q) ...@@ -257,7 +257,7 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
int4 dimt, int4 dimt,
j, j,
i; i;
float res = -1.0; float res = 0.0;
ITEM **item; ITEM **item;
int size = q->size; int size = q->size;
...@@ -266,6 +266,8 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q) ...@@ -266,6 +266,8 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
for (i = 0; i < size; i++) for (i = 0; i < size; i++)
{ {
float resj,wjm;
int4 jm;
entry = find_wordentry(t, q, item[i]); entry = find_wordentry(t, q, item[i]);
if (!entry) if (!entry)
continue; continue;
...@@ -281,14 +283,27 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q) ...@@ -281,14 +283,27 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
post = POSNULL + 1; post = POSNULL + 1;
} }
resj = 0.0;
wjm = -1.0;
jm = 0;
for (j = 0; j < dimt; j++) for (j = 0; j < dimt; j++)
{ {
if (res < 0) resj = resj + wpos(post[j])/((j+1)*(j+1));
res = wpos(post[j]); if ( wpos(post[j]) > wjm ) {
else wjm = wpos(post[j]);
res = 1.0 - (1.0 - res) * (1.0 - wpos(post[j])); jm = j;
} }
} }
/*
limit (sum(i/i^2),i->inf) = pi^2/6
resj = sum(wi/i^2),i=1,noccurence,
wi - should be sorted desc,
don't sort for now, just choose maximum weight. This should be corrected
Oleg Bartunov
*/
res = res + ( wjm + resj - wjm/((jm+1)*(jm+1)))/1.64493406685;
}
res = res /size;
pfree(item); pfree(item);
return res; return res;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment