Commit 3dbbd0f0 authored by Teodor Sigaev's avatar Teodor Sigaev

Do not fallback to AND for FTS phrase operator.

If there is no positional information of lexemes then phrase operator will not
fallback to AND operator. This change makes needing to modify TS_execute()
interface, because somewhere (in indexes, for example) positional information
is unaccesible and in this cases we need to force fallback to AND.

Per discussion c19fcfec308e6ccd952cdde9e648b505@mail.gmail.com
parent 028350f6
......@@ -308,7 +308,7 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
res = TS_execute(GETQUERY(query),
&gcv,
true,
TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_AS_AND,
checkcondition_gin);
}
......
......@@ -361,7 +361,8 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(TS_execute(
GETQUERY(query),
(void *) GETSIGN(key), false,
(void *) GETSIGN(key),
TS_EXEC_PHRASE_AS_AND,
checkcondition_bit
));
}
......@@ -373,7 +374,8 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
chkval.arre = chkval.arrb + ARRNELEM(key);
PG_RETURN_BOOL(TS_execute(
GETQUERY(query),
(void *) &chkval, true,
(void *) &chkval,
TS_EXEC_PHRASE_AS_AND | TS_EXEC_CALC_NOT,
checkcondition_arr
));
}
......
......@@ -662,7 +662,8 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
{
fillQueryRepresentationData(qr, ptr);
if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))
if (TS_execute(GETQUERY(qr->query), (void *) qr,
TS_EXEC_EMPTY, checkcondition_QueryOperand))
{
if (WEP_GETPOS(ptr->pos) > ext->q)
{
......@@ -691,7 +692,8 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
*/
fillQueryRepresentationData(qr, ptr);
if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))
if (TS_execute(GETQUERY(qr->query), (void *) qr,
TS_EXEC_CALC_NOT, checkcondition_QueryOperand))
{
if (WEP_GETPOS(ptr->pos) < ext->p)
{
......
......@@ -1360,7 +1360,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
*/
static bool
TS_phrase_execute(QueryItem *curitem,
void *checkval, bool calcnot, ExecPhraseData *data,
void *checkval, uint32 flags, ExecPhraseData *data,
bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *))
{
/* since this function recurses, it could be driven to stack overflow */
......@@ -1382,18 +1382,19 @@ TS_phrase_execute(QueryItem *curitem,
Assert(curitem->qoperator.oper == OP_PHRASE);
if (!TS_phrase_execute(curitem + curitem->qoperator.left,
checkval, calcnot, &Ldata, chkcond))
checkval, flags, &Ldata, chkcond))
return false;
if (!TS_phrase_execute(curitem + 1, checkval, calcnot, &Rdata, chkcond))
if (!TS_phrase_execute(curitem + 1, checkval, flags, &Rdata, chkcond))
return false;
/*
* if at least one of the operands has no position information,
* fallback to AND operation.
* then return false. But if TS_EXEC_PHRASE_AS_AND flag is set then
* we return true as it is a AND operation
*/
if (Ldata.npos == 0 || Rdata.npos == 0)
return true;
return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false;
/*
* Result of the operation is a list of the corresponding positions of
......@@ -1498,13 +1499,11 @@ TS_phrase_execute(QueryItem *curitem,
* chkcond is a callback function used to evaluate each VAL node in the query.
* checkval can be used to pass information to the callback. TS_execute doesn't
* do anything with it.
* if calcnot is false, NOT expressions are always evaluated to be true. This
* is used in ranking.
* It believes that ordinary operators are always closier to root than phrase
* operator, so, TS_execute() may not take care of lexeme's position at all.
*/
bool
TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data))
{
/* since this function recurses, it could be driven to stack overflow */
......@@ -1517,25 +1516,29 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
switch (curitem->qoperator.oper)
{
case OP_NOT:
if (calcnot)
return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
if (flags & TS_EXEC_CALC_NOT)
return !TS_execute(curitem + 1, checkval, flags, chkcond);
else
return true;
case OP_AND:
if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
return TS_execute(curitem + 1, checkval, flags, chkcond);
else
return false;
case OP_OR:
if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
return true;
else
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
return TS_execute(curitem + 1, checkval, flags, chkcond);
case OP_PHRASE:
return TS_phrase_execute(curitem, checkval, calcnot, NULL, chkcond);
/*
* do not check TS_EXEC_PHRASE_AS_AND here because chkcond()
* could do something more if it's called from TS_phrase_execute()
*/
return TS_phrase_execute(curitem, checkval, flags, NULL, chkcond);
default:
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
......@@ -1633,7 +1636,7 @@ ts_match_vq(PG_FUNCTION_ARGS)
result = TS_execute(
GETQUERY(query),
&chkval,
true,
TS_EXEC_CALC_NOT,
checkcondition_str
);
......
......@@ -111,8 +111,25 @@ typedef struct ExecPhraseData
WordEntryPos *pos;
} ExecPhraseData;
extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
/*
* Evaluates tsquery, flags are followe below
*/
extern bool TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *));
#define TS_EXEC_EMPTY (0x00)
/*
* if TS_EXEC_CALC_NOT is not set then NOT expression evaluated to be true,
* used in cases where NOT cannot be accurately computed (GiST) or
* it isn't important (ranking)
*/
#define TS_EXEC_CALC_NOT (0x01)
/*
* Treat OP_PHRASE as OP_AND. Used when posiotional information is not
* accessible, like in consistent methods of GIN/GiST indexes
*/
#define TS_EXEC_PHRASE_AS_AND (0x02)
extern bool tsquery_requires_match(QueryItem *curitem);
/*
......
......@@ -1459,13 +1459,14 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts;
--check OP_PHRASE on index
create temp table phrase_index_test(fts tsvector);
insert into phrase_index_test values('A fat cat has just eaten a rat.');
insert into phrase_index_test values ('A fat cat has just eaten a rat.');
insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.'));
create index phrase_index_test_idx on phrase_index_test using gin(fts);
set enable_seqscan = off;
select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
fts
-------------------------------------------------
'A' 'a' 'cat' 'eaten' 'fat' 'has' 'just' 'rat.'
fts
-----------------------------------
'cat':3 'eaten':6 'fat':2 'rat':8
(1 row)
set enable_seqscan = on;
......@@ -482,7 +482,8 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts;
--check OP_PHRASE on index
create temp table phrase_index_test(fts tsvector);
insert into phrase_index_test values('A fat cat has just eaten a rat.');
insert into phrase_index_test values ('A fat cat has just eaten a rat.');
insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.'));
create index phrase_index_test_idx on phrase_index_test using gin(fts);
set enable_seqscan = off;
select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment