Commit 2f2007fb authored by Tom Lane's avatar Tom Lane

Fix assorted bugs by changing TS_execute's callback API to ternary logic.

Text search sometimes failed to find valid matches, for instance
'!crew:A'::tsquery might fail to locate 'crew:1B'::tsvector during
an index search.  The root of the issue is that TS_execute's callback
functions were not changed to use ternary (yes/no/maybe) reporting
when we made the search logic itself do so.  It's somewhat annoying
to break that API, but on the other hand we now see that any code
using plain boolean logic is almost certainly broken since the
addition of phrase search.  There seem to be very few outside callers
of this code anyway, so we'll just break them intentionally to get
them to adapt.

This allows removal of tsginidx.c's private re-implementation of
TS_execute, since that's now entirely duplicative.  It's also no
longer necessary to avoid use of CALC_NOT in tsgistidx.c, since
the underlying callbacks can now do something reasonable.

Back-patch into v13.  We can't change this in stable branches,
but it seems not quite too late to fix it in v13.

Tom Lane and Pavel Borisov

Discussion: https://postgr.es/m/CALT9ZEE-aLotzBg-pOp2GFTesGWVYzXA3=mZKzRDa_OKnLF7Mg@mail.gmail.com
parent 25244b89
...@@ -1962,7 +1962,7 @@ typedef struct ...@@ -1962,7 +1962,7 @@ typedef struct
/* /*
* TS_execute callback for matching a tsquery operand to headline words * TS_execute callback for matching a tsquery operand to headline words
*/ */
static bool static TSTernaryValue
checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data) checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
{ {
hlCheck *checkval = (hlCheck *) opaque; hlCheck *checkval = (hlCheck *) opaque;
...@@ -1975,7 +1975,7 @@ checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data) ...@@ -1975,7 +1975,7 @@ checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
{ {
/* if data == NULL, don't need to report positions */ /* if data == NULL, don't need to report positions */
if (!data) if (!data)
return true; return TS_YES;
if (!data->pos) if (!data->pos)
{ {
...@@ -1992,9 +1992,9 @@ checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data) ...@@ -1992,9 +1992,9 @@ checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
} }
if (data && data->npos > 0) if (data && data->npos > 0)
return true; return TS_YES;
return false; return TS_NO;
} }
/* /*
......
...@@ -178,9 +178,13 @@ typedef struct ...@@ -178,9 +178,13 @@ typedef struct
bool *need_recheck; bool *need_recheck;
} GinChkVal; } GinChkVal;
static GinTernaryValue /*
checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data) * TS_execute callback for matching a tsquery operand to GIN index data
*/
static TSTernaryValue
checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
{ {
GinChkVal *gcv = (GinChkVal *) checkval;
int j; int j;
/* /*
...@@ -193,112 +197,22 @@ checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *d ...@@ -193,112 +197,22 @@ checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *d
/* convert item's number to corresponding entry's (operand's) number */ /* convert item's number to corresponding entry's (operand's) number */
j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
/* return presence of current entry in indexed value */
return gcv->check[j];
}
/*
* Wrapper of check condition function for TS_execute.
*/
static bool
checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
return checkcondition_gin_internal((GinChkVal *) checkval,
val,
data) != GIN_FALSE;
}
/*
* Evaluate tsquery boolean expression using ternary logic.
*
* Note: the reason we can't use TS_execute() for this is that its API
* for the checkcondition callback doesn't allow a MAYBE result to be
* returned, but we might have MAYBEs in the gcv->check array.
* Perhaps we should change that API.
*/
static GinTernaryValue
TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
{
GinTernaryValue val1,
val2,
result;
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (curitem->type == QI_VAL)
return
checkcondition_gin_internal(gcv,
(QueryOperand *) curitem,
NULL /* don't have position info */ );
switch (curitem->qoperator.oper)
{
case OP_NOT:
/* /*
* Below a phrase search, force NOT's result to MAYBE. We cannot * return presence of current entry in indexed value; but TRUE becomes
* invert a TRUE result from the subexpression to FALSE, since * MAYBE in the presence of a query requiring recheck
* TRUE only says that the subexpression matches somewhere, not
* that it matches everywhere, so there might be positions where
* the NOT will match. We could invert FALSE to TRUE, but there's
* little point in distinguishing TRUE from MAYBE, since a recheck
* will have been forced already.
*/ */
if (in_phrase) if (gcv->check[j] == GIN_TRUE)
return GIN_MAYBE; {
if (val->weight != 0 || data != NULL)
result = TS_execute_ternary(gcv, curitem + 1, in_phrase); return TS_MAYBE;
if (result == GIN_MAYBE) }
return result;
return !result;
case OP_PHRASE:
/* /*
* GIN doesn't contain any information about positions, so treat * We rely on GinTernaryValue and TSTernaryValue using equivalent value
* OP_PHRASE as OP_AND with recheck requirement, and always * assignments. We could use a switch statement to map the values if that
* reporting MAYBE not TRUE. * ever stops being true, but it seems unlikely to happen.
*/ */
*(gcv->need_recheck) = true; return (TSTernaryValue) gcv->check[j];
/* Pass down in_phrase == true in case there's a NOT below */
in_phrase = true;
/* FALL THRU */
case OP_AND:
val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
in_phrase);
if (val1 == GIN_FALSE)
return GIN_FALSE;
val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
if (val2 == GIN_FALSE)
return GIN_FALSE;
if (val1 == GIN_TRUE && val2 == GIN_TRUE &&
curitem->qoperator.oper != OP_PHRASE)
return GIN_TRUE;
else
return GIN_MAYBE;
case OP_OR:
val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
in_phrase);
if (val1 == GIN_TRUE)
return GIN_TRUE;
val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
if (val2 == GIN_TRUE)
return GIN_TRUE;
if (val1 == GIN_FALSE && val2 == GIN_FALSE)
return GIN_FALSE;
else
return GIN_MAYBE;
default:
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
}
/* not reachable, but keep compiler quiet */
return false;
} }
Datum Datum
...@@ -370,10 +284,11 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS) ...@@ -370,10 +284,11 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
gcv.map_item_operand = (int *) (extra_data[0]); gcv.map_item_operand = (int *) (extra_data[0]);
gcv.need_recheck = &recheck; gcv.need_recheck = &recheck;
res = TS_execute_ternary(&gcv, GETQUERY(query), false); if (TS_execute(GETQUERY(query),
&gcv,
if (res == GIN_TRUE && recheck) TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS,
res = GIN_MAYBE; checkcondition_gin))
res = recheck ? GIN_MAYBE : GIN_TRUE;
} }
PG_RETURN_GIN_TERNARY_VALUE(res); PG_RETURN_GIN_TERNARY_VALUE(res);
......
...@@ -273,9 +273,9 @@ typedef struct ...@@ -273,9 +273,9 @@ typedef struct
} CHKVAL; } CHKVAL;
/* /*
* is there value 'val' in array or not ? * TS_execute callback for matching a tsquery operand to GIST leaf-page data
*/ */
static bool static TSTernaryValue
checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data) checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data)
{ {
int32 *StopLow = ((CHKVAL *) checkval)->arrb; int32 *StopLow = ((CHKVAL *) checkval)->arrb;
...@@ -288,23 +288,26 @@ checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data) ...@@ -288,23 +288,26 @@ checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data)
* we are not able to find a prefix by hash value * we are not able to find a prefix by hash value
*/ */
if (val->prefix) if (val->prefix)
return true; return TS_MAYBE;
while (StopLow < StopHigh) while (StopLow < StopHigh)
{ {
StopMiddle = StopLow + (StopHigh - StopLow) / 2; StopMiddle = StopLow + (StopHigh - StopLow) / 2;
if (*StopMiddle == val->valcrc) if (*StopMiddle == val->valcrc)
return true; return TS_MAYBE;
else if (*StopMiddle < val->valcrc) else if (*StopMiddle < val->valcrc)
StopLow = StopMiddle + 1; StopLow = StopMiddle + 1;
else else
StopHigh = StopMiddle; StopHigh = StopMiddle;
} }
return false; return TS_NO;
} }
static bool /*
* TS_execute callback for matching a tsquery operand to GIST non-leaf data
*/
static TSTernaryValue
checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data) checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data)
{ {
void *key = (SignTSVector *) checkval; void *key = (SignTSVector *) checkval;
...@@ -313,8 +316,12 @@ checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data) ...@@ -313,8 +316,12 @@ checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data)
* we are not able to find a prefix in signature tree * we are not able to find a prefix in signature tree
*/ */
if (val->prefix) if (val->prefix)
return true; return TS_MAYBE;
return GETBIT(GETSIGN(key), HASHVAL(val->valcrc, GETSIGLEN(key)));
if (GETBIT(GETSIGN(key), HASHVAL(val->valcrc, GETSIGLEN(key))))
return TS_MAYBE;
else
return TS_NO;
} }
Datum Datum
...@@ -339,10 +346,9 @@ gtsvector_consistent(PG_FUNCTION_ARGS) ...@@ -339,10 +346,9 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
if (ISALLTRUE(key)) if (ISALLTRUE(key))
PG_RETURN_BOOL(true); PG_RETURN_BOOL(true);
/* since signature is lossy, cannot specify CALC_NOT here */
PG_RETURN_BOOL(TS_execute(GETQUERY(query), PG_RETURN_BOOL(TS_execute(GETQUERY(query),
key, key,
TS_EXEC_PHRASE_NO_POS, TS_EXEC_PHRASE_NO_POS | TS_EXEC_CALC_NOT,
checkcondition_bit)); checkcondition_bit));
} }
else else
......
...@@ -556,14 +556,18 @@ typedef struct ...@@ -556,14 +556,18 @@ typedef struct
#define QR_GET_OPERAND_DATA(q, v) \ #define QR_GET_OPERAND_DATA(q, v) \
( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) ) ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
static bool /*
checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data) * TS_execute callback for matching a tsquery operand to QueryRepresentation
*/
static TSTernaryValue
checkcondition_QueryOperand(void *checkval, QueryOperand *val,
ExecPhraseData *data)
{ {
QueryRepresentation *qr = (QueryRepresentation *) checkval; QueryRepresentation *qr = (QueryRepresentation *) checkval;
QueryRepresentationOperand *opData = QR_GET_OPERAND_DATA(qr, val); QueryRepresentationOperand *opData = QR_GET_OPERAND_DATA(qr, val);
if (!opData->operandexists) if (!opData->operandexists)
return false; return TS_NO;
if (data) if (data)
{ {
...@@ -573,7 +577,7 @@ checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *d ...@@ -573,7 +577,7 @@ checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *d
data->pos += MAXQROPOS - opData->npos; data->pos += MAXQROPOS - opData->npos;
} }
return true; return TS_YES;
} }
typedef struct typedef struct
......
...@@ -67,14 +67,6 @@ typedef struct ...@@ -67,14 +67,6 @@ typedef struct
StatEntry *root; StatEntry *root;
} TSVectorStat; } TSVectorStat;
/* TS_execute requires ternary logic to handle NOT with phrase matches */
typedef enum
{
TS_NO, /* definitely no match */
TS_YES, /* definitely does match */
TS_MAYBE /* can't verify match for lack of pos data */
} TSTernaryValue;
static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg, static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
uint32 flags, uint32 flags,
...@@ -1188,13 +1180,15 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix) ...@@ -1188,13 +1180,15 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
/* /*
* Check weight info or/and fill 'data' with the required positions * Check weight info or/and fill 'data' with the required positions
*/ */
static bool static TSTernaryValue
checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
ExecPhraseData *data) ExecPhraseData *data)
{ {
bool result = false; TSTernaryValue result = TS_NO;
if (entry->haspos && (val->weight || data)) Assert(data == NULL || data->npos == 0);
if (entry->haspos)
{ {
WordEntryPosVector *posvec; WordEntryPosVector *posvec;
...@@ -1232,7 +1226,13 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, ...@@ -1232,7 +1226,13 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
data->npos = dptr - data->pos; data->npos = dptr - data->pos;
if (data->npos > 0) if (data->npos > 0)
result = true; result = TS_YES;
else
{
pfree(data->pos);
data->pos = NULL;
data->allocated = false;
}
} }
else if (val->weight) else if (val->weight)
{ {
...@@ -1243,40 +1243,57 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, ...@@ -1243,40 +1243,57 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
{ {
if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter))) if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
{ {
result = true; result = TS_YES;
break; /* no need to go further */ break; /* no need to go further */
} }
posvec_iter++; posvec_iter++;
} }
} }
else /* data != NULL */ else if (data)
{ {
data->npos = posvec->npos; data->npos = posvec->npos;
data->pos = posvec->pos; data->pos = posvec->pos;
data->allocated = false; data->allocated = false;
result = true; result = TS_YES;
}
else
{
/* simplest case: no weight check, positions not needed */
result = TS_YES;
} }
} }
else else
{ {
result = true; /*
* Position info is lacking, so if the caller requires it, we can only
* say that maybe there is a match.
*
* Notice, however, that we *don't* check val->weight here.
* Historically, stripped tsvectors are considered to match queries
* whether or not the query has a weight restriction; that's a little
* dubious but we'll preserve the behavior.
*/
if (data)
result = TS_MAYBE;
else
result = TS_YES;
} }
return result; return result;
} }
/* /*
* is there value 'val' in array or not ? * TS_execute callback for matching a tsquery operand to plain tsvector data
*/ */
static bool static TSTernaryValue
checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
{ {
CHKVAL *chkval = (CHKVAL *) checkval; CHKVAL *chkval = (CHKVAL *) checkval;
WordEntry *StopLow = chkval->arrb; WordEntry *StopLow = chkval->arrb;
WordEntry *StopHigh = chkval->arre; WordEntry *StopHigh = chkval->arre;
WordEntry *StopMiddle = StopHigh; WordEntry *StopMiddle = StopHigh;
bool res = false; TSTernaryValue res = TS_NO;
/* Loop invariant: StopLow <= val < StopHigh */ /* Loop invariant: StopLow <= val < StopHigh */
while (StopLow < StopHigh) while (StopLow < StopHigh)
...@@ -1302,36 +1319,69 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) ...@@ -1302,36 +1319,69 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
StopHigh = StopMiddle; StopHigh = StopMiddle;
} }
if ((!res || data) && val->prefix) /*
* If it's a prefix search, we should also consider lexemes that the
* search term is a prefix of (which will necessarily immediately follow
* the place we found in the above loop). But we can skip them if there
* was a definite match on the exact term AND the caller doesn't need
* position info.
*/
if (val->prefix && (res != TS_YES || data))
{ {
WordEntryPos *allpos = NULL; WordEntryPos *allpos = NULL;
int npos = 0, int npos = 0,
totalpos = 0; totalpos = 0;
/* /* adjust start position for corner case */
* there was a failed exact search, so we should scan further to find
* a prefix match. We also need to do so if caller needs position info
*/
if (StopLow >= StopHigh) if (StopLow >= StopHigh)
StopMiddle = StopHigh; StopMiddle = StopHigh;
while ((!res || data) && StopMiddle < chkval->arre && /* we don't try to re-use any data from the initial match */
if (data)
{
if (data->allocated)
pfree(data->pos);
data->pos = NULL;
data->allocated = false;
data->npos = 0;
}
res = TS_NO;
while ((res != TS_YES || data) &&
StopMiddle < chkval->arre &&
tsCompareString(chkval->operand + val->distance, tsCompareString(chkval->operand + val->distance,
val->length, val->length,
chkval->values + StopMiddle->pos, chkval->values + StopMiddle->pos,
StopMiddle->len, StopMiddle->len,
true) == 0) true) == 0)
{
TSTernaryValue subres;
subres = checkclass_str(chkval, StopMiddle, val, data);
if (subres != TS_NO)
{ {
if (data) if (data)
{ {
/* /*
* We need to join position information * We need to join position information
*/ */
res = checkclass_str(chkval, StopMiddle, val, data); if (subres == TS_MAYBE)
if (res)
{ {
while (npos + data->npos >= totalpos) /*
* No position info for this match, so we must report
* MAYBE overall.
*/
res = TS_MAYBE;
/* forget any previous positions */
npos = 0;
/* don't leak storage */
if (allpos)
pfree(allpos);
break;
}
while (npos + data->npos > totalpos)
{ {
if (totalpos == 0) if (totalpos == 0)
{ {
...@@ -1347,22 +1397,27 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) ...@@ -1347,22 +1397,27 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos); memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
npos += data->npos; npos += data->npos;
/* don't leak storage from individual matches */
if (data->allocated)
pfree(data->pos);
data->pos = NULL;
data->allocated = false;
/* it's important to reset data->npos before next loop */
data->npos = 0;
} }
else else
{ {
/* at loop exit, res must be true if we found matches */ /* Don't need positions, just handle YES/MAYBE */
res = (npos > 0); if (subres == TS_YES || res == TS_NO)
res = subres;
} }
} }
else
{
res = checkclass_str(chkval, StopMiddle, val, NULL);
}
StopMiddle++; StopMiddle++;
} }
if (res && data) if (data && npos > 0)
{ {
/* Sort and make unique array of found positions */ /* Sort and make unique array of found positions */
data->pos = allpos; data->pos = allpos;
...@@ -1370,6 +1425,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) ...@@ -1370,6 +1425,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
data->npos = qunique(data->pos, npos, sizeof(WordEntryPos), data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
compareWordEntryPos); compareWordEntryPos);
data->allocated = true; data->allocated = true;
res = TS_YES;
} }
} }
...@@ -1561,14 +1617,7 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, ...@@ -1561,14 +1617,7 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
check_stack_depth(); check_stack_depth();
if (curitem->type == QI_VAL) if (curitem->type == QI_VAL)
{ return chkcond(arg, (QueryOperand *) curitem, data);
if (!chkcond(arg, (QueryOperand *) curitem, data))
return TS_NO;
if (data->npos > 0 || data->negate)
return TS_YES;
/* If we have no position data, we must return TS_MAYBE */
return TS_MAYBE;
}
switch (curitem->qoperator.oper) switch (curitem->qoperator.oper)
{ {
...@@ -1821,7 +1870,7 @@ TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, ...@@ -1821,7 +1870,7 @@ TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
if (curitem->type == QI_VAL) if (curitem->type == QI_VAL)
return chkcond(arg, (QueryOperand *) curitem, return chkcond(arg, (QueryOperand *) curitem,
NULL /* don't need position info */ ) ? TS_YES : TS_NO; NULL /* don't need position info */ );
switch (curitem->qoperator.oper) switch (curitem->qoperator.oper)
{ {
......
...@@ -124,13 +124,21 @@ extern text *generateHeadline(HeadlineParsedText *prs); ...@@ -124,13 +124,21 @@ extern text *generateHeadline(HeadlineParsedText *prs);
* whether a given primitive tsquery value is matched in the data. * whether a given primitive tsquery value is matched in the data.
*/ */
/* TS_execute requires ternary logic to handle NOT with phrase matches */
typedef enum
{
TS_NO, /* definitely no match */
TS_YES, /* definitely does match */
TS_MAYBE /* can't verify match for lack of pos data */
} TSTernaryValue;
/* /*
* struct ExecPhraseData is passed to a TSExecuteCallback function if we need * struct ExecPhraseData is passed to a TSExecuteCallback function if we need
* lexeme position data (because of a phrase-match operator in the tsquery). * lexeme position data (because of a phrase-match operator in the tsquery).
* The callback should fill in position data when it returns true (success). * The callback should fill in position data when it returns TS_YES (success).
* If it cannot return position data, it may leave "data" unchanged, but * If it cannot return position data, it should leave "data" unchanged and
* then the caller of TS_execute() must pass the TS_EXEC_PHRASE_NO_POS flag * return TS_MAYBE. The caller of TS_execute() must then arrange for a later
* and must arrange for a later recheck with position data available. * recheck with position data available.
* *
* The reported lexeme positions must be sorted and unique. Callers must only * The reported lexeme positions must be sorted and unique. Callers must only
* consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]). * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
...@@ -162,11 +170,12 @@ typedef struct ExecPhraseData ...@@ -162,11 +170,12 @@ typedef struct ExecPhraseData
* val: lexeme to test for presence of * val: lexeme to test for presence of
* data: to be filled with lexeme positions; NULL if position data not needed * data: to be filled with lexeme positions; NULL if position data not needed
* *
* Return true if lexeme is present in data, else false. If data is not * Return TS_YES if lexeme is present in data, TS_MAYBE if it might be
* NULL, it should be filled with lexeme positions, but function can leave * present, TS_NO if it definitely is not present. If data is not NULL,
* it as zeroes if position data is not available. * it must be filled with lexeme positions if available. If position data
* is not available, leave *data as zeroes and return TS_MAYBE, never TS_YES.
*/ */
typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val, typedef TSTernaryValue (*TSExecuteCallback) (void *arg, QueryOperand *val,
ExecPhraseData *data); ExecPhraseData *data);
/* /*
...@@ -175,10 +184,7 @@ typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val, ...@@ -175,10 +184,7 @@ typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val,
#define TS_EXEC_EMPTY (0x00) #define TS_EXEC_EMPTY (0x00)
/* /*
* If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically * If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically
* evaluated to be true. Useful in cases where NOT cannot be accurately * evaluated to be true. Useful in cases where NOT isn't important (ranking).
* computed (GiST) or it isn't important (ranking). From TS_execute's
* perspective, !CALC_NOT means that the TSExecuteCallback function might
* return false-positive indications of a lexeme's presence.
*/ */
#define TS_EXEC_CALC_NOT (0x01) #define TS_EXEC_CALC_NOT (0x01)
/* /*
......
...@@ -176,6 +176,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; ...@@ -176,6 +176,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
507 507
(1 row) (1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
count
-------
56
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
count
-------
58
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
count
-------
452
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
count
-------
450
(1 row)
create index wowidx on test_tsvector using gist (a); create index wowidx on test_tsvector using gist (a);
SET enable_seqscan=OFF; SET enable_seqscan=OFF;
SET enable_indexscan=ON; SET enable_indexscan=ON;
...@@ -308,6 +332,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; ...@@ -308,6 +332,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
507 507
(1 row) (1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
count
-------
56
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
count
-------
58
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
count
-------
452
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
count
-------
450
(1 row)
SET enable_indexscan=OFF; SET enable_indexscan=OFF;
SET enable_bitmapscan=ON; SET enable_bitmapscan=ON;
explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
...@@ -440,6 +488,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; ...@@ -440,6 +488,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
507 507
(1 row) (1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
count
-------
56
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
count
-------
58
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
count
-------
452
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
count
-------
450
(1 row)
-- Test siglen parameter of GiST tsvector_ops -- Test siglen parameter of GiST tsvector_ops
CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(foo=1)); CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(foo=1));
ERROR: unrecognized parameter "foo" ERROR: unrecognized parameter "foo"
...@@ -595,6 +667,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; ...@@ -595,6 +667,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
507 507
(1 row) (1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
count
-------
56
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
count
-------
58
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
count
-------
452
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
count
-------
450
(1 row)
DROP INDEX wowidx2; DROP INDEX wowidx2;
CREATE INDEX wowidx ON test_tsvector USING gist (a tsvector_ops(siglen=484)); CREATE INDEX wowidx ON test_tsvector USING gist (a tsvector_ops(siglen=484));
\d test_tsvector \d test_tsvector
...@@ -736,6 +832,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; ...@@ -736,6 +832,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
507 507
(1 row) (1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
count
-------
56
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
count
-------
58
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
count
-------
452
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
count
-------
450
(1 row)
RESET enable_seqscan; RESET enable_seqscan;
RESET enable_indexscan; RESET enable_indexscan;
RESET enable_bitmapscan; RESET enable_bitmapscan;
...@@ -873,6 +993,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; ...@@ -873,6 +993,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
507 507
(1 row) (1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
count
-------
56
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
count
-------
58
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
count
-------
452
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
count
-------
450
(1 row)
-- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries -- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries
EXPLAIN (COSTS OFF) EXPLAIN (COSTS OFF)
SELECT count(*) FROM test_tsvector WHERE a @@ '!qh'; SELECT count(*) FROM test_tsvector WHERE a @@ '!qh';
......
...@@ -551,6 +551,55 @@ SELECT 'wa:1A wb:2D'::tsvector @@ 'w:*D <-> w:*A'::tsquery as "false"; ...@@ -551,6 +551,55 @@ SELECT 'wa:1A wb:2D'::tsvector @@ 'w:*D <-> w:*A'::tsquery as "false";
f f
(1 row) (1 row)
SELECT 'wa:1A'::tsvector @@ 'w:*A'::tsquery as "true";
true
------
t
(1 row)
SELECT 'wa:1A'::tsvector @@ 'w:*D'::tsquery as "false";
false
-------
f
(1 row)
SELECT 'wa:1A'::tsvector @@ '!w:*A'::tsquery as "false";
false
-------
f
(1 row)
SELECT 'wa:1A'::tsvector @@ '!w:*D'::tsquery as "true";
true
------
t
(1 row)
-- historically, a stripped tsvector matches queries ignoring weights:
SELECT strip('wa:1A'::tsvector) @@ 'w:*A'::tsquery as "true";
true
------
t
(1 row)
SELECT strip('wa:1A'::tsvector) @@ 'w:*D'::tsquery as "true";
true
------
t
(1 row)
SELECT strip('wa:1A'::tsvector) @@ '!w:*A'::tsquery as "false";
false
-------
f
(1 row)
SELECT strip('wa:1A'::tsvector) @@ '!w:*D'::tsquery as "false";
false
-------
f
(1 row)
SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false"; SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false";
false false
------- -------
......
...@@ -61,6 +61,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt'; ...@@ -61,6 +61,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
create index wowidx on test_tsvector using gist (a); create index wowidx on test_tsvector using gist (a);
...@@ -90,6 +94,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt'; ...@@ -90,6 +94,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
SET enable_indexscan=OFF; SET enable_indexscan=OFF;
SET enable_bitmapscan=ON; SET enable_bitmapscan=ON;
...@@ -116,6 +124,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt'; ...@@ -116,6 +124,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
-- Test siglen parameter of GiST tsvector_ops -- Test siglen parameter of GiST tsvector_ops
CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(foo=1)); CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(foo=1));
...@@ -152,6 +164,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt'; ...@@ -152,6 +164,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
DROP INDEX wowidx2; DROP INDEX wowidx2;
...@@ -181,6 +197,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt'; ...@@ -181,6 +197,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
RESET enable_seqscan; RESET enable_seqscan;
RESET enable_indexscan; RESET enable_indexscan;
...@@ -215,6 +235,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt'; ...@@ -215,6 +235,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)';
SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D';
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A';
SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D';
-- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries -- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries
EXPLAIN (COSTS OFF) EXPLAIN (COSTS OFF)
......
...@@ -104,6 +104,15 @@ SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*B' as "true"; ...@@ -104,6 +104,15 @@ SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*B' as "true";
SELECT 'wa:1D wb:2A'::tsvector @@ 'w:*D & w:*A'::tsquery as "true"; SELECT 'wa:1D wb:2A'::tsvector @@ 'w:*D & w:*A'::tsquery as "true";
SELECT 'wa:1D wb:2A'::tsvector @@ 'w:*D <-> w:*A'::tsquery as "true"; SELECT 'wa:1D wb:2A'::tsvector @@ 'w:*D <-> w:*A'::tsquery as "true";
SELECT 'wa:1A wb:2D'::tsvector @@ 'w:*D <-> w:*A'::tsquery as "false"; SELECT 'wa:1A wb:2D'::tsvector @@ 'w:*D <-> w:*A'::tsquery as "false";
SELECT 'wa:1A'::tsvector @@ 'w:*A'::tsquery as "true";
SELECT 'wa:1A'::tsvector @@ 'w:*D'::tsquery as "false";
SELECT 'wa:1A'::tsvector @@ '!w:*A'::tsquery as "false";
SELECT 'wa:1A'::tsvector @@ '!w:*D'::tsquery as "true";
-- historically, a stripped tsvector matches queries ignoring weights:
SELECT strip('wa:1A'::tsvector) @@ 'w:*A'::tsquery as "true";
SELECT strip('wa:1A'::tsvector) @@ 'w:*D'::tsquery as "true";
SELECT strip('wa:1A'::tsvector) @@ '!w:*A'::tsquery as "false";
SELECT strip('wa:1A'::tsvector) @@ '!w:*D'::tsquery as "false";
SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false"; SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false";
SELECT 'supeanova supernova'::tsvector @@ 'super'::tsquery AS "false"; SELECT 'supeanova supernova'::tsvector @@ 'super'::tsquery AS "false";
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment