Commit 23c75b55 authored by Tom Lane's avatar Tom Lane

Improve documentation around TS_execute().

I got frustrated by the lack of commentary in this area, so here is some
reverse-engineered documentation, along with minor stylistic cleanup.
No code changes more significant than removal of unused variables.

Back-patch to 9.6, not because that's useful in itself, but because
we have some bugs to fix in phrase search and this would cause merge
failures if it's only in HEAD.
parent 3761fe3c
...@@ -2123,7 +2123,7 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q) ...@@ -2123,7 +2123,7 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q)
ch.words = &(prs->words[*p]); ch.words = &(prs->words[*p]);
ch.len = *q - *p + 1; ch.len = *q - *p + 1;
if (TS_execute(GETQUERY(query), &ch, false, checkcondition_HL)) if (TS_execute(GETQUERY(query), &ch, TS_EXEC_EMPTY, checkcondition_HL))
return true; return true;
else else
{ {
......
...@@ -188,7 +188,7 @@ checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *d ...@@ -188,7 +188,7 @@ checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *d
* information then set recheck flag * information then set recheck flag
*/ */
if (val->weight != 0 || data != NULL) if (val->weight != 0 || data != NULL)
*gcv->need_recheck = true; *(gcv->need_recheck) = true;
/* convert item's number to corresponding entry's (operand's) number */ /* convert item's number to corresponding entry's (operand's) number */
j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
...@@ -289,19 +289,18 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS) ...@@ -289,19 +289,18 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
bool *recheck = (bool *) PG_GETARG_POINTER(5); bool *recheck = (bool *) PG_GETARG_POINTER(5);
bool res = FALSE; bool res = FALSE;
/* The query requires recheck only if it involves weights */ /* Initially assume query doesn't require recheck */
*recheck = false; *recheck = false;
if (query->size > 0) if (query->size > 0)
{ {
QueryItem *item;
GinChkVal gcv; GinChkVal gcv;
/* /*
* check-parameter array has one entry for each value (operand) in the * check-parameter array has one entry for each value (operand) in the
* query. * query.
*/ */
gcv.first_item = item = GETQUERY(query); gcv.first_item = GETQUERY(query);
gcv.check = check; gcv.check = check;
gcv.map_item_operand = (int *) (extra_data[0]); gcv.map_item_operand = (int *) (extra_data[0]);
gcv.need_recheck = recheck; gcv.need_recheck = recheck;
...@@ -328,19 +327,18 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS) ...@@ -328,19 +327,18 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
GinTernaryValue res = GIN_FALSE; GinTernaryValue res = GIN_FALSE;
bool recheck; bool recheck;
/* The query requires recheck only if it involves weights */ /* Initially assume query doesn't require recheck */
recheck = false; recheck = false;
if (query->size > 0) if (query->size > 0)
{ {
QueryItem *item;
GinChkVal gcv; GinChkVal gcv;
/* /*
* check-parameter array has one entry for each value (operand) in the * check-parameter array has one entry for each value (operand) in the
* query. * query.
*/ */
gcv.first_item = item = GETQUERY(query); gcv.first_item = GETQUERY(query);
gcv.check = check; gcv.check = check;
gcv.map_item_operand = (int *) (extra_data[0]); gcv.map_item_operand = (int *) (extra_data[0]);
gcv.need_recheck = &recheck; gcv.need_recheck = &recheck;
......
...@@ -1405,20 +1405,26 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) ...@@ -1405,20 +1405,26 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
} }
/* /*
* Check for phrase condition. Fallback to the AND operation * Execute tsquery at or below an OP_PHRASE operator.
* if there is no positional information. *
* This handles the recursion at levels where we need to care about
* match locations. In addition to the same arguments used for TS_execute,
* the caller may pass a preinitialized-to-zeroes ExecPhraseData struct to
* be filled with lexeme match positions on success. data == NULL if no
* match data need be returned. (In practice, outside callers pass NULL,
* and only the internal recursion cases pass a data pointer.)
*/ */
static bool static bool
TS_phrase_execute(QueryItem *curitem, TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
void *checkval, uint32 flags, ExecPhraseData *data, ExecPhraseData *data,
bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *)) TSExecuteCallback chkcond)
{ {
/* since this function recurses, it could be driven to stack overflow */ /* since this function recurses, it could be driven to stack overflow */
check_stack_depth(); check_stack_depth();
if (curitem->type == QI_VAL) if (curitem->type == QI_VAL)
{ {
return chkcond(checkval, (QueryOperand *) curitem, data); return chkcond(arg, (QueryOperand *) curitem, data);
} }
else else
{ {
...@@ -1432,33 +1438,31 @@ TS_phrase_execute(QueryItem *curitem, ...@@ -1432,33 +1438,31 @@ TS_phrase_execute(QueryItem *curitem,
Assert(curitem->qoperator.oper == OP_PHRASE); Assert(curitem->qoperator.oper == OP_PHRASE);
if (!TS_phrase_execute(curitem + curitem->qoperator.left, if (!TS_phrase_execute(curitem + curitem->qoperator.left,
checkval, flags, &Ldata, chkcond)) arg, flags, &Ldata, chkcond))
return false; return false;
if (!TS_phrase_execute(curitem + 1, checkval, flags, &Rdata, chkcond)) if (!TS_phrase_execute(curitem + 1, arg, flags, &Rdata, chkcond))
return false; return false;
/* /*
* if at least one of the operands has no position information, then * If either operand has no position information, then we normally
* return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we * return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we
* return true as it is a AND operation * return true, treating OP_PHRASE as if it were OP_AND.
*/ */
if (Ldata.npos == 0 || Rdata.npos == 0) if (Ldata.npos == 0 || Rdata.npos == 0)
return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false; return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false;
/* /*
* Result of the operation is a list of the corresponding positions of * Prepare output position array if needed.
* RIGHT operand.
*/ */
if (data) if (data)
{ {
/*
* We can recycle the righthand operand's result array if it was
* palloc'd, else must allocate our own. The number of matches
* couldn't be more than the smaller of the two operands' matches.
*/
if (!Rdata.allocated) if (!Rdata.allocated)
/*
* OP_PHRASE is based on the OP_AND, so the number of
* resulting positions could not be greater than the total
* amount of operands.
*/
data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos)); data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos));
else else
data->pos = Rdata.pos; data->pos = Rdata.pos;
...@@ -1469,10 +1473,12 @@ TS_phrase_execute(QueryItem *curitem, ...@@ -1469,10 +1473,12 @@ TS_phrase_execute(QueryItem *curitem,
} }
/* /*
* Find matches by distance, WEP_GETPOS() is needed because * Find matches by distance. WEP_GETPOS() is needed because
* ExecPhraseData->data can point to the tsvector's WordEntryPosVector * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
*
* Note that the output positions are those of the matching RIGHT
* operands.
*/ */
Rpos = Rdata.pos; Rpos = Rdata.pos;
LposStart = Ldata.pos; LposStart = Ldata.pos;
while (Rpos < Rdata.pos + Rdata.npos) while (Rpos < Rdata.pos + Rdata.npos)
...@@ -1505,8 +1511,9 @@ TS_phrase_execute(QueryItem *curitem, ...@@ -1505,8 +1511,9 @@ TS_phrase_execute(QueryItem *curitem,
else else
{ {
/* /*
* We are in the root of the phrase tree and hence we * We are at the root of the phrase tree and hence we
* don't have to store the resulting positions * don't have to identify all the match positions.
* Just report success.
*/ */
return true; return true;
} }
...@@ -1546,42 +1553,45 @@ TS_phrase_execute(QueryItem *curitem, ...@@ -1546,42 +1553,45 @@ TS_phrase_execute(QueryItem *curitem,
/* /*
* Evaluate tsquery boolean expression. * Evaluate tsquery boolean expression.
* *
* chkcond is a callback function used to evaluate each VAL node in the query. * curitem: current tsquery item (initially, the first one)
* checkval can be used to pass information to the callback. TS_execute doesn't * arg: opaque value to pass through to callback function
* do anything with it. * flags: bitmask of flag bits shown in ts_utils.h
* It believes that ordinary operators are always closier to root than phrase * chkcond: callback function to check whether a primitive value is present
* operator, so, TS_execute() may not take care of lexeme's position at all. *
* The logic here deals only with operators above any phrase operator, for
* which we do not need to worry about lexeme positions. As soon as we hit an
* OP_PHRASE operator, we pass it off to TS_phrase_execute which does worry.
*/ */
bool bool
TS_execute(QueryItem *curitem, void *checkval, uint32 flags, TS_execute(QueryItem *curitem, void *arg, uint32 flags,
bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data)) TSExecuteCallback chkcond)
{ {
/* since this function recurses, it could be driven to stack overflow */ /* since this function recurses, it could be driven to stack overflow */
check_stack_depth(); check_stack_depth();
if (curitem->type == QI_VAL) if (curitem->type == QI_VAL)
return chkcond(checkval, (QueryOperand *) curitem, return chkcond(arg, (QueryOperand *) curitem,
NULL /* we don't need position info */ ); NULL /* we don't need position info */ );
switch (curitem->qoperator.oper) switch (curitem->qoperator.oper)
{ {
case OP_NOT: case OP_NOT:
if (flags & TS_EXEC_CALC_NOT) if (flags & TS_EXEC_CALC_NOT)
return !TS_execute(curitem + 1, checkval, flags, chkcond); return !TS_execute(curitem + 1, arg, flags, chkcond);
else else
return true; return true;
case OP_AND: case OP_AND:
if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond)) if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
return TS_execute(curitem + 1, checkval, flags, chkcond); return TS_execute(curitem + 1, arg, flags, chkcond);
else else
return false; return false;
case OP_OR: case OP_OR:
if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond)) if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
return true; return true;
else else
return TS_execute(curitem + 1, checkval, flags, chkcond); return TS_execute(curitem + 1, arg, flags, chkcond);
case OP_PHRASE: case OP_PHRASE:
...@@ -1589,7 +1599,7 @@ TS_execute(QueryItem *curitem, void *checkval, uint32 flags, ...@@ -1589,7 +1599,7 @@ TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
* do not check TS_EXEC_PHRASE_AS_AND here because chkcond() could * do not check TS_EXEC_PHRASE_AS_AND here because chkcond() could
* do something more if it's called from TS_phrase_execute() * do something more if it's called from TS_phrase_execute()
*/ */
return TS_phrase_execute(curitem, checkval, flags, NULL, chkcond); return TS_phrase_execute(curitem, arg, flags, NULL, chkcond);
default: default:
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
...@@ -1684,12 +1694,10 @@ ts_match_vq(PG_FUNCTION_ARGS) ...@@ -1684,12 +1694,10 @@ ts_match_vq(PG_FUNCTION_ARGS)
chkval.arre = chkval.arrb + val->size; chkval.arre = chkval.arrb + val->size;
chkval.values = STRPTR(val); chkval.values = STRPTR(val);
chkval.operand = GETOPERAND(query); chkval.operand = GETOPERAND(query);
result = TS_execute( result = TS_execute(GETQUERY(query),
GETQUERY(query),
&chkval, &chkval,
TS_EXEC_CALC_NOT, TS_EXEC_CALC_NOT,
checkcondition_str checkcondition_str);
);
PG_FREE_IF_COPY(val, 0); PG_FREE_IF_COPY(val, 0);
PG_FREE_IF_COPY(query, 1); PG_FREE_IF_COPY(query, 1);
......
...@@ -12,9 +12,9 @@ ...@@ -12,9 +12,9 @@
#ifndef _PG_TS_UTILS_H_ #ifndef _PG_TS_UTILS_H_
#define _PG_TS_UTILS_H_ #define _PG_TS_UTILS_H_
#include "tsearch/ts_type.h"
#include "tsearch/ts_public.h"
#include "nodes/pg_list.h" #include "nodes/pg_list.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_type.h"
/* /*
* Common parse definitions for tsvector and tsquery * Common parse definitions for tsvector and tsquery
...@@ -102,34 +102,67 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, ...@@ -102,34 +102,67 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query,
extern text *generateHeadline(HeadlineParsedText *prs); extern text *generateHeadline(HeadlineParsedText *prs);
/* /*
* Common check function for tsvector @@ tsquery * TSQuery execution support
*
* TS_execute() executes a tsquery against data that can be represented in
* various forms. The TSExecuteCallback callback function is called to check
* whether a given primitive tsquery value is matched in the data.
*/
/*
* struct ExecPhraseData is passed to a TSExecuteCallback function if we need
* lexeme position data (because of a phrase-match operator in the tsquery).
* The callback should fill in position data when it returns true (success).
* If it cannot return position data, it may ignore its "data" argument, but
* then the caller of TS_execute() must pass the TS_EXEC_PHRASE_AS_AND flag
* and must arrange for a later recheck with position data available.
*
* The reported lexeme positions must be sorted and unique. Callers must only
* consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
* This allows the returned "pos" to point directly to the WordEntryPos
* portion of a tsvector value. If "allocated" is true then the pos array
* is palloc'd workspace and caller may free it when done.
*
* All fields of the ExecPhraseData struct are initially zeroed by caller.
*/ */
typedef struct ExecPhraseData typedef struct ExecPhraseData
{ {
int npos; int npos; /* number of positions reported */
bool allocated; bool allocated; /* pos points to palloc'd data? */
WordEntryPos *pos; WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */
} ExecPhraseData; } ExecPhraseData;
/* /*
* Evaluates tsquery, flags are followe below * Signature for TSQuery lexeme check functions
*
* arg: opaque value passed through from caller of TS_execute
* val: lexeme to test for presence of
* data: to be filled with lexeme positions; NULL if position data not needed
*
* Return TRUE if lexeme is present in data, else FALSE
*/ */
extern bool TS_execute(QueryItem *curitem, void *checkval, uint32 flags, typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val,
bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *)); ExecPhraseData *data);
/*
* Flag bits for TS_execute
*/
#define TS_EXEC_EMPTY (0x00) #define TS_EXEC_EMPTY (0x00)
/* /*
* if TS_EXEC_CALC_NOT is not set then NOT expression evaluated to be true, * If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically
* used in cases where NOT cannot be accurately computed (GiST) or * evaluated to be true. Useful in cases where NOT cannot be accurately
* it isn't important (ranking) * computed (GiST) or it isn't important (ranking).
*/ */
#define TS_EXEC_CALC_NOT (0x01) #define TS_EXEC_CALC_NOT (0x01)
/* /*
* Treat OP_PHRASE as OP_AND. Used when posiotional information is not * Treat OP_PHRASE as OP_AND. Used when positional information is not
* accessible, like in consistent methods of GIN/GiST indexes * accessible, like in consistent methods of GIN/GiST indexes; rechecking
* must occur later.
*/ */
#define TS_EXEC_PHRASE_AS_AND (0x02) #define TS_EXEC_PHRASE_AS_AND (0x02)
extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
TSExecuteCallback chkcond);
extern bool tsquery_requires_match(QueryItem *curitem); extern bool tsquery_requires_match(QueryItem *curitem);
/* /*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment