Commit 23c75b55 authored by Tom Lane's avatar Tom Lane

Improve documentation around TS_execute().

I got frustrated by the lack of commentary in this area, so here is some
reverse-engineered documentation, along with minor stylistic cleanup.
No code changes more significant than removal of unused variables.

Back-patch to 9.6, not because that's useful in itself, but because
we have some bugs to fix in phrase search and this would cause merge
failures if it's only in HEAD.
parent 3761fe3c
......@@ -2123,7 +2123,7 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q)
ch.words = &(prs->words[*p]);
ch.len = *q - *p + 1;
if (TS_execute(GETQUERY(query), &ch, false, checkcondition_HL))
if (TS_execute(GETQUERY(query), &ch, TS_EXEC_EMPTY, checkcondition_HL))
return true;
else
{
......
......@@ -188,7 +188,7 @@ checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *d
* information then set recheck flag
*/
if (val->weight != 0 || data != NULL)
*gcv->need_recheck = true;
*(gcv->need_recheck) = true;
/* convert item's number to corresponding entry's (operand's) number */
j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
......@@ -289,19 +289,18 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
bool *recheck = (bool *) PG_GETARG_POINTER(5);
bool res = FALSE;
/* The query requires recheck only if it involves weights */
/* Initially assume query doesn't require recheck */
*recheck = false;
if (query->size > 0)
{
QueryItem *item;
GinChkVal gcv;
/*
* check-parameter array has one entry for each value (operand) in the
* query.
*/
gcv.first_item = item = GETQUERY(query);
gcv.first_item = GETQUERY(query);
gcv.check = check;
gcv.map_item_operand = (int *) (extra_data[0]);
gcv.need_recheck = recheck;
......@@ -328,19 +327,18 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
GinTernaryValue res = GIN_FALSE;
bool recheck;
/* The query requires recheck only if it involves weights */
/* Initially assume query doesn't require recheck */
recheck = false;
if (query->size > 0)
{
QueryItem *item;
GinChkVal gcv;
/*
* check-parameter array has one entry for each value (operand) in the
* query.
*/
gcv.first_item = item = GETQUERY(query);
gcv.first_item = GETQUERY(query);
gcv.check = check;
gcv.map_item_operand = (int *) (extra_data[0]);
gcv.need_recheck = &recheck;
......
......@@ -1405,20 +1405,26 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
}
/*
* Check for phrase condition. Fallback to the AND operation
* if there is no positional information.
* Execute tsquery at or below an OP_PHRASE operator.
*
* This handles the recursion at levels where we need to care about
* match locations. In addition to the same arguments used for TS_execute,
* the caller may pass a preinitialized-to-zeroes ExecPhraseData struct to
* be filled with lexeme match positions on success. data == NULL if no
* match data need be returned. (In practice, outside callers pass NULL,
* and only the internal recursion cases pass a data pointer.)
*/
static bool
TS_phrase_execute(QueryItem *curitem,
void *checkval, uint32 flags, ExecPhraseData *data,
bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *))
TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
ExecPhraseData *data,
TSExecuteCallback chkcond)
{
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (curitem->type == QI_VAL)
{
return chkcond(checkval, (QueryOperand *) curitem, data);
return chkcond(arg, (QueryOperand *) curitem, data);
}
else
{
......@@ -1432,33 +1438,31 @@ TS_phrase_execute(QueryItem *curitem,
Assert(curitem->qoperator.oper == OP_PHRASE);
if (!TS_phrase_execute(curitem + curitem->qoperator.left,
checkval, flags, &Ldata, chkcond))
arg, flags, &Ldata, chkcond))
return false;
if (!TS_phrase_execute(curitem + 1, checkval, flags, &Rdata, chkcond))
if (!TS_phrase_execute(curitem + 1, arg, flags, &Rdata, chkcond))
return false;
/*
* if at least one of the operands has no position information, then
* return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we
* return true as it is a AND operation
* If either operand has no position information, then we normally
* return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we
* return true, treating OP_PHRASE as if it were OP_AND.
*/
if (Ldata.npos == 0 || Rdata.npos == 0)
return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false;
/*
* Result of the operation is a list of the corresponding positions of
* RIGHT operand.
* Prepare output position array if needed.
*/
if (data)
{
/*
* We can recycle the righthand operand's result array if it was
* palloc'd, else must allocate our own. The number of matches
* couldn't be more than the smaller of the two operands' matches.
*/
if (!Rdata.allocated)
/*
* OP_PHRASE is based on the OP_AND, so the number of
* resulting positions could not be greater than the total
* amount of operands.
*/
data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos));
else
data->pos = Rdata.pos;
......@@ -1469,10 +1473,12 @@ TS_phrase_execute(QueryItem *curitem,
}
/*
* Find matches by distance, WEP_GETPOS() is needed because
* ExecPhraseData->data can point to the tsvector's WordEntryPosVector
* Find matches by distance. WEP_GETPOS() is needed because
* ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
*
* Note that the output positions are those of the matching RIGHT
* operands.
*/
Rpos = Rdata.pos;
LposStart = Ldata.pos;
while (Rpos < Rdata.pos + Rdata.npos)
......@@ -1505,8 +1511,9 @@ TS_phrase_execute(QueryItem *curitem,
else
{
/*
* We are in the root of the phrase tree and hence we
* don't have to store the resulting positions
* We are at the root of the phrase tree and hence we
* don't have to identify all the match positions.
* Just report success.
*/
return true;
}
......@@ -1546,42 +1553,45 @@ TS_phrase_execute(QueryItem *curitem,
/*
* Evaluate tsquery boolean expression.
*
* chkcond is a callback function used to evaluate each VAL node in the query.
* checkval can be used to pass information to the callback. TS_execute doesn't
* do anything with it.
* It believes that ordinary operators are always closier to root than phrase
* operator, so, TS_execute() may not take care of lexeme's position at all.
* curitem: current tsquery item (initially, the first one)
* arg: opaque value to pass through to callback function
* flags: bitmask of flag bits shown in ts_utils.h
* chkcond: callback function to check whether a primitive value is present
*
* The logic here deals only with operators above any phrase operator, for
* which we do not need to worry about lexeme positions. As soon as we hit an
* OP_PHRASE operator, we pass it off to TS_phrase_execute which does worry.
*/
bool
TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data))
TS_execute(QueryItem *curitem, void *arg, uint32 flags,
TSExecuteCallback chkcond)
{
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (curitem->type == QI_VAL)
return chkcond(checkval, (QueryOperand *) curitem,
return chkcond(arg, (QueryOperand *) curitem,
NULL /* we don't need position info */ );
switch (curitem->qoperator.oper)
{
case OP_NOT:
if (flags & TS_EXEC_CALC_NOT)
return !TS_execute(curitem + 1, checkval, flags, chkcond);
return !TS_execute(curitem + 1, arg, flags, chkcond);
else
return true;
case OP_AND:
if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
return TS_execute(curitem + 1, checkval, flags, chkcond);
if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
return TS_execute(curitem + 1, arg, flags, chkcond);
else
return false;
case OP_OR:
if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
return true;
else
return TS_execute(curitem + 1, checkval, flags, chkcond);
return TS_execute(curitem + 1, arg, flags, chkcond);
case OP_PHRASE:
......@@ -1589,7 +1599,7 @@ TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
* do not check TS_EXEC_PHRASE_AS_AND here because chkcond() could
* do something more if it's called from TS_phrase_execute()
*/
return TS_phrase_execute(curitem, checkval, flags, NULL, chkcond);
return TS_phrase_execute(curitem, arg, flags, NULL, chkcond);
default:
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
......@@ -1684,12 +1694,10 @@ ts_match_vq(PG_FUNCTION_ARGS)
chkval.arre = chkval.arrb + val->size;
chkval.values = STRPTR(val);
chkval.operand = GETOPERAND(query);
result = TS_execute(
GETQUERY(query),
result = TS_execute(GETQUERY(query),
&chkval,
TS_EXEC_CALC_NOT,
checkcondition_str
);
checkcondition_str);
PG_FREE_IF_COPY(val, 0);
PG_FREE_IF_COPY(query, 1);
......
......@@ -12,9 +12,9 @@
#ifndef _PG_TS_UTILS_H_
#define _PG_TS_UTILS_H_
#include "tsearch/ts_type.h"
#include "tsearch/ts_public.h"
#include "nodes/pg_list.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_type.h"
/*
* Common parse definitions for tsvector and tsquery
......@@ -102,34 +102,67 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query,
extern text *generateHeadline(HeadlineParsedText *prs);
/*
* Common check function for tsvector @@ tsquery
* TSQuery execution support
*
* TS_execute() executes a tsquery against data that can be represented in
* various forms. The TSExecuteCallback callback function is called to check
* whether a given primitive tsquery value is matched in the data.
*/
/*
* struct ExecPhraseData is passed to a TSExecuteCallback function if we need
* lexeme position data (because of a phrase-match operator in the tsquery).
* The callback should fill in position data when it returns true (success).
* If it cannot return position data, it may ignore its "data" argument, but
* then the caller of TS_execute() must pass the TS_EXEC_PHRASE_AS_AND flag
* and must arrange for a later recheck with position data available.
*
* The reported lexeme positions must be sorted and unique. Callers must only
* consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
* This allows the returned "pos" to point directly to the WordEntryPos
* portion of a tsvector value. If "allocated" is true then the pos array
* is palloc'd workspace and caller may free it when done.
*
* All fields of the ExecPhraseData struct are initially zeroed by caller.
*/
typedef struct ExecPhraseData
{
int npos;
bool allocated;
WordEntryPos *pos;
int npos; /* number of positions reported */
bool allocated; /* pos points to palloc'd data? */
WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */
} ExecPhraseData;
/*
* Evaluates tsquery, flags are followe below
* Signature for TSQuery lexeme check functions
*
* arg: opaque value passed through from caller of TS_execute
* val: lexeme to test for presence of
* data: to be filled with lexeme positions; NULL if position data not needed
*
* Return TRUE if lexeme is present in data, else FALSE
*/
extern bool TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *));
typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val,
ExecPhraseData *data);
/*
* Flag bits for TS_execute
*/
#define TS_EXEC_EMPTY (0x00)
/*
* if TS_EXEC_CALC_NOT is not set then NOT expression evaluated to be true,
* used in cases where NOT cannot be accurately computed (GiST) or
* it isn't important (ranking)
* If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically
* evaluated to be true. Useful in cases where NOT cannot be accurately
* computed (GiST) or it isn't important (ranking).
*/
#define TS_EXEC_CALC_NOT (0x01)
/*
* Treat OP_PHRASE as OP_AND. Used when posiotional information is not
* accessible, like in consistent methods of GIN/GiST indexes
* Treat OP_PHRASE as OP_AND. Used when positional information is not
* accessible, like in consistent methods of GIN/GiST indexes; rechecking
* must occur later.
*/
#define TS_EXEC_PHRASE_AS_AND (0x02)
extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
TSExecuteCallback chkcond);
extern bool tsquery_requires_match(QueryItem *curitem);
/*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment