Commit 1664ae19 authored by Teodor Sigaev's avatar Teodor Sigaev

Add websearch_to_tsquery

Error-tolerant conversion function with web-like syntax for search query,
it simplifies  constraining search engine with close to habitual interface for
users.

Bump catalog version

Authors: Victor Drobny, Dmitry Ivanov with editorization by me
Reviewed by: Aleksander Alekseev, Tomas Vondra, Thomas Munro, Aleksandr Parfenov
Discussion: https://www.postgresql.org/message-id/flat/fe931111ff7e9ad79196486ada79e268@postgrespro.ru
parent fbc27330
...@@ -9630,6 +9630,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple ...@@ -9630,6 +9630,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
<entry><literal>phraseto_tsquery('english', 'The Fat Rats')</literal></entry> <entry><literal>phraseto_tsquery('english', 'The Fat Rats')</literal></entry>
<entry><literal>'fat' &lt;-&gt; 'rat'</literal></entry> <entry><literal>'fat' &lt;-&gt; 'rat'</literal></entry>
</row> </row>
<row>
<entry>
<indexterm>
<primary>websearch_to_tsquery</primary>
</indexterm>
<literal><function>websearch_to_tsquery(<optional> <replaceable class="parameter">config</replaceable> <type>regconfig</type> , </optional> <replaceable class="parameter">query</replaceable> <type>text</type>)</function></literal>
</entry>
<entry><type>tsquery</type></entry>
<entry>produce <type>tsquery</type> from a web search style query</entry>
<entry><literal>websearch_to_tsquery('english', '"fat rat" or rat')</literal></entry>
<entry><literal>'fat' &lt;-&gt; 'rat' | 'rat'</literal></entry>
</row>
<row> <row>
<entry> <entry>
<indexterm> <indexterm>
......
...@@ -797,13 +797,16 @@ UPDATE tt SET ti = ...@@ -797,13 +797,16 @@ UPDATE tt SET ti =
<para> <para>
<productname>PostgreSQL</productname> provides the <productname>PostgreSQL</productname> provides the
functions <function>to_tsquery</function>, functions <function>to_tsquery</function>,
<function>plainto_tsquery</function>, and <function>plainto_tsquery</function>,
<function>phraseto_tsquery</function> <function>phraseto_tsquery</function> and
<function>websearch_to_tsquery</function>
for converting a query to the <type>tsquery</type> data type. for converting a query to the <type>tsquery</type> data type.
<function>to_tsquery</function> offers access to more features <function>to_tsquery</function> offers access to more features
than either <function>plainto_tsquery</function> or than either <function>plainto_tsquery</function> or
<function>phraseto_tsquery</function>, but it is less forgiving <function>phraseto_tsquery</function>, but it is less forgiving about its
about its input. input. <function>websearch_to_tsquery</function> is a simplified version
of <function>to_tsquery</function> with an alternative syntax, similar
to the one used by web search engines.
</para> </para>
<indexterm> <indexterm>
...@@ -962,6 +965,87 @@ SELECT phraseto_tsquery('english', 'The Fat &amp; Rats:C'); ...@@ -962,6 +965,87 @@ SELECT phraseto_tsquery('english', 'The Fat &amp; Rats:C');
</screen> </screen>
</para> </para>
<synopsis>
websearch_to_tsquery(<optional> <replaceable class="parameter">config</replaceable> <type>regconfig</type>, </optional> <replaceable class="parameter">querytext</replaceable> <type>text</type>) returns <type>tsquery</type>
</synopsis>
<para>
<function>websearch_to_tsquery</function> creates a <type>tsquery</type>
value from <replaceable>querytext</replaceable> using an alternative
syntax in which simple unformatted text is a valid query.
Unlike <function>plainto_tsquery</function>
and <function>phraseto_tsquery</function>, it also recognizes certain
operators. Moreover, this function should never raise syntax errors,
which makes it possible to use raw user-supplied input for search.
The following syntax is supported:
<itemizedlist spacing="compact" mark="bullet">
<listitem>
<para>
<literal>unquoted text</literal>: text not inside quote marks will be
converted to terms separated by <literal>&amp;</literal> operators, as
if processed by
<function>plainto_tsquery</function>.
</para>
</listitem>
<listitem>
<para>
<literal>"quoted text"</literal>: text inside quote marks will be
converted to terms separated by <literal>&lt;-&gt;</literal>
operators, as if processed by <function>phraseto_tsquery</function>.
</para>
</listitem>
<listitem>
<para>
<literal>OR</literal>: logical or will be converted to
the <literal>|</literal> operator.
</para>
</listitem>
<listitem>
<para>
<literal>-</literal>: the logical not operator, converted to the
the <literal>!</literal> operator.
</para>
</listitem>
</itemizedlist>
</para>
<para>
Examples:
<screen>
select websearch_to_tsquery('english', 'The fat rats');
websearch_to_tsquery
-----------------
'fat' &amp; 'rat'
(1 row)
</screen>
<screen>
select websearch_to_tsquery('english', '"supernovae stars" -crab');
websearch_to_tsquery
----------------------------------
'supernova' &lt;-&gt; 'star' &amp; !'crab'
(1 row)
</screen>
<screen>
select websearch_to_tsquery('english', '"sad cat" or "fat rat"');
websearch_to_tsquery
-----------------------------------
'sad' &lt;-&gt; 'cat' | 'fat' &lt;-&gt; 'rat'
(1 row)
</screen>
<screen>
select websearch_to_tsquery('english', 'signal -"segmentation fault"');
websearch_to_tsquery
---------------------------------------
'signal' &amp; !( 'segment' &lt;-&gt; 'fault' )
(1 row)
</screen>
<screen>
select websearch_to_tsquery('english', '""" )( dummy \\ query &lt;-&gt;');
websearch_to_tsquery
----------------------
'dummi' &amp; 'queri'
(1 row)
</screen>
</para>
</sect2> </sect2>
<sect2 id="textsearch-ranking"> <sect2 id="textsearch-ranking">
......
...@@ -490,7 +490,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS) ...@@ -490,7 +490,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in), query = parse_tsquery(text_to_cstring(in),
pushval_morph, pushval_morph,
PointerGetDatum(&data), PointerGetDatum(&data),
false); 0);
PG_RETURN_TSQUERY(query); PG_RETURN_TSQUERY(query);
} }
...@@ -520,7 +520,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS) ...@@ -520,7 +520,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in), query = parse_tsquery(text_to_cstring(in),
pushval_morph, pushval_morph,
PointerGetDatum(&data), PointerGetDatum(&data),
true); P_TSQ_PLAIN);
PG_RETURN_POINTER(query); PG_RETURN_POINTER(query);
} }
...@@ -551,7 +551,7 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS) ...@@ -551,7 +551,7 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in), query = parse_tsquery(text_to_cstring(in),
pushval_morph, pushval_morph,
PointerGetDatum(&data), PointerGetDatum(&data),
true); P_TSQ_PLAIN);
PG_RETURN_TSQUERY(query); PG_RETURN_TSQUERY(query);
} }
...@@ -567,3 +567,35 @@ phraseto_tsquery(PG_FUNCTION_ARGS) ...@@ -567,3 +567,35 @@ phraseto_tsquery(PG_FUNCTION_ARGS)
ObjectIdGetDatum(cfgId), ObjectIdGetDatum(cfgId),
PointerGetDatum(in))); PointerGetDatum(in)));
} }
Datum
websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_PP(1);
MorphOpaque data;
TSQuery query = NULL;
data.cfg_id = PG_GETARG_OID(0);
data.qoperator = OP_AND;
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
P_TSQ_WEB);
PG_RETURN_TSQUERY(query);
}
Datum
websearch_to_tsquery(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_PP(0);
Oid cfgId;
cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(websearch_to_tsquery_byid,
ObjectIdGetDatum(cfgId),
PointerGetDatum(in)));
}
...@@ -32,14 +32,53 @@ const int tsearch_op_priority[OP_COUNT] = ...@@ -32,14 +32,53 @@ const int tsearch_op_priority[OP_COUNT] =
3 /* OP_PHRASE */ 3 /* OP_PHRASE */
}; };
/*
* parser's states
*/
typedef enum
{
WAITOPERAND = 1,
WAITOPERATOR = 2,
WAITFIRSTOPERAND = 3
} ts_parserstate;
/*
* token types for parsing
*/
typedef enum
{
PT_END = 0,
PT_ERR = 1,
PT_VAL = 2,
PT_OPR = 3,
PT_OPEN = 4,
PT_CLOSE = 5
} ts_tokentype;
/*
* get token from query string
*
* *operator is filled in with OP_* when return values is PT_OPR,
* but *weight could contain a distance value in case of phrase operator.
* *strval, *lenval and *weight are filled in when return value is PT_VAL
*
*/
typedef ts_tokentype (*ts_tokenizer)(TSQueryParserState state, int8 *operator,
int *lenval, char **strval,
int16 *weight, bool *prefix);
struct TSQueryParserStateData struct TSQueryParserStateData
{ {
/* State for gettoken_query */ /* Tokenizer used for parsing tsquery */
ts_tokenizer gettoken;
/* State of tokenizer function */
char *buffer; /* entire string we are scanning */ char *buffer; /* entire string we are scanning */
char *buf; /* current scan point */ char *buf; /* current scan point */
int state;
int count; /* nesting count, incremented by (, int count; /* nesting count, incremented by (,
* decremented by ) */ * decremented by ) */
bool in_quotes; /* phrase in quotes "" */
ts_parserstate state;
/* polish (prefix) notation in list, filled in by push* functions */ /* polish (prefix) notation in list, filled in by push* functions */
List *polstr; List *polstr;
...@@ -57,12 +96,6 @@ struct TSQueryParserStateData ...@@ -57,12 +96,6 @@ struct TSQueryParserStateData
TSVectorParseState valstate; TSVectorParseState valstate;
}; };
/* parser's states */
#define WAITOPERAND 1
#define WAITOPERATOR 2
#define WAITFIRSTOPERAND 3
#define WAITSINGLEOPERAND 4
/* /*
* subroutine to parse the modifiers (weight and prefix flag currently) * subroutine to parse the modifiers (weight and prefix flag currently)
* part, like ':AB*' of a query. * part, like ':AB*' of a query.
...@@ -118,18 +151,17 @@ get_modifiers(char *buf, int16 *weight, bool *prefix) ...@@ -118,18 +151,17 @@ get_modifiers(char *buf, int16 *weight, bool *prefix)
* *
* The buffer should begin with '<' char * The buffer should begin with '<' char
*/ */
static char * static bool
parse_phrase_operator(char *buf, int16 *distance) parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
{ {
enum enum
{ {
PHRASE_OPEN = 0, PHRASE_OPEN = 0,
PHRASE_DIST, PHRASE_DIST,
PHRASE_CLOSE, PHRASE_CLOSE,
PHRASE_ERR,
PHRASE_FINISH PHRASE_FINISH
} state = PHRASE_OPEN; } state = PHRASE_OPEN;
char *ptr = buf; char *ptr = pstate->buf;
char *endptr; char *endptr;
long l = 1; /* default distance */ long l = 1; /* default distance */
...@@ -138,9 +170,13 @@ parse_phrase_operator(char *buf, int16 *distance) ...@@ -138,9 +170,13 @@ parse_phrase_operator(char *buf, int16 *distance)
switch (state) switch (state)
{ {
case PHRASE_OPEN: case PHRASE_OPEN:
Assert(t_iseq(ptr, '<')); if (t_iseq(ptr, '<'))
state = PHRASE_DIST; {
ptr++; state = PHRASE_DIST;
ptr++;
}
else
return false;
break; break;
case PHRASE_DIST: case PHRASE_DIST:
...@@ -148,18 +184,16 @@ parse_phrase_operator(char *buf, int16 *distance) ...@@ -148,18 +184,16 @@ parse_phrase_operator(char *buf, int16 *distance)
{ {
state = PHRASE_CLOSE; state = PHRASE_CLOSE;
ptr++; ptr++;
break; continue;
} }
if (!t_isdigit(ptr)) if (!t_isdigit(ptr))
{ return false;
state = PHRASE_ERR;
break;
}
errno = 0; errno = 0;
l = strtol(ptr, &endptr, 10); l = strtol(ptr, &endptr, 10);
if (ptr == endptr) if (ptr == endptr)
state = PHRASE_ERR; return false;
else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS) else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
...@@ -179,54 +213,77 @@ parse_phrase_operator(char *buf, int16 *distance) ...@@ -179,54 +213,77 @@ parse_phrase_operator(char *buf, int16 *distance)
ptr++; ptr++;
} }
else else
state = PHRASE_ERR; return false;
break; break;
case PHRASE_FINISH: case PHRASE_FINISH:
*distance = (int16) l; *distance = (int16) l;
return ptr; pstate->buf = ptr;
return true;
case PHRASE_ERR:
default:
goto err;
} }
} }
err: return false;
*distance = -1;
return buf;
} }
/* /*
* token types for parsing * Parse OR operator used in websearch_to_tsquery(), returns true if we
* believe that "OR" literal could be an operator OR
*/ */
typedef enum static bool
parse_or_operator(TSQueryParserState pstate)
{ {
PT_END = 0, char *ptr = pstate->buf;
PT_ERR = 1,
PT_VAL = 2, if (pstate->in_quotes)
PT_OPR = 3, return false;
PT_OPEN = 4,
PT_CLOSE = 5 /* it should begin with "OR" literal */
} ts_tokentype; if (pg_strncasecmp(ptr, "or", 2) != 0)
return false;
ptr += 2;
/*
* it shouldn't be a part of any word but somewhere later it should be some
* operand
*/
if (*ptr == '\0') /* no operand */
return false;
/* it shouldn't be a part of any word */
if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalpha(ptr) || t_isdigit(ptr))
return false;
for(;;)
{
ptr += pg_mblen(ptr);
if (*ptr == '\0') /* got end of string without operand */
return false;
/*
* Suppose, we found an operand, but could be a not correct operand. So
* we still treat OR literal as operation with possibly incorrect
* operand and will not search it as lexeme
*/
if (!t_isspace(ptr))
break;
}
pstate->buf += 2;
return true;
}
/*
* get token from query string
*
* *operator is filled in with OP_* when return values is PT_OPR,
* but *weight could contain a distance value in case of phrase operator.
* *strval, *lenval and *weight are filled in when return value is PT_VAL
*
*/
static ts_tokentype static ts_tokentype
gettoken_query(TSQueryParserState state, gettoken_query_standard(TSQueryParserState state, int8 *operator,
int8 *operator, int *lenval, char **strval,
int *lenval, char **strval, int16 *weight, bool *prefix) int16 *weight, bool *prefix)
{ {
*weight = 0; *weight = 0;
*prefix = false; *prefix = false;
while (1) while (true)
{ {
switch (state->state) switch (state->state)
{ {
...@@ -234,17 +291,16 @@ gettoken_query(TSQueryParserState state, ...@@ -234,17 +291,16 @@ gettoken_query(TSQueryParserState state,
case WAITOPERAND: case WAITOPERAND:
if (t_iseq(state->buf, '!')) if (t_iseq(state->buf, '!'))
{ {
(state->buf)++; /* can safely ++, t_iseq guarantee that state->buf++;
* pg_mblen()==1 */
*operator = OP_NOT;
state->state = WAITOPERAND; state->state = WAITOPERAND;
*operator = OP_NOT;
return PT_OPR; return PT_OPR;
} }
else if (t_iseq(state->buf, '(')) else if (t_iseq(state->buf, '('))
{ {
state->count++; state->buf++;
(state->buf)++;
state->state = WAITOPERAND; state->state = WAITOPERAND;
state->count++;
return PT_OPEN; return PT_OPEN;
} }
else if (t_iseq(state->buf, ':')) else if (t_iseq(state->buf, ':'))
...@@ -256,19 +312,19 @@ gettoken_query(TSQueryParserState state, ...@@ -256,19 +312,19 @@ gettoken_query(TSQueryParserState state,
} }
else if (!t_isspace(state->buf)) else if (!t_isspace(state->buf))
{ {
/* /* We rely on the tsvector parser to parse the value for us */
* We rely on the tsvector parser to parse the value for
* us
*/
reset_tsvector_parser(state->valstate, state->buf); reset_tsvector_parser(state->valstate, state->buf);
if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf)) if (gettoken_tsvector(state->valstate, strval, lenval,
NULL, NULL, &state->buf))
{ {
state->buf = get_modifiers(state->buf, weight, prefix); state->buf = get_modifiers(state->buf, weight, prefix);
state->state = WAITOPERATOR; state->state = WAITOPERATOR;
return PT_VAL; return PT_VAL;
} }
else if (state->state == WAITFIRSTOPERAND) else if (state->state == WAITFIRSTOPERAND)
{
return PT_END; return PT_END;
}
else else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
...@@ -276,58 +332,206 @@ gettoken_query(TSQueryParserState state, ...@@ -276,58 +332,206 @@ gettoken_query(TSQueryParserState state,
state->buffer))); state->buffer)));
} }
break; break;
case WAITOPERATOR: case WAITOPERATOR:
if (t_iseq(state->buf, '&')) if (t_iseq(state->buf, '&'))
{ {
state->buf++;
state->state = WAITOPERAND; state->state = WAITOPERAND;
*operator = OP_AND; *operator = OP_AND;
(state->buf)++;
return PT_OPR; return PT_OPR;
} }
else if (t_iseq(state->buf, '|')) else if (t_iseq(state->buf, '|'))
{ {
state->buf++;
state->state = WAITOPERAND; state->state = WAITOPERAND;
*operator = OP_OR; *operator = OP_OR;
(state->buf)++;
return PT_OPR; return PT_OPR;
} }
else if (t_iseq(state->buf, '<')) else if (parse_phrase_operator(state, weight))
{ {
/* weight var is used as storage for distance */
state->state = WAITOPERAND; state->state = WAITOPERAND;
*operator = OP_PHRASE; *operator = OP_PHRASE;
/* weight var is used as storage for distance */
state->buf = parse_phrase_operator(state->buf, weight);
if (*weight < 0)
return PT_ERR;
return PT_OPR; return PT_OPR;
} }
else if (t_iseq(state->buf, ')')) else if (t_iseq(state->buf, ')'))
{ {
(state->buf)++; state->buf++;
state->count--; state->count--;
return (state->count < 0) ? PT_ERR : PT_CLOSE; return (state->count < 0) ? PT_ERR : PT_CLOSE;
} }
else if (*(state->buf) == '\0') else if (*state->buf == '\0')
{
return (state->count) ? PT_ERR : PT_END; return (state->count) ? PT_ERR : PT_END;
}
else if (!t_isspace(state->buf)) else if (!t_isspace(state->buf))
{
return PT_ERR; return PT_ERR;
}
break;
}
state->buf += pg_mblen(state->buf);
}
}
static ts_tokentype
gettoken_query_websearch(TSQueryParserState state, int8 *operator,
int *lenval, char **strval,
int16 *weight, bool *prefix)
{
*weight = 0;
*prefix = false;
while (true)
{
switch (state->state)
{
case WAITFIRSTOPERAND:
case WAITOPERAND:
if (t_iseq(state->buf, '-'))
{
state->buf++;
state->state = WAITOPERAND;
if (state->in_quotes)
continue;
*operator = OP_NOT;
return PT_OPR;
}
else if (t_iseq(state->buf, '"'))
{
state->buf++;
if (!state->in_quotes)
{
state->state = WAITOPERAND;
if (strchr(state->buf, '"'))
{
/* quoted text should be ordered <-> */
state->in_quotes = true;
return PT_OPEN;
}
/* web search tolerates missing quotes */
continue;
}
else
{
/* we have to provide an operand */
state->in_quotes = false;
state->state = WAITOPERATOR;
pushStop(state);
return PT_CLOSE;
}
}
else if (ISOPERATOR(state->buf))
{
/* or else gettoken_tsvector() will raise an error */
state->buf++;
state->state = WAITOPERAND;
continue;
}
else if (!t_isspace(state->buf))
{
/* We rely on the tsvector parser to parse the value for us */
reset_tsvector_parser(state->valstate, state->buf);
if (gettoken_tsvector(state->valstate, strval, lenval,
NULL, NULL, &state->buf))
{
state->state = WAITOPERATOR;
return PT_VAL;
}
else if (state->state == WAITFIRSTOPERAND)
{
return PT_END;
}
else
{
/* finally, we have to provide an operand */
pushStop(state);
return PT_END;
}
}
break; break;
case WAITSINGLEOPERAND:
if (*(state->buf) == '\0') case WAITOPERATOR:
if (t_iseq(state->buf, '"'))
{
if (!state->in_quotes)
{
/*
* put implicit AND after an operand
* and handle this quote in WAITOPERAND
*/
state->state = WAITOPERAND;
*operator = OP_AND;
return PT_OPR;
}
else
{
state->buf++;
/* just close quotes */
state->in_quotes = false;
return PT_CLOSE;
}
}
else if (parse_or_operator(state))
{
state->state = WAITOPERAND;
*operator = OP_OR;
return PT_OPR;
}
else if (*state->buf == '\0')
{
return PT_END; return PT_END;
*strval = state->buf; }
*lenval = strlen(state->buf); else if (!t_isspace(state->buf))
state->buf += strlen(state->buf); {
state->count++; if (state->in_quotes)
return PT_VAL; {
default: /* put implicit <-> after an operand */
return PT_ERR; *operator = OP_PHRASE;
*weight = 1;
}
else
{
/* put implicit AND after an operand */
*operator = OP_AND;
}
state->state = WAITOPERAND;
return PT_OPR;
}
break; break;
} }
state->buf += pg_mblen(state->buf); state->buf += pg_mblen(state->buf);
} }
} }
static ts_tokentype
gettoken_query_plain(TSQueryParserState state, int8 *operator,
int *lenval, char **strval,
int16 *weight, bool *prefix)
{
*weight = 0;
*prefix = false;
if (*state->buf == '\0')
return PT_END;
*strval = state->buf;
*lenval = strlen(state->buf);
state->buf += *lenval;
state->count++;
return PT_VAL;
}
/* /*
* Push an operator to state->polstr * Push an operator to state->polstr
*/ */
...@@ -489,7 +693,9 @@ makepol(TSQueryParserState state, ...@@ -489,7 +693,9 @@ makepol(TSQueryParserState state,
/* since this function recurses, it could be driven to stack overflow */ /* since this function recurses, it could be driven to stack overflow */
check_stack_depth(); check_stack_depth();
while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END) while ((type = state->gettoken(state, &operator,
&lenval, &strval,
&weight, &prefix)) != PT_END)
{ {
switch (type) switch (type)
{ {
...@@ -605,7 +811,7 @@ TSQuery ...@@ -605,7 +811,7 @@ TSQuery
parse_tsquery(char *buf, parse_tsquery(char *buf,
PushFunction pushval, PushFunction pushval,
Datum opaque, Datum opaque,
bool isplain) int flags)
{ {
struct TSQueryParserStateData state; struct TSQueryParserStateData state;
int i; int i;
...@@ -614,16 +820,32 @@ parse_tsquery(char *buf, ...@@ -614,16 +820,32 @@ parse_tsquery(char *buf,
QueryItem *ptr; QueryItem *ptr;
ListCell *cell; ListCell *cell;
bool needcleanup; bool needcleanup;
int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
/* plain should not be used with web */
Assert((flags & (P_TSQ_PLAIN | P_TSQ_WEB)) != (P_TSQ_PLAIN | P_TSQ_WEB));
/* select suitable tokenizer */
if (flags & P_TSQ_PLAIN)
state.gettoken = gettoken_query_plain;
else if (flags & P_TSQ_WEB)
{
state.gettoken = gettoken_query_websearch;
tsv_flags |= P_TSV_IS_WEB;
}
else
state.gettoken = gettoken_query_standard;
/* init state */ /* init state */
state.buffer = buf; state.buffer = buf;
state.buf = buf; state.buf = buf;
state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
state.count = 0; state.count = 0;
state.in_quotes = false;
state.state = WAITFIRSTOPERAND;
state.polstr = NIL; state.polstr = NIL;
/* init value parser's state */ /* init value parser's state */
state.valstate = init_tsvector_parser(state.buffer, true, true); state.valstate = init_tsvector_parser(state.buffer, tsv_flags);
/* init list of operand */ /* init list of operand */
state.sumlen = 0; state.sumlen = 0;
...@@ -716,7 +938,7 @@ tsqueryin(PG_FUNCTION_ARGS) ...@@ -716,7 +938,7 @@ tsqueryin(PG_FUNCTION_ARGS)
{ {
char *in = PG_GETARG_CSTRING(0); char *in = PG_GETARG_CSTRING(0);
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false)); PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0));
} }
/* /*
......
...@@ -200,7 +200,7 @@ tsvectorin(PG_FUNCTION_ARGS) ...@@ -200,7 +200,7 @@ tsvectorin(PG_FUNCTION_ARGS)
char *cur; char *cur;
int buflen = 256; /* allocated size of tmpbuf */ int buflen = 256; /* allocated size of tmpbuf */
state = init_tsvector_parser(buf, false, false); state = init_tsvector_parser(buf, 0);
arrlen = 64; arrlen = 64;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen); arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
......
...@@ -33,6 +33,7 @@ struct TSVectorParseStateData ...@@ -33,6 +33,7 @@ struct TSVectorParseStateData
int eml; /* max bytes per character */ int eml; /* max bytes per character */
bool oprisdelim; /* treat ! | * ( ) as delimiters? */ bool oprisdelim; /* treat ! | * ( ) as delimiters? */
bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */ bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
bool is_web; /* we're in websearch_to_tsquery() */
}; };
...@@ -42,7 +43,7 @@ struct TSVectorParseStateData ...@@ -42,7 +43,7 @@ struct TSVectorParseStateData
* ! | & ( ) * ! | & ( )
*/ */
TSVectorParseState TSVectorParseState
init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery) init_tsvector_parser(char *input, int flags)
{ {
TSVectorParseState state; TSVectorParseState state;
...@@ -52,8 +53,9 @@ init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery) ...@@ -52,8 +53,9 @@ init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
state->len = 32; state->len = 32;
state->word = (char *) palloc(state->len); state->word = (char *) palloc(state->len);
state->eml = pg_database_encoding_max_length(); state->eml = pg_database_encoding_max_length();
state->oprisdelim = oprisdelim; state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
state->is_tsquery = is_tsquery; state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
state->is_web = (flags & P_TSV_IS_WEB) != 0;
return state; return state;
} }
...@@ -89,16 +91,6 @@ do { \ ...@@ -89,16 +91,6 @@ do { \
} \ } \
} while (0) } while (0)
/* phrase operator begins with '<' */
#define ISOPERATOR(x) \
( pg_mblen(x) == 1 && ( *(x) == '!' || \
*(x) == '&' || \
*(x) == '|' || \
*(x) == '(' || \
*(x) == ')' || \
*(x) == '<' \
) )
/* Fills gettoken_tsvector's output parameters, and returns true */ /* Fills gettoken_tsvector's output parameters, and returns true */
#define RETURN_TOKEN \ #define RETURN_TOKEN \
do { \ do { \
...@@ -183,14 +175,15 @@ gettoken_tsvector(TSVectorParseState state, ...@@ -183,14 +175,15 @@ gettoken_tsvector(TSVectorParseState state,
{ {
if (*(state->prsbuf) == '\0') if (*(state->prsbuf) == '\0')
return false; return false;
else if (t_iseq(state->prsbuf, '\'')) else if (!state->is_web && t_iseq(state->prsbuf, '\''))
statecode = WAITENDCMPLX; statecode = WAITENDCMPLX;
else if (t_iseq(state->prsbuf, '\\')) else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
{ {
statecode = WAITNEXTCHAR; statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD; oldstate = WAITENDWORD;
} }
else if (state->oprisdelim && ISOPERATOR(state->prsbuf)) else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
(state->is_web && t_iseq(state->prsbuf, '"')))
PRSSYNTAXERROR; PRSSYNTAXERROR;
else if (!t_isspace(state->prsbuf)) else if (!t_isspace(state->prsbuf))
{ {
...@@ -217,13 +210,14 @@ gettoken_tsvector(TSVectorParseState state, ...@@ -217,13 +210,14 @@ gettoken_tsvector(TSVectorParseState state,
} }
else if (statecode == WAITENDWORD) else if (statecode == WAITENDWORD)
{ {
if (t_iseq(state->prsbuf, '\\')) if (!state->is_web && t_iseq(state->prsbuf, '\\'))
{ {
statecode = WAITNEXTCHAR; statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD; oldstate = WAITENDWORD;
} }
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf))) (state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
(state->is_web && t_iseq(state->prsbuf, '"')))
{ {
RESIZEPRSBUF; RESIZEPRSBUF;
if (curpos == state->word) if (curpos == state->word)
...@@ -250,11 +244,11 @@ gettoken_tsvector(TSVectorParseState state, ...@@ -250,11 +244,11 @@ gettoken_tsvector(TSVectorParseState state,
} }
else if (statecode == WAITENDCMPLX) else if (statecode == WAITENDCMPLX)
{ {
if (t_iseq(state->prsbuf, '\'')) if (!state->is_web && t_iseq(state->prsbuf, '\''))
{ {
statecode = WAITCHARCMPLX; statecode = WAITCHARCMPLX;
} }
else if (t_iseq(state->prsbuf, '\\')) else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
{ {
statecode = WAITNEXTCHAR; statecode = WAITNEXTCHAR;
oldstate = WAITENDCMPLX; oldstate = WAITENDCMPLX;
...@@ -270,7 +264,7 @@ gettoken_tsvector(TSVectorParseState state, ...@@ -270,7 +264,7 @@ gettoken_tsvector(TSVectorParseState state,
} }
else if (statecode == WAITCHARCMPLX) else if (statecode == WAITCHARCMPLX)
{ {
if (t_iseq(state->prsbuf, '\'')) if (!state->is_web && t_iseq(state->prsbuf, '\''))
{ {
RESIZEPRSBUF; RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf); COPYCHAR(curpos, state->prsbuf);
......
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 201804031 #define CATALOG_VERSION_NO 201804051
#endif #endif
...@@ -4971,6 +4971,8 @@ DATA(insert OID = 3747 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s ...@@ -4971,6 +4971,8 @@ DATA(insert OID = 3747 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s
DESCR("transform to tsquery"); DESCR("transform to tsquery");
DATA(insert OID = 5006 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ )); DATA(insert OID = 5006 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ ));
DESCR("transform to tsquery"); DESCR("transform to tsquery");
DATA(insert OID = 8889 ( websearch_to_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery_byid _null_ _null_ _null_ ));
DESCR("transform to tsquery");
DATA(insert OID = 3749 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ )); DATA(insert OID = 3749 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ ));
DESCR("transform to tsvector"); DESCR("transform to tsvector");
DATA(insert OID = 3750 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ )); DATA(insert OID = 3750 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ ));
...@@ -4979,6 +4981,8 @@ DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s ...@@ -4979,6 +4981,8 @@ DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s
DESCR("transform to tsquery"); DESCR("transform to tsquery");
DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ )); DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery"); DESCR("transform to tsquery");
DATA(insert OID = 8890 ( websearch_to_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery");
DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ )); DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
DESCR("transform jsonb to tsvector"); DESCR("transform jsonb to tsvector");
DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ )); DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
......
...@@ -25,9 +25,11 @@ ...@@ -25,9 +25,11 @@
struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */ struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
typedef struct TSVectorParseStateData *TSVectorParseState; typedef struct TSVectorParseStateData *TSVectorParseState;
extern TSVectorParseState init_tsvector_parser(char *input, #define P_TSV_OPR_IS_DELIM (1 << 0)
bool oprisdelim, #define P_TSV_IS_TSQUERY (1 << 1)
bool is_tsquery); #define P_TSV_IS_WEB (1 << 2)
extern TSVectorParseState init_tsvector_parser(char *input, int flags);
extern void reset_tsvector_parser(TSVectorParseState state, char *input); extern void reset_tsvector_parser(TSVectorParseState state, char *input);
extern bool gettoken_tsvector(TSVectorParseState state, extern bool gettoken_tsvector(TSVectorParseState state,
char **token, int *len, char **token, int *len,
...@@ -35,6 +37,16 @@ extern bool gettoken_tsvector(TSVectorParseState state, ...@@ -35,6 +37,16 @@ extern bool gettoken_tsvector(TSVectorParseState state,
char **endptr); char **endptr);
extern void close_tsvector_parser(TSVectorParseState state); extern void close_tsvector_parser(TSVectorParseState state);
/* phrase operator begins with '<' */
#define ISOPERATOR(x) \
( pg_mblen(x) == 1 && ( *(x) == '!' || \
*(x) == '&' || \
*(x) == '|' || \
*(x) == '(' || \
*(x) == ')' || \
*(x) == '<' \
) )
/* parse_tsquery */ /* parse_tsquery */
struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */ struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
...@@ -46,9 +58,13 @@ typedef void (*PushFunction) (Datum opaque, TSQueryParserState state, ...@@ -46,9 +58,13 @@ typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
* QueryOperand struct */ * QueryOperand struct */
bool prefix); bool prefix);
#define P_TSQ_PLAIN (1 << 0)
#define P_TSQ_WEB (1 << 1)
extern TSQuery parse_tsquery(char *buf, extern TSQuery parse_tsquery(char *buf,
PushFunction pushval, PushFunction pushval,
Datum opaque, bool isplain); Datum opaque,
int flags);
/* Functions for use by PushFunction implementations */ /* Functions for use by PushFunction implementations */
extern void pushValue(TSQueryParserState state, extern void pushValue(TSQueryParserState state,
......
...@@ -1672,3 +1672,426 @@ select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat ca ...@@ -1672,3 +1672,426 @@ select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat ca
(1 row) (1 row)
set enable_seqscan = on; set enable_seqscan = on;
-- test websearch_to_tsquery function
select websearch_to_tsquery('simple', 'I have a fat:*ABCD cat');
websearch_to_tsquery
---------------------------------------------
'i' & 'have' & 'a' & 'fat' & 'abcd' & 'cat'
(1 row)
select websearch_to_tsquery('simple', 'orange:**AABBCCDD');
websearch_to_tsquery
-----------------------
'orange' & 'aabbccdd'
(1 row)
select websearch_to_tsquery('simple', 'fat:A!cat:B|rat:C<');
websearch_to_tsquery
-----------------------------------------
'fat' & 'a' & 'cat' & 'b' & 'rat' & 'c'
(1 row)
select websearch_to_tsquery('simple', 'fat:A : cat:B');
websearch_to_tsquery
---------------------------
'fat' & 'a' & 'cat' & 'b'
(1 row)
select websearch_to_tsquery('simple', 'fat*rat');
websearch_to_tsquery
----------------------
'fat' & 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat-rat');
websearch_to_tsquery
---------------------------
'fat-rat' & 'fat' & 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat_rat');
websearch_to_tsquery
----------------------
'fat' & 'rat'
(1 row)
-- weights are completely ignored
select websearch_to_tsquery('simple', 'abc : def');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('simple', 'abc:def');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('simple', 'a:::b');
websearch_to_tsquery
----------------------
'a' & 'b'
(1 row)
select websearch_to_tsquery('simple', 'abc:d');
websearch_to_tsquery
----------------------
'abc' & 'd'
(1 row)
select websearch_to_tsquery('simple', ':');
NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
websearch_to_tsquery
----------------------
(1 row)
-- these operators are ignored
select websearch_to_tsquery('simple', 'abc & def');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('simple', 'abc | def');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('simple', 'abc <-> def');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('simple', 'abc (pg or class)');
websearch_to_tsquery
------------------------
'abc' & 'pg' | 'class'
(1 row)
-- NOT is ignored in quotes
select websearch_to_tsquery('english', 'My brand new smartphone');
websearch_to_tsquery
-------------------------------
'brand' & 'new' & 'smartphon'
(1 row)
select websearch_to_tsquery('english', 'My brand "new smartphone"');
websearch_to_tsquery
---------------------------------
'brand' & 'new' <-> 'smartphon'
(1 row)
select websearch_to_tsquery('english', 'My brand "new -smartphone"');
websearch_to_tsquery
---------------------------------
'brand' & 'new' <-> 'smartphon'
(1 row)
-- test OR operator
select websearch_to_tsquery('simple', 'cat or rat');
websearch_to_tsquery
----------------------
'cat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'cat OR rat');
websearch_to_tsquery
----------------------
'cat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'cat "OR" rat');
websearch_to_tsquery
----------------------
'cat' & 'or' & 'rat'
(1 row)
select websearch_to_tsquery('simple', 'cat OR');
websearch_to_tsquery
----------------------
'cat' & 'or'
(1 row)
select websearch_to_tsquery('simple', 'OR rat');
websearch_to_tsquery
----------------------
'or' & 'rat'
(1 row)
select websearch_to_tsquery('simple', '"fat cat OR rat"');
websearch_to_tsquery
------------------------------------
'fat' <-> 'cat' <-> 'or' <-> 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat (cat OR rat');
websearch_to_tsquery
-----------------------
'fat' & 'cat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'or OR or');
websearch_to_tsquery
----------------------
'or' | 'or'
(1 row)
-- OR is an operator here ...
select websearch_to_tsquery('simple', '"fat cat"or"fat rat"');
websearch_to_tsquery
-----------------------------------
'fat' <-> 'cat' | 'fat' <-> 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or(rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or)rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or&rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or|rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or!rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or<rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or>rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or ');
websearch_to_tsquery
----------------------
'fat' & 'or'
(1 row)
-- ... but not here
select websearch_to_tsquery('simple', 'abc orange');
websearch_to_tsquery
----------------------
'abc' & 'orange'
(1 row)
select websearch_to_tsquery('simple', 'abc orтест');
websearch_to_tsquery
----------------------
'abc' & 'orтест'
(1 row)
select websearch_to_tsquery('simple', 'abc OR1234');
websearch_to_tsquery
----------------------
'abc' & 'or1234'
(1 row)
select websearch_to_tsquery('simple', 'abc or-abc');
websearch_to_tsquery
---------------------------------
'abc' & 'or-abc' & 'or' & 'abc'
(1 row)
select websearch_to_tsquery('simple', 'abc OR_abc');
websearch_to_tsquery
----------------------
'abc' & 'or' & 'abc'
(1 row)
-- test quotes
select websearch_to_tsquery('english', '"pg_class pg');
websearch_to_tsquery
-----------------------
'pg' & 'class' & 'pg'
(1 row)
select websearch_to_tsquery('english', 'pg_class pg"');
websearch_to_tsquery
-----------------------
'pg' & 'class' & 'pg'
(1 row)
select websearch_to_tsquery('english', '"pg_class pg"');
websearch_to_tsquery
-----------------------------
( 'pg' & 'class' ) <-> 'pg'
(1 row)
select websearch_to_tsquery('english', 'abc "pg_class pg"');
websearch_to_tsquery
-------------------------------------
'abc' & ( 'pg' & 'class' ) <-> 'pg'
(1 row)
select websearch_to_tsquery('english', '"pg_class pg" def');
websearch_to_tsquery
-------------------------------------
( 'pg' & 'class' ) <-> 'pg' & 'def'
(1 row)
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
websearch_to_tsquery
------------------------------------------------------
'abc' & 'pg' <-> ( 'pg' & 'class' ) <-> 'pg' & 'def'
(1 row)
select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
websearch_to_tsquery
--------------------------------------
'pg' <-> ( 'pg' & 'class' ) <-> 'pg'
(1 row)
select websearch_to_tsquery('english', '""pg pg_class pg""');
websearch_to_tsquery
------------------------------
'pg' & 'pg' & 'class' & 'pg'
(1 row)
select websearch_to_tsquery('english', 'abc """"" def');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('english', 'cat -"fat rat"');
websearch_to_tsquery
------------------------------
'cat' & !( 'fat' <-> 'rat' )
(1 row)
select websearch_to_tsquery('english', 'cat -"fat rat" cheese');
websearch_to_tsquery
----------------------------------------
'cat' & !( 'fat' <-> 'rat' ) & 'chees'
(1 row)
select websearch_to_tsquery('english', 'abc "def -"');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('english', 'abc "def :"');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
websearch_to_tsquery
------------------------------------
'fat' <-> 'cat' & 'eaten' & !'rat'
(1 row)
select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
websearch_to_tsquery
-----------------------------------
'fat' <-> 'cat' & 'eaten' | 'rat'
(1 row)
select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
websearch_to_tsquery
------------------------------------
'fat' <-> 'cat' & 'eaten' | !'rat'
(1 row)
select websearch_to_tsquery('english', 'this is ----fine');
websearch_to_tsquery
----------------------
!!!!'fine'
(1 row)
select websearch_to_tsquery('english', '(()) )))) this ||| is && -fine, "dear friend" OR good');
websearch_to_tsquery
----------------------------------------
!'fine' & 'dear' <-> 'friend' | 'good'
(1 row)
select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
websearch_to_tsquery
------------------------
'old' & 'cat' & 'fine'
(1 row)
select websearch_to_tsquery('english', '"A the" OR just on');
NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
websearch_to_tsquery
----------------------
(1 row)
select websearch_to_tsquery('english', '"a fat cat" ate a rat');
websearch_to_tsquery
---------------------------------
'fat' <-> 'cat' & 'ate' & 'rat'
(1 row)
select to_tsvector('english', 'A fat cat ate a rat') @@
websearch_to_tsquery('english', '"a fat cat" ate a rat');
?column?
----------
t
(1 row)
select to_tsvector('english', 'A fat grey cat ate a rat') @@
websearch_to_tsquery('english', '"a fat cat" ate a rat');
?column?
----------
f
(1 row)
-- cases handled by gettoken_tsvector()
select websearch_to_tsquery('''');
NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
websearch_to_tsquery
----------------------
(1 row)
select websearch_to_tsquery('''abc''''def''');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('\abc');
websearch_to_tsquery
----------------------
'abc'
(1 row)
select websearch_to_tsquery('\');
NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
websearch_to_tsquery
----------------------
(1 row)
...@@ -539,3 +539,97 @@ create index phrase_index_test_idx on phrase_index_test using gin(fts); ...@@ -539,3 +539,97 @@ create index phrase_index_test_idx on phrase_index_test using gin(fts);
set enable_seqscan = off; set enable_seqscan = off;
select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat'); select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
set enable_seqscan = on; set enable_seqscan = on;
-- test websearch_to_tsquery function
select websearch_to_tsquery('simple', 'I have a fat:*ABCD cat');
select websearch_to_tsquery('simple', 'orange:**AABBCCDD');
select websearch_to_tsquery('simple', 'fat:A!cat:B|rat:C<');
select websearch_to_tsquery('simple', 'fat:A : cat:B');
select websearch_to_tsquery('simple', 'fat*rat');
select websearch_to_tsquery('simple', 'fat-rat');
select websearch_to_tsquery('simple', 'fat_rat');
-- weights are completely ignored
select websearch_to_tsquery('simple', 'abc : def');
select websearch_to_tsquery('simple', 'abc:def');
select websearch_to_tsquery('simple', 'a:::b');
select websearch_to_tsquery('simple', 'abc:d');
select websearch_to_tsquery('simple', ':');
-- these operators are ignored
select websearch_to_tsquery('simple', 'abc & def');
select websearch_to_tsquery('simple', 'abc | def');
select websearch_to_tsquery('simple', 'abc <-> def');
select websearch_to_tsquery('simple', 'abc (pg or class)');
-- NOT is ignored in quotes
select websearch_to_tsquery('english', 'My brand new smartphone');
select websearch_to_tsquery('english', 'My brand "new smartphone"');
select websearch_to_tsquery('english', 'My brand "new -smartphone"');
-- test OR operator
select websearch_to_tsquery('simple', 'cat or rat');
select websearch_to_tsquery('simple', 'cat OR rat');
select websearch_to_tsquery('simple', 'cat "OR" rat');
select websearch_to_tsquery('simple', 'cat OR');
select websearch_to_tsquery('simple', 'OR rat');
select websearch_to_tsquery('simple', '"fat cat OR rat"');
select websearch_to_tsquery('simple', 'fat (cat OR rat');
select websearch_to_tsquery('simple', 'or OR or');
-- OR is an operator here ...
select websearch_to_tsquery('simple', '"fat cat"or"fat rat"');
select websearch_to_tsquery('simple', 'fat or(rat');
select websearch_to_tsquery('simple', 'fat or)rat');
select websearch_to_tsquery('simple', 'fat or&rat');
select websearch_to_tsquery('simple', 'fat or|rat');
select websearch_to_tsquery('simple', 'fat or!rat');
select websearch_to_tsquery('simple', 'fat or<rat');
select websearch_to_tsquery('simple', 'fat or>rat');
select websearch_to_tsquery('simple', 'fat or ');
-- ... but not here
select websearch_to_tsquery('simple', 'abc orange');
select websearch_to_tsquery('simple', 'abc orтест');
select websearch_to_tsquery('simple', 'abc OR1234');
select websearch_to_tsquery('simple', 'abc or-abc');
select websearch_to_tsquery('simple', 'abc OR_abc');
-- test quotes
select websearch_to_tsquery('english', '"pg_class pg');
select websearch_to_tsquery('english', 'pg_class pg"');
select websearch_to_tsquery('english', '"pg_class pg"');
select websearch_to_tsquery('english', 'abc "pg_class pg"');
select websearch_to_tsquery('english', '"pg_class pg" def');
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
select websearch_to_tsquery('english', '""pg pg_class pg""');
select websearch_to_tsquery('english', 'abc """"" def');
select websearch_to_tsquery('english', 'cat -"fat rat"');
select websearch_to_tsquery('english', 'cat -"fat rat" cheese');
select websearch_to_tsquery('english', 'abc "def -"');
select websearch_to_tsquery('english', 'abc "def :"');
select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
select websearch_to_tsquery('english', 'this is ----fine');
select websearch_to_tsquery('english', '(()) )))) this ||| is && -fine, "dear friend" OR good');
select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
select websearch_to_tsquery('english', '"A the" OR just on');
select websearch_to_tsquery('english', '"a fat cat" ate a rat');
select to_tsvector('english', 'A fat cat ate a rat') @@
websearch_to_tsquery('english', '"a fat cat" ate a rat');
select to_tsvector('english', 'A fat grey cat ate a rat') @@
websearch_to_tsquery('english', '"a fat cat" ate a rat');
-- cases handled by gettoken_tsvector()
select websearch_to_tsquery('''');
select websearch_to_tsquery('''abc''''def''');
select websearch_to_tsquery('\abc');
select websearch_to_tsquery('\');
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment