Commit 1ea47dd8 authored by Tom Lane's avatar Tom Lane

Fix shared tsvector/tsquery input code so that we don't say "syntax error in

tsvector" when we are really parsing a tsquery.  Report the bogus input,
too.  Make styles of some related error messages more consistent.
parent dfc6f130
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.7 2007/09/11 16:01:40 teodor Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.8 2007/10/21 22:29:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -141,7 +141,7 @@ gettoken_query(TSQueryParserState state, ...@@ -141,7 +141,7 @@ gettoken_query(TSQueryParserState state,
{ {
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error at start of operand in tsearch query: \"%s\"", errmsg("syntax error in tsquery: \"%s\"",
state->buffer))); state->buffer)));
} }
else if (!t_isspace(state->buf)) else if (!t_isspace(state->buf))
...@@ -159,7 +159,7 @@ gettoken_query(TSQueryParserState state, ...@@ -159,7 +159,7 @@ gettoken_query(TSQueryParserState state,
else else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("no operand in tsearch query: \"%s\"", errmsg("no operand in tsquery: \"%s\"",
state->buffer))); state->buffer)));
} }
break; break;
...@@ -232,12 +232,12 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int ...@@ -232,12 +232,12 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
if (distance >= MAXSTRPOS) if (distance >= MAXSTRPOS)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("value is too big in tsearch query: \"%s\"", errmsg("value is too big in tsquery: \"%s\"",
state->buffer))); state->buffer)));
if (lenval >= MAXSTRLEN) if (lenval >= MAXSTRLEN)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("operand is too long in tsearch query: \"%s\"", errmsg("operand is too long in tsquery: \"%s\"",
state->buffer))); state->buffer)));
tmp = (QueryOperand *) palloc(sizeof(QueryOperand)); tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
...@@ -264,7 +264,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight) ...@@ -264,7 +264,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
if (lenval >= MAXSTRLEN) if (lenval >= MAXSTRLEN)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long in tsearch query: \"%s\"", errmsg("word is too long in tsquery: \"%s\"",
state->buffer))); state->buffer)));
INIT_CRC32(valcrc); INIT_CRC32(valcrc);
...@@ -372,7 +372,7 @@ makepol(TSQueryParserState state, ...@@ -372,7 +372,7 @@ makepol(TSQueryParserState state,
default: default:
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsearch query: \"%s\"", errmsg("syntax error in tsquery: \"%s\"",
state->buffer))); state->buffer)));
} }
} }
...@@ -478,7 +478,7 @@ parse_tsquery(char *buf, ...@@ -478,7 +478,7 @@ parse_tsquery(char *buf,
state.polstr = NIL; state.polstr = NIL;
/* init value parser's state */ /* init value parser's state */
state.valstate = init_tsvector_parser(NULL, true); state.valstate = init_tsvector_parser(state.buffer, true, true);
/* init list of operand */ /* init list of operand */
state.sumlen = 0; state.sumlen = 0;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.4 2007/09/07 16:03:40 teodor Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.5 2007/10/21 22:29:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -204,7 +204,7 @@ tsvectorin(PG_FUNCTION_ARGS) ...@@ -204,7 +204,7 @@ tsvectorin(PG_FUNCTION_ARGS)
pg_verifymbstr(buf, strlen(buf), false); pg_verifymbstr(buf, strlen(buf), false);
state = init_tsvector_parser(buf, false); state = init_tsvector_parser(buf, false, false);
arrlen = 64; arrlen = 64;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen); arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
...@@ -224,7 +224,7 @@ tsvectorin(PG_FUNCTION_ARGS) ...@@ -224,7 +224,7 @@ tsvectorin(PG_FUNCTION_ARGS)
if (cur - tmpbuf > MAXSTRPOS) if (cur - tmpbuf > MAXSTRPOS)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("position value too large"))); errmsg("position value is too large")));
/* /*
* Enlarge buffers if needed * Enlarge buffers if needed
...@@ -496,7 +496,7 @@ tsvectorrecv(PG_FUNCTION_ARGS) ...@@ -496,7 +496,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
datalen += lex_len; datalen += lex_len;
if (i > 0 && WordEntryCMP(&vec->entries[i], &vec->entries[i - 1], STRPTR(vec)) <= 0) if (i > 0 && WordEntryCMP(&vec->entries[i], &vec->entries[i - 1], STRPTR(vec)) <= 0)
elog(ERROR, "lexemes are unordered"); elog(ERROR, "lexemes are misordered");
/* Receive positions */ /* Receive positions */
...@@ -523,7 +523,7 @@ tsvectorrecv(PG_FUNCTION_ARGS) ...@@ -523,7 +523,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
{ {
wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos)); wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1])) if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
elog(ERROR, "position information is unordered"); elog(ERROR, "position information is misordered");
} }
datalen += (npos + 1) * sizeof(WordEntry); datalen += (npos + 1) * sizeof(WordEntry);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.1 2007/09/07 15:09:56 teodor Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.2 2007/10/21 22:29:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -20,35 +20,49 @@ ...@@ -20,35 +20,49 @@
#include "tsearch/ts_utils.h" #include "tsearch/ts_utils.h"
#include "utils/memutils.h" #include "utils/memutils.h"
/*
* Private state of tsvector parser. Note that tsquery also uses this code to
* parse its input, hence the boolean flags. The two flags are both true or
* both false in current usage, but we keep them separate for clarity.
* is_tsquery affects *only* the content of error messages.
*/
struct TSVectorParseStateData struct TSVectorParseStateData
{ {
char *prsbuf; char *prsbuf; /* next input character */
char *bufstart; /* whole string (used only for errors) */
char *word; /* buffer to hold the current word */ char *word; /* buffer to hold the current word */
int len; /* size in bytes allocated for 'word' */ int len; /* size in bytes allocated for 'word' */
bool oprisdelim; int eml; /* max bytes per character */
bool oprisdelim; /* treat ! | * ( ) as delimiters? */
bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
}; };
/* /*
* Initializes parser for the input string. If oprisdelim is set, the * Initializes parser for the input string. If oprisdelim is set, the
* following characters are treated as delimiters in addition to whitespace: * following characters are treated as delimiters in addition to whitespace:
* ! | & ( ) * ! | & ( )
*/ */
TSVectorParseState TSVectorParseState
init_tsvector_parser(char *input, bool oprisdelim) init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
{ {
TSVectorParseState state; TSVectorParseState state;
state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData)); state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
state->prsbuf = input; state->prsbuf = input;
state->bufstart = input;
state->len = 32; state->len = 32;
state->word = (char *) palloc(state->len); state->word = (char *) palloc(state->len);
state->eml = pg_database_encoding_max_length();
state->oprisdelim = oprisdelim; state->oprisdelim = oprisdelim;
state->is_tsquery = is_tsquery;
return state; return state;
} }
/* /*
* Reinitializes parser for parsing 'input', instead of previous input. * Reinitializes parser to parse 'input', instead of previous input.
*/ */
void void
reset_tsvector_parser(TSVectorParseState state, char *input) reset_tsvector_parser(TSVectorParseState state, char *input)
...@@ -66,21 +80,21 @@ close_tsvector_parser(TSVectorParseState state) ...@@ -66,21 +80,21 @@ close_tsvector_parser(TSVectorParseState state)
pfree(state); pfree(state);
} }
/* increase the size of 'word' if needed to hold one more character */
#define RESIZEPRSBUF \ #define RESIZEPRSBUF \
do { \ do { \
if ( curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
{ \
int clen = curpos - state->word; \ int clen = curpos - state->word; \
if ( clen + state->eml >= state->len ) \
{ \
state->len *= 2; \ state->len *= 2; \
state->word = (char*)repalloc( (void*)state->word, state->len ); \ state->word = (char *) repalloc(state->word, state->len); \
curpos = state->word + clen; \ curpos = state->word + clen; \
} \ } \
} while (0) } while (0)
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) ) #define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
/* Fills the output parameters, and returns true */ /* Fills gettoken_tsvector's output parameters, and returns true */
#define RETURN_TOKEN \ #define RETURN_TOKEN \
do { \ do { \
if (pos_ptr != NULL) \ if (pos_ptr != NULL) \
...@@ -111,18 +125,34 @@ do { \ ...@@ -111,18 +125,34 @@ do { \
#define WAITPOSDELIM 7 #define WAITPOSDELIM 7
#define WAITCHARCMPLX 8 #define WAITCHARCMPLX 8
#define PRSSYNTAXERROR prssyntaxerror(state)
static void
prssyntaxerror(TSVectorParseState state)
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
state->is_tsquery ?
errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
}
/* /*
* Get next token from string being parsed. Returns false if * Get next token from string being parsed. Returns true if successful,
* end of input string is reached, otherwise strval, lenval, pos_ptr * false if end of input string is reached. On success, these output
* and poslen output parameters are filled in: * parameters are filled in:
* *
* *strval token * *strval pointer to token
* *lenval length of*strval * *lenval length of *strval
* *pos_ptr pointer to a palloc'd array of positions and weights * *pos_ptr pointer to a palloc'd array of positions and weights
* associated with the token. If the caller is not interested * associated with the token. If the caller is not interested
* in the information, NULL can be supplied. Otherwise * in the information, NULL can be supplied. Otherwise
* the caller is responsible for pfreeing the array. * the caller is responsible for pfreeing the array.
* *poslen number of elements in *pos_ptr * *poslen number of elements in *pos_ptr
* *endptr scan resumption point
*
* Pass NULL for unwanted output parameters.
*/ */
bool bool
gettoken_tsvector(TSVectorParseState state, gettoken_tsvector(TSVectorParseState state,
...@@ -155,9 +185,7 @@ gettoken_tsvector(TSVectorParseState state, ...@@ -155,9 +185,7 @@ gettoken_tsvector(TSVectorParseState state,
oldstate = WAITENDWORD; oldstate = WAITENDWORD;
} }
else if (state->oprisdelim && ISOPERATOR(state->prsbuf)) else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
ereport(ERROR, PRSSYNTAXERROR;
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else if (!t_isspace(state->prsbuf)) else if (!t_isspace(state->prsbuf))
{ {
COPYCHAR(curpos, state->prsbuf); COPYCHAR(curpos, state->prsbuf);
...@@ -170,7 +198,8 @@ gettoken_tsvector(TSVectorParseState state, ...@@ -170,7 +198,8 @@ gettoken_tsvector(TSVectorParseState state,
if (*(state->prsbuf) == '\0') if (*(state->prsbuf) == '\0')
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character"))); errmsg("there is no escaped character: \"%s\"",
state->bufstart)));
else else
{ {
RESIZEPRSBUF; RESIZEPRSBUF;
...@@ -192,18 +221,14 @@ gettoken_tsvector(TSVectorParseState state, ...@@ -192,18 +221,14 @@ gettoken_tsvector(TSVectorParseState state,
{ {
RESIZEPRSBUF; RESIZEPRSBUF;
if (curpos == state->word) if (curpos == state->word)
ereport(ERROR, PRSSYNTAXERROR;
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(curpos) = '\0'; *(curpos) = '\0';
RETURN_TOKEN; RETURN_TOKEN;
} }
else if (t_iseq(state->prsbuf, ':')) else if (t_iseq(state->prsbuf, ':'))
{ {
if (curpos == state->word) if (curpos == state->word)
ereport(ERROR, PRSSYNTAXERROR;
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(curpos) = '\0'; *(curpos) = '\0';
if (state->oprisdelim) if (state->oprisdelim)
RETURN_TOKEN; RETURN_TOKEN;
...@@ -229,9 +254,7 @@ gettoken_tsvector(TSVectorParseState state, ...@@ -229,9 +254,7 @@ gettoken_tsvector(TSVectorParseState state,
oldstate = WAITENDCMPLX; oldstate = WAITENDCMPLX;
} }
else if (*(state->prsbuf) == '\0') else if (*(state->prsbuf) == '\0')
ereport(ERROR, PRSSYNTAXERROR;
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else else
{ {
RESIZEPRSBUF; RESIZEPRSBUF;
...@@ -253,9 +276,7 @@ gettoken_tsvector(TSVectorParseState state, ...@@ -253,9 +276,7 @@ gettoken_tsvector(TSVectorParseState state,
RESIZEPRSBUF; RESIZEPRSBUF;
*(curpos) = '\0'; *(curpos) = '\0';
if (curpos == state->word) if (curpos == state->word)
ereport(ERROR, PRSSYNTAXERROR;
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
if (state->oprisdelim) if (state->oprisdelim)
{ {
/* state->prsbuf+=pg_mblen(state->prsbuf); */ /* state->prsbuf+=pg_mblen(state->prsbuf); */
...@@ -290,17 +311,17 @@ gettoken_tsvector(TSVectorParseState state, ...@@ -290,17 +311,17 @@ gettoken_tsvector(TSVectorParseState state,
} }
npos++; npos++;
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf))); WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
/* we cannot get here in tsquery, so no need for 2 errmsgs */
if (WEP_GETPOS(pos[npos - 1]) == 0) if (WEP_GETPOS(pos[npos - 1]) == 0)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info in tsvector"))); errmsg("wrong position info in tsvector: \"%s\"",
state->bufstart)));
WEP_SETWEIGHT(pos[npos - 1], 0); WEP_SETWEIGHT(pos[npos - 1], 0);
statecode = WAITPOSDELIM; statecode = WAITPOSDELIM;
} }
else else
ereport(ERROR, PRSSYNTAXERROR;
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
} }
else if (statecode == WAITPOSDELIM) else if (statecode == WAITPOSDELIM)
{ {
...@@ -309,42 +330,32 @@ gettoken_tsvector(TSVectorParseState state, ...@@ -309,42 +330,32 @@ gettoken_tsvector(TSVectorParseState state,
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*')) else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
{ {
if (WEP_GETWEIGHT(pos[npos - 1])) if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR, PRSSYNTAXERROR;
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 3); WEP_SETWEIGHT(pos[npos - 1], 3);
} }
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B')) else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
{ {
if (WEP_GETWEIGHT(pos[npos - 1])) if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR, PRSSYNTAXERROR;
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 2); WEP_SETWEIGHT(pos[npos - 1], 2);
} }
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C')) else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
{ {
if (WEP_GETWEIGHT(pos[npos - 1])) if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR, PRSSYNTAXERROR;
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 1); WEP_SETWEIGHT(pos[npos - 1], 1);
} }
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D')) else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
{ {
if (WEP_GETWEIGHT(pos[npos - 1])) if (WEP_GETWEIGHT(pos[npos - 1]))
ereport(ERROR, PRSSYNTAXERROR;
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(pos[npos - 1], 0); WEP_SETWEIGHT(pos[npos - 1], 0);
} }
else if (t_isspace(state->prsbuf) || else if (t_isspace(state->prsbuf) ||
*(state->prsbuf) == '\0') *(state->prsbuf) == '\0')
RETURN_TOKEN; RETURN_TOKEN;
else if (!t_isdigit(state->prsbuf)) else if (!t_isdigit(state->prsbuf))
ereport(ERROR, PRSSYNTAXERROR;
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
} }
else /* internal error */ else /* internal error */
elog(ERROR, "internal error in gettoken_tsvector"); elog(ERROR, "internal error in gettoken_tsvector");
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* *
* Copyright (c) 1998-2007, PostgreSQL Global Development Group * Copyright (c) 1998-2007, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.5 2007/10/19 22:01:45 tgl Exp $ * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.6 2007/10/21 22:29:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -22,10 +22,12 @@ ...@@ -22,10 +22,12 @@
/* tsvector parser support. */ /* tsvector parser support. */
struct TSVectorParseStateData; struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
typedef struct TSVectorParseStateData *TSVectorParseState; typedef struct TSVectorParseStateData *TSVectorParseState;
extern TSVectorParseState init_tsvector_parser(char *input, bool oprisdelim); extern TSVectorParseState init_tsvector_parser(char *input,
bool oprisdelim,
bool is_tsquery);
extern void reset_tsvector_parser(TSVectorParseState state, char *input); extern void reset_tsvector_parser(TSVectorParseState state, char *input);
extern bool gettoken_tsvector(TSVectorParseState state, extern bool gettoken_tsvector(TSVectorParseState state,
char **token, int *len, char **token, int *len,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment