Commit 21d11e7e authored by Tom Lane's avatar Tom Lane

Avoid unnecessary copying of source string when generating a cloned TParser.

For long source strings the copying results in O(N^2) behavior, and the
multiplier can be significant if wide-char conversion is involved.

Andres Freund, reviewed by Kevin Grittner.
parent a5495cd8
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.25 2009/11/15 13:57:01 petere Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.26 2009/12/15 20:37:17 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -328,6 +328,46 @@ TParserInit(char *str, int len) ...@@ -328,6 +328,46 @@ TParserInit(char *str, int len)
return prs; return prs;
} }
/*
* As an alternative to a full TParserInit one can create a
* TParserCopy which basically is a regular TParser without a private
* copy of the string - instead it uses the one from another TParser.
* This is useful because at some places TParsers are created
* recursively and the repeated copying around of the strings can
* cause major inefficiency if the source string is long.
* The new parser starts parsing at the original's current position.
*
* Obviously one must not close the original TParser before the copy.
*/
static TParser *
TParserCopyInit(const TParser *orig)
{
TParser *prs = (TParser *) palloc0(sizeof(TParser));
prs->charmaxlen = orig->charmaxlen;
prs->str = orig->str + orig->state->posbyte;
prs->lenstr = orig->lenstr - orig->state->posbyte;
#ifdef USE_WIDE_UPPER_LOWER
prs->usewide = orig->usewide;
if (orig->pgwstr)
prs->pgwstr = orig->pgwstr + orig->state->poschar;
if (orig->wstr)
prs->wstr = orig->wstr + orig->state->poschar;
#endif
prs->state = newTParserPosition(NULL);
prs->state->state = TPS_Base;
#ifdef WPARSER_TRACE
fprintf(stderr, "parsing copy of \"%.*s\"\n", prs->lenstr, prs->str);
#endif
return prs;
}
static void static void
TParserClose(TParser *prs) TParserClose(TParser *prs)
{ {
...@@ -346,9 +386,33 @@ TParserClose(TParser *prs) ...@@ -346,9 +386,33 @@ TParserClose(TParser *prs)
pfree(prs->pgwstr); pfree(prs->pgwstr);
#endif #endif
#ifdef WPARSER_TRACE
fprintf(stderr, "closing parser");
#endif
pfree(prs); pfree(prs);
} }
/*
* Close a parser created with TParserCopyInit
*/
static void
TParserCopyClose(TParser *prs)
{
while (prs->state)
{
TParserPosition *ptr = prs->state->prev;
pfree(prs->state);
prs->state = ptr;
}
#ifdef WPARSER_TRACE
fprintf(stderr, "closing parser copy");
#endif
pfree(prs);
}
/* /*
* Character-type support functions, equivalent to is* macros, but * Character-type support functions, equivalent to is* macros, but
* working with any possible encodings and locales. Notes: * working with any possible encodings and locales. Notes:
...@@ -617,7 +681,7 @@ p_isignore(TParser *prs) ...@@ -617,7 +681,7 @@ p_isignore(TParser *prs)
static int static int
p_ishost(TParser *prs) p_ishost(TParser *prs)
{ {
TParser *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte); TParser *tmpprs = TParserCopyInit(prs);
int res = 0; int res = 0;
tmpprs->wanthost = true; tmpprs->wanthost = true;
...@@ -631,7 +695,7 @@ p_ishost(TParser *prs) ...@@ -631,7 +695,7 @@ p_ishost(TParser *prs)
prs->state->charlen = tmpprs->state->charlen; prs->state->charlen = tmpprs->state->charlen;
res = 1; res = 1;
} }
TParserClose(tmpprs); TParserCopyClose(tmpprs);
return res; return res;
} }
...@@ -639,7 +703,7 @@ p_ishost(TParser *prs) ...@@ -639,7 +703,7 @@ p_ishost(TParser *prs)
static int static int
p_isURLPath(TParser *prs) p_isURLPath(TParser *prs)
{ {
TParser *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte); TParser *tmpprs = TParserCopyInit(prs);
int res = 0; int res = 0;
tmpprs->state = newTParserPosition(tmpprs->state); tmpprs->state = newTParserPosition(tmpprs->state);
...@@ -654,7 +718,7 @@ p_isURLPath(TParser *prs) ...@@ -654,7 +718,7 @@ p_isURLPath(TParser *prs)
prs->state->charlen = tmpprs->state->charlen; prs->state->charlen = tmpprs->state->charlen;
res = 1; res = 1;
} }
TParserClose(tmpprs); TParserCopyClose(tmpprs);
return res; return res;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment