Commit 58735947 authored by Tom Lane's avatar Tom Lane

Avoid repeated creation/freeing of per-subre DFAs during regex search.

In nested sub-regex trees, lower-level nodes created DFAs and then
destroyed them again before exiting, which is a bit dumb considering that
the recursive search is likely to call those nodes again later.  Instead
cache each created DFA until the end of pg_regexec().  This is basically a
space for time tradeoff, in that it might increase the maximum memory
usage.  However, in most regex patterns there are not all that many subre
nodes, so not that many DFAs --- and in any case, the peak usage occurs
when reaching the bottom recursion level, and except for alternation cases
that's going to be the same anyway.
parent 3cbfe485
...@@ -112,6 +112,7 @@ struct vars ...@@ -112,6 +112,7 @@ struct vars
chr *search_start; /* search start of string */ chr *search_start; /* search start of string */
chr *stop; /* just past end of string */ chr *stop; /* just past end of string */
int err; /* error code if any (0 none) */ int err; /* error code if any (0 none) */
struct dfa **subdfas; /* per-subre DFAs */
struct smalldfa dfa1; struct smalldfa dfa1;
struct smalldfa dfa2; struct smalldfa dfa2;
}; };
...@@ -130,6 +131,7 @@ struct vars ...@@ -130,6 +131,7 @@ struct vars
* forward declarations * forward declarations
*/ */
/* === regexec.c === */ /* === regexec.c === */
static struct dfa *getsubdfa(struct vars *, struct subre *);
static int find(struct vars *, struct cnfa *, struct colormap *); static int find(struct vars *, struct cnfa *, struct colormap *);
static int cfind(struct vars *, struct cnfa *, struct colormap *); static int cfind(struct vars *, struct cnfa *, struct colormap *);
static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **); static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
...@@ -180,11 +182,15 @@ pg_regexec(regex_t *re, ...@@ -180,11 +182,15 @@ pg_regexec(regex_t *re,
register struct vars *v = &var; register struct vars *v = &var;
int st; int st;
size_t n; size_t n;
size_t i;
int backref; int backref;
#define LOCALMAT 20 #define LOCALMAT 20
regmatch_t mat[LOCALMAT]; regmatch_t mat[LOCALMAT];
#define LOCALDFAS 40
struct dfa *subdfas[LOCALDFAS];
/* sanity checks */ /* sanity checks */
if (re == NULL || string == NULL || re->re_magic != REMAGIC) if (re == NULL || string == NULL || re->re_magic != REMAGIC)
return REG_INVARG; return REG_INVARG;
...@@ -225,6 +231,20 @@ pg_regexec(regex_t *re, ...@@ -225,6 +231,20 @@ pg_regexec(regex_t *re,
v->search_start = (chr *) string + search_start; v->search_start = (chr *) string + search_start;
v->stop = (chr *) string + len; v->stop = (chr *) string + len;
v->err = 0; v->err = 0;
assert(v->g->ntree >= 0);
n = (size_t) v->g->ntree;
if (n <= LOCALDFAS)
v->subdfas = subdfas;
else
v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
if (v->subdfas == NULL)
{
if (v->pmatch != pmatch && v->pmatch != mat)
FREE(v->pmatch);
return REG_ESPACE;
}
for (i = 0; i < n; i++)
v->subdfas[i] = NULL;
/* do it */ /* do it */
assert(v->g->tree != NULL); assert(v->g->tree != NULL);
...@@ -244,9 +264,36 @@ pg_regexec(regex_t *re, ...@@ -244,9 +264,36 @@ pg_regexec(regex_t *re,
/* clean up */ /* clean up */
if (v->pmatch != pmatch && v->pmatch != mat) if (v->pmatch != pmatch && v->pmatch != mat)
FREE(v->pmatch); FREE(v->pmatch);
for (i = 0; i < n; i++)
{
if (v->subdfas[i] != NULL)
freedfa(v->subdfas[i]);
}
if (v->subdfas != subdfas)
FREE(v->subdfas);
return st; return st;
} }
/*
* getsubdfa - create or re-fetch the DFA for a subre node
*
* We only need to create the DFA once per overall regex execution.
* The DFA will be freed by the cleanup step in pg_regexec().
*/
static struct dfa *
getsubdfa(struct vars * v,
struct subre * t)
{
if (v->subdfas[t->id] == NULL)
{
v->subdfas[t->id] = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
if (ISERR())
return NULL;
}
return v->subdfas[t->id];
}
/* /*
* find - find a match for the main NFA (no-complications case) * find - find a match for the main NFA (no-complications case)
*/ */
...@@ -578,15 +625,10 @@ condissect(struct vars * v, ...@@ -578,15 +625,10 @@ condissect(struct vars * v,
assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->left != NULL && t->left->cnfa.nstates > 0);
assert(t->right != NULL && t->right->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0);
d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); d = getsubdfa(v, t->left);
NOERR();
d2 = getsubdfa(v, t->right);
NOERR(); NOERR();
d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
if (ISERR())
{
assert(d2 == NULL);
freedfa(d);
return v->err;
}
/* pick a tentative midpoint */ /* pick a tentative midpoint */
if (shorter) if (shorter)
...@@ -595,11 +637,7 @@ condissect(struct vars * v, ...@@ -595,11 +637,7 @@ condissect(struct vars * v,
else else
mid = longest(v, d, begin, end, (int *) NULL); mid = longest(v, d, begin, end, (int *) NULL);
if (mid == NULL) if (mid == NULL)
{
freedfa(d);
freedfa(d2);
return REG_ASSERT; return REG_ASSERT;
}
MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
/* iterate until satisfaction or failure */ /* iterate until satisfaction or failure */
...@@ -610,8 +648,6 @@ condissect(struct vars * v, ...@@ -610,8 +648,6 @@ condissect(struct vars * v,
{ {
/* all possibilities exhausted! */ /* all possibilities exhausted! */
MDEBUG(("no midpoint!\n")); MDEBUG(("no midpoint!\n"));
freedfa(d);
freedfa(d2);
return REG_ASSERT; return REG_ASSERT;
} }
if (shorter) if (shorter)
...@@ -623,8 +659,6 @@ condissect(struct vars * v, ...@@ -623,8 +659,6 @@ condissect(struct vars * v,
{ {
/* failed to find a new one! */ /* failed to find a new one! */
MDEBUG(("failed midpoint!\n")); MDEBUG(("failed midpoint!\n"));
freedfa(d);
freedfa(d2);
return REG_ASSERT; return REG_ASSERT;
} }
MDEBUG(("new midpoint %ld\n", LOFF(mid))); MDEBUG(("new midpoint %ld\n", LOFF(mid)));
...@@ -632,8 +666,6 @@ condissect(struct vars * v, ...@@ -632,8 +666,6 @@ condissect(struct vars * v,
/* satisfaction */ /* satisfaction */
MDEBUG(("successful\n")); MDEBUG(("successful\n"));
freedfa(d);
freedfa(d2);
i = dissect(v, t->left, begin, mid); i = dissect(v, t->left, begin, mid);
if (i != REG_OKAY) if (i != REG_OKAY)
return i; return i;
...@@ -659,16 +691,13 @@ altdissect(struct vars * v, ...@@ -659,16 +691,13 @@ altdissect(struct vars * v,
{ {
MDEBUG(("trying %dth\n", i)); MDEBUG(("trying %dth\n", i));
assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->left != NULL && t->left->cnfa.nstates > 0);
d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); d = getsubdfa(v, t->left);
if (ISERR()) NOERR();
return v->err;
if (longest(v, d, begin, end, (int *) NULL) == end) if (longest(v, d, begin, end, (int *) NULL) == end)
{ {
MDEBUG(("success\n")); MDEBUG(("success\n"));
freedfa(d);
return dissect(v, t->left, begin, end); return dissect(v, t->left, begin, end);
} }
freedfa(d);
} }
return REG_ASSERT; /* none of them matched?!? */ return REG_ASSERT; /* none of them matched?!? */
} }
...@@ -731,7 +760,7 @@ iterdissect(struct vars * v, ...@@ -731,7 +760,7 @@ iterdissect(struct vars * v,
return REG_ESPACE; return REG_ESPACE;
endpts[0] = begin; endpts[0] = begin;
d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); d = getsubdfa(v, t->left);
if (ISERR()) if (ISERR())
{ {
FREE(endpts); FREE(endpts);
...@@ -814,7 +843,6 @@ iterdissect(struct vars * v, ...@@ -814,7 +843,6 @@ iterdissect(struct vars * v,
if (er == REG_NOMATCH) if (er == REG_NOMATCH)
break; break;
/* oops, something failed */ /* oops, something failed */
freedfa(d);
FREE(endpts); FREE(endpts);
return er; return er;
} }
...@@ -823,7 +851,6 @@ iterdissect(struct vars * v, ...@@ -823,7 +851,6 @@ iterdissect(struct vars * v,
{ {
/* satisfaction */ /* satisfaction */
MDEBUG(("%d successful\n", t->id)); MDEBUG(("%d successful\n", t->id));
freedfa(d);
FREE(endpts); FREE(endpts);
return REG_OKAY; return REG_OKAY;
} }
...@@ -856,7 +883,6 @@ backtrack: ...@@ -856,7 +883,6 @@ backtrack:
/* all possibilities exhausted - shouldn't happen in uncomplicated mode */ /* all possibilities exhausted - shouldn't happen in uncomplicated mode */
MDEBUG(("%d failed\n", t->id)); MDEBUG(("%d failed\n", t->id));
freedfa(d);
FREE(endpts); FREE(endpts);
return REG_ASSERT; return REG_ASSERT;
} }
...@@ -917,7 +943,7 @@ reviterdissect(struct vars * v, ...@@ -917,7 +943,7 @@ reviterdissect(struct vars * v,
return REG_ESPACE; return REG_ESPACE;
endpts[0] = begin; endpts[0] = begin;
d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); d = getsubdfa(v, t->left);
if (ISERR()) if (ISERR())
{ {
FREE(endpts); FREE(endpts);
...@@ -1002,7 +1028,6 @@ reviterdissect(struct vars * v, ...@@ -1002,7 +1028,6 @@ reviterdissect(struct vars * v,
if (er == REG_NOMATCH) if (er == REG_NOMATCH)
break; break;
/* oops, something failed */ /* oops, something failed */
freedfa(d);
FREE(endpts); FREE(endpts);
return er; return er;
} }
...@@ -1011,7 +1036,6 @@ reviterdissect(struct vars * v, ...@@ -1011,7 +1036,6 @@ reviterdissect(struct vars * v,
{ {
/* satisfaction */ /* satisfaction */
MDEBUG(("%d successful\n", t->id)); MDEBUG(("%d successful\n", t->id));
freedfa(d);
FREE(endpts); FREE(endpts);
return REG_OKAY; return REG_OKAY;
} }
...@@ -1037,7 +1061,6 @@ backtrack: ...@@ -1037,7 +1061,6 @@ backtrack:
/* all possibilities exhausted - shouldn't happen in uncomplicated mode */ /* all possibilities exhausted - shouldn't happen in uncomplicated mode */
MDEBUG(("%d failed\n", t->id)); MDEBUG(("%d failed\n", t->id));
freedfa(d);
FREE(endpts); FREE(endpts);
return REG_ASSERT; return REG_ASSERT;
} }
...@@ -1106,25 +1129,16 @@ ccondissect(struct vars * v, ...@@ -1106,25 +1129,16 @@ ccondissect(struct vars * v,
if (t->left->flags & SHORTER) /* reverse scan */ if (t->left->flags & SHORTER) /* reverse scan */
return crevdissect(v, t, begin, end); return crevdissect(v, t, begin, end);
d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); d = getsubdfa(v, t->left);
if (ISERR()) NOERR();
return v->err; d2 = getsubdfa(v, t->right);
d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); NOERR();
if (ISERR())
{
freedfa(d);
return v->err;
}
MDEBUG(("cconcat %d\n", t->id)); MDEBUG(("cconcat %d\n", t->id));
/* pick a tentative midpoint */ /* pick a tentative midpoint */
mid = longest(v, d, begin, end, (int *) NULL); mid = longest(v, d, begin, end, (int *) NULL);
if (mid == NULL) if (mid == NULL)
{
freedfa(d);
freedfa(d2);
return REG_NOMATCH; return REG_NOMATCH;
}
MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
/* iterate until satisfaction or failure */ /* iterate until satisfaction or failure */
...@@ -1141,26 +1155,18 @@ ccondissect(struct vars * v, ...@@ -1141,26 +1155,18 @@ ccondissect(struct vars * v,
{ {
/* satisfaction */ /* satisfaction */
MDEBUG(("successful\n")); MDEBUG(("successful\n"));
freedfa(d);
freedfa(d2);
return REG_OKAY; return REG_OKAY;
} }
} }
if (er != REG_OKAY && er != REG_NOMATCH) if (er != REG_OKAY && er != REG_NOMATCH)
{
freedfa(d);
freedfa(d2);
return er; return er;
} }
}
/* that midpoint didn't work, find a new one */ /* that midpoint didn't work, find a new one */
if (mid == begin) if (mid == begin)
{ {
/* all possibilities exhausted */ /* all possibilities exhausted */
MDEBUG(("%d no midpoint\n", t->id)); MDEBUG(("%d no midpoint\n", t->id));
freedfa(d);
freedfa(d2);
return REG_NOMATCH; return REG_NOMATCH;
} }
mid = longest(v, d, begin, mid - 1, (int *) NULL); mid = longest(v, d, begin, mid - 1, (int *) NULL);
...@@ -1168,8 +1174,6 @@ ccondissect(struct vars * v, ...@@ -1168,8 +1174,6 @@ ccondissect(struct vars * v,
{ {
/* failed to find a new one */ /* failed to find a new one */
MDEBUG(("%d failed midpoint\n", t->id)); MDEBUG(("%d failed midpoint\n", t->id));
freedfa(d);
freedfa(d2);
return REG_NOMATCH; return REG_NOMATCH;
} }
MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
...@@ -1201,25 +1205,16 @@ crevdissect(struct vars * v, ...@@ -1201,25 +1205,16 @@ crevdissect(struct vars * v,
assert(t->left->flags & SHORTER); assert(t->left->flags & SHORTER);
/* concatenation -- need to split the substring between parts */ /* concatenation -- need to split the substring between parts */
d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); d = getsubdfa(v, t->left);
if (ISERR()) NOERR();
return v->err; d2 = getsubdfa(v, t->right);
d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); NOERR();
if (ISERR())
{
freedfa(d);
return v->err;
}
MDEBUG(("crev %d\n", t->id)); MDEBUG(("crev %d\n", t->id));
/* pick a tentative midpoint */ /* pick a tentative midpoint */
mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
if (mid == NULL) if (mid == NULL)
{
freedfa(d);
freedfa(d2);
return REG_NOMATCH; return REG_NOMATCH;
}
MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
/* iterate until satisfaction or failure */ /* iterate until satisfaction or failure */
...@@ -1236,26 +1231,18 @@ crevdissect(struct vars * v, ...@@ -1236,26 +1231,18 @@ crevdissect(struct vars * v,
{ {
/* satisfaction */ /* satisfaction */
MDEBUG(("successful\n")); MDEBUG(("successful\n"));
freedfa(d);
freedfa(d2);
return REG_OKAY; return REG_OKAY;
} }
} }
if (er != REG_OKAY && er != REG_NOMATCH) if (er != REG_OKAY && er != REG_NOMATCH)
{
freedfa(d);
freedfa(d2);
return er; return er;
} }
}
/* that midpoint didn't work, find a new one */ /* that midpoint didn't work, find a new one */
if (mid == end) if (mid == end)
{ {
/* all possibilities exhausted */ /* all possibilities exhausted */
MDEBUG(("%d no midpoint\n", t->id)); MDEBUG(("%d no midpoint\n", t->id));
freedfa(d);
freedfa(d2);
return REG_NOMATCH; return REG_NOMATCH;
} }
mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
...@@ -1263,8 +1250,6 @@ crevdissect(struct vars * v, ...@@ -1263,8 +1250,6 @@ crevdissect(struct vars * v,
{ {
/* failed to find a new one */ /* failed to find a new one */
MDEBUG(("%d failed midpoint\n", t->id)); MDEBUG(("%d failed midpoint\n", t->id));
freedfa(d);
freedfa(d2);
return REG_NOMATCH; return REG_NOMATCH;
} }
MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
...@@ -1377,15 +1362,10 @@ caltdissect(struct vars * v, ...@@ -1377,15 +1362,10 @@ caltdissect(struct vars * v,
MDEBUG(("calt n%d\n", t->id)); MDEBUG(("calt n%d\n", t->id));
d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); d = getsubdfa(v, t->left);
if (ISERR()) NOERR();
return v->err;
if (longest(v, d, begin, end, (int *) NULL) != end) if (longest(v, d, begin, end, (int *) NULL) != end)
{
freedfa(d);
return caltdissect(v, t->right, begin, end); return caltdissect(v, t->right, begin, end);
}
freedfa(d);
MDEBUG(("calt matched\n")); MDEBUG(("calt matched\n"));
er = cdissect(v, t->left, begin, end); er = cdissect(v, t->left, begin, end);
...@@ -1453,7 +1433,7 @@ citerdissect(struct vars * v, ...@@ -1453,7 +1433,7 @@ citerdissect(struct vars * v,
return REG_ESPACE; return REG_ESPACE;
endpts[0] = begin; endpts[0] = begin;
d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); d = getsubdfa(v, t->left);
if (ISERR()) if (ISERR())
{ {
FREE(endpts); FREE(endpts);
...@@ -1537,7 +1517,6 @@ citerdissect(struct vars * v, ...@@ -1537,7 +1517,6 @@ citerdissect(struct vars * v,
if (er == REG_NOMATCH) if (er == REG_NOMATCH)
break; break;
/* oops, something failed */ /* oops, something failed */
freedfa(d);
FREE(endpts); FREE(endpts);
return er; return er;
} }
...@@ -1546,7 +1525,6 @@ citerdissect(struct vars * v, ...@@ -1546,7 +1525,6 @@ citerdissect(struct vars * v,
{ {
/* satisfaction */ /* satisfaction */
MDEBUG(("%d successful\n", t->id)); MDEBUG(("%d successful\n", t->id));
freedfa(d);
FREE(endpts); FREE(endpts);
return REG_OKAY; return REG_OKAY;
} }
...@@ -1579,7 +1557,6 @@ backtrack: ...@@ -1579,7 +1557,6 @@ backtrack:
/* all possibilities exhausted */ /* all possibilities exhausted */
MDEBUG(("%d failed\n", t->id)); MDEBUG(("%d failed\n", t->id));
freedfa(d);
FREE(endpts); FREE(endpts);
return REG_NOMATCH; return REG_NOMATCH;
} }
...@@ -1640,7 +1617,7 @@ creviterdissect(struct vars * v, ...@@ -1640,7 +1617,7 @@ creviterdissect(struct vars * v,
return REG_ESPACE; return REG_ESPACE;
endpts[0] = begin; endpts[0] = begin;
d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); d = getsubdfa(v, t->left);
if (ISERR()) if (ISERR())
{ {
FREE(endpts); FREE(endpts);
...@@ -1726,7 +1703,6 @@ creviterdissect(struct vars * v, ...@@ -1726,7 +1703,6 @@ creviterdissect(struct vars * v,
if (er == REG_NOMATCH) if (er == REG_NOMATCH)
break; break;
/* oops, something failed */ /* oops, something failed */
freedfa(d);
FREE(endpts); FREE(endpts);
return er; return er;
} }
...@@ -1735,7 +1711,6 @@ creviterdissect(struct vars * v, ...@@ -1735,7 +1711,6 @@ creviterdissect(struct vars * v,
{ {
/* satisfaction */ /* satisfaction */
MDEBUG(("%d successful\n", t->id)); MDEBUG(("%d successful\n", t->id));
freedfa(d);
FREE(endpts); FREE(endpts);
return REG_OKAY; return REG_OKAY;
} }
...@@ -1761,7 +1736,6 @@ backtrack: ...@@ -1761,7 +1736,6 @@ backtrack:
/* all possibilities exhausted */ /* all possibilities exhausted */
MDEBUG(("%d failed\n", t->id)); MDEBUG(("%d failed\n", t->id));
freedfa(d);
FREE(endpts); FREE(endpts);
return REG_NOMATCH; return REG_NOMATCH;
} }
......
...@@ -409,7 +409,7 @@ struct subre ...@@ -409,7 +409,7 @@ struct subre
#define PREF(f) ((f)&LOCAL) #define PREF(f) ((f)&LOCAL)
#define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) #define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
#define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) #define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
short id; /* ID of subre (1..ntree) */ short id; /* ID of subre (1..ntree-1) */
int subno; /* subexpression number (for 'b' and '(') */ int subno; /* subexpression number (for 'b' and '(') */
short min; /* min repetitions for iteration or backref */ short min; /* min repetitions for iteration or backref */
short max; /* max repetitions for iteration or backref */ short max; /* max repetitions for iteration or backref */
...@@ -446,7 +446,7 @@ struct guts ...@@ -446,7 +446,7 @@ struct guts
size_t nsub; /* copy of re_nsub */ size_t nsub; /* copy of re_nsub */
struct subre *tree; struct subre *tree;
struct cnfa search; /* for fast preliminary search */ struct cnfa search; /* for fast preliminary search */
int ntree; int ntree; /* number of subre's, less one */
struct colormap cmap; struct colormap cmap;
int FUNCPTR(compare, (const chr *, const chr *, size_t)); int FUNCPTR(compare, (const chr *, const chr *, size_t));
struct subre *lacons; /* lookahead-constraint vector */ struct subre *lacons; /* lookahead-constraint vector */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment