Commit 6a715366 authored by Tom Lane's avatar Tom Lane

Improve performance of pullback/pushfwd in regular-expression compiler.

The previous coding would create a new intermediate state every time it
wanted to interchange the ordering of two constraint arcs.  Certain regex
features such as \Y can generate large numbers of parallel constraint arcs,
and if we needed to reorder the results of that, we created unreasonable
numbers of intermediate states.  To improve matters, keep a list of
already-created intermediate states associated with the state currently
being considered by the outer loop; we can re-use such states to place all
the new arcs leading to the same destination or source.

I also took the trouble to redefine push() and pull() to have a less risky
API: they no longer delete any state or arc that the caller might possibly
have a pointer to, except for the specifically-passed constraint arc.
This reduces the risk of re-introducing the same type of error seen in
the failed patch for CVE-2007-4772.

Back-patch to all supported branches.
parent f5b7d103
...@@ -1513,6 +1513,7 @@ pullback(struct nfa * nfa, ...@@ -1513,6 +1513,7 @@ pullback(struct nfa * nfa,
struct state *nexts; struct state *nexts;
struct arc *a; struct arc *a;
struct arc *nexta; struct arc *nexta;
struct state *intermediates;
int progress; int progress;
/* find and pull until there are no more */ /* find and pull until there are no more */
...@@ -1522,14 +1523,25 @@ pullback(struct nfa * nfa, ...@@ -1522,14 +1523,25 @@ pullback(struct nfa * nfa,
for (s = nfa->states; s != NULL && !NISERR(); s = nexts) for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
{ {
nexts = s->next; nexts = s->next;
intermediates = NULL;
for (a = s->outs; a != NULL && !NISERR(); a = nexta) for (a = s->outs; a != NULL && !NISERR(); a = nexta)
{ {
nexta = a->outchain; nexta = a->outchain;
if (a->type == '^' || a->type == BEHIND) if (a->type == '^' || a->type == BEHIND)
if (pull(nfa, a)) if (pull(nfa, a, &intermediates))
progress = 1; progress = 1;
assert(nexta == NULL || s->no != FREESTATE);
} }
/* clear tmp fields of intermediate states created here */
while (intermediates != NULL)
{
struct state *ns = intermediates->tmp;
intermediates->tmp = NULL;
intermediates = ns;
}
/* if s is now useless, get rid of it */
if ((s->nins == 0 || s->nouts == 0) && !s->flag)
dropstate(nfa, s);
} }
if (progress && f != NULL) if (progress && f != NULL)
dumpnfa(nfa, f); dumpnfa(nfa, f);
...@@ -1557,13 +1569,26 @@ pullback(struct nfa * nfa, ...@@ -1557,13 +1569,26 @@ pullback(struct nfa * nfa,
/* /*
* pull - pull a back constraint backward past its source state * pull - pull a back constraint backward past its source state
* A significant property of this function is that it deletes at most *
* one state -- the constraint's from state -- and only if the constraint * Returns 1 if successful (which it always is unless the source is the
* was that state's last outarc. * start state or we have an internal error), 0 if nothing happened.
*
* A significant property of this function is that it deletes no pre-existing
* states, and no outarcs of the constraint's from state other than the given
* constraint arc. This makes the loops in pullback() safe, at the cost that
* we may leave useless states behind. Therefore, we leave it to pullback()
* to delete such states.
*
* If the from state has multiple back-constraint outarcs, and/or multiple
* compatible constraint inarcs, we only need to create one new intermediate
* state per combination of predecessor and successor states. *intermediates
* points to a list of such intermediate states for this from state (chained
* through their tmp fields).
*/ */
static int /* 0 couldn't, 1 could */ static int
pull(struct nfa * nfa, pull(struct nfa * nfa,
struct arc * con) struct arc * con,
struct state ** intermediates)
{ {
struct state *from = con->from; struct state *from = con->from;
struct state *to = con->to; struct state *to = con->to;
...@@ -1580,7 +1605,11 @@ pull(struct nfa * nfa, ...@@ -1580,7 +1605,11 @@ pull(struct nfa * nfa,
return 1; return 1;
} }
/* first, clone from state if necessary to avoid other outarcs */ /*
* First, clone from state if necessary to avoid other outarcs. This may
* seem wasteful, but it simplifies the logic, and we'll get rid of the
* clone state again at the bottom.
*/
if (from->nouts > 1) if (from->nouts > 1)
{ {
s = newstate(nfa); s = newstate(nfa);
...@@ -1589,13 +1618,15 @@ pull(struct nfa * nfa, ...@@ -1589,13 +1618,15 @@ pull(struct nfa * nfa,
copyins(nfa, from, s, 1); /* duplicate inarcs */ copyins(nfa, from, s, 1); /* duplicate inarcs */
cparc(nfa, con, s, to); /* move constraint arc */ cparc(nfa, con, s, to); /* move constraint arc */
freearc(nfa, con); freearc(nfa, con);
if (NISERR())
return 0;
from = s; from = s;
con = from->outs; con = from->outs;
} }
assert(from->nouts == 1); assert(from->nouts == 1);
/* propagate the constraint into the from state's inarcs */ /* propagate the constraint into the from state's inarcs */
for (a = from->ins; a != NULL; a = nexta) for (a = from->ins; a != NULL && !NISERR(); a = nexta)
{ {
nexta = a->inchain; nexta = a->inchain;
switch (combine(con, a)) switch (combine(con, a))
...@@ -1606,13 +1637,23 @@ pull(struct nfa * nfa, ...@@ -1606,13 +1637,23 @@ pull(struct nfa * nfa,
case SATISFIED: /* no action needed */ case SATISFIED: /* no action needed */
break; break;
case COMPATIBLE: /* swap the two arcs, more or less */ case COMPATIBLE: /* swap the two arcs, more or less */
s = newstate(nfa); /* need an intermediate state, but might have one already */
if (NISERR()) for (s = *intermediates; s != NULL; s = s->tmp)
return 0; {
cparc(nfa, a, s, to); /* anticipate move */ assert(s->nins > 0 && s->nouts > 0);
if (s->ins->from == a->from && s->outs->to == to)
break;
}
if (s == NULL)
{
s = newstate(nfa);
if (NISERR())
return 0;
s->tmp = *intermediates;
*intermediates = s;
}
cparc(nfa, con, a->from, s); cparc(nfa, con, a->from, s);
if (NISERR()) cparc(nfa, a, s, to);
return 0;
freearc(nfa, a); freearc(nfa, a);
break; break;
default: default:
...@@ -1623,7 +1664,8 @@ pull(struct nfa * nfa, ...@@ -1623,7 +1664,8 @@ pull(struct nfa * nfa,
/* remaining inarcs, if any, incorporate the constraint */ /* remaining inarcs, if any, incorporate the constraint */
moveins(nfa, from, to); moveins(nfa, from, to);
dropstate(nfa, from); /* will free the constraint */ freearc(nfa, con);
/* from state is now useless, but we leave it to pullback() to clean up */
return 1; return 1;
} }
...@@ -1638,6 +1680,7 @@ pushfwd(struct nfa * nfa, ...@@ -1638,6 +1680,7 @@ pushfwd(struct nfa * nfa,
struct state *nexts; struct state *nexts;
struct arc *a; struct arc *a;
struct arc *nexta; struct arc *nexta;
struct state *intermediates;
int progress; int progress;
/* find and push until there are no more */ /* find and push until there are no more */
...@@ -1647,14 +1690,25 @@ pushfwd(struct nfa * nfa, ...@@ -1647,14 +1690,25 @@ pushfwd(struct nfa * nfa,
for (s = nfa->states; s != NULL && !NISERR(); s = nexts) for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
{ {
nexts = s->next; nexts = s->next;
intermediates = NULL;
for (a = s->ins; a != NULL && !NISERR(); a = nexta) for (a = s->ins; a != NULL && !NISERR(); a = nexta)
{ {
nexta = a->inchain; nexta = a->inchain;
if (a->type == '$' || a->type == AHEAD) if (a->type == '$' || a->type == AHEAD)
if (push(nfa, a)) if (push(nfa, a, &intermediates))
progress = 1; progress = 1;
assert(nexta == NULL || s->no != FREESTATE);
} }
/* clear tmp fields of intermediate states created here */
while (intermediates != NULL)
{
struct state *ns = intermediates->tmp;
intermediates->tmp = NULL;
intermediates = ns;
}
/* if s is now useless, get rid of it */
if ((s->nins == 0 || s->nouts == 0) && !s->flag)
dropstate(nfa, s);
} }
if (progress && f != NULL) if (progress && f != NULL)
dumpnfa(nfa, f); dumpnfa(nfa, f);
...@@ -1682,13 +1736,26 @@ pushfwd(struct nfa * nfa, ...@@ -1682,13 +1736,26 @@ pushfwd(struct nfa * nfa,
/* /*
* push - push a forward constraint forward past its destination state * push - push a forward constraint forward past its destination state
* A significant property of this function is that it deletes at most *
* one state -- the constraint's to state -- and only if the constraint * Returns 1 if successful (which it always is unless the destination is the
* was that state's last inarc. * post state or we have an internal error), 0 if nothing happened.
*
* A significant property of this function is that it deletes no pre-existing
* states, and no inarcs of the constraint's to state other than the given
* constraint arc. This makes the loops in pushfwd() safe, at the cost that
* we may leave useless states behind. Therefore, we leave it to pushfwd()
* to delete such states.
*
* If the to state has multiple forward-constraint inarcs, and/or multiple
* compatible constraint outarcs, we only need to create one new intermediate
* state per combination of predecessor and successor states. *intermediates
* points to a list of such intermediate states for this to state (chained
* through their tmp fields).
*/ */
static int /* 0 couldn't, 1 could */ static int
push(struct nfa * nfa, push(struct nfa * nfa,
struct arc * con) struct arc * con,
struct state ** intermediates)
{ {
struct state *from = con->from; struct state *from = con->from;
struct state *to = con->to; struct state *to = con->to;
...@@ -1705,22 +1772,28 @@ push(struct nfa * nfa, ...@@ -1705,22 +1772,28 @@ push(struct nfa * nfa,
return 1; return 1;
} }
/* first, clone to state if necessary to avoid other inarcs */ /*
* First, clone to state if necessary to avoid other inarcs. This may
* seem wasteful, but it simplifies the logic, and we'll get rid of the
* clone state again at the bottom.
*/
if (to->nins > 1) if (to->nins > 1)
{ {
s = newstate(nfa); s = newstate(nfa);
if (NISERR()) if (NISERR())
return 0; return 0;
copyouts(nfa, to, s, 1); /* duplicate outarcs */ copyouts(nfa, to, s, 1); /* duplicate outarcs */
cparc(nfa, con, from, s); /* move constraint */ cparc(nfa, con, from, s); /* move constraint arc */
freearc(nfa, con); freearc(nfa, con);
if (NISERR())
return 0;
to = s; to = s;
con = to->ins; con = to->ins;
} }
assert(to->nins == 1); assert(to->nins == 1);
/* propagate the constraint into the to state's outarcs */ /* propagate the constraint into the to state's outarcs */
for (a = to->outs; a != NULL; a = nexta) for (a = to->outs; a != NULL && !NISERR(); a = nexta)
{ {
nexta = a->outchain; nexta = a->outchain;
switch (combine(con, a)) switch (combine(con, a))
...@@ -1731,13 +1804,23 @@ push(struct nfa * nfa, ...@@ -1731,13 +1804,23 @@ push(struct nfa * nfa,
case SATISFIED: /* no action needed */ case SATISFIED: /* no action needed */
break; break;
case COMPATIBLE: /* swap the two arcs, more or less */ case COMPATIBLE: /* swap the two arcs, more or less */
s = newstate(nfa); /* need an intermediate state, but might have one already */
if (NISERR()) for (s = *intermediates; s != NULL; s = s->tmp)
return 0; {
cparc(nfa, con, s, a->to); /* anticipate move */ assert(s->nins > 0 && s->nouts > 0);
if (s->ins->from == from && s->outs->to == a->to)
break;
}
if (s == NULL)
{
s = newstate(nfa);
if (NISERR())
return 0;
s->tmp = *intermediates;
*intermediates = s;
}
cparc(nfa, con, s, a->to);
cparc(nfa, a, from, s); cparc(nfa, a, from, s);
if (NISERR())
return 0;
freearc(nfa, a); freearc(nfa, a);
break; break;
default: default:
...@@ -1748,7 +1831,8 @@ push(struct nfa * nfa, ...@@ -1748,7 +1831,8 @@ push(struct nfa * nfa,
/* remaining outarcs, if any, incorporate the constraint */ /* remaining outarcs, if any, incorporate the constraint */
moveouts(nfa, to, from); moveouts(nfa, to, from);
dropstate(nfa, to); /* will free the constraint */ freearc(nfa, con);
/* to state is now useless, but we leave it to pushfwd() to clean up */
return 1; return 1;
} }
......
...@@ -149,9 +149,9 @@ static void cleartraverse(struct nfa *, struct state *); ...@@ -149,9 +149,9 @@ static void cleartraverse(struct nfa *, struct state *);
static void specialcolors(struct nfa *); static void specialcolors(struct nfa *);
static long optimize(struct nfa *, FILE *); static long optimize(struct nfa *, FILE *);
static void pullback(struct nfa *, FILE *); static void pullback(struct nfa *, FILE *);
static int pull(struct nfa *, struct arc *); static int pull(struct nfa *, struct arc *, struct state **);
static void pushfwd(struct nfa *, FILE *); static void pushfwd(struct nfa *, FILE *);
static int push(struct nfa *, struct arc *); static int push(struct nfa *, struct arc *, struct state **);
#define INCOMPATIBLE 1 /* destroys arc */ #define INCOMPATIBLE 1 /* destroys arc */
#define SATISFIED 2 /* constraint satisfied */ #define SATISFIED 2 /* constraint satisfied */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment