Commit 48789c5d authored by Tom Lane's avatar Tom Lane

Fix regular-expression compiler to handle loops of constraint arcs.

It's possible to construct regular expressions that contain loops of
constraint arcs (that is, ^ $ AHEAD BEHIND or LACON arcs).  There's no use
in fully traversing such a loop at execution, since you'd just end up in
the same NFA state without having consumed any input.  Worse, such a loop
leads to infinite looping in the pullback/pushfwd stage of compilation,
because we keep pushing or pulling the same constraints around the loop
in a vain attempt to move them to the pre or post state.  Such looping was
previously recognized in CVE-2007-4772; but the fix only handled the case
of trivial single-state loops (that is, a constraint arc leading back to
its source state) ... and not only that, it was incorrect even for that
case, because it broke the admittedly-not-very-clearly-stated API contract
of the pull() and push() subroutines.  The first two regression test cases
added by this commit exhibit patterns that result in assertion failures
because of that (though there seem to be no ill effects in non-assert
builds).  The other new test cases exhibit multi-state constraint loops;
in an unpatched build they will run until the NFA state-count limit is
exceeded.

To fix, remove the code added for CVE-2007-4772, and instead create a
general-purpose constraint-loop-breaking phase of regex compilation that
executes before we do pullback/pushfwd.  Since we never need to traverse
a constraint loop fully, we can just break the loop at any chosen spot,
if we add clone states that can replicate any sequence of arc transitions
that would've traversed just part of the loop.

Also add some commentary clarifying why we have to have all these
machinations in the first place.

This class of problems has been known for some time --- we had a report
from Marc Mamin about two years ago, for example, and there are related
complaints in the Tcl bug tracker.  I had discussed a fix of this kind
off-list with Henry Spencer, but didn't get around to doing something
about it until the issue was rediscovered by Greg Stark recently.

Back-patch to all supported branches.
parent d53e3d5f
This diff is collapsed.
...@@ -155,6 +155,14 @@ static int combine(struct arc *, struct arc *); ...@@ -155,6 +155,14 @@ static int combine(struct arc *, struct arc *);
static void fixempties(struct nfa *, FILE *); static void fixempties(struct nfa *, FILE *);
static struct state *emptyreachable(struct nfa *, struct state *, struct state *); static struct state *emptyreachable(struct nfa *, struct state *, struct state *);
static void replaceempty(struct nfa *, struct state *, struct state *); static void replaceempty(struct nfa *, struct state *, struct state *);
static int isconstraintarc(struct arc *);
static int hasconstraintout(struct state *);
static void fixconstraintloops(struct nfa *, FILE *);
static int findconstraintloop(struct nfa *, struct state *);
static void breakconstraintloop(struct nfa *, struct state *);
static void clonesuccessorstates(struct nfa *, struct state *, struct state *,
struct state *, struct arc *,
char *, char *, int);
static void cleanup(struct nfa *); static void cleanup(struct nfa *);
static void markreachable(struct nfa *, struct state *, struct state *, struct state *); static void markreachable(struct nfa *, struct state *, struct state *, struct state *);
static void markcanreach(struct nfa *, struct state *, struct state *, struct state *); static void markcanreach(struct nfa *, struct state *, struct state *, struct state *);
......
...@@ -160,6 +160,62 @@ select 'a' ~ '($|^)*'; ...@@ -160,6 +160,62 @@ select 'a' ~ '($|^)*';
t t
(1 row) (1 row)
-- These cases expose a bug in the original fix for CVE-2007-4772
select 'a' ~ '(^)+^';
?column?
----------
t
(1 row)
select 'a' ~ '$($$)+';
?column?
----------
t
(1 row)
-- More cases of infinite loop in pullback(), not fixed by CVE-2007-4772 fix
select 'a' ~ '($^)+';
?column?
----------
f
(1 row)
select 'a' ~ '(^$)*';
?column?
----------
t
(1 row)
select 'aa bb cc' ~ '(^(?!aa))+';
?column?
----------
f
(1 row)
select 'aa x' ~ '(^(?!aa)(?!bb)(?!cc))+';
?column?
----------
f
(1 row)
select 'bb x' ~ '(^(?!aa)(?!bb)(?!cc))+';
?column?
----------
f
(1 row)
select 'cc x' ~ '(^(?!aa)(?!bb)(?!cc))+';
?column?
----------
f
(1 row)
select 'dd x' ~ '(^(?!aa)(?!bb)(?!cc))+';
?column?
----------
t
(1 row)
-- Test for infinite loop in fixempties() (Tcl bugs 3604074, 3606683) -- Test for infinite loop in fixempties() (Tcl bugs 3604074, 3606683)
select 'a' ~ '((((((a)*)*)*)*)*)*'; select 'a' ~ '((((((a)*)*)*)*)*)*';
?column? ?column?
......
...@@ -38,6 +38,19 @@ explain (costs off) select * from pg_proc where proname ~ '^(abc)?d'; ...@@ -38,6 +38,19 @@ explain (costs off) select * from pg_proc where proname ~ '^(abc)?d';
-- Test for infinite loop in pullback() (CVE-2007-4772) -- Test for infinite loop in pullback() (CVE-2007-4772)
select 'a' ~ '($|^)*'; select 'a' ~ '($|^)*';
-- These cases expose a bug in the original fix for CVE-2007-4772
select 'a' ~ '(^)+^';
select 'a' ~ '$($$)+';
-- More cases of infinite loop in pullback(), not fixed by CVE-2007-4772 fix
select 'a' ~ '($^)+';
select 'a' ~ '(^$)*';
select 'aa bb cc' ~ '(^(?!aa))+';
select 'aa x' ~ '(^(?!aa)(?!bb)(?!cc))+';
select 'bb x' ~ '(^(?!aa)(?!bb)(?!cc))+';
select 'cc x' ~ '(^(?!aa)(?!bb)(?!cc))+';
select 'dd x' ~ '(^(?!aa)(?!bb)(?!cc))+';
-- Test for infinite loop in fixempties() (Tcl bugs 3604074, 3606683) -- Test for infinite loop in fixempties() (Tcl bugs 3604074, 3606683)
select 'a' ~ '((((((a)*)*)*)*)*)*'; select 'a' ~ '((((((a)*)*)*)*)*)*';
select 'a' ~ '((((((a+|)+|)+|)+|)+|)+|)'; select 'a' ~ '((((((a+|)+|)+|)+|)+|)+|)';
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment