Commit b63fc287 authored by Tom Lane's avatar Tom Lane

Add recursion depth protections to regular expression matching.

Some of the functions in regex compilation and execution recurse, and
therefore could in principle be driven to stack overflow.  The Tcl crew
has seen this happen in practice in duptraverse(), though their fix was
to put in a hard-wired limit on the number of recursive levels, which is
not too appetizing --- fortunately, we have enough infrastructure to check
the actually available stack.  Greg Stark has also seen it in other places
while fuzz testing on a machine with limited stack space.  Let's put guards
in to prevent crashes in all these places.

Since the regex code would leak memory if we simply threw elog(ERROR),
we have to introduce an API that checks for stack depth without throwing
such an error.  Fortunately that's not difficult.
parent f2c4ffc3
...@@ -683,6 +683,8 @@ delsub(struct nfa * nfa, ...@@ -683,6 +683,8 @@ delsub(struct nfa * nfa,
rp->tmp = rp; /* mark end */ rp->tmp = rp; /* mark end */
deltraverse(nfa, lp, lp); deltraverse(nfa, lp, lp);
if (NISERR())
return; /* asserts might not hold after failure */
assert(lp->nouts == 0 && rp->nins == 0); /* did the job */ assert(lp->nouts == 0 && rp->nins == 0); /* did the job */
assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */ assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
...@@ -702,6 +704,13 @@ deltraverse(struct nfa * nfa, ...@@ -702,6 +704,13 @@ deltraverse(struct nfa * nfa,
struct arc *a; struct arc *a;
struct state *to; struct state *to;
/* Since this is recursive, it could be driven to stack overflow */
if (STACK_TOO_DEEP(nfa->v->re))
{
NERR(REG_ETOOBIG);
return;
}
if (s->nouts == 0) if (s->nouts == 0)
return; /* nothing to do */ return; /* nothing to do */
if (s->tmp != NULL) if (s->tmp != NULL)
...@@ -713,6 +722,8 @@ deltraverse(struct nfa * nfa, ...@@ -713,6 +722,8 @@ deltraverse(struct nfa * nfa,
{ {
to = a->to; to = a->to;
deltraverse(nfa, leftend, to); deltraverse(nfa, leftend, to);
if (NISERR())
return; /* asserts might not hold after failure */
assert(to->nouts == 0 || to->tmp != NULL); assert(to->nouts == 0 || to->tmp != NULL);
freearc(nfa, a); freearc(nfa, a);
if (to->nins == 0 && to->tmp == NULL) if (to->nins == 0 && to->tmp == NULL)
...@@ -767,6 +778,13 @@ duptraverse(struct nfa * nfa, ...@@ -767,6 +778,13 @@ duptraverse(struct nfa * nfa,
{ {
struct arc *a; struct arc *a;
/* Since this is recursive, it could be driven to stack overflow */
if (STACK_TOO_DEEP(nfa->v->re))
{
NERR(REG_ETOOBIG);
return;
}
if (s->tmp != NULL) if (s->tmp != NULL)
return; /* already done */ return; /* already done */
...@@ -796,6 +814,13 @@ cleartraverse(struct nfa * nfa, ...@@ -796,6 +814,13 @@ cleartraverse(struct nfa * nfa,
{ {
struct arc *a; struct arc *a;
/* Since this is recursive, it could be driven to stack overflow */
if (STACK_TOO_DEEP(nfa->v->re))
{
NERR(REG_ETOOBIG);
return;
}
if (s->tmp == NULL) if (s->tmp == NULL)
return; return;
s->tmp = NULL; s->tmp = NULL;
...@@ -1284,7 +1309,7 @@ fixempties(struct nfa * nfa, ...@@ -1284,7 +1309,7 @@ fixempties(struct nfa * nfa,
*/ */
for (s = nfa->states; s != NULL && !NISERR(); s = s->next) for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
{ {
for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); s2 = nexts) for (s2 = emptyreachable(nfa, s, s); s2 != s && !NISERR(); s2 = nexts)
{ {
/* /*
* If s2 is doomed, we decide that (1) we will always push arcs * If s2 is doomed, we decide that (1) we will always push arcs
...@@ -1342,19 +1367,28 @@ fixempties(struct nfa * nfa, ...@@ -1342,19 +1367,28 @@ fixempties(struct nfa * nfa,
* *
* The maximum recursion depth here is equal to the length of the longest * The maximum recursion depth here is equal to the length of the longest
* loop-free chain of EMPTY arcs, which is surely no more than the size of * loop-free chain of EMPTY arcs, which is surely no more than the size of
* the NFA, and in practice will be a lot less than that. * the NFA ... but that could still be enough to cause trouble.
*/ */
static struct state * static struct state *
emptyreachable(struct state * s, struct state * lastfound) emptyreachable(struct nfa * nfa,
struct state * s,
struct state * lastfound)
{ {
struct arc *a; struct arc *a;
/* Since this is recursive, it could be driven to stack overflow */
if (STACK_TOO_DEEP(nfa->v->re))
{
NERR(REG_ETOOBIG);
return lastfound;
}
s->tmp = lastfound; s->tmp = lastfound;
lastfound = s; lastfound = s;
for (a = s->outs; a != NULL; a = a->outchain) for (a = s->outs; a != NULL; a = a->outchain)
{ {
if (a->type == EMPTY && a->to->tmp == NULL) if (a->type == EMPTY && a->to->tmp == NULL)
lastfound = emptyreachable(a->to, lastfound); lastfound = emptyreachable(nfa, a->to, lastfound);
} }
return lastfound; return lastfound;
} }
...@@ -1433,19 +1467,22 @@ cleanup(struct nfa * nfa) ...@@ -1433,19 +1467,22 @@ cleanup(struct nfa * nfa)
struct state *nexts; struct state *nexts;
int n; int n;
if (NISERR())
return;
/* clear out unreachable or dead-end states */ /* clear out unreachable or dead-end states */
/* use pre to mark reachable, then post to mark can-reach-post */ /* use pre to mark reachable, then post to mark can-reach-post */
markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre); markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
markcanreach(nfa, nfa->post, nfa->pre, nfa->post); markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
for (s = nfa->states; s != NULL; s = nexts) for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
{ {
nexts = s->next; nexts = s->next;
if (s->tmp != nfa->post && !s->flag) if (s->tmp != nfa->post && !s->flag)
dropstate(nfa, s); dropstate(nfa, s);
} }
assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post); assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
cleartraverse(nfa, nfa->pre); cleartraverse(nfa, nfa->pre);
assert(nfa->post->nins == 0 || nfa->post->tmp == NULL); assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == NULL);
/* the nins==0 (final unreachable) case will be caught later */ /* the nins==0 (final unreachable) case will be caught later */
/* renumber surviving states */ /* renumber surviving states */
...@@ -1466,6 +1503,13 @@ markreachable(struct nfa * nfa, ...@@ -1466,6 +1503,13 @@ markreachable(struct nfa * nfa,
{ {
struct arc *a; struct arc *a;
/* Since this is recursive, it could be driven to stack overflow */
if (STACK_TOO_DEEP(nfa->v->re))
{
NERR(REG_ETOOBIG);
return;
}
if (s->tmp != okay) if (s->tmp != okay)
return; return;
s->tmp = mark; s->tmp = mark;
...@@ -1485,6 +1529,13 @@ markcanreach(struct nfa * nfa, ...@@ -1485,6 +1529,13 @@ markcanreach(struct nfa * nfa,
{ {
struct arc *a; struct arc *a;
/* Since this is recursive, it could be driven to stack overflow */
if (STACK_TOO_DEEP(nfa->v->re))
{
NERR(REG_ETOOBIG);
return;
}
if (s->tmp != okay) if (s->tmp != okay)
return; return;
s->tmp = mark; s->tmp = mark;
...@@ -1502,6 +1553,9 @@ analyze(struct nfa * nfa) ...@@ -1502,6 +1553,9 @@ analyze(struct nfa * nfa)
struct arc *a; struct arc *a;
struct arc *aa; struct arc *aa;
if (NISERR())
return 0;
if (nfa->pre->outs == NULL) if (nfa->pre->outs == NULL)
return REG_UIMPOSSIBLE; return REG_UIMPOSSIBLE;
for (a = nfa->pre->outs; a != NULL; a = a->outchain) for (a = nfa->pre->outs; a != NULL; a = a->outchain)
......
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
#include "regex/regguts.h" #include "regex/regguts.h"
#include "miscadmin.h" /* needed by rcancelrequested() */ #include "miscadmin.h" /* needed by rcancelrequested/rstacktoodeep */
/* /*
* forward declarations, up here so forward datatypes etc. are defined early * forward declarations, up here so forward datatypes etc. are defined early
...@@ -70,6 +70,7 @@ static int newlacon(struct vars *, struct state *, struct state *, int); ...@@ -70,6 +70,7 @@ static int newlacon(struct vars *, struct state *, struct state *, int);
static void freelacons(struct subre *, int); static void freelacons(struct subre *, int);
static void rfree(regex_t *); static void rfree(regex_t *);
static int rcancelrequested(void); static int rcancelrequested(void);
static int rstacktoodeep(void);
#ifdef REG_DEBUG #ifdef REG_DEBUG
static void dump(regex_t *, FILE *); static void dump(regex_t *, FILE *);
...@@ -152,7 +153,7 @@ static int push(struct nfa *, struct arc *); ...@@ -152,7 +153,7 @@ static int push(struct nfa *, struct arc *);
#define COMPATIBLE 3 /* compatible but not satisfied yet */ #define COMPATIBLE 3 /* compatible but not satisfied yet */
static int combine(struct arc *, struct arc *); static int combine(struct arc *, struct arc *);
static void fixempties(struct nfa *, FILE *); static void fixempties(struct nfa *, FILE *);
static struct state *emptyreachable(struct state *, struct state *); static struct state *emptyreachable(struct nfa *, struct state *, struct state *);
static void replaceempty(struct nfa *, struct state *, struct state *); static void replaceempty(struct nfa *, struct state *, struct state *);
static void cleanup(struct nfa *); static void cleanup(struct nfa *);
static void markreachable(struct nfa *, struct state *, struct state *, struct state *); static void markreachable(struct nfa *, struct state *, struct state *, struct state *);
...@@ -279,7 +280,8 @@ struct vars ...@@ -279,7 +280,8 @@ struct vars
/* static function list */ /* static function list */
static const struct fns functions = { static const struct fns functions = {
rfree, /* regfree insides */ rfree, /* regfree insides */
rcancelrequested /* check for cancel request */ rcancelrequested, /* check for cancel request */
rstacktoodeep /* check for stack getting dangerously deep */
}; };
...@@ -1626,6 +1628,16 @@ subre(struct vars * v, ...@@ -1626,6 +1628,16 @@ subre(struct vars * v,
{ {
struct subre *ret = v->treefree; struct subre *ret = v->treefree;
/*
* Checking for stack overflow here is sufficient to protect parse() and
* its recursive subroutines.
*/
if (STACK_TOO_DEEP(v->re))
{
ERR(REG_ETOOBIG);
return NULL;
}
if (ret != NULL) if (ret != NULL)
v->treefree = ret->left; v->treefree = ret->left;
else else
...@@ -1938,6 +1950,22 @@ rcancelrequested(void) ...@@ -1938,6 +1950,22 @@ rcancelrequested(void)
return InterruptPending && (QueryCancelPending || ProcDiePending); return InterruptPending && (QueryCancelPending || ProcDiePending);
} }
/*
* rstacktoodeep - check for stack getting dangerously deep
*
* Return nonzero to fail the operation with error code REG_ETOOBIG,
* zero to keep going
*
* The current implementation is Postgres-specific. If we ever get around
* to splitting the regex code out as a standalone library, there will need
* to be some API to let applications define a callback function for this.
*/
static int
rstacktoodeep(void)
{
return stack_is_too_deep();
}
#ifdef REG_DEBUG #ifdef REG_DEBUG
/* /*
......
...@@ -627,6 +627,13 @@ lacon(struct vars * v, ...@@ -627,6 +627,13 @@ lacon(struct vars * v,
struct smalldfa sd; struct smalldfa sd;
chr *end; chr *end;
/* Since this is recursive, it could be driven to stack overflow */
if (STACK_TOO_DEEP(v->re))
{
ERR(REG_ETOOBIG);
return 0;
}
n = co - pcnfa->ncolors; n = co - pcnfa->ncolors;
assert(n < v->g->nlacons && v->g->lacons != NULL); assert(n < v->g->nlacons && v->g->lacons != NULL);
FDEBUG(("=== testing lacon %d\n", n)); FDEBUG(("=== testing lacon %d\n", n));
......
...@@ -624,6 +624,9 @@ cdissect(struct vars * v, ...@@ -624,6 +624,9 @@ cdissect(struct vars * v,
/* handy place to check for operation cancel */ /* handy place to check for operation cancel */
if (CANCEL_REQUESTED(v->re)) if (CANCEL_REQUESTED(v->re))
return REG_CANCEL; return REG_CANCEL;
/* ... and stack overrun */
if (STACK_TOO_DEEP(v->re))
return REG_ETOOBIG;
switch (t->op) switch (t->op)
{ {
......
...@@ -3081,15 +3081,32 @@ restore_stack_base(pg_stack_base_t base) ...@@ -3081,15 +3081,32 @@ restore_stack_base(pg_stack_base_t base)
} }
/* /*
* check_stack_depth: check for excessively deep recursion * check_stack_depth/stack_is_too_deep: check for excessively deep recursion
* *
* This should be called someplace in any recursive routine that might possibly * This should be called someplace in any recursive routine that might possibly
* recurse deep enough to overflow the stack. Most Unixen treat stack * recurse deep enough to overflow the stack. Most Unixen treat stack
* overflow as an unrecoverable SIGSEGV, so we want to error out ourselves * overflow as an unrecoverable SIGSEGV, so we want to error out ourselves
* before hitting the hardware limit. * before hitting the hardware limit.
*
* check_stack_depth() just throws an error summarily. stack_is_too_deep()
* can be used by code that wants to handle the error condition itself.
*/ */
void void
check_stack_depth(void) check_stack_depth(void)
{
if (stack_is_too_deep())
{
ereport(ERROR,
(errcode(ERRCODE_STATEMENT_TOO_COMPLEX),
errmsg("stack depth limit exceeded"),
errhint("Increase the configuration parameter \"max_stack_depth\" (currently %dkB), "
"after ensuring the platform's stack depth limit is adequate.",
max_stack_depth)));
}
}
bool
stack_is_too_deep(void)
{ {
char stack_top_loc; char stack_top_loc;
long stack_depth; long stack_depth;
...@@ -3115,14 +3132,7 @@ check_stack_depth(void) ...@@ -3115,14 +3132,7 @@ check_stack_depth(void)
*/ */
if (stack_depth > max_stack_depth_bytes && if (stack_depth > max_stack_depth_bytes &&
stack_base_ptr != NULL) stack_base_ptr != NULL)
{ return true;
ereport(ERROR,
(errcode(ERRCODE_STATEMENT_TOO_COMPLEX),
errmsg("stack depth limit exceeded"),
errhint("Increase the configuration parameter \"max_stack_depth\" (currently %dkB), "
"after ensuring the platform's stack depth limit is adequate.",
max_stack_depth)));
}
/* /*
* On IA64 there is a separate "register" stack that requires its own * On IA64 there is a separate "register" stack that requires its own
...@@ -3137,15 +3147,10 @@ check_stack_depth(void) ...@@ -3137,15 +3147,10 @@ check_stack_depth(void)
if (stack_depth > max_stack_depth_bytes && if (stack_depth > max_stack_depth_bytes &&
register_stack_base_ptr != NULL) register_stack_base_ptr != NULL)
{ return true;
ereport(ERROR,
(errcode(ERRCODE_STATEMENT_TOO_COMPLEX),
errmsg("stack depth limit exceeded"),
errhint("Increase the configuration parameter \"max_stack_depth\" (currently %dkB), "
"after ensuring the platform's stack depth limit is adequate.",
max_stack_depth)));
}
#endif /* IA64 */ #endif /* IA64 */
return false;
} }
/* GUC check hook for max_stack_depth */ /* GUC check hook for max_stack_depth */
......
...@@ -268,6 +268,7 @@ typedef char *pg_stack_base_t; ...@@ -268,6 +268,7 @@ typedef char *pg_stack_base_t;
extern pg_stack_base_t set_stack_base(void); extern pg_stack_base_t set_stack_base(void);
extern void restore_stack_base(pg_stack_base_t base); extern void restore_stack_base(pg_stack_base_t base);
extern void check_stack_depth(void); extern void check_stack_depth(void);
extern bool stack_is_too_deep(void);
/* in tcop/utility.c */ /* in tcop/utility.c */
extern void PreventCommandIfReadOnly(const char *cmdname); extern void PreventCommandIfReadOnly(const char *cmdname);
......
...@@ -449,11 +449,15 @@ struct fns ...@@ -449,11 +449,15 @@ struct fns
{ {
void FUNCPTR(free, (regex_t *)); void FUNCPTR(free, (regex_t *));
int FUNCPTR(cancel_requested, (void)); int FUNCPTR(cancel_requested, (void));
int FUNCPTR(stack_too_deep, (void));
}; };
#define CANCEL_REQUESTED(re) \ #define CANCEL_REQUESTED(re) \
((*((struct fns *) (re)->re_fns)->cancel_requested) ()) ((*((struct fns *) (re)->re_fns)->cancel_requested) ())
#define STACK_TOO_DEEP(re) \
((*((struct fns *) (re)->re_fns)->stack_too_deep) ())
/* /*
* the insides of a regex_t, hidden behind a void * * the insides of a regex_t, hidden behind a void *
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment