Commit f42ea835 authored by Tom Lane's avatar Tom Lane

Fix use-after-free issue in regexp engine.

Commit cebc1d34 taught parseqatom() to optimize cases where a branch
contains only one, "messy", atom by getting rid of excess subRE nodes.
The way we really should do that is to keep the subRE built for the
"messy" child atom; but to avoid changing parseqatom's nominal API,
I made it delete that node after copying its fields to the outer subRE
made by parsebranch().  It seems that that actually worked at the time;
but it became dangerous after ea1268f6, because that later commit
allowed the lower invocation of parse() to return a subRE that was also
pointed to by some v->subs[] entry.  This meant we could wind up with a
dangling pointer in v->subs[], allowing a later backref to misbehave,
but only if that subRE struct had been reused in between.  So the damage
seems confined to cases like '((...))...(...\2'.

To fix, do what I should have done before and modify parseqatom's API
to make it possible for it to remove the caller's subRE instead of the
callee's.  That's safer because we know that subRE isn't complete yet,
so noplace else will have a pointer to it.

Per report from Mark Dilger.  Back-patch to v14 where the problematic
patches came in.

Discussion: https://postgr.es/m/0203588E-E609-43AF-9F4F-902854231EE7@enterprisedb.com
parent 51b95fb2
...@@ -43,7 +43,7 @@ static int freev(struct vars *, int); ...@@ -43,7 +43,7 @@ static int freev(struct vars *, int);
static void makesearch(struct vars *, struct nfa *); static void makesearch(struct vars *, struct nfa *);
static struct subre *parse(struct vars *, int, int, struct state *, struct state *); static struct subre *parse(struct vars *, int, int, struct state *, struct state *);
static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int); static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int);
static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *); static struct subre *parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *);
static void nonword(struct vars *, int, struct state *, struct state *); static void nonword(struct vars *, int, struct state *, struct state *);
static void word(struct vars *, int, struct state *, struct state *); static void word(struct vars *, int, struct state *, struct state *);
static void charclass(struct vars *, enum char_classes, static void charclass(struct vars *, enum char_classes,
...@@ -756,7 +756,7 @@ parsebranch(struct vars *v, ...@@ -756,7 +756,7 @@ parsebranch(struct vars *v,
seencontent = 1; seencontent = 1;
/* NB, recursion in parseqatom() may swallow rest of branch */ /* NB, recursion in parseqatom() may swallow rest of branch */
parseqatom(v, stopper, type, lp, right, t); t = parseqatom(v, stopper, type, lp, right, t);
NOERRN(); NOERRN();
} }
...@@ -777,8 +777,12 @@ parsebranch(struct vars *v, ...@@ -777,8 +777,12 @@ parsebranch(struct vars *v,
* The bookkeeping near the end cooperates very closely with parsebranch(); * The bookkeeping near the end cooperates very closely with parsebranch();
* in particular, it contains a recursion that can involve parsing the rest * in particular, it contains a recursion that can involve parsing the rest
* of the branch, making this function's name somewhat inaccurate. * of the branch, making this function's name somewhat inaccurate.
*
* Usually, the return value is just "top", but in some cases where we
* have parsed the rest of the branch, we may deem "top" redundant and
* free it, returning some child subre instead.
*/ */
static void static struct subre *
parseqatom(struct vars *v, parseqatom(struct vars *v,
int stopper, /* EOS or ')' */ int stopper, /* EOS or ')' */
int type, /* LACON (lookaround subRE) or PLAIN */ int type, /* LACON (lookaround subRE) or PLAIN */
...@@ -818,84 +822,84 @@ parseqatom(struct vars *v, ...@@ -818,84 +822,84 @@ parseqatom(struct vars *v,
if (v->cflags & REG_NLANCH) if (v->cflags & REG_NLANCH)
ARCV(BEHIND, v->nlcolor); ARCV(BEHIND, v->nlcolor);
NEXT(); NEXT();
return; return top;
break; break;
case '$': case '$':
ARCV('$', 1); ARCV('$', 1);
if (v->cflags & REG_NLANCH) if (v->cflags & REG_NLANCH)
ARCV(AHEAD, v->nlcolor); ARCV(AHEAD, v->nlcolor);
NEXT(); NEXT();
return; return top;
break; break;
case SBEGIN: case SBEGIN:
ARCV('^', 1); /* BOL */ ARCV('^', 1); /* BOL */
ARCV('^', 0); /* or BOS */ ARCV('^', 0); /* or BOS */
NEXT(); NEXT();
return; return top;
break; break;
case SEND: case SEND:
ARCV('$', 1); /* EOL */ ARCV('$', 1); /* EOL */
ARCV('$', 0); /* or EOS */ ARCV('$', 0); /* or EOS */
NEXT(); NEXT();
return; return top;
break; break;
case '<': case '<':
wordchrs(v); wordchrs(v);
s = newstate(v->nfa); s = newstate(v->nfa);
NOERR(); NOERRN();
nonword(v, BEHIND, lp, s); nonword(v, BEHIND, lp, s);
word(v, AHEAD, s, rp); word(v, AHEAD, s, rp);
NEXT(); NEXT();
return; return top;
break; break;
case '>': case '>':
wordchrs(v); wordchrs(v);
s = newstate(v->nfa); s = newstate(v->nfa);
NOERR(); NOERRN();
word(v, BEHIND, lp, s); word(v, BEHIND, lp, s);
nonword(v, AHEAD, s, rp); nonword(v, AHEAD, s, rp);
NEXT(); NEXT();
return; return top;
break; break;
case WBDRY: case WBDRY:
wordchrs(v); wordchrs(v);
s = newstate(v->nfa); s = newstate(v->nfa);
NOERR(); NOERRN();
nonword(v, BEHIND, lp, s); nonword(v, BEHIND, lp, s);
word(v, AHEAD, s, rp); word(v, AHEAD, s, rp);
s = newstate(v->nfa); s = newstate(v->nfa);
NOERR(); NOERRN();
word(v, BEHIND, lp, s); word(v, BEHIND, lp, s);
nonword(v, AHEAD, s, rp); nonword(v, AHEAD, s, rp);
NEXT(); NEXT();
return; return top;
break; break;
case NWBDRY: case NWBDRY:
wordchrs(v); wordchrs(v);
s = newstate(v->nfa); s = newstate(v->nfa);
NOERR(); NOERRN();
word(v, BEHIND, lp, s); word(v, BEHIND, lp, s);
word(v, AHEAD, s, rp); word(v, AHEAD, s, rp);
s = newstate(v->nfa); s = newstate(v->nfa);
NOERR(); NOERRN();
nonword(v, BEHIND, lp, s); nonword(v, BEHIND, lp, s);
nonword(v, AHEAD, s, rp); nonword(v, AHEAD, s, rp);
NEXT(); NEXT();
return; return top;
break; break;
case LACON: /* lookaround constraint */ case LACON: /* lookaround constraint */
latype = v->nextvalue; latype = v->nextvalue;
NEXT(); NEXT();
s = newstate(v->nfa); s = newstate(v->nfa);
s2 = newstate(v->nfa); s2 = newstate(v->nfa);
NOERR(); NOERRN();
t = parse(v, ')', LACON, s, s2); t = parse(v, ')', LACON, s, s2);
freesubre(v, t); /* internal structure irrelevant */ freesubre(v, t); /* internal structure irrelevant */
NOERR(); NOERRN();
assert(SEE(')')); assert(SEE(')'));
NEXT(); NEXT();
processlacon(v, s, s2, latype, lp, rp); processlacon(v, s, s2, latype, lp, rp);
return; return top;
break; break;
/* then errors, to get them out of the way */ /* then errors, to get them out of the way */
case '*': case '*':
...@@ -903,18 +907,18 @@ parseqatom(struct vars *v, ...@@ -903,18 +907,18 @@ parseqatom(struct vars *v,
case '?': case '?':
case '{': case '{':
ERR(REG_BADRPT); ERR(REG_BADRPT);
return; return top;
break; break;
default: default:
ERR(REG_ASSERT); ERR(REG_ASSERT);
return; return top;
break; break;
/* then plain characters, and minor variants on that theme */ /* then plain characters, and minor variants on that theme */
case ')': /* unbalanced paren */ case ')': /* unbalanced paren */
if ((v->cflags & REG_ADVANCED) != REG_EXTENDED) if ((v->cflags & REG_ADVANCED) != REG_EXTENDED)
{ {
ERR(REG_EPAREN); ERR(REG_EPAREN);
return; return top;
} }
/* legal in EREs due to specification botch */ /* legal in EREs due to specification botch */
NOTE(REG_UPBOTCH); NOTE(REG_UPBOTCH);
...@@ -923,7 +927,7 @@ parseqatom(struct vars *v, ...@@ -923,7 +927,7 @@ parseqatom(struct vars *v,
case PLAIN: case PLAIN:
onechr(v, v->nextvalue, lp, rp); onechr(v, v->nextvalue, lp, rp);
okcolors(v->nfa, v->cm); okcolors(v->nfa, v->cm);
NOERR(); NOERRN();
NEXT(); NEXT();
break; break;
case '[': case '[':
...@@ -972,14 +976,14 @@ parseqatom(struct vars *v, ...@@ -972,14 +976,14 @@ parseqatom(struct vars *v,
*/ */
s = newstate(v->nfa); s = newstate(v->nfa);
s2 = newstate(v->nfa); s2 = newstate(v->nfa);
NOERR(); NOERRN();
EMPTYARC(lp, s); EMPTYARC(lp, s);
EMPTYARC(s2, rp); EMPTYARC(s2, rp);
NOERR(); NOERRN();
atom = parse(v, ')', type, s, s2); atom = parse(v, ')', type, s, s2);
assert(SEE(')') || ISERR()); assert(SEE(')') || ISERR());
NEXT(); NEXT();
NOERR(); NOERRN();
if (cap) if (cap)
{ {
assert(v->subs[subno] == NULL); assert(v->subs[subno] == NULL);
...@@ -994,7 +998,7 @@ parseqatom(struct vars *v, ...@@ -994,7 +998,7 @@ parseqatom(struct vars *v,
{ {
/* generate no-op wrapper node to handle "((x))" */ /* generate no-op wrapper node to handle "((x))" */
t = subre(v, '(', atom->flags | CAP, lp, rp); t = subre(v, '(', atom->flags | CAP, lp, rp);
NOERR(); NOERRN();
t->capno = subno; t->capno = subno;
t->child = atom; t->child = atom;
atom = t; atom = t;
...@@ -1006,10 +1010,10 @@ parseqatom(struct vars *v, ...@@ -1006,10 +1010,10 @@ parseqatom(struct vars *v,
INSIST(type != LACON, REG_ESUBREG); INSIST(type != LACON, REG_ESUBREG);
INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG); INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
NOERR(); NOERRN();
assert(v->nextvalue > 0); assert(v->nextvalue > 0);
atom = subre(v, 'b', BACKR, lp, rp); atom = subre(v, 'b', BACKR, lp, rp);
NOERR(); NOERRN();
subno = v->nextvalue; subno = v->nextvalue;
atom->backno = subno; atom->backno = subno;
EMPTYARC(lp, rp); /* temporarily, so there's something */ EMPTYARC(lp, rp); /* temporarily, so there's something */
...@@ -1050,7 +1054,7 @@ parseqatom(struct vars *v, ...@@ -1050,7 +1054,7 @@ parseqatom(struct vars *v,
if (m > n) if (m > n)
{ {
ERR(REG_BADBR); ERR(REG_BADBR);
return; return top;
} }
/* {m,n} exercises preference, even if it's {m,m} */ /* {m,n} exercises preference, even if it's {m,m} */
qprefer = (v->nextvalue) ? LONGER : SHORTER; qprefer = (v->nextvalue) ? LONGER : SHORTER;
...@@ -1064,7 +1068,7 @@ parseqatom(struct vars *v, ...@@ -1064,7 +1068,7 @@ parseqatom(struct vars *v,
if (!SEE('}')) if (!SEE('}'))
{ /* catches errors too */ { /* catches errors too */
ERR(REG_BADBR); ERR(REG_BADBR);
return; return top;
} }
NEXT(); NEXT();
break; break;
...@@ -1083,7 +1087,7 @@ parseqatom(struct vars *v, ...@@ -1083,7 +1087,7 @@ parseqatom(struct vars *v,
v->subs[subno] = NULL; v->subs[subno] = NULL;
delsub(v->nfa, lp, rp); delsub(v->nfa, lp, rp);
EMPTYARC(lp, rp); EMPTYARC(lp, rp);
return; return top;
} }
/* if not a messy case, avoid hard part */ /* if not a messy case, avoid hard part */
...@@ -1096,7 +1100,7 @@ parseqatom(struct vars *v, ...@@ -1096,7 +1100,7 @@ parseqatom(struct vars *v,
if (atom != NULL) if (atom != NULL)
freesubre(v, atom); freesubre(v, atom);
top->flags = f; top->flags = f;
return; return top;
} }
/* /*
...@@ -1110,7 +1114,7 @@ parseqatom(struct vars *v, ...@@ -1110,7 +1114,7 @@ parseqatom(struct vars *v,
if (atom == NULL) if (atom == NULL)
{ {
atom = subre(v, '=', 0, lp, rp); atom = subre(v, '=', 0, lp, rp);
NOERR(); NOERRN();
} }
/*---------- /*----------
...@@ -1131,20 +1135,20 @@ parseqatom(struct vars *v, ...@@ -1131,20 +1135,20 @@ parseqatom(struct vars *v,
*/ */
s = newstate(v->nfa); /* first, new endpoints for the atom */ s = newstate(v->nfa); /* first, new endpoints for the atom */
s2 = newstate(v->nfa); s2 = newstate(v->nfa);
NOERR(); NOERRN();
moveouts(v->nfa, lp, s); moveouts(v->nfa, lp, s);
moveins(v->nfa, rp, s2); moveins(v->nfa, rp, s2);
NOERR(); NOERRN();
atom->begin = s; atom->begin = s;
atom->end = s2; atom->end = s2;
s = newstate(v->nfa); /* set up starting state */ s = newstate(v->nfa); /* set up starting state */
NOERR(); NOERRN();
EMPTYARC(lp, s); EMPTYARC(lp, s);
NOERR(); NOERRN();
/* break remaining subRE into x{...} and what follows */ /* break remaining subRE into x{...} and what follows */
t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp); t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
NOERR(); NOERRN();
t->child = atom; t->child = atom;
atomp = &t->child; atomp = &t->child;
...@@ -1163,7 +1167,7 @@ parseqatom(struct vars *v, ...@@ -1163,7 +1167,7 @@ parseqatom(struct vars *v,
*/ */
assert(top->op == '=' && top->child == NULL); assert(top->op == '=' && top->child == NULL);
top->child = subre(v, '=', top->flags, top->begin, lp); top->child = subre(v, '=', top->flags, top->begin, lp);
NOERR(); NOERRN();
top->op = '.'; top->op = '.';
top->child->sibling = t; top->child->sibling = t;
/* top->flags will get updated later */ /* top->flags will get updated later */
...@@ -1182,11 +1186,11 @@ parseqatom(struct vars *v, ...@@ -1182,11 +1186,11 @@ parseqatom(struct vars *v,
*/ */
dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end, dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
atom->begin, atom->end); atom->begin, atom->end);
NOERR(); NOERRN();
/* The backref node's NFA should not enforce any constraints */ /* The backref node's NFA should not enforce any constraints */
removeconstraints(v->nfa, atom->begin, atom->end); removeconstraints(v->nfa, atom->begin, atom->end);
NOERR(); NOERRN();
} }
/* /*
...@@ -1226,7 +1230,7 @@ parseqatom(struct vars *v, ...@@ -1226,7 +1230,7 @@ parseqatom(struct vars *v,
repeat(v, atom->begin, atom->end, m, n); repeat(v, atom->begin, atom->end, m, n);
f = COMBINE(qprefer, atom->flags); f = COMBINE(qprefer, atom->flags);
t = subre(v, '=', f, atom->begin, atom->end); t = subre(v, '=', f, atom->begin, atom->end);
NOERR(); NOERRN();
freesubre(v, atom); freesubre(v, atom);
*atomp = t; *atomp = t;
/* rest of branch can be strung starting from t->end */ /* rest of branch can be strung starting from t->end */
...@@ -1247,9 +1251,9 @@ parseqatom(struct vars *v, ...@@ -1247,9 +1251,9 @@ parseqatom(struct vars *v,
repeat(v, s, atom->begin, m - 1, (n == DUPINF) ? n : n - 1); repeat(v, s, atom->begin, m - 1, (n == DUPINF) ? n : n - 1);
f = COMBINE(qprefer, atom->flags); f = COMBINE(qprefer, atom->flags);
t = subre(v, '.', f, s, atom->end); /* prefix and atom */ t = subre(v, '.', f, s, atom->end); /* prefix and atom */
NOERR(); NOERRN();
t->child = subre(v, '=', PREF(f), s, atom->begin); t->child = subre(v, '=', PREF(f), s, atom->begin);
NOERR(); NOERRN();
t->child->sibling = atom; t->child->sibling = atom;
*atomp = t; *atomp = t;
/* rest of branch can be strung starting from atom->end */ /* rest of branch can be strung starting from atom->end */
...@@ -1259,14 +1263,14 @@ parseqatom(struct vars *v, ...@@ -1259,14 +1263,14 @@ parseqatom(struct vars *v,
{ {
/* general case: need an iteration node */ /* general case: need an iteration node */
s2 = newstate(v->nfa); s2 = newstate(v->nfa);
NOERR(); NOERRN();
moveouts(v->nfa, atom->end, s2); moveouts(v->nfa, atom->end, s2);
NOERR(); NOERRN();
dupnfa(v->nfa, atom->begin, atom->end, s, s2); dupnfa(v->nfa, atom->begin, atom->end, s, s2);
repeat(v, s, s2, m, n); repeat(v, s, s2, m, n);
f = COMBINE(qprefer, atom->flags); f = COMBINE(qprefer, atom->flags);
t = subre(v, '*', f, s, s2); t = subre(v, '*', f, s, s2);
NOERR(); NOERRN();
t->min = (short) m; t->min = (short) m;
t->max = (short) n; t->max = (short) n;
t->child = atom; t->child = atom;
...@@ -1280,7 +1284,7 @@ parseqatom(struct vars *v, ...@@ -1280,7 +1284,7 @@ parseqatom(struct vars *v,
{ {
/* parse all the rest of the branch, and insert in t->child->sibling */ /* parse all the rest of the branch, and insert in t->child->sibling */
t->child->sibling = parsebranch(v, stopper, type, s2, rp, 1); t->child->sibling = parsebranch(v, stopper, type, s2, rp, 1);
NOERR(); NOERRN();
assert(SEE('|') || SEE(stopper) || SEE(EOS)); assert(SEE('|') || SEE(stopper) || SEE(EOS));
/* here's the promised update of the flags */ /* here's the promised update of the flags */
...@@ -1299,9 +1303,7 @@ parseqatom(struct vars *v, ...@@ -1299,9 +1303,7 @@ parseqatom(struct vars *v,
* *
* If the messy atom was the first thing in the branch, then * If the messy atom was the first thing in the branch, then
* top->child is vacuous and we can get rid of one level of * top->child is vacuous and we can get rid of one level of
* concatenation. Since the caller is holding a pointer to the top * concatenation.
* node, we can't remove that node; but we're allowed to change its
* properties.
*/ */
assert(top->child->op == '='); assert(top->child->op == '=');
if (top->child->begin == top->child->end) if (top->child->begin == top->child->end)
...@@ -1351,21 +1353,13 @@ parseqatom(struct vars *v, ...@@ -1351,21 +1353,13 @@ parseqatom(struct vars *v,
{ {
assert(!MESSY(top->child->flags)); assert(!MESSY(top->child->flags));
t = top->child->sibling; t = top->child->sibling;
freesubre(v, top->child); top->child->sibling = NULL;
top->op = t->op; freesubre(v, top);
top->flags = t->flags; top = t;
top->latype = t->latype;
top->id = t->id;
top->capno = t->capno;
top->backno = t->backno;
top->min = t->min;
top->max = t->max;
top->child = t->child;
top->begin = t->begin;
top->end = t->end;
freesrnode(v, t);
} }
} }
return top;
} }
/* /*
...@@ -2109,7 +2103,9 @@ freesrnode(struct vars *v, /* might be NULL */ ...@@ -2109,7 +2103,9 @@ freesrnode(struct vars *v, /* might be NULL */
if (!NULLCNFA(sr->cnfa)) if (!NULLCNFA(sr->cnfa))
freecnfa(&sr->cnfa); freecnfa(&sr->cnfa);
sr->flags = 0; sr->flags = 0; /* in particular, not INUSE */
sr->child = sr->sibling = NULL;
sr->begin = sr->end = NULL;
if (v != NULL && v->treechain != NULL) if (v != NULL && v->treechain != NULL)
{ {
......
...@@ -3468,6 +3468,14 @@ select * from test_regex(' TO (([a-z0-9._]+|"([^"]+|"")+")+)', 'asd TO foo', 'M' ...@@ -3468,6 +3468,14 @@ select * from test_regex(' TO (([a-z0-9._]+|"([^"]+|"")+")+)', 'asd TO foo', 'M'
{" TO foo",foo,o,NULL} {" TO foo",foo,o,NULL}
(2 rows) (2 rows)
-- expectMatch 21.36 RPQ ((.))(\2){0} xy x x x {}
select * from test_regex('((.))(\2){0}', 'xy', 'RPQ');
test_regex
--------------------------------------------
{3,REG_UBACKREF,REG_UBOUNDS,REG_UNONPOSIX}
{x,x,x,NULL}
(2 rows)
-- doing 22 "multicharacter collating elements" -- doing 22 "multicharacter collating elements"
-- # again ugh -- # again ugh
-- MCCEs are not implemented in Postgres, so we skip all these tests -- MCCEs are not implemented in Postgres, so we skip all these tests
......
...@@ -1009,6 +1009,8 @@ select * from test_regex('(.*).*', 'abc', 'N'); ...@@ -1009,6 +1009,8 @@ select * from test_regex('(.*).*', 'abc', 'N');
select * from test_regex('(a*)*', 'bc', 'N'); select * from test_regex('(a*)*', 'bc', 'N');
-- expectMatch 21.35 M { TO (([a-z0-9._]+|"([^"]+|"")+")+)} {asd TO foo} { TO foo} foo o {} -- expectMatch 21.35 M { TO (([a-z0-9._]+|"([^"]+|"")+")+)} {asd TO foo} { TO foo} foo o {}
select * from test_regex(' TO (([a-z0-9._]+|"([^"]+|"")+")+)', 'asd TO foo', 'M'); select * from test_regex(' TO (([a-z0-9._]+|"([^"]+|"")+")+)', 'asd TO foo', 'M');
-- expectMatch 21.36 RPQ ((.))(\2){0} xy x x x {}
select * from test_regex('((.))(\2){0}', 'xy', 'RPQ');
-- doing 22 "multicharacter collating elements" -- doing 22 "multicharacter collating elements"
-- # again ugh -- # again ugh
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment