Commit ee3a81f0 authored by Tom Lane's avatar Tom Lane

Change regexp engine's ccondissect/crevdissect routines to perform DFA

matching before recursing instead of after.  The DFA match eliminates
unworkable midpoint choices a lot faster than the recursive check, in most
cases, so doing it first can speed things up; particularly in pathological
cases such as recently exhibited by Michael Glaesemann.

In addition, apply some cosmetic changes that were applied upstream (in the
Tcl project) at the same time, in order to sync with upstream version 1.15
of regexec.c.

Upstream apparently intends to backpatch this, so I will too.  The
pathological behavior could be unpleasant if encountered in the field,
which seems to justify any risk of introducing new bugs.

Tom Lane, reviewed by Donal K. Fellows of Tcl project
parent c85c9414
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* *
* $PostgreSQL: pgsql/src/backend/regex/regexec.c,v 1.27 2005/10/15 02:49:24 momjian Exp $ * $PostgreSQL: pgsql/src/backend/regex/regexec.c,v 1.28 2010/02/01 02:45:29 tgl Exp $
* *
*/ */
...@@ -141,6 +141,7 @@ static int dissect(struct vars *, struct subre *, chr *, chr *); ...@@ -141,6 +141,7 @@ static int dissect(struct vars *, struct subre *, chr *, chr *);
static int condissect(struct vars *, struct subre *, chr *, chr *); static int condissect(struct vars *, struct subre *, chr *, chr *);
static int altdissect(struct vars *, struct subre *, chr *, chr *); static int altdissect(struct vars *, struct subre *, chr *, chr *);
static int cdissect(struct vars *, struct subre *, chr *, chr *); static int cdissect(struct vars *, struct subre *, chr *, chr *);
static int ccaptdissect(struct vars *, struct subre *, chr *, chr *);
static int ccondissect(struct vars *, struct subre *, chr *, chr *); static int ccondissect(struct vars *, struct subre *, chr *, chr *);
static int crevdissect(struct vars *, struct subre *, chr *, chr *); static int crevdissect(struct vars *, struct subre *, chr *, chr *);
static int cbrdissect(struct vars *, struct subre *, chr *, chr *); static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
...@@ -560,27 +561,21 @@ dissect(struct vars * v, ...@@ -560,27 +561,21 @@ dissect(struct vars * v,
case '=': /* terminal node */ case '=': /* terminal node */
assert(t->left == NULL && t->right == NULL); assert(t->left == NULL && t->right == NULL);
return REG_OKAY; /* no action, parent did the work */ return REG_OKAY; /* no action, parent did the work */
break;
case '|': /* alternation */ case '|': /* alternation */
assert(t->left != NULL); assert(t->left != NULL);
return altdissect(v, t, begin, end); return altdissect(v, t, begin, end);
break;
case 'b': /* back ref -- shouldn't be calling us! */ case 'b': /* back ref -- shouldn't be calling us! */
return REG_ASSERT; return REG_ASSERT;
break;
case '.': /* concatenation */ case '.': /* concatenation */
assert(t->left != NULL && t->right != NULL); assert(t->left != NULL && t->right != NULL);
return condissect(v, t, begin, end); return condissect(v, t, begin, end);
break;
case '(': /* capturing */ case '(': /* capturing */
assert(t->left != NULL && t->right == NULL); assert(t->left != NULL && t->right == NULL);
assert(t->subno > 0); assert(t->subno > 0);
subset(v, t, begin, end); subset(v, t, begin, end);
return dissect(v, t->left, begin, end); return dissect(v, t->left, begin, end);
break;
default: default:
return REG_ASSERT; return REG_ASSERT;
break;
} }
} }
...@@ -710,8 +705,6 @@ cdissect(struct vars * v, ...@@ -710,8 +705,6 @@ cdissect(struct vars * v,
chr *begin, /* beginning of relevant substring */ chr *begin, /* beginning of relevant substring */
chr *end) /* end of same */ chr *end) /* end of same */
{ {
int er;
assert(t != NULL); assert(t != NULL);
MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
...@@ -720,33 +713,42 @@ cdissect(struct vars * v, ...@@ -720,33 +713,42 @@ cdissect(struct vars * v,
case '=': /* terminal node */ case '=': /* terminal node */
assert(t->left == NULL && t->right == NULL); assert(t->left == NULL && t->right == NULL);
return REG_OKAY; /* no action, parent did the work */ return REG_OKAY; /* no action, parent did the work */
break;
case '|': /* alternation */ case '|': /* alternation */
assert(t->left != NULL); assert(t->left != NULL);
return caltdissect(v, t, begin, end); return caltdissect(v, t, begin, end);
break;
case 'b': /* back ref -- shouldn't be calling us! */ case 'b': /* back ref -- shouldn't be calling us! */
assert(t->left == NULL && t->right == NULL); assert(t->left == NULL && t->right == NULL);
return cbrdissect(v, t, begin, end); return cbrdissect(v, t, begin, end);
break;
case '.': /* concatenation */ case '.': /* concatenation */
assert(t->left != NULL && t->right != NULL); assert(t->left != NULL && t->right != NULL);
return ccondissect(v, t, begin, end); return ccondissect(v, t, begin, end);
break;
case '(': /* capturing */ case '(': /* capturing */
assert(t->left != NULL && t->right == NULL); assert(t->left != NULL && t->right == NULL);
assert(t->subno > 0); return ccaptdissect(v, t, begin, end);
er = cdissect(v, t->left, begin, end);
if (er == REG_OKAY)
subset(v, t, begin, end);
return er;
break;
default: default:
return REG_ASSERT; return REG_ASSERT;
break;
} }
} }
/*
* ccaptdissect - capture subexpression matches (with complications)
*/
static int /* regexec return code */
ccaptdissect(struct vars * v,
struct subre * t,
chr *begin, /* beginning of relevant substring */
chr *end) /* end of same */
{
int er;
assert(t->subno > 0);
er = cdissect(v, t->left, begin, end);
if (er == REG_OKAY)
subset(v, t, begin, end);
return er;
}
/* /*
* ccondissect - concatenation subexpression matches (with complications) * ccondissect - concatenation subexpression matches (with complications)
* The retry memory stores the offset of the trial midpoint from begin, * The retry memory stores the offset of the trial midpoint from begin,
...@@ -804,17 +806,27 @@ ccondissect(struct vars * v, ...@@ -804,17 +806,27 @@ ccondissect(struct vars * v,
for (;;) for (;;)
{ {
/* try this midpoint on for size */ /* try this midpoint on for size */
er = cdissect(v, t->left, begin, mid); if (longest(v, d2, mid, end, (int *) NULL) == end)
if (er == REG_OKAY &&
longest(v, d2, mid, end, (int *) NULL) == end &&
(er = cdissect(v, t->right, mid, end)) ==
REG_OKAY)
break; /* NOTE BREAK OUT */
if (er != REG_OKAY && er != REG_NOMATCH)
{ {
freedfa(d); er = cdissect(v, t->left, begin, mid);
freedfa(d2); if (er == REG_OKAY)
return er; {
er = cdissect(v, t->right, mid, end);
if (er == REG_OKAY)
{
/* satisfaction */
MDEBUG(("successful\n"));
freedfa(d);
freedfa(d2);
return REG_OKAY;
}
}
if (er != REG_OKAY && er != REG_NOMATCH)
{
freedfa(d);
freedfa(d2);
return er;
}
} }
/* that midpoint didn't work, find a new one */ /* that midpoint didn't work, find a new one */
...@@ -841,11 +853,8 @@ ccondissect(struct vars * v, ...@@ -841,11 +853,8 @@ ccondissect(struct vars * v,
zapmem(v, t->right); zapmem(v, t->right);
} }
/* satisfaction */ /* can't get here */
MDEBUG(("successful\n")); return REG_ASSERT;
freedfa(d);
freedfa(d2);
return REG_OKAY;
} }
/* /*
...@@ -904,17 +913,27 @@ crevdissect(struct vars * v, ...@@ -904,17 +913,27 @@ crevdissect(struct vars * v,
for (;;) for (;;)
{ {
/* try this midpoint on for size */ /* try this midpoint on for size */
er = cdissect(v, t->left, begin, mid); if (longest(v, d2, mid, end, (int *) NULL) == end)
if (er == REG_OKAY &&
longest(v, d2, mid, end, (int *) NULL) == end &&
(er = cdissect(v, t->right, mid, end)) ==
REG_OKAY)
break; /* NOTE BREAK OUT */
if (er != REG_OKAY && er != REG_NOMATCH)
{ {
freedfa(d); er = cdissect(v, t->left, begin, mid);
freedfa(d2); if (er == REG_OKAY)
return er; {
er = cdissect(v, t->right, mid, end);
if (er == REG_OKAY)
{
/* satisfaction */
MDEBUG(("successful\n"));
freedfa(d);
freedfa(d2);
return REG_OKAY;
}
}
if (er != REG_OKAY && er != REG_NOMATCH)
{
freedfa(d);
freedfa(d2);
return er;
}
} }
/* that midpoint didn't work, find a new one */ /* that midpoint didn't work, find a new one */
...@@ -941,11 +960,8 @@ crevdissect(struct vars * v, ...@@ -941,11 +960,8 @@ crevdissect(struct vars * v,
zapmem(v, t->right); zapmem(v, t->right);
} }
/* satisfaction */ /* can't get here */
MDEBUG(("successful\n")); return REG_ASSERT;
freedfa(d);
freedfa(d2);
return REG_OKAY;
} }
/* /*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment