Commit 0fc1af17 authored by Tom Lane's avatar Tom Lane

Improve memory management in regex compiler.

The previous logic here created a separate pool of arcs for each
state, so that the out-arcs of each state were physically stored
within it.  Perhaps this choice was driven by trying to not include
a "from" pointer within each arc; but Spencer gave up on that idea
long ago, and it's hard to see what the value is now.  The approach
turns out to be fairly disastrous in terms of memory consumption,
though.  In the first place, NFAs built by this engine seem to have
about 4 arcs per state on average, with a majority having only one
or two out-arcs.  So pre-allocating 10 out-arcs for each state is
already cause for a factor of two or more bloat.  Worse, the NFA
optimization phase moves arcs around with abandon.  In a large NFA,
some of the states will have hundreds of out-arcs, so towards the
end of the optimization phase we have a significant number of states
whose arc pools have room for hundreds of arcs each, even though only
a few of those arcs are in use.  We have seen real-world regexes in
which this effect bloats the memory requirement by 25X or even more.

Hence, get rid of the per-state arc pools in favor of a single arc
pool for the whole NFA, with variable-sized allocation batches
instead of always asking for 10 at a time.  While we're at it,
let's batch the allocations of state structs too, to further reduce
the malloc traffic.

This incidentally allows moveouts() to be optimized in a similar
way to moveins(): when moving an arc to another state, it's now
valid to just re-link the same arc struct into a different outchain,
where before the code invariants required us to make a physically
new arc and then free the old one.

These changes reduce the regex compiler's typical space consumption
for average-size regexes by about a factor of two, and much more for
large or complicated regexes.  In a large test set of real-world
regexes, we formerly had half a dozen cases that failed with "regular
expression too complex" due to exceeding the REG_MAX_COMPILE_SPACE
limit (about 150MB); we would have had to raise that limit to
something close to 400MB to make them work with the old code.  Now,
none of those cases need more than 13MB to compile.  Furthermore,
the test set is about 10% faster overall due to less malloc traffic.

Discussion: https://postgr.es/m/168861.1614298592@sss.pgh.pa.us
parent b3a9e989
...@@ -57,9 +57,15 @@ newnfa(struct vars *v, ...@@ -57,9 +57,15 @@ newnfa(struct vars *v,
return NULL; return NULL;
} }
/* Make the NFA minimally valid, so freenfa() will behave sanely */
nfa->states = NULL; nfa->states = NULL;
nfa->slast = NULL; nfa->slast = NULL;
nfa->free = NULL; nfa->freestates = NULL;
nfa->freearcs = NULL;
nfa->lastsb = NULL;
nfa->lastab = NULL;
nfa->lastsbused = 0;
nfa->lastabused = 0;
nfa->nstates = 0; nfa->nstates = 0;
nfa->cm = cm; nfa->cm = cm;
nfa->v = v; nfa->v = v;
...@@ -68,9 +74,10 @@ newnfa(struct vars *v, ...@@ -68,9 +74,10 @@ newnfa(struct vars *v,
nfa->flags = 0; nfa->flags = 0;
nfa->minmatchall = nfa->maxmatchall = -1; nfa->minmatchall = nfa->maxmatchall = -1;
nfa->parent = parent; /* Precedes newfstate so parent is valid. */ nfa->parent = parent; /* Precedes newfstate so parent is valid. */
/* Create required infrastructure */
nfa->post = newfstate(nfa, '@'); /* number 0 */ nfa->post = newfstate(nfa, '@'); /* number 0 */
nfa->pre = newfstate(nfa, '>'); /* number 1 */ nfa->pre = newfstate(nfa, '>'); /* number 1 */
nfa->init = newstate(nfa); /* may become invalid later */ nfa->init = newstate(nfa); /* may become invalid later */
nfa->final = newstate(nfa); nfa->final = newstate(nfa);
if (ISERR()) if (ISERR())
...@@ -99,23 +106,27 @@ newnfa(struct vars *v, ...@@ -99,23 +106,27 @@ newnfa(struct vars *v,
static void static void
freenfa(struct nfa *nfa) freenfa(struct nfa *nfa)
{ {
struct state *s; struct statebatch *sb;
struct statebatch *sbnext;
struct arcbatch *ab;
struct arcbatch *abnext;
while ((s = nfa->states) != NULL) for (sb = nfa->lastsb; sb != NULL; sb = sbnext)
{ {
s->nins = s->nouts = 0; /* don't worry about arcs */ sbnext = sb->next;
freestate(nfa, s); nfa->v->spaceused -= STATEBATCHSIZE(sb->nstates);
FREE(sb);
} }
while ((s = nfa->free) != NULL) nfa->lastsb = NULL;
for (ab = nfa->lastab; ab != NULL; ab = abnext)
{ {
nfa->free = s->next; abnext = ab->next;
destroystate(nfa, s); nfa->v->spaceused -= ARCBATCHSIZE(ab->narcs);
FREE(ab);
} }
nfa->lastab = NULL;
nfa->slast = NULL;
nfa->nstates = -1; nfa->nstates = -1;
nfa->pre = NULL;
nfa->post = NULL;
FREE(nfa); FREE(nfa);
} }
...@@ -138,28 +149,43 @@ newstate(struct nfa *nfa) ...@@ -138,28 +149,43 @@ newstate(struct nfa *nfa)
return NULL; return NULL;
} }
if (nfa->free != NULL) /* first, recycle anything that's on the freelist */
if (nfa->freestates != NULL)
{
s = nfa->freestates;
nfa->freestates = s->next;
}
/* otherwise, is there anything left in the last statebatch? */
else if (nfa->lastsb != NULL && nfa->lastsbused < nfa->lastsb->nstates)
{ {
s = nfa->free; s = &nfa->lastsb->s[nfa->lastsbused++];
nfa->free = s->next;
} }
/* otherwise, need to allocate a new statebatch */
else else
{ {
struct statebatch *newSb;
size_t nstates;
if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE) if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
{ {
NERR(REG_ETOOBIG); NERR(REG_ETOOBIG);
return NULL; return NULL;
} }
s = (struct state *) MALLOC(sizeof(struct state)); nstates = (nfa->lastsb != NULL) ? nfa->lastsb->nstates * 2 : FIRSTSBSIZE;
if (s == NULL) if (nstates > MAXSBSIZE)
nstates = MAXSBSIZE;
newSb = (struct statebatch *) MALLOC(STATEBATCHSIZE(nstates));
if (newSb == NULL)
{ {
NERR(REG_ESPACE); NERR(REG_ESPACE);
return NULL; return NULL;
} }
nfa->v->spaceused += sizeof(struct state); nfa->v->spaceused += STATEBATCHSIZE(nstates);
s->oas.next = NULL; newSb->nstates = nstates;
s->free = NULL; newSb->next = nfa->lastsb;
s->noas = 0; nfa->lastsb = newSb;
nfa->lastsbused = 1;
s = &newSb->s[0];
} }
assert(nfa->nstates >= 0); assert(nfa->nstates >= 0);
...@@ -240,32 +266,8 @@ freestate(struct nfa *nfa, ...@@ -240,32 +266,8 @@ freestate(struct nfa *nfa,
nfa->states = s->next; nfa->states = s->next;
} }
s->prev = NULL; s->prev = NULL;
s->next = nfa->free; /* don't delete it, put it on the free list */ s->next = nfa->freestates; /* don't delete it, put it on the free list */
nfa->free = s; nfa->freestates = s;
}
/*
* destroystate - really get rid of an already-freed state
*/
static void
destroystate(struct nfa *nfa,
struct state *s)
{
struct arcbatch *ab;
struct arcbatch *abnext;
assert(s->no == FREESTATE);
for (ab = s->oas.next; ab != NULL; ab = abnext)
{
abnext = ab->next;
FREE(ab);
nfa->v->spaceused -= sizeof(struct arcbatch);
}
s->ins = NULL;
s->outs = NULL;
s->next = NULL;
FREE(s);
nfa->v->spaceused -= sizeof(struct state);
} }
/* /*
...@@ -334,8 +336,7 @@ createarc(struct nfa *nfa, ...@@ -334,8 +336,7 @@ createarc(struct nfa *nfa,
{ {
struct arc *a; struct arc *a;
/* the arc is physically allocated within its from-state */ a = allocarc(nfa);
a = allocarc(nfa, from);
if (NISERR()) if (NISERR())
return; return;
assert(a != NULL); assert(a != NULL);
...@@ -369,55 +370,52 @@ createarc(struct nfa *nfa, ...@@ -369,55 +370,52 @@ createarc(struct nfa *nfa,
} }
/* /*
* allocarc - allocate a new out-arc within a state * allocarc - allocate a new arc within an NFA
*/ */
static struct arc * /* NULL for failure */ static struct arc * /* NULL for failure */
allocarc(struct nfa *nfa, allocarc(struct nfa *nfa)
struct state *s)
{ {
struct arc *a; struct arc *a;
/* shortcut */ /* first, recycle anything that's on the freelist */
if (s->free == NULL && s->noas < ABSIZE) if (nfa->freearcs != NULL)
{ {
a = &s->oas.a[s->noas]; a = nfa->freearcs;
s->noas++; nfa->freearcs = a->freechain;
return a;
} }
/* otherwise, is there anything left in the last arcbatch? */
/* if none at hand, get more */ else if (nfa->lastab != NULL && nfa->lastabused < nfa->lastab->narcs)
if (s->free == NULL) {
a = &nfa->lastab->a[nfa->lastabused++];
}
/* otherwise, need to allocate a new arcbatch */
else
{ {
struct arcbatch *newAb; struct arcbatch *newAb;
int i; size_t narcs;
if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE) if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
{ {
NERR(REG_ETOOBIG); NERR(REG_ETOOBIG);
return NULL; return NULL;
} }
newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch)); narcs = (nfa->lastab != NULL) ? nfa->lastab->narcs * 2 : FIRSTABSIZE;
if (narcs > MAXABSIZE)
narcs = MAXABSIZE;
newAb = (struct arcbatch *) MALLOC(ARCBATCHSIZE(narcs));
if (newAb == NULL) if (newAb == NULL)
{ {
NERR(REG_ESPACE); NERR(REG_ESPACE);
return NULL; return NULL;
} }
nfa->v->spaceused += sizeof(struct arcbatch); nfa->v->spaceused += ARCBATCHSIZE(narcs);
newAb->next = s->oas.next; newAb->narcs = narcs;
s->oas.next = newAb; newAb->next = nfa->lastab;
nfa->lastab = newAb;
for (i = 0; i < ABSIZE; i++) nfa->lastabused = 1;
{ a = &newAb->a[0];
newAb->a[i].type = 0;
newAb->a[i].freechain = &newAb->a[i + 1];
}
newAb->a[ABSIZE - 1].freechain = NULL;
s->free = &newAb->a[0];
} }
assert(s->free != NULL);
a = s->free;
s->free = a->freechain;
return a; return a;
} }
...@@ -478,7 +476,7 @@ freearc(struct nfa *nfa, ...@@ -478,7 +476,7 @@ freearc(struct nfa *nfa,
} }
to->nins--; to->nins--;
/* clean up and place on from-state's free list */ /* clean up and place on NFA's free list */
victim->type = 0; victim->type = 0;
victim->from = NULL; /* precautions... */ victim->from = NULL; /* precautions... */
victim->to = NULL; victim->to = NULL;
...@@ -486,17 +484,58 @@ freearc(struct nfa *nfa, ...@@ -486,17 +484,58 @@ freearc(struct nfa *nfa,
victim->inchainRev = NULL; victim->inchainRev = NULL;
victim->outchain = NULL; victim->outchain = NULL;
victim->outchainRev = NULL; victim->outchainRev = NULL;
victim->freechain = from->free; victim->freechain = nfa->freearcs;
from->free = victim; nfa->freearcs = victim;
} }
/* /*
* changearctarget - flip an arc to have a different to state * changearcsource - flip an arc to have a different from state
* *
* Caller must have verified that there is no pre-existing duplicate arc. * Caller must have verified that there is no pre-existing duplicate arc.
*/
static void
changearcsource(struct arc *a, struct state *newfrom)
{
struct state *oldfrom = a->from;
struct arc *predecessor;
assert(oldfrom != newfrom);
/* take it off old source's out-chain */
assert(oldfrom != NULL);
predecessor = a->outchainRev;
if (predecessor == NULL)
{
assert(oldfrom->outs == a);
oldfrom->outs = a->outchain;
}
else
{
assert(predecessor->outchain == a);
predecessor->outchain = a->outchain;
}
if (a->outchain != NULL)
{
assert(a->outchain->outchainRev == a);
a->outchain->outchainRev = predecessor;
}
oldfrom->nouts--;
a->from = newfrom;
/* prepend it to new source's out-chain */
a->outchain = newfrom->outs;
a->outchainRev = NULL;
if (newfrom->outs)
newfrom->outs->outchainRev = a;
newfrom->outs = a;
newfrom->nouts++;
}
/*
* changearctarget - flip an arc to have a different to state
* *
* Note that because we store arcs in their from state, we can't easily have * Caller must have verified that there is no pre-existing duplicate arc.
* a similar changearcsource function.
*/ */
static void static void
changearctarget(struct arc *a, struct state *newto) changearctarget(struct arc *a, struct state *newto)
...@@ -1009,6 +1048,8 @@ mergeins(struct nfa *nfa, ...@@ -1009,6 +1048,8 @@ mergeins(struct nfa *nfa,
/* /*
* moveouts - move all out arcs of a state to another state * moveouts - move all out arcs of a state to another state
*
* See comments for moveins()
*/ */
static void static void
moveouts(struct nfa *nfa, moveouts(struct nfa *nfa,
...@@ -1031,9 +1072,9 @@ moveouts(struct nfa *nfa, ...@@ -1031,9 +1072,9 @@ moveouts(struct nfa *nfa,
else else
{ {
/* /*
* With many arcs, use a sort-merge approach. Note that createarc() * With many arcs, use a sort-merge approach. Note changearcsource()
* will put new arcs onto the front of newState's chain, so it does * will put the arc onto the front of newState's chain, so it does not
* not break our walk through the sorted part of the chain. * break our walk through the sorted part of the chain.
*/ */
struct arc *oa; struct arc *oa;
struct arc *na; struct arc *na;
...@@ -1063,8 +1104,12 @@ moveouts(struct nfa *nfa, ...@@ -1063,8 +1104,12 @@ moveouts(struct nfa *nfa,
case -1: case -1:
/* newState does not have anything matching oa */ /* newState does not have anything matching oa */
oa = oa->outchain; oa = oa->outchain;
createarc(nfa, a->type, a->co, newState, a->to);
freearc(nfa, a); /*
* Rather than doing createarc+freearc, we can just unlink
* and relink the existing arc struct.
*/
changearcsource(a, newState);
break; break;
case 0: case 0:
/* match, advance in both lists */ /* match, advance in both lists */
...@@ -1087,8 +1132,7 @@ moveouts(struct nfa *nfa, ...@@ -1087,8 +1132,7 @@ moveouts(struct nfa *nfa,
struct arc *a = oa; struct arc *a = oa;
oa = oa->outchain; oa = oa->outchain;
createarc(nfa, a->type, a->co, newState, a->to); changearcsource(a, newState);
freearc(nfa, a);
} }
} }
...@@ -3413,7 +3457,6 @@ dumparc(struct arc *a, ...@@ -3413,7 +3457,6 @@ dumparc(struct arc *a,
FILE *f) FILE *f)
{ {
struct arc *aa; struct arc *aa;
struct arcbatch *ab;
fprintf(f, "\t"); fprintf(f, "\t");
switch (a->type) switch (a->type)
...@@ -3451,16 +3494,11 @@ dumparc(struct arc *a, ...@@ -3451,16 +3494,11 @@ dumparc(struct arc *a,
} }
if (a->from != s) if (a->from != s)
fprintf(f, "?%d?", a->from->no); fprintf(f, "?%d?", a->from->no);
for (ab = &a->from->oas; ab != NULL; ab = ab->next) for (aa = a->from->outs; aa != NULL; aa = aa->outchain)
{ if (aa == a)
for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
if (aa == a)
break; /* NOTE BREAK OUT */
if (aa < &ab->a[ABSIZE]) /* propagate break */
break; /* NOTE BREAK OUT */ break; /* NOTE BREAK OUT */
} if (aa == NULL)
if (ab == NULL) fprintf(f, "?!?"); /* missing from out-chain */
fprintf(f, "?!?"); /* not in allocated space */
fprintf(f, "->"); fprintf(f, "->");
if (a->to == NULL) if (a->to == NULL)
{ {
......
...@@ -127,11 +127,11 @@ static struct state *newstate(struct nfa *); ...@@ -127,11 +127,11 @@ static struct state *newstate(struct nfa *);
static struct state *newfstate(struct nfa *, int flag); static struct state *newfstate(struct nfa *, int flag);
static void dropstate(struct nfa *, struct state *); static void dropstate(struct nfa *, struct state *);
static void freestate(struct nfa *, struct state *); static void freestate(struct nfa *, struct state *);
static void destroystate(struct nfa *, struct state *);
static void newarc(struct nfa *, int, color, struct state *, struct state *); static void newarc(struct nfa *, int, color, struct state *, struct state *);
static void createarc(struct nfa *, int, color, struct state *, struct state *); static void createarc(struct nfa *, int, color, struct state *, struct state *);
static struct arc *allocarc(struct nfa *, struct state *); static struct arc *allocarc(struct nfa *);
static void freearc(struct nfa *, struct arc *); static void freearc(struct nfa *, struct arc *);
static void changearcsource(struct arc *, struct state *);
static void changearctarget(struct arc *, struct state *); static void changearctarget(struct arc *, struct state *);
static int hasnonemptyout(struct state *); static int hasnonemptyout(struct state *);
static struct arc *findarc(struct state *, int, color); static struct arc *findarc(struct state *, int, color);
......
...@@ -284,9 +284,6 @@ struct cvec ...@@ -284,9 +284,6 @@ struct cvec
/* /*
* definitions for NFA internal representation * definitions for NFA internal representation
*
* Having a "from" pointer within each arc may seem redundant, but it
* saves a lot of hassle.
*/ */
struct state; struct state;
...@@ -294,7 +291,7 @@ struct arc ...@@ -294,7 +291,7 @@ struct arc
{ {
int type; /* 0 if free, else an NFA arc type code */ int type; /* 0 if free, else an NFA arc type code */
color co; /* color the arc matches (possibly RAINBOW) */ color co; /* color the arc matches (possibly RAINBOW) */
struct state *from; /* where it's from (and contained within) */ struct state *from; /* where it's from */
struct state *to; /* where it's to */ struct state *to; /* where it's to */
struct arc *outchain; /* link in *from's outs chain or free chain */ struct arc *outchain; /* link in *from's outs chain or free chain */
struct arc *outchainRev; /* back-link in *from's outs chain */ struct arc *outchainRev; /* back-link in *from's outs chain */
...@@ -308,28 +305,41 @@ struct arc ...@@ -308,28 +305,41 @@ struct arc
struct arcbatch struct arcbatch
{ /* for bulk allocation of arcs */ { /* for bulk allocation of arcs */
struct arcbatch *next; struct arcbatch *next; /* chain link */
#define ABSIZE 10 size_t narcs; /* number of arcs allocated in this arcbatch */
struct arc a[ABSIZE]; struct arc a[FLEXIBLE_ARRAY_MEMBER];
}; };
#define ARCBATCHSIZE(n) ((n) * sizeof(struct arc) + offsetof(struct arcbatch, a))
/* first batch will have FIRSTABSIZE arcs; then double it until MAXABSIZE */
#define FIRSTABSIZE 64
#define MAXABSIZE 1024
struct state struct state
{ {
int no; int no; /* state number, zero and up; or FREESTATE */
#define FREESTATE (-1) #define FREESTATE (-1)
char flag; /* marks special states */ char flag; /* marks special states */
int nins; /* number of inarcs */ int nins; /* number of inarcs */
struct arc *ins; /* chain of inarcs */
int nouts; /* number of outarcs */ int nouts; /* number of outarcs */
struct arc *ins; /* chain of inarcs */
struct arc *outs; /* chain of outarcs */ struct arc *outs; /* chain of outarcs */
struct arc *free; /* chain of free arcs */
struct state *tmp; /* temporary for traversal algorithms */ struct state *tmp; /* temporary for traversal algorithms */
struct state *next; /* chain for traversing all */ struct state *next; /* chain for traversing all live states */
struct state *prev; /* back chain */ /* the "next" field is also used to chain free states together */
struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */ struct state *prev; /* back-link in chain of all live states */
int noas; /* number of arcs used in first arcbatch */
}; };
struct statebatch
{ /* for bulk allocation of states */
struct statebatch *next; /* chain link */
size_t nstates; /* number of states allocated in this batch */
struct state s[FLEXIBLE_ARRAY_MEMBER];
};
#define STATEBATCHSIZE(n) ((n) * sizeof(struct state) + offsetof(struct statebatch, s))
/* first batch will have FIRSTSBSIZE states; then double it until MAXSBSIZE */
#define FIRSTSBSIZE 32
#define MAXSBSIZE 1024
struct nfa struct nfa
{ {
struct state *pre; /* pre-initial state */ struct state *pre; /* pre-initial state */
...@@ -337,9 +347,14 @@ struct nfa ...@@ -337,9 +347,14 @@ struct nfa
struct state *final; /* final state */ struct state *final; /* final state */
struct state *post; /* post-final state */ struct state *post; /* post-final state */
int nstates; /* for numbering states */ int nstates; /* for numbering states */
struct state *states; /* state-chain header */ struct state *states; /* chain of live states */
struct state *slast; /* tail of the chain */ struct state *slast; /* tail of the chain */
struct state *free; /* free list */ struct state *freestates; /* chain of free states */
struct arc *freearcs; /* chain of free arcs */
struct statebatch *lastsb; /* chain of statebatches */
struct arcbatch *lastab; /* chain of arcbatches */
size_t lastsbused; /* number of states consumed from *lastsb */
size_t lastabused; /* number of arcs consumed from *lastab */
struct colormap *cm; /* the color map */ struct colormap *cm; /* the color map */
color bos[2]; /* colors, if any, assigned to BOS and BOL */ color bos[2]; /* colors, if any, assigned to BOS and BOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */ color eos[2]; /* colors, if any, assigned to EOS and EOL */
...@@ -387,7 +402,7 @@ struct cnfa ...@@ -387,7 +402,7 @@ struct cnfa
{ {
int nstates; /* number of states */ int nstates; /* number of states */
int ncolors; /* number of colors (max color in use + 1) */ int ncolors; /* number of colors (max color in use + 1) */
int flags; int flags; /* bitmask of the following flags: */
#define HASLACONS 01 /* uses lookaround constraints */ #define HASLACONS 01 /* uses lookaround constraints */
#define MATCHALL 02 /* matches all strings of a range of lengths */ #define MATCHALL 02 /* matches all strings of a range of lengths */
int pre; /* setup state number */ int pre; /* setup state number */
...@@ -422,10 +437,12 @@ struct cnfa ...@@ -422,10 +437,12 @@ struct cnfa
* transient data is generally not large enough to notice compared to those. * transient data is generally not large enough to notice compared to those.
* Note that we do not charge anything for the final output data structures * Note that we do not charge anything for the final output data structures
* (the compacted NFA and the colormap). * (the compacted NFA and the colormap).
* The scaling here is based on an empirical measurement that very large
* NFAs tend to have about 4 arcs/state.
*/ */
#ifndef REG_MAX_COMPILE_SPACE #ifndef REG_MAX_COMPILE_SPACE
#define REG_MAX_COMPILE_SPACE \ #define REG_MAX_COMPILE_SPACE \
(100000 * sizeof(struct state) + 100000 * sizeof(struct arcbatch)) (500000 * (sizeof(struct state) + 4 * sizeof(struct arc)))
#endif #endif
/* /*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment