Commit df1e965e authored by Tom Lane's avatar Tom Lane

Sync our regex code with upstream changes since last time we did this, which

was Tcl 8.4.8.  The main changes are to remove the never-fully-implemented
code for multi-character collating elements, and to const-ify some stuff a
bit more fully.  In combination with the recent security patch, this commit
brings us into line with Tcl 8.5.0.

Note that I didn't make any effort to duplicate a lot of cosmetic changes
that they made to bring their copy into line with their own style
guidelines, such as adding braces around single-line IF bodies.  Most of
those we either had done already (such as ANSI-fication of function headers)
or there is no point because pgindent would undo the change anyway.
parent 423abf4d
......@@ -28,7 +28,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/backend/regex/regc_color.c,v 1.8 2008/01/03 20:47:55 tgl Exp $
* $PostgreSQL: pgsql/src/backend/regex/regc_color.c,v 1.9 2008/02/14 17:33:37 tgl Exp $
*
*
* Note that there are some incestuous relationships between this code and
......@@ -222,7 +222,6 @@ static color /* COLORLESS for error */
newcolor(struct colormap * cm)
{
struct colordesc *cd;
struct colordesc *new;
size_t n;
if (CISERR())
......@@ -245,24 +244,25 @@ newcolor(struct colormap * cm)
else
{
/* oops, must allocate more */
struct colordesc *newCd;
n = cm->ncds * 2;
if (cm->cd == cm->cdspace)
{
new = (struct colordesc *) MALLOC(n *
sizeof(struct colordesc));
if (new != NULL)
memcpy(VS(new), VS(cm->cdspace), cm->ncds *
newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc));
if (newCd != NULL)
memcpy(VS(newCd), VS(cm->cdspace), cm->ncds *
sizeof(struct colordesc));
}
else
new = (struct colordesc *) REALLOC(cm->cd,
n * sizeof(struct colordesc));
if (new == NULL)
newCd = (struct colordesc *)
REALLOC(cm->cd, n * sizeof(struct colordesc));
if (newCd == NULL)
{
CERR(REG_ESPACE);
return COLORLESS;
}
cm->cd = new;
cm->cd = newCd;
cm->ncds = n;
assert(cm->max < cm->ncds - 1);
cm->max++;
......@@ -634,21 +634,6 @@ uncolorchain(struct colormap * cm,
a->colorchainRev = NULL;
}
/*
* singleton - is this character in its own color?
*/
static int /* predicate */
singleton(struct colormap * cm,
chr c)
{
color co; /* color of c */
co = GETCOLOR(cm, c);
if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB)
return 1;
return 0;
}
/*
* rainbow - add arcs of all full colors (but one) between specified states
*/
......
......@@ -28,33 +28,31 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/backend/regex/regc_cvec.c,v 1.5 2005/10/15 02:49:24 momjian Exp $
* $PostgreSQL: pgsql/src/backend/regex/regc_cvec.c,v 1.6 2008/02/14 17:33:37 tgl Exp $
*
*/
/*
* Notes:
* Only (selected) functions in _this_ file should treat chr* as non-constant.
*/
/*
* newcvec - allocate a new cvec
*/
static struct cvec *
newcvec(int nchrs, /* to hold this many chrs... */
int nranges, /* ... and this many ranges... */
int nmcces) /* ... and this many MCCEs */
int nranges) /* ... and this many ranges */
{
size_t n;
size_t nc;
struct cvec *cv;
size_t nc = (size_t) nchrs + (size_t) nranges * 2;
size_t n = sizeof(struct cvec) + nc * sizeof(chr);
struct cvec *cv = (struct cvec *) MALLOC(n);
nc = (size_t) nchrs + (size_t) nmcces *(MAXMCCE + 1) + (size_t) nranges *2;
n = sizeof(struct cvec) + (size_t) (nmcces - 1) * sizeof(chr *)
+ nc * sizeof(chr);
cv = (struct cvec *) MALLOC(n);
if (cv == NULL)
return NULL;
cv->chrspace = nchrs;
cv->chrs = (chr *) &cv->mcces[nmcces]; /* chrs just after MCCE ptrs */
cv->mccespace = nmcces;
cv->ranges = cv->chrs + nchrs + nmcces * (MAXMCCE + 1);
cv->chrs = (chr *) (((char *) cv) + sizeof(struct cvec));
cv->ranges = cv->chrs + nchrs;
cv->rangespace = nranges;
return clearcvec(cv);
}
......@@ -66,17 +64,9 @@ newcvec(int nchrs, /* to hold this many chrs... */
static struct cvec *
clearcvec(struct cvec * cv)
{
int i;
assert(cv != NULL);
cv->nchrs = 0;
assert(cv->chrs == (chr *) &cv->mcces[cv->mccespace]);
cv->nmcces = 0;
cv->nmccechrs = 0;
cv->nranges = 0;
for (i = 0; i < cv->mccespace; i++)
cv->mcces[i] = NULL;
return cv;
}
......@@ -87,7 +77,6 @@ static void
addchr(struct cvec * cv, /* character vector */
chr c) /* character to add */
{
assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
cv->chrs[cv->nchrs++] = (chr) c;
}
......@@ -105,73 +94,21 @@ addrange(struct cvec * cv, /* character vector */
cv->nranges++;
}
/*
* addmcce - add an MCCE to a cvec
*/
static void
addmcce(struct cvec * cv, /* character vector */
chr *startp, /* beginning of text */
chr *endp) /* just past end of text */
{
int len;
int i;
chr *s;
chr *d;
if (startp == NULL && endp == NULL)
return;
len = endp - startp;
assert(len > 0);
assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs);
assert(cv->nmcces < cv->mccespace);
d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1];
cv->mcces[cv->nmcces++] = d;
for (s = startp, i = len; i > 0; s++, i--)
*d++ = *s;
*d++ = 0; /* endmarker */
assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
cv->nmccechrs += len + 1;
}
/*
* haschr - does a cvec contain this chr?
*/
static int /* predicate */
haschr(struct cvec * cv, /* character vector */
chr c) /* character to test for */
{
int i;
chr *p;
for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
{
if (*p == c)
return 1;
}
for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
{
if ((*p <= c) && (c <= *(p + 1)))
return 1;
}
return 0;
}
/*
* getcvec - get a cvec, remembering it as v->cv
*/
static struct cvec *
getcvec(struct vars * v, /* context */
int nchrs, /* to hold this many chrs... */
int nranges, /* ... and this many ranges... */
int nmcces) /* ... and this many MCCEs */
int nranges) /* ... and this many ranges */
{
if (v->cv != NULL && nchrs <= v->cv->chrspace &&
nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace)
nranges <= v->cv->rangespace)
return clearcvec(v->cv);
if (v->cv != NULL)
freecvec(v->cv);
v->cv = newcvec(nchrs, nranges, nmcces);
v->cv = newcvec(nchrs, nranges);
if (v->cv == NULL)
ERR(REG_ESPACE);
......
......@@ -28,7 +28,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/backend/regex/regc_lex.c,v 1.7 2008/01/03 20:47:55 tgl Exp $
* $PostgreSQL: pgsql/src/backend/regex/regc_lex.c,v 1.8 2008/02/14 17:33:37 tgl Exp $
*
*/
......@@ -201,8 +201,8 @@ prefixes(struct vars * v)
*/
static void
lexnest(struct vars * v,
chr *beginp, /* start of interpolation */
chr *endp) /* one past end of interpolation */
const chr *beginp, /* start of interpolation */
const chr *endp) /* one past end of interpolation */
{
assert(v->savenow == NULL); /* only one level of nesting */
v->savenow = v->now;
......@@ -214,47 +214,47 @@ lexnest(struct vars * v,
/*
* string constants to interpolate as expansions of things like \d
*/
static chr backd[] = { /* \d */
static const chr backd[] = { /* \d */
CHR('['), CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
CHR(':'), CHR(']'), CHR(']')
};
static chr backD[] = { /* \D */
static const chr backD[] = { /* \D */
CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
CHR(':'), CHR(']'), CHR(']')
};
static chr brbackd[] = { /* \d within brackets */
static const chr brbackd[] = { /* \d within brackets */
CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
CHR(':'), CHR(']')
};
static chr backs[] = { /* \s */
static const chr backs[] = { /* \s */
CHR('['), CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
CHR(':'), CHR(']'), CHR(']')
};
static chr backS[] = { /* \S */
static const chr backS[] = { /* \S */
CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
CHR(':'), CHR(']'), CHR(']')
};
static chr brbacks[] = { /* \s within brackets */
static const chr brbacks[] = { /* \s within brackets */
CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
CHR(':'), CHR(']')
};
static chr backw[] = { /* \w */
static const chr backw[] = { /* \w */
CHR('['), CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
CHR(':'), CHR(']'), CHR('_'), CHR(']')
};
static chr backW[] = { /* \W */
static const chr backW[] = { /* \W */
CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
CHR(':'), CHR(']'), CHR('_'), CHR(']')
};
static chr brbackw[] = { /* \w within brackets */
static const chr brbackw[] = { /* \w within brackets */
CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
CHR(':'), CHR(']'), CHR('_')
......@@ -722,7 +722,7 @@ lexescape(struct vars * v)
static chr esc[] = {
CHR('E'), CHR('S'), CHR('C')
};
chr *save;
const chr *save;
assert(v->cflags & REG_ADVF);
......@@ -1080,7 +1080,7 @@ brenext(struct vars * v,
static void
skip(struct vars * v)
{
chr *start = v->now;
const chr *start = v->now;
assert(v->cflags & REG_EXPANDED);
......@@ -1119,8 +1119,8 @@ newline(void)
*/
static chr
chrnamed(struct vars * v,
chr *startp, /* start of name */
chr *endp, /* just past end of name */
const chr *startp, /* start of name */
const chr *endp, /* just past end of name */
chr lastresort) /* what to return if name lookup fails */
{
celt c;
......
......@@ -47,15 +47,15 @@
* permission to use and distribute the software in accordance with the
* terms specified in this license.
*
* $PostgreSQL: pgsql/src/backend/regex/regc_locale.c,v 1.8 2005/11/22 18:17:19 momjian Exp $
* $PostgreSQL: pgsql/src/backend/regex/regc_locale.c,v 1.9 2008/02/14 17:33:37 tgl Exp $
*/
/* ASCII character-name table */
static struct cname
static const struct cname
{
char *name;
char code;
const char *name;
const char code;
} cnames[] =
{
......@@ -423,46 +423,15 @@ pg_wc_tolower(pg_wchar c)
}
/*
* nmcces - how many distinct MCCEs are there?
*/
static int
nmcces(struct vars * v)
{
/*
* No multi-character collating elements defined at the moment.
*/
return 0;
}
/*
* nleaders - how many chrs can be first chrs of MCCEs?
*/
static int
nleaders(struct vars * v)
{
return 0;
}
/*
* allmcces - return a cvec with all the MCCEs of the locale
*/
static struct cvec *
allmcces(struct vars * v, /* context */
struct cvec * cv) /* this is supposed to have enough room */
{
return clearcvec(cv);
}
/*
* element - map collating-element name to celt
*/
static celt
element(struct vars * v, /* context */
chr *startp, /* points to start of name */
chr *endp) /* points just past end of name */
const chr *startp, /* points to start of name */
const chr *endp) /* points just past end of name */
{
struct cname *cn;
const struct cname *cn;
size_t len;
/* generic: one-chr names stand for themselves */
......@@ -513,7 +482,7 @@ range(struct vars * v, /* context */
if (!cases)
{ /* easy version */
cv = getcvec(v, 0, 1, 0);
cv = getcvec(v, 0, 1);
NOERRN();
addrange(cv, a, b);
return cv;
......@@ -527,7 +496,7 @@ range(struct vars * v, /* context */
nchrs = (b - a + 1) * 2 + 4;
cv = getcvec(v, nchrs, 0, 0);
cv = getcvec(v, nchrs, 0);
NOERRN();
for (c = a; c <= b; c++)
......@@ -550,7 +519,6 @@ range(struct vars * v, /* context */
static int /* predicate */
before(celt x, celt y)
{
/* trivial because no MCCEs */
if (x < y)
return 1;
return 0;
......@@ -571,7 +539,7 @@ eclass(struct vars * v, /* context */
/* crude fake equivalence class for testing */
if ((v->cflags & REG_FAKE) && c == 'x')
{
cv = getcvec(v, 4, 0, 0);
cv = getcvec(v, 4, 0);
addchr(cv, (chr) 'x');
addchr(cv, (chr) 'y');
if (cases)
......@@ -585,7 +553,7 @@ eclass(struct vars * v, /* context */
/* otherwise, none */
if (cases)
return allcases(v, c);
cv = getcvec(v, 1, 0, 0);
cv = getcvec(v, 1, 0);
assert(cv != NULL);
addchr(cv, (chr) c);
return cv;
......@@ -598,13 +566,13 @@ eclass(struct vars * v, /* context */
*/
static struct cvec *
cclass(struct vars * v, /* context */
chr *startp, /* where the name starts */
chr *endp, /* just past the end of the name */
const chr *startp, /* where the name starts */
const chr *endp, /* just past the end of the name */
int cases) /* case-independent? */
{
size_t len;
struct cvec *cv = NULL;
char **namePtr;
const char **namePtr;
int i,
index;
......@@ -612,7 +580,7 @@ cclass(struct vars * v, /* context */
* The following arrays define the valid character class names.
*/
static char *classNames[] = {
static const char *classNames[] = {
"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
"lower", "print", "punct", "space", "upper", "xdigit", NULL
};
......@@ -662,7 +630,7 @@ cclass(struct vars * v, /* context */
switch ((enum classes) index)
{
case CC_PRINT:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
......@@ -673,7 +641,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_ALNUM:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
......@@ -684,7 +652,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_ALPHA:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
......@@ -695,27 +663,27 @@ cclass(struct vars * v, /* context */
}
break;
case CC_ASCII:
cv = getcvec(v, 0, 1, 0);
cv = getcvec(v, 0, 1);
if (cv)
addrange(cv, 0, 0x7f);
break;
case CC_BLANK:
cv = getcvec(v, 2, 0, 0);
cv = getcvec(v, 2, 0);
addchr(cv, '\t');
addchr(cv, ' ');
break;
case CC_CNTRL:
cv = getcvec(v, 0, 2, 0);
cv = getcvec(v, 0, 2);
addrange(cv, 0x0, 0x1f);
addrange(cv, 0x7f, 0x9f);
break;
case CC_DIGIT:
cv = getcvec(v, 0, 1, 0);
cv = getcvec(v, 0, 1);
if (cv)
addrange(cv, (chr) '0', (chr) '9');
break;
case CC_PUNCT:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
......@@ -726,7 +694,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_XDIGIT:
cv = getcvec(v, 0, 3, 0);
cv = getcvec(v, 0, 3);
if (cv)
{
addrange(cv, '0', '9');
......@@ -735,7 +703,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_SPACE:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
......@@ -746,7 +714,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_LOWER:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
......@@ -757,7 +725,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_UPPER:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
......@@ -768,7 +736,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_GRAPH:
cv = getcvec(v, UCHAR_MAX, 0, 0);
cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
......@@ -802,7 +770,7 @@ allcases(struct vars * v, /* context */
lc = pg_wc_tolower((chr) c);
uc = pg_wc_toupper((chr) c);
cv = getcvec(v, 2, 0, 0);
cv = getcvec(v, 2, 0);
addchr(cv, lc);
if (lc != uc)
addchr(cv, uc);
......
......@@ -28,7 +28,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/backend/regex/regc_nfa.c,v 1.5 2008/01/03 20:47:55 tgl Exp $
* $PostgreSQL: pgsql/src/backend/regex/regc_nfa.c,v 1.6 2008/02/14 17:33:37 tgl Exp $
*
*
* One or two things that technically ought to be in here
......@@ -349,8 +349,6 @@ newarc(struct nfa * nfa,
if (COLORED(a) && nfa->parent == NULL)
colorchain(nfa->cm, a);
return;
}
/*
......@@ -361,8 +359,6 @@ allocarc(struct nfa * nfa,
struct state * s)
{
struct arc *a;
struct arcbatch *new;
int i;
/* shortcut */
if (s->free == NULL && s->noas < ABSIZE)
......@@ -375,22 +371,25 @@ allocarc(struct nfa * nfa,
/* if none at hand, get more */
if (s->free == NULL)
{
new = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
if (new == NULL)
struct arcbatch *newAb;
int i;
newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
if (newAb == NULL)
{
NERR(REG_ESPACE);
return NULL;
}
new->next = s->oas.next;
s->oas.next = new;
newAb->next = s->oas.next;
s->oas.next = newAb;
for (i = 0; i < ABSIZE; i++)
{
new->a[i].type = 0;
new->a[i].freechain = &new->a[i + 1];
newAb->a[i].type = 0;
newAb->a[i].freechain = &newAb->a[i + 1];
}
new->a[ABSIZE - 1].freechain = NULL;
s->free = &new->a[0];
newAb->a[ABSIZE - 1].freechain = NULL;
s->free = &newAb->a[0];
}
assert(s->free != NULL);
......@@ -495,20 +494,20 @@ cparc(struct nfa * nfa,
*/
static void
moveins(struct nfa * nfa,
struct state * old,
struct state * new)
struct state * oldState,
struct state * newState)
{
struct arc *a;
assert(old != new);
assert(oldState != newState);
while ((a = old->ins) != NULL)
while ((a = oldState->ins) != NULL)
{
cparc(nfa, a, a->from, new);
cparc(nfa, a, a->from, newState);
freearc(nfa, a);
}
assert(old->nins == 0);
assert(old->ins == NULL);
assert(oldState->nins == 0);
assert(oldState->ins == NULL);
}
/*
......@@ -516,15 +515,15 @@ moveins(struct nfa * nfa,
*/
static void
copyins(struct nfa * nfa,
struct state * old,
struct state * new)
struct state * oldState,
struct state * newState)
{
struct arc *a;
assert(old != new);
assert(oldState != newState);
for (a = old->ins; a != NULL; a = a->inchain)
cparc(nfa, a, a->from, new);
for (a = oldState->ins; a != NULL; a = a->inchain)
cparc(nfa, a, a->from, newState);
}
/*
......@@ -532,16 +531,16 @@ copyins(struct nfa * nfa,
*/
static void
moveouts(struct nfa * nfa,
struct state * old,
struct state * new)
struct state * oldState,
struct state * newState)
{
struct arc *a;
assert(old != new);
assert(oldState != newState);
while ((a = old->outs) != NULL)
while ((a = oldState->outs) != NULL)
{
cparc(nfa, a, new, a->to);
cparc(nfa, a, newState, a->to);
freearc(nfa, a);
}
}
......@@ -551,15 +550,15 @@ moveouts(struct nfa * nfa,
*/
static void
copyouts(struct nfa * nfa,
struct state * old,
struct state * new)
struct state * oldState,
struct state * newState)
{
struct arc *a;
assert(old != new);
assert(oldState != newState);
for (a = old->outs; a != NULL; a = a->outchain)
cparc(nfa, a, new, a->to);
for (a = oldState->outs; a != NULL; a = a->outchain)
cparc(nfa, a, newState, a->to);
}
/*
......
......@@ -28,7 +28,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/backend/regex/regcomp.c,v 1.45 2007/10/06 16:05:54 tgl Exp $
* $PostgreSQL: pgsql/src/backend/regex/regcomp.c,v 1.46 2008/02/14 17:33:37 tgl Exp $
*
*/
......@@ -51,11 +51,9 @@ static void repeat(struct vars *, struct state *, struct state *, int, int);
static void bracket(struct vars *, struct state *, struct state *);
static void cbracket(struct vars *, struct state *, struct state *);
static void brackpart(struct vars *, struct state *, struct state *);
static chr *scanplain(struct vars *);
static void leaders(struct vars *, struct cvec *);
static const chr *scanplain(struct vars *);
static void onechr(struct vars *, chr, struct state *, struct state *);
static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
static celt nextleader(struct vars *, chr, chr);
static void wordchrs(struct vars *);
static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
static void freesubre(struct vars *, struct subre *);
......@@ -74,12 +72,12 @@ static void rfree(regex_t *);
static void dump(regex_t *, FILE *);
static void dumpst(struct subre *, FILE *, int);
static void stdump(struct subre *, FILE *, int);
static char *stid(struct subre *, char *, size_t);
static const char *stid(struct subre *, char *, size_t);
#endif
/* === regc_lex.c === */
static void lexstart(struct vars *);
static void prefixes(struct vars *);
static void lexnest(struct vars *, chr *, chr *);
static void lexnest(struct vars *, const chr *, const chr *);
static void lexword(struct vars *);
static int next(struct vars *);
static int lexescape(struct vars *);
......@@ -87,7 +85,7 @@ static chr lexdigits(struct vars *, int, int, int);
static int brenext(struct vars *, chr);
static void skip(struct vars *);
static chr newline(void);
static chr chrnamed(struct vars *, chr *, chr *, chr);
static chr chrnamed(struct vars *, const chr *, const chr *, chr);
/* === regc_color.c === */
static void initcm(struct vars *, struct colormap *);
......@@ -105,7 +103,6 @@ static void subblock(struct vars *, chr, struct state *, struct state *);
static void okcolors(struct nfa *, struct colormap *);
static void colorchain(struct colormap *, struct arc *);
static void uncolorchain(struct colormap *, struct arc *);
static int singleton(struct colormap *, chr c);
static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *);
static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);
......@@ -168,13 +165,11 @@ static void dumpcnfa(struct cnfa *, FILE *);
static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
#endif
/* === regc_cvec.c === */
static struct cvec *newcvec(int, int, int);
static struct cvec *newcvec(int, int);
static struct cvec *clearcvec(struct cvec *);
static void addchr(struct cvec *, chr);
static void addrange(struct cvec *, chr, chr);
static void addmcce(struct cvec *, chr *, chr *);
static int haschr(struct cvec *, chr);
static struct cvec *getcvec(struct vars *, int, int, int);
static struct cvec *getcvec(struct vars *, int, int);
static void freecvec(struct cvec *);
/* === regc_locale.c === */
......@@ -189,14 +184,11 @@ static int pg_wc_ispunct(pg_wchar c);
static int pg_wc_isspace(pg_wchar c);
static pg_wchar pg_wc_toupper(pg_wchar c);
static pg_wchar pg_wc_tolower(pg_wchar c);
static int nmcces(struct vars *);
static int nleaders(struct vars *);
static struct cvec *allmcces(struct vars *, struct cvec *);
static celt element(struct vars *, chr *, chr *);
static celt element(struct vars *, const chr *, const chr *);
static struct cvec *range(struct vars *, celt, celt, int);
static int before(celt, celt);
static struct cvec *eclass(struct vars *, celt, int);
static struct cvec *cclass(struct vars *, chr *, chr *, int);
static struct cvec *cclass(struct vars *, const chr *, const chr *, int);
static struct cvec *allcases(struct vars *, chr);
static int cmp(const chr *, const chr *, size_t);
static int casecmp(const chr *, const chr *, size_t);
......@@ -206,10 +198,10 @@ static int casecmp(const chr *, const chr *, size_t);
struct vars
{
regex_t *re;
chr *now; /* scan pointer into string */
chr *stop; /* end of string */
chr *savenow; /* saved now and stop for "subroutine call" */
chr *savestop;
const chr *now; /* scan pointer into string */
const chr *stop; /* end of string */
const chr *savenow; /* saved now and stop for "subroutine call" */
const chr *savestop;
int err; /* error code (0 if none) */
int cflags; /* copy of compile flags */
int lasttype; /* type of previous token */
......@@ -230,10 +222,6 @@ struct vars
int ntree; /* number of tree nodes */
struct cvec *cv; /* interface cvec */
struct cvec *cv2; /* utility cvec */
struct cvec *mcces; /* collating-element information */
#define ISCELEADER(v,c) ((v)->mcces != NULL && haschr((v)->mcces, (c)))
struct state *mccepbegin; /* in nfa, start of MCCE prototypes */
struct state *mccepend; /* in nfa, end of MCCE prototypes */
struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */
};
......@@ -275,9 +263,8 @@ struct vars
#define PREFER 'P' /* length preference */
/* is an arc colored, and hence on a color chain? */
#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \
(a)->type == BEHIND)
#define COLORED(a) \
((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND)
/* static function list */
......@@ -322,7 +309,7 @@ pg_regcomp(regex_t *re,
/* initial setup (after which freev() is callable) */
v->re = re;
v->now = (chr *) string;
v->now = string;
v->stop = v->now + len;
v->savenow = v->savestop = NULL;
v->err = 0;
......@@ -341,7 +328,6 @@ pg_regcomp(regex_t *re,
v->treefree = NULL;
v->cv = NULL;
v->cv2 = NULL;
v->mcces = NULL;
v->lacons = NULL;
v->nlacons = 0;
re->re_magic = REMAGIC;
......@@ -363,19 +349,9 @@ pg_regcomp(regex_t *re,
ZAPCNFA(g->search);
v->nfa = newnfa(v, v->cm, (struct nfa *) NULL);
CNOERR();
v->cv = newcvec(100, 20, 10);
v->cv = newcvec(100, 20);
if (v->cv == NULL)
return freev(v, REG_ESPACE);
i = nmcces(v);
if (i > 0)
{
v->mcces = newcvec(nleaders(v), 0, i);
CNOERR();
v->mcces = allmcces(v, v->mcces);
leaders(v, v->mcces);
addmcce(v->mcces, (chr *) NULL, (chr *) NULL); /* dummy */
}
CNOERR();
/* parsing */
lexstart(v); /* also handles prefixes */
......@@ -525,8 +501,6 @@ freev(struct vars * v,
freecvec(v->cv);
if (v->cv2 != NULL)
freecvec(v->cv2);
if (v->mcces != NULL)
freecvec(v->mcces);
if (v->lacons != NULL)
freelacons(v->lacons, v->nlacons);
ERR(err); /* nop if err==0 */
......@@ -583,15 +557,14 @@ makesearch(struct vars * v,
for (b = s->ins; b != NULL; b = b->inchain)
if (b->from != pre)
break;
if (b != NULL)
{ /* must be split */
if (s->tmp == NULL)
{ /* if not already in the list */
/* (fixes bugs 505048, 230589, */
/* 840258, 504785) */
s->tmp = slist;
slist = s;
}
if (b != NULL && s->tmp == NULL)
{
/*
* Must be split if not already in the list (fixes bugs 505048,
* 230589, 840258, 504785).
*/
s->tmp = slist;
slist = s;
}
}
......@@ -1338,13 +1311,6 @@ cbracket(struct vars * v,
{
struct state *left = newstate(v->nfa);
struct state *right = newstate(v->nfa);
struct state *s;
struct arc *a; /* arc from lp */
struct arc *ba; /* arc from left, from bracket() */
struct arc *pa; /* MCCE-prototype arc */
color co;
chr *p;
int i;
NOERR();
bracket(v, left, right);
......@@ -1354,65 +1320,13 @@ cbracket(struct vars * v,
assert(lp->nouts == 0); /* all outarcs will be ours */
/* easy part of complementing */
/*
* Easy part of complementing, and all there is to do since the MCCE code
* was removed.
*/
colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
NOERR();
if (v->mcces == NULL)
{ /* no MCCEs -- we're done */
dropstate(v->nfa, left);
assert(right->nins == 0);
freestate(v->nfa, right);
return;
}
/* but complementing gets messy in the presence of MCCEs... */
NOTE(REG_ULOCALE);
for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--)
{
co = GETCOLOR(v->cm, *p);
a = findarc(lp, PLAIN, co);
ba = findarc(left, PLAIN, co);
if (ba == NULL)
{
assert(a != NULL);
freearc(v->nfa, a);
}
else
assert(a == NULL);
s = newstate(v->nfa);
NOERR();
newarc(v->nfa, PLAIN, co, lp, s);
NOERR();
pa = findarc(v->mccepbegin, PLAIN, co);
assert(pa != NULL);
if (ba == NULL)
{ /* easy case, need all of them */
cloneouts(v->nfa, pa->to, s, rp, PLAIN);
newarc(v->nfa, '$', 1, s, rp);
newarc(v->nfa, '$', 0, s, rp);
colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);
}
else
{ /* must be selective */
if (findarc(ba->to, '$', 1) == NULL)
{
newarc(v->nfa, '$', 1, s, rp);
newarc(v->nfa, '$', 0, s, rp);
colorcomplement(v->nfa, v->cm, AHEAD, pa->to,
s, rp);
}
for (pa = pa->to->outs; pa != NULL; pa = pa->outchain)
if (findarc(ba->to, PLAIN, pa->co) == NULL)
newarc(v->nfa, PLAIN, pa->co, s, rp);
if (s->nouts == 0) /* limit of selectivity: none */
dropstate(v->nfa, s); /* frees arc too */
}
NOERR();
}
delsub(v->nfa, left, right);
assert(left->nouts == 0);
freestate(v->nfa, left);
dropstate(v->nfa, left);
assert(right->nins == 0);
freestate(v->nfa, right);
}
......@@ -1428,8 +1342,8 @@ brackpart(struct vars * v,
celt startc;
celt endc;
struct cvec *cv;
chr *startp;
chr *endp;
const chr *startp;
const chr *endp;
chr c[1];
/* parse something, get rid of special cases, take shortcuts */
......@@ -1442,8 +1356,8 @@ brackpart(struct vars * v,
case PLAIN:
c[0] = v->nextvalue;
NEXT();
/* shortcut for ordinary chr (not range, not MCCE leader) */
if (!SEE(RANGE) && !ISCELEADER(v, c[0]))
/* shortcut for ordinary chr (not range) */
if (!SEE(RANGE))
{
onechr(v, c[0], lp, rp);
return;
......@@ -1533,10 +1447,10 @@ brackpart(struct vars * v,
* Certain bits of trickery in lex.c know that this code does not try
* to look past the final bracket of the [. etc.
*/
static chr * /* just after end of sequence */
static const chr * /* just after end of sequence */
scanplain(struct vars * v)
{
chr *endp;
const chr *endp;
assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));
NEXT();
......@@ -1554,52 +1468,6 @@ scanplain(struct vars * v)
return endp;
}
/*
* leaders - process a cvec of collating elements to also include leaders
* Also gives all characters involved their own colors, which is almost
* certainly necessary, and sets up little disconnected subNFA.
*/
static void
leaders(struct vars * v,
struct cvec * cv)
{
int mcce;
chr *p;
chr leader;
struct state *s;
struct arc *a;
v->mccepbegin = newstate(v->nfa);
v->mccepend = newstate(v->nfa);
NOERR();
for (mcce = 0; mcce < cv->nmcces; mcce++)
{
p = cv->mcces[mcce];
leader = *p;
if (!haschr(cv, leader))
{
addchr(cv, leader);
s = newstate(v->nfa);
newarc(v->nfa, PLAIN, subcolor(v->cm, leader),
v->mccepbegin, s);
okcolors(v->nfa, v->cm);
}
else
{
a = findarc(v->mccepbegin, PLAIN,
GETCOLOR(v->cm, leader));
assert(a != NULL);
s = a->to;
assert(s != v->mccepend);
}
p++;
assert(*p != 0 && *(p + 1) == 0); /* only 2-char MCCEs for now */
newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);
okcolors(v->nfa, v->cm);
}
}
/*
* onechr - fill in arcs for a plain character, and possible case complements
* This is mostly a shortcut for efficient handling of the common case.
......@@ -1622,7 +1490,6 @@ onechr(struct vars * v,
/*
* dovec - fill in arcs for each element of a cvec
* This one has to handle the messy cases, like MCCEs and MCCE leaders.
*/
static void
dovec(struct vars * v,
......@@ -1633,47 +1500,14 @@ dovec(struct vars * v,
chr ch,
from,
to;
celt ce;
chr *p;
const chr *p;
int i;
color co;
struct cvec *leads;
struct arc *a;
struct arc *pa; /* arc in prototype */
struct state *s;
struct state *ps; /* state in prototype */
/* need a place to store leaders, if any */
if (nmcces(v) > 0)
{
assert(v->mcces != NULL);
if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs)
{
if (v->cv2 != NULL)
free(v->cv2);
v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces);
NOERR();
leads = v->cv2;
}
else
leads = clearcvec(v->cv2);
}
else
leads = NULL;
/* first, get the ordinary characters out of the way */
/* ordinary characters */
for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
{
ch = *p;
if (!ISCELEADER(v, ch))
newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
else
{
assert(singleton(v->cm, ch));
assert(leads != NULL);
if (!haschr(leads, ch))
addchr(leads, ch);
}
newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
}
/* and the ranges */
......@@ -1681,103 +1515,9 @@ dovec(struct vars * v,
{
from = *p;
to = *(p + 1);
while (from <= to && (ce = nextleader(v, from, to)) != NOCELT)
{
if (from < ce)
subrange(v, from, ce - 1, lp, rp);
assert(singleton(v->cm, ce));
assert(leads != NULL);
if (!haschr(leads, ce))
addchr(leads, ce);
from = ce + 1;
}
if (from <= to)
subrange(v, from, to, lp, rp);
}
if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)
return;
/* deal with the MCCE leaders */
NOTE(REG_ULOCALE);
for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--)
{
co = GETCOLOR(v->cm, *p);
a = findarc(lp, PLAIN, co);
if (a != NULL)
s = a->to;
else
{
s = newstate(v->nfa);
NOERR();
newarc(v->nfa, PLAIN, co, lp, s);
NOERR();
}
pa = findarc(v->mccepbegin, PLAIN, co);
assert(pa != NULL);
ps = pa->to;
newarc(v->nfa, '$', 1, s, rp);
newarc(v->nfa, '$', 0, s, rp);
colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp);
NOERR();
}
/* and the MCCEs */
for (i = 0; i < cv->nmcces; i++)
{
p = cv->mcces[i];
assert(singleton(v->cm, *p));
if (!singleton(v->cm, *p))
{
ERR(REG_ASSERT);
return;
}
ch = *p++;
co = GETCOLOR(v->cm, ch);
a = findarc(lp, PLAIN, co);
if (a != NULL)
s = a->to;
else
{
s = newstate(v->nfa);
NOERR();
newarc(v->nfa, PLAIN, co, lp, s);
NOERR();
}
assert(*p != 0); /* at least two chars */
assert(singleton(v->cm, *p));
ch = *p++;
co = GETCOLOR(v->cm, ch);
assert(*p == 0); /* and only two, for now */
newarc(v->nfa, PLAIN, co, s, rp);
NOERR();
}
}
/*
* nextleader - find next MCCE leader within range
*/
static celt /* NOCELT means none */
nextleader(struct vars * v,
chr from,
chr to)
{
int i;
chr *p;
chr ch;
celt it = NOCELT;
if (v->mcces == NULL)
return it;
for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++)
{
ch = *p;
if (from <= ch && ch <= to)
if (it == NOCELT || ch < it)
it = ch;
}
return it;
}
/*
......@@ -1825,9 +1565,8 @@ subre(struct vars * v,
struct state * begin,
struct state * end)
{
struct subre *ret;
struct subre *ret = v->treefree;
ret = v->treefree;
if (ret != NULL)
v->treefree = ret->left;
else
......@@ -1906,14 +1645,13 @@ static void
optst(struct vars * v,
struct subre * t)
{
if (t == NULL)
return;
/* recurse through children */
if (t->left != NULL)
optst(v, t->left);
if (t->right != NULL)
optst(v, t->right);
/*
* DGP (2007-11-13): I assume it was the programmer's intent to eventually
* come back and add code to optimize subRE trees, but the routine coded
* just spends effort traversing the tree and doing nothing. We can do
* nothing with less effort.
*/
return;
}
/*
......@@ -2207,8 +1945,8 @@ stdump(struct subre * t,
{
fprintf(f, "\n");
dumpcnfa(&t->cnfa, f);
fprintf(f, "\n");
}
fprintf(f, "\n");
if (t->left != NULL)
stdump(t->left, f, nfapresent);
if (t->right != NULL)
......@@ -2218,7 +1956,7 @@ stdump(struct subre * t,
/*
* stid - identify a subtree node for dumping
*/
static char * /* points to buf or constant string */
static const char * /* points to buf or constant string */
stid(struct subre * t,
char *buf,
size_t bufsize)
......
......@@ -27,7 +27,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/backend/regex/regerror.c,v 1.27 2003/11/29 19:51:55 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/regex/regerror.c,v 1.28 2008/02/14 17:33:37 tgl Exp $
*
*/
......@@ -40,8 +40,8 @@ static char unk[] = "*** unknown regex error code 0x%x ***";
static struct rerr
{
int code;
char *name;
char *explain;
const char *name;
const char *explain;
} rerrs[] =
{
......@@ -63,7 +63,7 @@ pg_regerror(int errcode, /* error code, or REG_ATOI or REG_ITOA */
size_t errbuf_size) /* available space in errbuf, can be 0 */
{
struct rerr *r;
char *msg;
const char *msg;
char convbuf[sizeof(unk) + 50]; /* 50 = plenty for int */
size_t len;
int icode;
......
......@@ -25,7 +25,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/include/regex/regcustom.h,v 1.6 2007/10/06 16:01:51 tgl Exp $
* $PostgreSQL: pgsql/src/include/regex/regcustom.h,v 1.7 2008/02/14 17:33:37 tgl Exp $
*/
/* headers if any */
......@@ -47,9 +47,9 @@
/* internal character type and related */
typedef pg_wchar chr; /* the type itself */
typedef unsigned uchr; /* unsigned type that will hold a chr */
typedef int celt; /* type to hold chr, MCCE number, or NOCELT */
typedef int celt; /* type to hold chr, or NOCELT */
#define NOCELT (-1) /* celt value which is not valid chr or MCCE */
#define NOCELT (-1) /* celt value which is not valid chr */
#define CHR(c) ((unsigned char) (c)) /* turn char literal into chr literal */
#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */
#define CHRBITS 32 /* bits in a chr; must not use sizeof */
......
......@@ -27,7 +27,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $PostgreSQL: pgsql/src/include/regex/regguts.h,v 1.6 2008/01/03 20:47:55 tgl Exp $
* $PostgreSQL: pgsql/src/include/regex/regguts.h,v 1.7 2008/02/14 17:33:37 tgl Exp $
*/
......@@ -181,7 +181,7 @@ union tree
#define tcolor colors.ccolor
#define tptr ptrs.pptr
/* internal per-color structure for the color machinery */
/* internal per-color descriptor structure for the color machinery */
struct colordesc
{
uchr nchrs; /* number of chars of this color */
......@@ -228,11 +228,11 @@ struct colormap
#endif
/*
* Interface definitions for locale-interface functions in locale.c.
* Multi-character collating elements (MCCEs) cause most of the trouble.
*/
/* Representation of a set of characters. */
struct cvec
{
int nchrs; /* number of chrs */
......@@ -241,17 +241,9 @@ struct cvec
int nranges; /* number of ranges (chr pairs) */
int rangespace; /* number of chrs possible */
chr *ranges; /* pointer to vector of chr pairs */
int nmcces; /* number of MCCEs */
int mccespace; /* number of MCCEs possible */
int nmccechrs; /* number of chrs used for MCCEs */
chr *mcces[1]; /* pointers to 0-terminated MCCEs */
/* and both batches of chrs are on the end */
/* both batches of chrs are on the end */
};
/* caution: this value cannot be changed easily */
#define MAXMCCE 2 /* length of longest MCCE */
/*
* definitions for NFA internal representation
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment