Commit df1e965e authored by Tom Lane's avatar Tom Lane

Sync our regex code with upstream changes since last time we did this, which

was Tcl 8.4.8.  The main changes are to remove the never-fully-implemented
code for multi-character collating elements, and to const-ify some stuff a
bit more fully.  In combination with the recent security patch, this commit
brings us into line with Tcl 8.5.0.

Note that I didn't make any effort to duplicate a lot of cosmetic changes
that they made to bring their copy into line with their own style
guidelines, such as adding braces around single-line IF bodies.  Most of
those we either had done already (such as ANSI-fication of function headers)
or there is no point because pgindent would undo the change anyway.
parent 423abf4d
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* *
* $PostgreSQL: pgsql/src/backend/regex/regc_color.c,v 1.8 2008/01/03 20:47:55 tgl Exp $ * $PostgreSQL: pgsql/src/backend/regex/regc_color.c,v 1.9 2008/02/14 17:33:37 tgl Exp $
* *
* *
* Note that there are some incestuous relationships between this code and * Note that there are some incestuous relationships between this code and
...@@ -222,7 +222,6 @@ static color /* COLORLESS for error */ ...@@ -222,7 +222,6 @@ static color /* COLORLESS for error */
newcolor(struct colormap * cm) newcolor(struct colormap * cm)
{ {
struct colordesc *cd; struct colordesc *cd;
struct colordesc *new;
size_t n; size_t n;
if (CISERR()) if (CISERR())
...@@ -245,24 +244,25 @@ newcolor(struct colormap * cm) ...@@ -245,24 +244,25 @@ newcolor(struct colormap * cm)
else else
{ {
/* oops, must allocate more */ /* oops, must allocate more */
struct colordesc *newCd;
n = cm->ncds * 2; n = cm->ncds * 2;
if (cm->cd == cm->cdspace) if (cm->cd == cm->cdspace)
{ {
new = (struct colordesc *) MALLOC(n * newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc));
sizeof(struct colordesc)); if (newCd != NULL)
if (new != NULL) memcpy(VS(newCd), VS(cm->cdspace), cm->ncds *
memcpy(VS(new), VS(cm->cdspace), cm->ncds *
sizeof(struct colordesc)); sizeof(struct colordesc));
} }
else else
new = (struct colordesc *) REALLOC(cm->cd, newCd = (struct colordesc *)
n * sizeof(struct colordesc)); REALLOC(cm->cd, n * sizeof(struct colordesc));
if (new == NULL) if (newCd == NULL)
{ {
CERR(REG_ESPACE); CERR(REG_ESPACE);
return COLORLESS; return COLORLESS;
} }
cm->cd = new; cm->cd = newCd;
cm->ncds = n; cm->ncds = n;
assert(cm->max < cm->ncds - 1); assert(cm->max < cm->ncds - 1);
cm->max++; cm->max++;
...@@ -634,21 +634,6 @@ uncolorchain(struct colormap * cm, ...@@ -634,21 +634,6 @@ uncolorchain(struct colormap * cm,
a->colorchainRev = NULL; a->colorchainRev = NULL;
} }
/*
* singleton - is this character in its own color?
*/
static int /* predicate */
singleton(struct colormap * cm,
chr c)
{
color co; /* color of c */
co = GETCOLOR(cm, c);
if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB)
return 1;
return 0;
}
/* /*
* rainbow - add arcs of all full colors (but one) between specified states * rainbow - add arcs of all full colors (but one) between specified states
*/ */
......
...@@ -28,33 +28,31 @@ ...@@ -28,33 +28,31 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* *
* $PostgreSQL: pgsql/src/backend/regex/regc_cvec.c,v 1.5 2005/10/15 02:49:24 momjian Exp $ * $PostgreSQL: pgsql/src/backend/regex/regc_cvec.c,v 1.6 2008/02/14 17:33:37 tgl Exp $
* *
*/ */
/*
* Notes:
* Only (selected) functions in _this_ file should treat chr* as non-constant.
*/
/* /*
* newcvec - allocate a new cvec * newcvec - allocate a new cvec
*/ */
static struct cvec * static struct cvec *
newcvec(int nchrs, /* to hold this many chrs... */ newcvec(int nchrs, /* to hold this many chrs... */
int nranges, /* ... and this many ranges... */ int nranges) /* ... and this many ranges */
int nmcces) /* ... and this many MCCEs */
{ {
size_t n; size_t nc = (size_t) nchrs + (size_t) nranges * 2;
size_t nc; size_t n = sizeof(struct cvec) + nc * sizeof(chr);
struct cvec *cv; struct cvec *cv = (struct cvec *) MALLOC(n);
nc = (size_t) nchrs + (size_t) nmcces *(MAXMCCE + 1) + (size_t) nranges *2;
n = sizeof(struct cvec) + (size_t) (nmcces - 1) * sizeof(chr *)
+ nc * sizeof(chr);
cv = (struct cvec *) MALLOC(n);
if (cv == NULL) if (cv == NULL)
return NULL; return NULL;
cv->chrspace = nchrs; cv->chrspace = nchrs;
cv->chrs = (chr *) &cv->mcces[nmcces]; /* chrs just after MCCE ptrs */ cv->chrs = (chr *) (((char *) cv) + sizeof(struct cvec));
cv->mccespace = nmcces; cv->ranges = cv->chrs + nchrs;
cv->ranges = cv->chrs + nchrs + nmcces * (MAXMCCE + 1);
cv->rangespace = nranges; cv->rangespace = nranges;
return clearcvec(cv); return clearcvec(cv);
} }
...@@ -66,17 +64,9 @@ newcvec(int nchrs, /* to hold this many chrs... */ ...@@ -66,17 +64,9 @@ newcvec(int nchrs, /* to hold this many chrs... */
static struct cvec * static struct cvec *
clearcvec(struct cvec * cv) clearcvec(struct cvec * cv)
{ {
int i;
assert(cv != NULL); assert(cv != NULL);
cv->nchrs = 0; cv->nchrs = 0;
assert(cv->chrs == (chr *) &cv->mcces[cv->mccespace]);
cv->nmcces = 0;
cv->nmccechrs = 0;
cv->nranges = 0; cv->nranges = 0;
for (i = 0; i < cv->mccespace; i++)
cv->mcces[i] = NULL;
return cv; return cv;
} }
...@@ -87,7 +77,6 @@ static void ...@@ -87,7 +77,6 @@ static void
addchr(struct cvec * cv, /* character vector */ addchr(struct cvec * cv, /* character vector */
chr c) /* character to add */ chr c) /* character to add */
{ {
assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
cv->chrs[cv->nchrs++] = (chr) c; cv->chrs[cv->nchrs++] = (chr) c;
} }
...@@ -105,73 +94,21 @@ addrange(struct cvec * cv, /* character vector */ ...@@ -105,73 +94,21 @@ addrange(struct cvec * cv, /* character vector */
cv->nranges++; cv->nranges++;
} }
/*
* addmcce - add an MCCE to a cvec
*/
static void
addmcce(struct cvec * cv, /* character vector */
chr *startp, /* beginning of text */
chr *endp) /* just past end of text */
{
int len;
int i;
chr *s;
chr *d;
if (startp == NULL && endp == NULL)
return;
len = endp - startp;
assert(len > 0);
assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs);
assert(cv->nmcces < cv->mccespace);
d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1];
cv->mcces[cv->nmcces++] = d;
for (s = startp, i = len; i > 0; s++, i--)
*d++ = *s;
*d++ = 0; /* endmarker */
assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
cv->nmccechrs += len + 1;
}
/*
* haschr - does a cvec contain this chr?
*/
static int /* predicate */
haschr(struct cvec * cv, /* character vector */
chr c) /* character to test for */
{
int i;
chr *p;
for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
{
if (*p == c)
return 1;
}
for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
{
if ((*p <= c) && (c <= *(p + 1)))
return 1;
}
return 0;
}
/* /*
* getcvec - get a cvec, remembering it as v->cv * getcvec - get a cvec, remembering it as v->cv
*/ */
static struct cvec * static struct cvec *
getcvec(struct vars * v, /* context */ getcvec(struct vars * v, /* context */
int nchrs, /* to hold this many chrs... */ int nchrs, /* to hold this many chrs... */
int nranges, /* ... and this many ranges... */ int nranges) /* ... and this many ranges */
int nmcces) /* ... and this many MCCEs */
{ {
if (v->cv != NULL && nchrs <= v->cv->chrspace && if (v->cv != NULL && nchrs <= v->cv->chrspace &&
nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace) nranges <= v->cv->rangespace)
return clearcvec(v->cv); return clearcvec(v->cv);
if (v->cv != NULL) if (v->cv != NULL)
freecvec(v->cv); freecvec(v->cv);
v->cv = newcvec(nchrs, nranges, nmcces); v->cv = newcvec(nchrs, nranges);
if (v->cv == NULL) if (v->cv == NULL)
ERR(REG_ESPACE); ERR(REG_ESPACE);
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* *
* $PostgreSQL: pgsql/src/backend/regex/regc_lex.c,v 1.7 2008/01/03 20:47:55 tgl Exp $ * $PostgreSQL: pgsql/src/backend/regex/regc_lex.c,v 1.8 2008/02/14 17:33:37 tgl Exp $
* *
*/ */
...@@ -201,8 +201,8 @@ prefixes(struct vars * v) ...@@ -201,8 +201,8 @@ prefixes(struct vars * v)
*/ */
static void static void
lexnest(struct vars * v, lexnest(struct vars * v,
chr *beginp, /* start of interpolation */ const chr *beginp, /* start of interpolation */
chr *endp) /* one past end of interpolation */ const chr *endp) /* one past end of interpolation */
{ {
assert(v->savenow == NULL); /* only one level of nesting */ assert(v->savenow == NULL); /* only one level of nesting */
v->savenow = v->now; v->savenow = v->now;
...@@ -214,47 +214,47 @@ lexnest(struct vars * v, ...@@ -214,47 +214,47 @@ lexnest(struct vars * v,
/* /*
* string constants to interpolate as expansions of things like \d * string constants to interpolate as expansions of things like \d
*/ */
static chr backd[] = { /* \d */ static const chr backd[] = { /* \d */
CHR('['), CHR('['), CHR(':'), CHR('['), CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
CHR(':'), CHR(']'), CHR(']') CHR(':'), CHR(']'), CHR(']')
}; };
static chr backD[] = { /* \D */ static const chr backD[] = { /* \D */
CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
CHR(':'), CHR(']'), CHR(']') CHR(':'), CHR(']'), CHR(']')
}; };
static chr brbackd[] = { /* \d within brackets */ static const chr brbackd[] = { /* \d within brackets */
CHR('['), CHR(':'), CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
CHR(':'), CHR(']') CHR(':'), CHR(']')
}; };
static chr backs[] = { /* \s */ static const chr backs[] = { /* \s */
CHR('['), CHR('['), CHR(':'), CHR('['), CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
CHR(':'), CHR(']'), CHR(']') CHR(':'), CHR(']'), CHR(']')
}; };
static chr backS[] = { /* \S */ static const chr backS[] = { /* \S */
CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
CHR(':'), CHR(']'), CHR(']') CHR(':'), CHR(']'), CHR(']')
}; };
static chr brbacks[] = { /* \s within brackets */ static const chr brbacks[] = { /* \s within brackets */
CHR('['), CHR(':'), CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
CHR(':'), CHR(']') CHR(':'), CHR(']')
}; };
static chr backw[] = { /* \w */ static const chr backw[] = { /* \w */
CHR('['), CHR('['), CHR(':'), CHR('['), CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
CHR(':'), CHR(']'), CHR('_'), CHR(']') CHR(':'), CHR(']'), CHR('_'), CHR(']')
}; };
static chr backW[] = { /* \W */ static const chr backW[] = { /* \W */
CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
CHR(':'), CHR(']'), CHR('_'), CHR(']') CHR(':'), CHR(']'), CHR('_'), CHR(']')
}; };
static chr brbackw[] = { /* \w within brackets */ static const chr brbackw[] = { /* \w within brackets */
CHR('['), CHR(':'), CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
CHR(':'), CHR(']'), CHR('_') CHR(':'), CHR(']'), CHR('_')
...@@ -722,7 +722,7 @@ lexescape(struct vars * v) ...@@ -722,7 +722,7 @@ lexescape(struct vars * v)
static chr esc[] = { static chr esc[] = {
CHR('E'), CHR('S'), CHR('C') CHR('E'), CHR('S'), CHR('C')
}; };
chr *save; const chr *save;
assert(v->cflags & REG_ADVF); assert(v->cflags & REG_ADVF);
...@@ -1080,7 +1080,7 @@ brenext(struct vars * v, ...@@ -1080,7 +1080,7 @@ brenext(struct vars * v,
static void static void
skip(struct vars * v) skip(struct vars * v)
{ {
chr *start = v->now; const chr *start = v->now;
assert(v->cflags & REG_EXPANDED); assert(v->cflags & REG_EXPANDED);
...@@ -1119,8 +1119,8 @@ newline(void) ...@@ -1119,8 +1119,8 @@ newline(void)
*/ */
static chr static chr
chrnamed(struct vars * v, chrnamed(struct vars * v,
chr *startp, /* start of name */ const chr *startp, /* start of name */
chr *endp, /* just past end of name */ const chr *endp, /* just past end of name */
chr lastresort) /* what to return if name lookup fails */ chr lastresort) /* what to return if name lookup fails */
{ {
celt c; celt c;
......
...@@ -47,15 +47,15 @@ ...@@ -47,15 +47,15 @@
* permission to use and distribute the software in accordance with the * permission to use and distribute the software in accordance with the
* terms specified in this license. * terms specified in this license.
* *
* $PostgreSQL: pgsql/src/backend/regex/regc_locale.c,v 1.8 2005/11/22 18:17:19 momjian Exp $ * $PostgreSQL: pgsql/src/backend/regex/regc_locale.c,v 1.9 2008/02/14 17:33:37 tgl Exp $
*/ */
/* ASCII character-name table */ /* ASCII character-name table */
static struct cname static const struct cname
{ {
char *name; const char *name;
char code; const char code;
} cnames[] = } cnames[] =
{ {
...@@ -423,46 +423,15 @@ pg_wc_tolower(pg_wchar c) ...@@ -423,46 +423,15 @@ pg_wc_tolower(pg_wchar c)
} }
/*
* nmcces - how many distinct MCCEs are there?
*/
static int
nmcces(struct vars * v)
{
/*
* No multi-character collating elements defined at the moment.
*/
return 0;
}
/*
* nleaders - how many chrs can be first chrs of MCCEs?
*/
static int
nleaders(struct vars * v)
{
return 0;
}
/*
* allmcces - return a cvec with all the MCCEs of the locale
*/
static struct cvec *
allmcces(struct vars * v, /* context */
struct cvec * cv) /* this is supposed to have enough room */
{
return clearcvec(cv);
}
/* /*
* element - map collating-element name to celt * element - map collating-element name to celt
*/ */
static celt static celt
element(struct vars * v, /* context */ element(struct vars * v, /* context */
chr *startp, /* points to start of name */ const chr *startp, /* points to start of name */
chr *endp) /* points just past end of name */ const chr *endp) /* points just past end of name */
{ {
struct cname *cn; const struct cname *cn;
size_t len; size_t len;
/* generic: one-chr names stand for themselves */ /* generic: one-chr names stand for themselves */
...@@ -513,7 +482,7 @@ range(struct vars * v, /* context */ ...@@ -513,7 +482,7 @@ range(struct vars * v, /* context */
if (!cases) if (!cases)
{ /* easy version */ { /* easy version */
cv = getcvec(v, 0, 1, 0); cv = getcvec(v, 0, 1);
NOERRN(); NOERRN();
addrange(cv, a, b); addrange(cv, a, b);
return cv; return cv;
...@@ -527,7 +496,7 @@ range(struct vars * v, /* context */ ...@@ -527,7 +496,7 @@ range(struct vars * v, /* context */
nchrs = (b - a + 1) * 2 + 4; nchrs = (b - a + 1) * 2 + 4;
cv = getcvec(v, nchrs, 0, 0); cv = getcvec(v, nchrs, 0);
NOERRN(); NOERRN();
for (c = a; c <= b; c++) for (c = a; c <= b; c++)
...@@ -550,7 +519,6 @@ range(struct vars * v, /* context */ ...@@ -550,7 +519,6 @@ range(struct vars * v, /* context */
static int /* predicate */ static int /* predicate */
before(celt x, celt y) before(celt x, celt y)
{ {
/* trivial because no MCCEs */
if (x < y) if (x < y)
return 1; return 1;
return 0; return 0;
...@@ -571,7 +539,7 @@ eclass(struct vars * v, /* context */ ...@@ -571,7 +539,7 @@ eclass(struct vars * v, /* context */
/* crude fake equivalence class for testing */ /* crude fake equivalence class for testing */
if ((v->cflags & REG_FAKE) && c == 'x') if ((v->cflags & REG_FAKE) && c == 'x')
{ {
cv = getcvec(v, 4, 0, 0); cv = getcvec(v, 4, 0);
addchr(cv, (chr) 'x'); addchr(cv, (chr) 'x');
addchr(cv, (chr) 'y'); addchr(cv, (chr) 'y');
if (cases) if (cases)
...@@ -585,7 +553,7 @@ eclass(struct vars * v, /* context */ ...@@ -585,7 +553,7 @@ eclass(struct vars * v, /* context */
/* otherwise, none */ /* otherwise, none */
if (cases) if (cases)
return allcases(v, c); return allcases(v, c);
cv = getcvec(v, 1, 0, 0); cv = getcvec(v, 1, 0);
assert(cv != NULL); assert(cv != NULL);
addchr(cv, (chr) c); addchr(cv, (chr) c);
return cv; return cv;
...@@ -598,13 +566,13 @@ eclass(struct vars * v, /* context */ ...@@ -598,13 +566,13 @@ eclass(struct vars * v, /* context */
*/ */
static struct cvec * static struct cvec *
cclass(struct vars * v, /* context */ cclass(struct vars * v, /* context */
chr *startp, /* where the name starts */ const chr *startp, /* where the name starts */
chr *endp, /* just past the end of the name */ const chr *endp, /* just past the end of the name */
int cases) /* case-independent? */ int cases) /* case-independent? */
{ {
size_t len; size_t len;
struct cvec *cv = NULL; struct cvec *cv = NULL;
char **namePtr; const char **namePtr;
int i, int i,
index; index;
...@@ -612,7 +580,7 @@ cclass(struct vars * v, /* context */ ...@@ -612,7 +580,7 @@ cclass(struct vars * v, /* context */
* The following arrays define the valid character class names. * The following arrays define the valid character class names.
*/ */
static char *classNames[] = { static const char *classNames[] = {
"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph", "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
"lower", "print", "punct", "space", "upper", "xdigit", NULL "lower", "print", "punct", "space", "upper", "xdigit", NULL
}; };
...@@ -662,7 +630,7 @@ cclass(struct vars * v, /* context */ ...@@ -662,7 +630,7 @@ cclass(struct vars * v, /* context */
switch ((enum classes) index) switch ((enum classes) index)
{ {
case CC_PRINT: case CC_PRINT:
cv = getcvec(v, UCHAR_MAX, 0, 0); cv = getcvec(v, UCHAR_MAX, 0);
if (cv) if (cv)
{ {
for (i = 0; i <= UCHAR_MAX; i++) for (i = 0; i <= UCHAR_MAX; i++)
...@@ -673,7 +641,7 @@ cclass(struct vars * v, /* context */ ...@@ -673,7 +641,7 @@ cclass(struct vars * v, /* context */
} }
break; break;
case CC_ALNUM: case CC_ALNUM:
cv = getcvec(v, UCHAR_MAX, 0, 0); cv = getcvec(v, UCHAR_MAX, 0);
if (cv) if (cv)
{ {
for (i = 0; i <= UCHAR_MAX; i++) for (i = 0; i <= UCHAR_MAX; i++)
...@@ -684,7 +652,7 @@ cclass(struct vars * v, /* context */ ...@@ -684,7 +652,7 @@ cclass(struct vars * v, /* context */
} }
break; break;
case CC_ALPHA: case CC_ALPHA:
cv = getcvec(v, UCHAR_MAX, 0, 0); cv = getcvec(v, UCHAR_MAX, 0);
if (cv) if (cv)
{ {
for (i = 0; i <= UCHAR_MAX; i++) for (i = 0; i <= UCHAR_MAX; i++)
...@@ -695,27 +663,27 @@ cclass(struct vars * v, /* context */ ...@@ -695,27 +663,27 @@ cclass(struct vars * v, /* context */
} }
break; break;
case CC_ASCII: case CC_ASCII:
cv = getcvec(v, 0, 1, 0); cv = getcvec(v, 0, 1);
if (cv) if (cv)
addrange(cv, 0, 0x7f); addrange(cv, 0, 0x7f);
break; break;
case CC_BLANK: case CC_BLANK:
cv = getcvec(v, 2, 0, 0); cv = getcvec(v, 2, 0);
addchr(cv, '\t'); addchr(cv, '\t');
addchr(cv, ' '); addchr(cv, ' ');
break; break;
case CC_CNTRL: case CC_CNTRL:
cv = getcvec(v, 0, 2, 0); cv = getcvec(v, 0, 2);
addrange(cv, 0x0, 0x1f); addrange(cv, 0x0, 0x1f);
addrange(cv, 0x7f, 0x9f); addrange(cv, 0x7f, 0x9f);
break; break;
case CC_DIGIT: case CC_DIGIT:
cv = getcvec(v, 0, 1, 0); cv = getcvec(v, 0, 1);
if (cv) if (cv)
addrange(cv, (chr) '0', (chr) '9'); addrange(cv, (chr) '0', (chr) '9');
break; break;
case CC_PUNCT: case CC_PUNCT:
cv = getcvec(v, UCHAR_MAX, 0, 0); cv = getcvec(v, UCHAR_MAX, 0);
if (cv) if (cv)
{ {
for (i = 0; i <= UCHAR_MAX; i++) for (i = 0; i <= UCHAR_MAX; i++)
...@@ -726,7 +694,7 @@ cclass(struct vars * v, /* context */ ...@@ -726,7 +694,7 @@ cclass(struct vars * v, /* context */
} }
break; break;
case CC_XDIGIT: case CC_XDIGIT:
cv = getcvec(v, 0, 3, 0); cv = getcvec(v, 0, 3);
if (cv) if (cv)
{ {
addrange(cv, '0', '9'); addrange(cv, '0', '9');
...@@ -735,7 +703,7 @@ cclass(struct vars * v, /* context */ ...@@ -735,7 +703,7 @@ cclass(struct vars * v, /* context */
} }
break; break;
case CC_SPACE: case CC_SPACE:
cv = getcvec(v, UCHAR_MAX, 0, 0); cv = getcvec(v, UCHAR_MAX, 0);
if (cv) if (cv)
{ {
for (i = 0; i <= UCHAR_MAX; i++) for (i = 0; i <= UCHAR_MAX; i++)
...@@ -746,7 +714,7 @@ cclass(struct vars * v, /* context */ ...@@ -746,7 +714,7 @@ cclass(struct vars * v, /* context */
} }
break; break;
case CC_LOWER: case CC_LOWER:
cv = getcvec(v, UCHAR_MAX, 0, 0); cv = getcvec(v, UCHAR_MAX, 0);
if (cv) if (cv)
{ {
for (i = 0; i <= UCHAR_MAX; i++) for (i = 0; i <= UCHAR_MAX; i++)
...@@ -757,7 +725,7 @@ cclass(struct vars * v, /* context */ ...@@ -757,7 +725,7 @@ cclass(struct vars * v, /* context */
} }
break; break;
case CC_UPPER: case CC_UPPER:
cv = getcvec(v, UCHAR_MAX, 0, 0); cv = getcvec(v, UCHAR_MAX, 0);
if (cv) if (cv)
{ {
for (i = 0; i <= UCHAR_MAX; i++) for (i = 0; i <= UCHAR_MAX; i++)
...@@ -768,7 +736,7 @@ cclass(struct vars * v, /* context */ ...@@ -768,7 +736,7 @@ cclass(struct vars * v, /* context */
} }
break; break;
case CC_GRAPH: case CC_GRAPH:
cv = getcvec(v, UCHAR_MAX, 0, 0); cv = getcvec(v, UCHAR_MAX, 0);
if (cv) if (cv)
{ {
for (i = 0; i <= UCHAR_MAX; i++) for (i = 0; i <= UCHAR_MAX; i++)
...@@ -802,7 +770,7 @@ allcases(struct vars * v, /* context */ ...@@ -802,7 +770,7 @@ allcases(struct vars * v, /* context */
lc = pg_wc_tolower((chr) c); lc = pg_wc_tolower((chr) c);
uc = pg_wc_toupper((chr) c); uc = pg_wc_toupper((chr) c);
cv = getcvec(v, 2, 0, 0); cv = getcvec(v, 2, 0);
addchr(cv, lc); addchr(cv, lc);
if (lc != uc) if (lc != uc)
addchr(cv, uc); addchr(cv, uc);
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* *
* $PostgreSQL: pgsql/src/backend/regex/regc_nfa.c,v 1.5 2008/01/03 20:47:55 tgl Exp $ * $PostgreSQL: pgsql/src/backend/regex/regc_nfa.c,v 1.6 2008/02/14 17:33:37 tgl Exp $
* *
* *
* One or two things that technically ought to be in here * One or two things that technically ought to be in here
...@@ -349,8 +349,6 @@ newarc(struct nfa * nfa, ...@@ -349,8 +349,6 @@ newarc(struct nfa * nfa,
if (COLORED(a) && nfa->parent == NULL) if (COLORED(a) && nfa->parent == NULL)
colorchain(nfa->cm, a); colorchain(nfa->cm, a);
return;
} }
/* /*
...@@ -361,8 +359,6 @@ allocarc(struct nfa * nfa, ...@@ -361,8 +359,6 @@ allocarc(struct nfa * nfa,
struct state * s) struct state * s)
{ {
struct arc *a; struct arc *a;
struct arcbatch *new;
int i;
/* shortcut */ /* shortcut */
if (s->free == NULL && s->noas < ABSIZE) if (s->free == NULL && s->noas < ABSIZE)
...@@ -375,22 +371,25 @@ allocarc(struct nfa * nfa, ...@@ -375,22 +371,25 @@ allocarc(struct nfa * nfa,
/* if none at hand, get more */ /* if none at hand, get more */
if (s->free == NULL) if (s->free == NULL)
{ {
new = (struct arcbatch *) MALLOC(sizeof(struct arcbatch)); struct arcbatch *newAb;
if (new == NULL) int i;
newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
if (newAb == NULL)
{ {
NERR(REG_ESPACE); NERR(REG_ESPACE);
return NULL; return NULL;
} }
new->next = s->oas.next; newAb->next = s->oas.next;
s->oas.next = new; s->oas.next = newAb;
for (i = 0; i < ABSIZE; i++) for (i = 0; i < ABSIZE; i++)
{ {
new->a[i].type = 0; newAb->a[i].type = 0;
new->a[i].freechain = &new->a[i + 1]; newAb->a[i].freechain = &newAb->a[i + 1];
} }
new->a[ABSIZE - 1].freechain = NULL; newAb->a[ABSIZE - 1].freechain = NULL;
s->free = &new->a[0]; s->free = &newAb->a[0];
} }
assert(s->free != NULL); assert(s->free != NULL);
...@@ -495,20 +494,20 @@ cparc(struct nfa * nfa, ...@@ -495,20 +494,20 @@ cparc(struct nfa * nfa,
*/ */
static void static void
moveins(struct nfa * nfa, moveins(struct nfa * nfa,
struct state * old, struct state * oldState,
struct state * new) struct state * newState)
{ {
struct arc *a; struct arc *a;
assert(old != new); assert(oldState != newState);
while ((a = old->ins) != NULL) while ((a = oldState->ins) != NULL)
{ {
cparc(nfa, a, a->from, new); cparc(nfa, a, a->from, newState);
freearc(nfa, a); freearc(nfa, a);
} }
assert(old->nins == 0); assert(oldState->nins == 0);
assert(old->ins == NULL); assert(oldState->ins == NULL);
} }
/* /*
...@@ -516,15 +515,15 @@ moveins(struct nfa * nfa, ...@@ -516,15 +515,15 @@ moveins(struct nfa * nfa,
*/ */
static void static void
copyins(struct nfa * nfa, copyins(struct nfa * nfa,
struct state * old, struct state * oldState,
struct state * new) struct state * newState)
{ {
struct arc *a; struct arc *a;
assert(old != new); assert(oldState != newState);
for (a = old->ins; a != NULL; a = a->inchain) for (a = oldState->ins; a != NULL; a = a->inchain)
cparc(nfa, a, a->from, new); cparc(nfa, a, a->from, newState);
} }
/* /*
...@@ -532,16 +531,16 @@ copyins(struct nfa * nfa, ...@@ -532,16 +531,16 @@ copyins(struct nfa * nfa,
*/ */
static void static void
moveouts(struct nfa * nfa, moveouts(struct nfa * nfa,
struct state * old, struct state * oldState,
struct state * new) struct state * newState)
{ {
struct arc *a; struct arc *a;
assert(old != new); assert(oldState != newState);
while ((a = old->outs) != NULL) while ((a = oldState->outs) != NULL)
{ {
cparc(nfa, a, new, a->to); cparc(nfa, a, newState, a->to);
freearc(nfa, a); freearc(nfa, a);
} }
} }
...@@ -551,15 +550,15 @@ moveouts(struct nfa * nfa, ...@@ -551,15 +550,15 @@ moveouts(struct nfa * nfa,
*/ */
static void static void
copyouts(struct nfa * nfa, copyouts(struct nfa * nfa,
struct state * old, struct state * oldState,
struct state * new) struct state * newState)
{ {
struct arc *a; struct arc *a;
assert(old != new); assert(oldState != newState);
for (a = old->outs; a != NULL; a = a->outchain) for (a = oldState->outs; a != NULL; a = a->outchain)
cparc(nfa, a, new, a->to); cparc(nfa, a, newState, a->to);
} }
/* /*
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* *
* $PostgreSQL: pgsql/src/backend/regex/regcomp.c,v 1.45 2007/10/06 16:05:54 tgl Exp $ * $PostgreSQL: pgsql/src/backend/regex/regcomp.c,v 1.46 2008/02/14 17:33:37 tgl Exp $
* *
*/ */
...@@ -51,11 +51,9 @@ static void repeat(struct vars *, struct state *, struct state *, int, int); ...@@ -51,11 +51,9 @@ static void repeat(struct vars *, struct state *, struct state *, int, int);
static void bracket(struct vars *, struct state *, struct state *); static void bracket(struct vars *, struct state *, struct state *);
static void cbracket(struct vars *, struct state *, struct state *); static void cbracket(struct vars *, struct state *, struct state *);
static void brackpart(struct vars *, struct state *, struct state *); static void brackpart(struct vars *, struct state *, struct state *);
static chr *scanplain(struct vars *); static const chr *scanplain(struct vars *);
static void leaders(struct vars *, struct cvec *);
static void onechr(struct vars *, chr, struct state *, struct state *); static void onechr(struct vars *, chr, struct state *, struct state *);
static void dovec(struct vars *, struct cvec *, struct state *, struct state *); static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
static celt nextleader(struct vars *, chr, chr);
static void wordchrs(struct vars *); static void wordchrs(struct vars *);
static struct subre *subre(struct vars *, int, int, struct state *, struct state *); static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
static void freesubre(struct vars *, struct subre *); static void freesubre(struct vars *, struct subre *);
...@@ -74,12 +72,12 @@ static void rfree(regex_t *); ...@@ -74,12 +72,12 @@ static void rfree(regex_t *);
static void dump(regex_t *, FILE *); static void dump(regex_t *, FILE *);
static void dumpst(struct subre *, FILE *, int); static void dumpst(struct subre *, FILE *, int);
static void stdump(struct subre *, FILE *, int); static void stdump(struct subre *, FILE *, int);
static char *stid(struct subre *, char *, size_t); static const char *stid(struct subre *, char *, size_t);
#endif #endif
/* === regc_lex.c === */ /* === regc_lex.c === */
static void lexstart(struct vars *); static void lexstart(struct vars *);
static void prefixes(struct vars *); static void prefixes(struct vars *);
static void lexnest(struct vars *, chr *, chr *); static void lexnest(struct vars *, const chr *, const chr *);
static void lexword(struct vars *); static void lexword(struct vars *);
static int next(struct vars *); static int next(struct vars *);
static int lexescape(struct vars *); static int lexescape(struct vars *);
...@@ -87,7 +85,7 @@ static chr lexdigits(struct vars *, int, int, int); ...@@ -87,7 +85,7 @@ static chr lexdigits(struct vars *, int, int, int);
static int brenext(struct vars *, chr); static int brenext(struct vars *, chr);
static void skip(struct vars *); static void skip(struct vars *);
static chr newline(void); static chr newline(void);
static chr chrnamed(struct vars *, chr *, chr *, chr); static chr chrnamed(struct vars *, const chr *, const chr *, chr);
/* === regc_color.c === */ /* === regc_color.c === */
static void initcm(struct vars *, struct colormap *); static void initcm(struct vars *, struct colormap *);
...@@ -105,7 +103,6 @@ static void subblock(struct vars *, chr, struct state *, struct state *); ...@@ -105,7 +103,6 @@ static void subblock(struct vars *, chr, struct state *, struct state *);
static void okcolors(struct nfa *, struct colormap *); static void okcolors(struct nfa *, struct colormap *);
static void colorchain(struct colormap *, struct arc *); static void colorchain(struct colormap *, struct arc *);
static void uncolorchain(struct colormap *, struct arc *); static void uncolorchain(struct colormap *, struct arc *);
static int singleton(struct colormap *, chr c);
static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *); static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *);
static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *); static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);
...@@ -168,13 +165,11 @@ static void dumpcnfa(struct cnfa *, FILE *); ...@@ -168,13 +165,11 @@ static void dumpcnfa(struct cnfa *, FILE *);
static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
#endif #endif
/* === regc_cvec.c === */ /* === regc_cvec.c === */
static struct cvec *newcvec(int, int, int); static struct cvec *newcvec(int, int);
static struct cvec *clearcvec(struct cvec *); static struct cvec *clearcvec(struct cvec *);
static void addchr(struct cvec *, chr); static void addchr(struct cvec *, chr);
static void addrange(struct cvec *, chr, chr); static void addrange(struct cvec *, chr, chr);
static void addmcce(struct cvec *, chr *, chr *); static struct cvec *getcvec(struct vars *, int, int);
static int haschr(struct cvec *, chr);
static struct cvec *getcvec(struct vars *, int, int, int);
static void freecvec(struct cvec *); static void freecvec(struct cvec *);
/* === regc_locale.c === */ /* === regc_locale.c === */
...@@ -189,14 +184,11 @@ static int pg_wc_ispunct(pg_wchar c); ...@@ -189,14 +184,11 @@ static int pg_wc_ispunct(pg_wchar c);
static int pg_wc_isspace(pg_wchar c); static int pg_wc_isspace(pg_wchar c);
static pg_wchar pg_wc_toupper(pg_wchar c); static pg_wchar pg_wc_toupper(pg_wchar c);
static pg_wchar pg_wc_tolower(pg_wchar c); static pg_wchar pg_wc_tolower(pg_wchar c);
static int nmcces(struct vars *); static celt element(struct vars *, const chr *, const chr *);
static int nleaders(struct vars *);
static struct cvec *allmcces(struct vars *, struct cvec *);
static celt element(struct vars *, chr *, chr *);
static struct cvec *range(struct vars *, celt, celt, int); static struct cvec *range(struct vars *, celt, celt, int);
static int before(celt, celt); static int before(celt, celt);
static struct cvec *eclass(struct vars *, celt, int); static struct cvec *eclass(struct vars *, celt, int);
static struct cvec *cclass(struct vars *, chr *, chr *, int); static struct cvec *cclass(struct vars *, const chr *, const chr *, int);
static struct cvec *allcases(struct vars *, chr); static struct cvec *allcases(struct vars *, chr);
static int cmp(const chr *, const chr *, size_t); static int cmp(const chr *, const chr *, size_t);
static int casecmp(const chr *, const chr *, size_t); static int casecmp(const chr *, const chr *, size_t);
...@@ -206,10 +198,10 @@ static int casecmp(const chr *, const chr *, size_t); ...@@ -206,10 +198,10 @@ static int casecmp(const chr *, const chr *, size_t);
struct vars struct vars
{ {
regex_t *re; regex_t *re;
chr *now; /* scan pointer into string */ const chr *now; /* scan pointer into string */
chr *stop; /* end of string */ const chr *stop; /* end of string */
chr *savenow; /* saved now and stop for "subroutine call" */ const chr *savenow; /* saved now and stop for "subroutine call" */
chr *savestop; const chr *savestop;
int err; /* error code (0 if none) */ int err; /* error code (0 if none) */
int cflags; /* copy of compile flags */ int cflags; /* copy of compile flags */
int lasttype; /* type of previous token */ int lasttype; /* type of previous token */
...@@ -230,10 +222,6 @@ struct vars ...@@ -230,10 +222,6 @@ struct vars
int ntree; /* number of tree nodes */ int ntree; /* number of tree nodes */
struct cvec *cv; /* interface cvec */ struct cvec *cv; /* interface cvec */
struct cvec *cv2; /* utility cvec */ struct cvec *cv2; /* utility cvec */
struct cvec *mcces; /* collating-element information */
#define ISCELEADER(v,c) ((v)->mcces != NULL && haschr((v)->mcces, (c)))
struct state *mccepbegin; /* in nfa, start of MCCE prototypes */
struct state *mccepend; /* in nfa, end of MCCE prototypes */
struct subre *lacons; /* lookahead-constraint vector */ struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */ int nlacons; /* size of lacons */
}; };
...@@ -275,9 +263,8 @@ struct vars ...@@ -275,9 +263,8 @@ struct vars
#define PREFER 'P' /* length preference */ #define PREFER 'P' /* length preference */
/* is an arc colored, and hence on a color chain? */ /* is an arc colored, and hence on a color chain? */
#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \ #define COLORED(a) \
(a)->type == BEHIND) ((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND)
/* static function list */ /* static function list */
...@@ -322,7 +309,7 @@ pg_regcomp(regex_t *re, ...@@ -322,7 +309,7 @@ pg_regcomp(regex_t *re,
/* initial setup (after which freev() is callable) */ /* initial setup (after which freev() is callable) */
v->re = re; v->re = re;
v->now = (chr *) string; v->now = string;
v->stop = v->now + len; v->stop = v->now + len;
v->savenow = v->savestop = NULL; v->savenow = v->savestop = NULL;
v->err = 0; v->err = 0;
...@@ -341,7 +328,6 @@ pg_regcomp(regex_t *re, ...@@ -341,7 +328,6 @@ pg_regcomp(regex_t *re,
v->treefree = NULL; v->treefree = NULL;
v->cv = NULL; v->cv = NULL;
v->cv2 = NULL; v->cv2 = NULL;
v->mcces = NULL;
v->lacons = NULL; v->lacons = NULL;
v->nlacons = 0; v->nlacons = 0;
re->re_magic = REMAGIC; re->re_magic = REMAGIC;
...@@ -363,19 +349,9 @@ pg_regcomp(regex_t *re, ...@@ -363,19 +349,9 @@ pg_regcomp(regex_t *re,
ZAPCNFA(g->search); ZAPCNFA(g->search);
v->nfa = newnfa(v, v->cm, (struct nfa *) NULL); v->nfa = newnfa(v, v->cm, (struct nfa *) NULL);
CNOERR(); CNOERR();
v->cv = newcvec(100, 20, 10); v->cv = newcvec(100, 20);
if (v->cv == NULL) if (v->cv == NULL)
return freev(v, REG_ESPACE); return freev(v, REG_ESPACE);
i = nmcces(v);
if (i > 0)
{
v->mcces = newcvec(nleaders(v), 0, i);
CNOERR();
v->mcces = allmcces(v, v->mcces);
leaders(v, v->mcces);
addmcce(v->mcces, (chr *) NULL, (chr *) NULL); /* dummy */
}
CNOERR();
/* parsing */ /* parsing */
lexstart(v); /* also handles prefixes */ lexstart(v); /* also handles prefixes */
...@@ -525,8 +501,6 @@ freev(struct vars * v, ...@@ -525,8 +501,6 @@ freev(struct vars * v,
freecvec(v->cv); freecvec(v->cv);
if (v->cv2 != NULL) if (v->cv2 != NULL)
freecvec(v->cv2); freecvec(v->cv2);
if (v->mcces != NULL)
freecvec(v->mcces);
if (v->lacons != NULL) if (v->lacons != NULL)
freelacons(v->lacons, v->nlacons); freelacons(v->lacons, v->nlacons);
ERR(err); /* nop if err==0 */ ERR(err); /* nop if err==0 */
...@@ -583,17 +557,16 @@ makesearch(struct vars * v, ...@@ -583,17 +557,16 @@ makesearch(struct vars * v,
for (b = s->ins; b != NULL; b = b->inchain) for (b = s->ins; b != NULL; b = b->inchain)
if (b->from != pre) if (b->from != pre)
break; break;
if (b != NULL) if (b != NULL && s->tmp == NULL)
{ /* must be split */ {
if (s->tmp == NULL) /*
{ /* if not already in the list */ * Must be split if not already in the list (fixes bugs 505048,
/* (fixes bugs 505048, 230589, */ * 230589, 840258, 504785).
/* 840258, 504785) */ */
s->tmp = slist; s->tmp = slist;
slist = s; slist = s;
} }
} }
}
/* do the splits */ /* do the splits */
for (s = slist; s != NULL; s = s2) for (s = slist; s != NULL; s = s2)
...@@ -1338,13 +1311,6 @@ cbracket(struct vars * v, ...@@ -1338,13 +1311,6 @@ cbracket(struct vars * v,
{ {
struct state *left = newstate(v->nfa); struct state *left = newstate(v->nfa);
struct state *right = newstate(v->nfa); struct state *right = newstate(v->nfa);
struct state *s;
struct arc *a; /* arc from lp */
struct arc *ba; /* arc from left, from bracket() */
struct arc *pa; /* MCCE-prototype arc */
color co;
chr *p;
int i;
NOERR(); NOERR();
bracket(v, left, right); bracket(v, left, right);
...@@ -1354,67 +1320,15 @@ cbracket(struct vars * v, ...@@ -1354,67 +1320,15 @@ cbracket(struct vars * v,
assert(lp->nouts == 0); /* all outarcs will be ours */ assert(lp->nouts == 0); /* all outarcs will be ours */
/* easy part of complementing */ /*
* Easy part of complementing, and all there is to do since the MCCE code
* was removed.
*/
colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
NOERR(); NOERR();
if (v->mcces == NULL)
{ /* no MCCEs -- we're done */
dropstate(v->nfa, left); dropstate(v->nfa, left);
assert(right->nins == 0); assert(right->nins == 0);
freestate(v->nfa, right); freestate(v->nfa, right);
return;
}
/* but complementing gets messy in the presence of MCCEs... */
NOTE(REG_ULOCALE);
for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--)
{
co = GETCOLOR(v->cm, *p);
a = findarc(lp, PLAIN, co);
ba = findarc(left, PLAIN, co);
if (ba == NULL)
{
assert(a != NULL);
freearc(v->nfa, a);
}
else
assert(a == NULL);
s = newstate(v->nfa);
NOERR();
newarc(v->nfa, PLAIN, co, lp, s);
NOERR();
pa = findarc(v->mccepbegin, PLAIN, co);
assert(pa != NULL);
if (ba == NULL)
{ /* easy case, need all of them */
cloneouts(v->nfa, pa->to, s, rp, PLAIN);
newarc(v->nfa, '$', 1, s, rp);
newarc(v->nfa, '$', 0, s, rp);
colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);
}
else
{ /* must be selective */
if (findarc(ba->to, '$', 1) == NULL)
{
newarc(v->nfa, '$', 1, s, rp);
newarc(v->nfa, '$', 0, s, rp);
colorcomplement(v->nfa, v->cm, AHEAD, pa->to,
s, rp);
}
for (pa = pa->to->outs; pa != NULL; pa = pa->outchain)
if (findarc(ba->to, PLAIN, pa->co) == NULL)
newarc(v->nfa, PLAIN, pa->co, s, rp);
if (s->nouts == 0) /* limit of selectivity: none */
dropstate(v->nfa, s); /* frees arc too */
}
NOERR();
}
delsub(v->nfa, left, right);
assert(left->nouts == 0);
freestate(v->nfa, left);
assert(right->nins == 0);
freestate(v->nfa, right);
} }
/* /*
...@@ -1428,8 +1342,8 @@ brackpart(struct vars * v, ...@@ -1428,8 +1342,8 @@ brackpart(struct vars * v,
celt startc; celt startc;
celt endc; celt endc;
struct cvec *cv; struct cvec *cv;
chr *startp; const chr *startp;
chr *endp; const chr *endp;
chr c[1]; chr c[1];
/* parse something, get rid of special cases, take shortcuts */ /* parse something, get rid of special cases, take shortcuts */
...@@ -1442,8 +1356,8 @@ brackpart(struct vars * v, ...@@ -1442,8 +1356,8 @@ brackpart(struct vars * v,
case PLAIN: case PLAIN:
c[0] = v->nextvalue; c[0] = v->nextvalue;
NEXT(); NEXT();
/* shortcut for ordinary chr (not range, not MCCE leader) */ /* shortcut for ordinary chr (not range) */
if (!SEE(RANGE) && !ISCELEADER(v, c[0])) if (!SEE(RANGE))
{ {
onechr(v, c[0], lp, rp); onechr(v, c[0], lp, rp);
return; return;
...@@ -1533,10 +1447,10 @@ brackpart(struct vars * v, ...@@ -1533,10 +1447,10 @@ brackpart(struct vars * v,
* Certain bits of trickery in lex.c know that this code does not try * Certain bits of trickery in lex.c know that this code does not try
* to look past the final bracket of the [. etc. * to look past the final bracket of the [. etc.
*/ */
static chr * /* just after end of sequence */ static const chr * /* just after end of sequence */
scanplain(struct vars * v) scanplain(struct vars * v)
{ {
chr *endp; const chr *endp;
assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS)); assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));
NEXT(); NEXT();
...@@ -1554,52 +1468,6 @@ scanplain(struct vars * v) ...@@ -1554,52 +1468,6 @@ scanplain(struct vars * v)
return endp; return endp;
} }
/*
* leaders - process a cvec of collating elements to also include leaders
* Also gives all characters involved their own colors, which is almost
* certainly necessary, and sets up little disconnected subNFA.
*/
static void
leaders(struct vars * v,
struct cvec * cv)
{
int mcce;
chr *p;
chr leader;
struct state *s;
struct arc *a;
v->mccepbegin = newstate(v->nfa);
v->mccepend = newstate(v->nfa);
NOERR();
for (mcce = 0; mcce < cv->nmcces; mcce++)
{
p = cv->mcces[mcce];
leader = *p;
if (!haschr(cv, leader))
{
addchr(cv, leader);
s = newstate(v->nfa);
newarc(v->nfa, PLAIN, subcolor(v->cm, leader),
v->mccepbegin, s);
okcolors(v->nfa, v->cm);
}
else
{
a = findarc(v->mccepbegin, PLAIN,
GETCOLOR(v->cm, leader));
assert(a != NULL);
s = a->to;
assert(s != v->mccepend);
}
p++;
assert(*p != 0 && *(p + 1) == 0); /* only 2-char MCCEs for now */
newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);
okcolors(v->nfa, v->cm);
}
}
/* /*
* onechr - fill in arcs for a plain character, and possible case complements * onechr - fill in arcs for a plain character, and possible case complements
* This is mostly a shortcut for efficient handling of the common case. * This is mostly a shortcut for efficient handling of the common case.
...@@ -1622,7 +1490,6 @@ onechr(struct vars * v, ...@@ -1622,7 +1490,6 @@ onechr(struct vars * v,
/* /*
* dovec - fill in arcs for each element of a cvec * dovec - fill in arcs for each element of a cvec
* This one has to handle the messy cases, like MCCEs and MCCE leaders.
*/ */
static void static void
dovec(struct vars * v, dovec(struct vars * v,
...@@ -1633,47 +1500,14 @@ dovec(struct vars * v, ...@@ -1633,47 +1500,14 @@ dovec(struct vars * v,
chr ch, chr ch,
from, from,
to; to;
celt ce; const chr *p;
chr *p;
int i; int i;
color co;
struct cvec *leads;
struct arc *a;
struct arc *pa; /* arc in prototype */
struct state *s;
struct state *ps; /* state in prototype */
/* need a place to store leaders, if any */
if (nmcces(v) > 0)
{
assert(v->mcces != NULL);
if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs)
{
if (v->cv2 != NULL)
free(v->cv2);
v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces);
NOERR();
leads = v->cv2;
}
else
leads = clearcvec(v->cv2);
}
else
leads = NULL;
/* first, get the ordinary characters out of the way */ /* ordinary characters */
for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
{ {
ch = *p; ch = *p;
if (!ISCELEADER(v, ch))
newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
else
{
assert(singleton(v->cm, ch));
assert(leads != NULL);
if (!haschr(leads, ch))
addchr(leads, ch);
}
} }
/* and the ranges */ /* and the ranges */
...@@ -1681,103 +1515,9 @@ dovec(struct vars * v, ...@@ -1681,103 +1515,9 @@ dovec(struct vars * v,
{ {
from = *p; from = *p;
to = *(p + 1); to = *(p + 1);
while (from <= to && (ce = nextleader(v, from, to)) != NOCELT)
{
if (from < ce)
subrange(v, from, ce - 1, lp, rp);
assert(singleton(v->cm, ce));
assert(leads != NULL);
if (!haschr(leads, ce))
addchr(leads, ce);
from = ce + 1;
}
if (from <= to) if (from <= to)
subrange(v, from, to, lp, rp); subrange(v, from, to, lp, rp);
} }
if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)
return;
/* deal with the MCCE leaders */
NOTE(REG_ULOCALE);
for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--)
{
co = GETCOLOR(v->cm, *p);
a = findarc(lp, PLAIN, co);
if (a != NULL)
s = a->to;
else
{
s = newstate(v->nfa);
NOERR();
newarc(v->nfa, PLAIN, co, lp, s);
NOERR();
}
pa = findarc(v->mccepbegin, PLAIN, co);
assert(pa != NULL);
ps = pa->to;
newarc(v->nfa, '$', 1, s, rp);
newarc(v->nfa, '$', 0, s, rp);
colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp);
NOERR();
}
/* and the MCCEs */
for (i = 0; i < cv->nmcces; i++)
{
p = cv->mcces[i];
assert(singleton(v->cm, *p));
if (!singleton(v->cm, *p))
{
ERR(REG_ASSERT);
return;
}
ch = *p++;
co = GETCOLOR(v->cm, ch);
a = findarc(lp, PLAIN, co);
if (a != NULL)
s = a->to;
else
{
s = newstate(v->nfa);
NOERR();
newarc(v->nfa, PLAIN, co, lp, s);
NOERR();
}
assert(*p != 0); /* at least two chars */
assert(singleton(v->cm, *p));
ch = *p++;
co = GETCOLOR(v->cm, ch);
assert(*p == 0); /* and only two, for now */
newarc(v->nfa, PLAIN, co, s, rp);
NOERR();
}
}
/*
* nextleader - find next MCCE leader within range
*/
static celt /* NOCELT means none */
nextleader(struct vars * v,
chr from,
chr to)
{
int i;
chr *p;
chr ch;
celt it = NOCELT;
if (v->mcces == NULL)
return it;
for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++)
{
ch = *p;
if (from <= ch && ch <= to)
if (it == NOCELT || ch < it)
it = ch;
}
return it;
} }
/* /*
...@@ -1825,9 +1565,8 @@ subre(struct vars * v, ...@@ -1825,9 +1565,8 @@ subre(struct vars * v,
struct state * begin, struct state * begin,
struct state * end) struct state * end)
{ {
struct subre *ret; struct subre *ret = v->treefree;
ret = v->treefree;
if (ret != NULL) if (ret != NULL)
v->treefree = ret->left; v->treefree = ret->left;
else else
...@@ -1906,14 +1645,13 @@ static void ...@@ -1906,14 +1645,13 @@ static void
optst(struct vars * v, optst(struct vars * v,
struct subre * t) struct subre * t)
{ {
if (t == NULL) /*
* DGP (2007-11-13): I assume it was the programmer's intent to eventually
* come back and add code to optimize subRE trees, but the routine coded
* just spends effort traversing the tree and doing nothing. We can do
* nothing with less effort.
*/
return; return;
/* recurse through children */
if (t->left != NULL)
optst(v, t->left);
if (t->right != NULL)
optst(v, t->right);
} }
/* /*
...@@ -2207,8 +1945,8 @@ stdump(struct subre * t, ...@@ -2207,8 +1945,8 @@ stdump(struct subre * t,
{ {
fprintf(f, "\n"); fprintf(f, "\n");
dumpcnfa(&t->cnfa, f); dumpcnfa(&t->cnfa, f);
fprintf(f, "\n");
} }
fprintf(f, "\n");
if (t->left != NULL) if (t->left != NULL)
stdump(t->left, f, nfapresent); stdump(t->left, f, nfapresent);
if (t->right != NULL) if (t->right != NULL)
...@@ -2218,7 +1956,7 @@ stdump(struct subre * t, ...@@ -2218,7 +1956,7 @@ stdump(struct subre * t,
/* /*
* stid - identify a subtree node for dumping * stid - identify a subtree node for dumping
*/ */
static char * /* points to buf or constant string */ static const char * /* points to buf or constant string */
stid(struct subre * t, stid(struct subre * t,
char *buf, char *buf,
size_t bufsize) size_t bufsize)
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* *
* $PostgreSQL: pgsql/src/backend/regex/regerror.c,v 1.27 2003/11/29 19:51:55 pgsql Exp $ * $PostgreSQL: pgsql/src/backend/regex/regerror.c,v 1.28 2008/02/14 17:33:37 tgl Exp $
* *
*/ */
...@@ -40,8 +40,8 @@ static char unk[] = "*** unknown regex error code 0x%x ***"; ...@@ -40,8 +40,8 @@ static char unk[] = "*** unknown regex error code 0x%x ***";
static struct rerr static struct rerr
{ {
int code; int code;
char *name; const char *name;
char *explain; const char *explain;
} rerrs[] = } rerrs[] =
{ {
...@@ -63,7 +63,7 @@ pg_regerror(int errcode, /* error code, or REG_ATOI or REG_ITOA */ ...@@ -63,7 +63,7 @@ pg_regerror(int errcode, /* error code, or REG_ATOI or REG_ITOA */
size_t errbuf_size) /* available space in errbuf, can be 0 */ size_t errbuf_size) /* available space in errbuf, can be 0 */
{ {
struct rerr *r; struct rerr *r;
char *msg; const char *msg;
char convbuf[sizeof(unk) + 50]; /* 50 = plenty for int */ char convbuf[sizeof(unk) + 50]; /* 50 = plenty for int */
size_t len; size_t len;
int icode; int icode;
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* *
* $PostgreSQL: pgsql/src/include/regex/regcustom.h,v 1.6 2007/10/06 16:01:51 tgl Exp $ * $PostgreSQL: pgsql/src/include/regex/regcustom.h,v 1.7 2008/02/14 17:33:37 tgl Exp $
*/ */
/* headers if any */ /* headers if any */
...@@ -47,9 +47,9 @@ ...@@ -47,9 +47,9 @@
/* internal character type and related */ /* internal character type and related */
typedef pg_wchar chr; /* the type itself */ typedef pg_wchar chr; /* the type itself */
typedef unsigned uchr; /* unsigned type that will hold a chr */ typedef unsigned uchr; /* unsigned type that will hold a chr */
typedef int celt; /* type to hold chr, MCCE number, or NOCELT */ typedef int celt; /* type to hold chr, or NOCELT */
#define NOCELT (-1) /* celt value which is not valid chr or MCCE */ #define NOCELT (-1) /* celt value which is not valid chr */
#define CHR(c) ((unsigned char) (c)) /* turn char literal into chr literal */ #define CHR(c) ((unsigned char) (c)) /* turn char literal into chr literal */
#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */ #define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */
#define CHRBITS 32 /* bits in a chr; must not use sizeof */ #define CHRBITS 32 /* bits in a chr; must not use sizeof */
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* *
* $PostgreSQL: pgsql/src/include/regex/regguts.h,v 1.6 2008/01/03 20:47:55 tgl Exp $ * $PostgreSQL: pgsql/src/include/regex/regguts.h,v 1.7 2008/02/14 17:33:37 tgl Exp $
*/ */
...@@ -181,7 +181,7 @@ union tree ...@@ -181,7 +181,7 @@ union tree
#define tcolor colors.ccolor #define tcolor colors.ccolor
#define tptr ptrs.pptr #define tptr ptrs.pptr
/* internal per-color structure for the color machinery */ /* internal per-color descriptor structure for the color machinery */
struct colordesc struct colordesc
{ {
uchr nchrs; /* number of chars of this color */ uchr nchrs; /* number of chars of this color */
...@@ -228,11 +228,11 @@ struct colormap ...@@ -228,11 +228,11 @@ struct colormap
#endif #endif
/* /*
* Interface definitions for locale-interface functions in locale.c. * Interface definitions for locale-interface functions in locale.c.
* Multi-character collating elements (MCCEs) cause most of the trouble.
*/ */
/* Representation of a set of characters. */
struct cvec struct cvec
{ {
int nchrs; /* number of chrs */ int nchrs; /* number of chrs */
...@@ -241,17 +241,9 @@ struct cvec ...@@ -241,17 +241,9 @@ struct cvec
int nranges; /* number of ranges (chr pairs) */ int nranges; /* number of ranges (chr pairs) */
int rangespace; /* number of chrs possible */ int rangespace; /* number of chrs possible */
chr *ranges; /* pointer to vector of chr pairs */ chr *ranges; /* pointer to vector of chr pairs */
int nmcces; /* number of MCCEs */ /* both batches of chrs are on the end */
int mccespace; /* number of MCCEs possible */
int nmccechrs; /* number of chrs used for MCCEs */
chr *mcces[1]; /* pointers to 0-terminated MCCEs */
/* and both batches of chrs are on the end */
}; };
/* caution: this value cannot be changed easily */
#define MAXMCCE 2 /* length of longest MCCE */
/* /*
* definitions for NFA internal representation * definitions for NFA internal representation
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment