Commit f7a839bc authored by Tom Lane's avatar Tom Lane

Clean up portability problems in regexp package: change all routine

definitions from K&R to ANSI C style, and fix broken assumption that
int and long are the same datatype.  This repairs problems observed
on Alpha with regexps having between 32 and 63 states.
parent f4e4c729
......@@ -80,37 +80,26 @@ struct match
pg_wchar *endp; /* end of string -- virtual NUL here */
pg_wchar *coldp; /* can be no match starting before here */
pg_wchar **lastpos; /* [nplus+1] */
STATEVARS;
STATEVARS;
states st; /* current states */
states fresh; /* states for a fresh start */
states tmp; /* temporary */
states empty; /* empty set of states */
};
/* ========= begin header generated by ./mkh ========= */
#ifdef __cplusplus
extern "C"
{
#endif
static int matcher(struct re_guts *g, pg_wchar *string, size_t nmatch,
regmatch_t *pmatch, int eflags);
static pg_wchar *dissect(struct match *m, pg_wchar *start, pg_wchar *stop,
sopno startst, sopno stopst);
static pg_wchar *backref(struct match *m, pg_wchar *start, pg_wchar *stop,
sopno startst, sopno stopst, sopno lev);
static pg_wchar *fast(struct match *m, pg_wchar *start, pg_wchar *stop,
sopno startst, sopno stopst);
static pg_wchar *slow(struct match *m, pg_wchar *start, pg_wchar *stop,
sopno startst, sopno stopst);
static states step(struct re_guts *g, sopno start,
sopno stop, states bef, int ch, states aft);
/* === engine.c === */
static int
matcher(struct re_guts * g, pg_wchar * string, size_t nmatch,
regmatch_t *pmatch, int eflags);
static pg_wchar *
dissect(struct match * m, pg_wchar * start, pg_wchar * stop,
sopno startst, sopno stopst);
static pg_wchar *
backref(struct match * m, pg_wchar * start, pg_wchar * stop,
sopno startst, sopno stopst, sopno lev);
static pg_wchar *
fast(struct match * m, pg_wchar * start, pg_wchar * stop,
sopno startst, sopno stopst);
static pg_wchar *
slow(struct match * m, pg_wchar * start, pg_wchar * stop, sopno startst, sopno stopst);
static states
step(struct re_guts * g, sopno start,
sopno stop, states bef, int ch, states aft);
#define BOL (OUT+1)
#define EOL (BOL+1)
#define BOLEOL (BOL+2)
......@@ -128,24 +117,13 @@ extern "C"
#endif
#ifdef REDEBUG
static void
print(struct match * m, pg_wchar * caption, states st, int ch, FILE *d);
#endif
#ifdef REDEBUG
static void
at(struct match * m, pg_wchar * title, pg_wchar * start, pg_wchar * stop,
sopno startst, sopno stopst);
#endif
#ifdef REDEBUG
static pg_wchar *
p_char(int ch);
#endif
#ifdef __cplusplus
}
static void print(struct match *m, pg_wchar *caption, states st, int ch,
FILE *d);
static void at(struct match *m, pg_wchar *title, pg_wchar *start,
pg_wchar *stop, sopno startst, sopno stopst);
static pg_wchar *pchar(int ch);
static int pg_isprint(int c);
#endif
/* ========= end header generated by ./mkh ========= */
#ifdef REDEBUG
#define SP(t, s, c) print(m, t, s, c, stdout)
......@@ -158,17 +136,11 @@ extern "C"
#endif
/*
- matcher - the actual matching engine
== static int matcher(struct re_guts *g, pg_wchar *string, \
== size_t nmatch, regmatch_t *pmatch, int eflags);
* matcher - the actual matching engine
*/
static int /* 0 success, REG_NOMATCH failure */
matcher(g, string, nmatch, pmatch, eflags)
struct re_guts *g;
pg_wchar *string;
size_t nmatch;
regmatch_t *pmatch;
int eflags;
matcher(struct re_guts *g, pg_wchar *string, size_t nmatch,
regmatch_t *pmatch, int eflags)
{
pg_wchar *endp;
int i;
......@@ -206,10 +178,11 @@ int eflags;
for (dp = start; dp < stop; dp++)
if (*dp == g->must[0] && stop - dp >= g->mlen &&
#ifdef MULTIBYTE
memcmp(dp, g->must, (size_t) (g->mlen * sizeof(pg_wchar))) == 0)
memcmp(dp, g->must, (size_t) (g->mlen * sizeof(pg_wchar))) == 0
#else
memcmp(dp, g->must, (size_t) g->mlen) == 0)
memcmp(dp, g->must, (size_t) g->mlen) == 0
#endif
)
break;
if (dp == stop) /* we didn't find g->must */
return REG_NOMATCH;
......@@ -349,17 +322,11 @@ int eflags;
}
/*
- dissect - figure out what matched what, no back references
== static char *dissect(struct match *m, char *start, \
== char *stop, sopno startst, sopno stopst);
* dissect - figure out what matched what, no back references
*/
static pg_wchar * /* == stop (success) always */
dissect(m, start, stop, startst, stopst)
struct match *m;
pg_wchar *start;
pg_wchar *stop;
sopno startst;
sopno stopst;
dissect(struct match *m, pg_wchar *start, pg_wchar *stop,
sopno startst, sopno stopst)
{
int i;
sopno ss; /* start sop of current subRE */
......@@ -549,18 +516,13 @@ sopno stopst;
}
/*
- backref - figure out what matched what, figuring in back references
== static char *backref(struct match *m, char *start, \
== char *stop, sopno startst, sopno stopst, sopno lev);
* backref - figure out what matched what, figuring in back references
*
* lev is PLUS nesting level
*/
static pg_wchar * /* == stop (success) or NULL (failure) */
backref(m, start, stop, startst, stopst, lev)
struct match *m;
pg_wchar *start;
pg_wchar *stop;
sopno startst;
sopno stopst;
sopno lev; /* PLUS nesting level */
backref(struct match *m, pg_wchar *start, pg_wchar *stop,
sopno startst, sopno stopst, sopno lev)
{
int i;
sopno ss; /* start sop of current subRE */
......@@ -763,17 +725,11 @@ sopno lev; /* PLUS nesting level */
}
/*
- fast - step through the string at top speed
== static char *fast(struct match *m, char *start, \
== char *stop, sopno startst, sopno stopst);
* fast - step through the string at top speed
*/
static pg_wchar * /* where tentative match ended, or NULL */
fast(m, start, stop, startst, stopst)
struct match *m;
pg_wchar *start;
pg_wchar *stop;
sopno startst;
sopno stopst;
fast(struct match *m, pg_wchar *start, pg_wchar *stop,
sopno startst, sopno stopst)
{
states st = m->st;
states fresh = m->fresh;
......@@ -858,17 +814,11 @@ sopno stopst;
}
/*
- slow - step through the string more deliberately
== static char *slow(struct match *m, char *start, \
== char *stop, sopno startst, sopno stopst);
* slow - step through the string more deliberately
*/
static pg_wchar * /* where it ended */
slow(m, start, stop, startst, stopst)
struct match *m;
pg_wchar *start;
pg_wchar *stop;
sopno startst;
sopno stopst;
slow(struct match *m, pg_wchar *start, pg_wchar *stop,
sopno startst, sopno stopst)
{
states st = m->st;
states empty = m->empty;
......@@ -948,27 +898,15 @@ sopno stopst;
/*
- step - map set of states reachable before char to set reachable after
== static states step(struct re_guts *g, sopno start, sopno stop, \
== states bef, int ch, states aft);
== #define BOL (OUT+1)
== #define EOL (BOL+1)
== #define BOLEOL (BOL+2)
== #define NOTHING (BOL+3)
== #define BOW (BOL+4)
== #define EOW (BOL+5)
== #define CODEMAX (BOL+5) // highest code used
== #define NONCHAR(c) ((c) > CHAR_MAX)
== #define NNONCHAR (CODEMAX-CHAR_MAX)
* step - map set of states reachable before char to set reachable after
*/
static states
step(g, start, stop, bef, ch, aft)
struct re_guts *g;
sopno start; /* start state within strip */
sopno stop; /* state after stop state within strip */
states bef; /* states reachable before */
int ch; /* character or NONCHAR code */
states aft; /* states already known reachable after */
step(struct re_guts *g,
sopno start, /* start state within strip */
sopno stop, /* state after stop state within strip */
states bef, /* states reachable before */
int ch, /* character or NONCHAR code */
states aft) /* states already known reachable after */
{
cset *cs;
sop s;
......@@ -1082,19 +1020,11 @@ states aft; /* states already known reachable after */
#ifdef REDEBUG
/*
- print - print a set of states
== #ifdef REDEBUG
== static void print(struct match *m, char *caption, states st, \
== int ch, FILE *d);
== #endif
* print - print a set of states
*/
static void
print(m, caption, st, ch, d)
struct match *m;
pg_wchar *caption;
states st;
int ch;
FILE *d;
print(struct match *m, pg_wchar *caption, states st,
int ch, FILE *d)
{
struct re_guts *g = m->g;
int i;
......@@ -1116,20 +1046,11 @@ FILE *d;
}
/*
- at - print current situation
== #ifdef REDEBUG
== static void at(struct match *m, pg_wchar *title, pg_wchar *start, pg_wchar *stop, \
== sopno startst, sopno stopst);
== #endif
* at - print current situation
*/
static void
at(m, title, start, stop, startst, stopst)
struct match *m;
pg_wchar *title;
pg_wchar *start;
pg_wchar *stop;
sopno startst;
sopno stopst;
at(struct match *m, pg_wchar *title, pg_wchar *start, pg_wchar *stop,
sopno startst, sopno stopst)
{
if (!(m->eflags & REG_TRACE))
return;
......@@ -1140,19 +1061,26 @@ sopno stopst;
}
#ifndef PCHARDONE
#define PCHARDONE /* never again */
#define PCHARDONE /* only do this once */
/*
- pchar - make a character printable
== #ifdef REDEBUG
== static char *pchar(int ch);
== #endif
* pchar - make a character printable
*
* Is this identical to regchar() over in debug.c? Well, yes. But a
* duplicate here avoids having a debugging-capable regexec.o tied to
* a matching debug.o, and this is convenient. It all disappears in
* the non-debug compilation anyway, so it doesn't matter much.
*/
static pg_wchar * /* -> representation */
pchar(int ch)
{
static pg_wchar pbuf[10];
if (pg_isprint(ch) || ch == ' ')
sprintf(pbuf, "%c", ch);
else
sprintf(pbuf, "\\%o", ch);
return pbuf;
}
static int
pg_isprint(int c)
......@@ -1164,19 +1092,6 @@ pg_isprint(int c)
#endif
}
static pg_wchar * /* -> representation */
pchar(ch)
int ch;
{
static pg_wchar pbuf[10];
if (pg_isprint(ch) || ch == ' ')
sprintf(pbuf, "%c", ch);
else
sprintf(pbuf, "\\%o", ch);
return pbuf;
}
#endif
#endif
......
This diff is collapsed.
......@@ -37,59 +37,19 @@
* @(#)regerror.c 8.4 (Berkeley) 3/20/94
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)regerror.c 8.4 (Berkeley) 3/20/94";
#endif /* LIBC_SCCS and not lint */
#include "postgres.h"
#include <sys/types.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <stdlib.h>
#include <assert.h>
#include "regex/regex.h"
#include "regex/utils.h"
#include "regex/regex2.h"
/* ========= begin header generated by ./mkh ========= */
#ifdef __cplusplus
extern "C"
{
#endif
/* === regerror.c === */
static char *regatoi(const regex_t *preg, char *localbuf);
static char *regatoi(const regex_t *preg, char *localbuf);
#ifdef __cplusplus
}
#endif
/* ========= end header generated by ./mkh ========= */
/*
= #define REG_NOMATCH 1
= #define REG_BADPAT 2
= #define REG_ECOLLATE 3
= #define REG_ECTYPE 4
= #define REG_EESCAPE 5
= #define REG_ESUBREG 6
= #define REG_EBRACK 7
= #define REG_EPAREN 8
= #define REG_EBRACE 9
= #define REG_BADBR 10
= #define REG_ERANGE 11
= #define REG_ESPACE 12
= #define REG_BADRPT 13
= #define REG_EMPTY 14
= #define REG_ASSERT 15
= #define REG_INVARG 16
= #define REG_ATOI 255 // convert name to number (!)
= #define REG_ITOA 0400 // convert number to name (!)
*/
static struct rerr
{
int code;
......@@ -152,16 +112,12 @@ static struct rerr
};
/*
- regerror - the interface to error numbers
= extern size_t regerror(int, const regex_t *, char *, size_t);
* regerror - the interface to error numbers
*/
/* ARGSUSED */
size_t
pg95_regerror(errcode, preg, errbuf, errbuf_size)
int errcode;
const regex_t *preg;
char *errbuf;
size_t errbuf_size;
pg95_regerror(int errcode, const regex_t *preg,
char *errbuf, size_t errbuf_size)
{
struct rerr *r;
size_t len;
......@@ -206,13 +162,10 @@ size_t errbuf_size;
}
/*
- regatoi - internal routine to implement REG_ATOI
== static char *regatoi(const regex_t *preg, char *localbuf);
* regatoi - internal routine to implement REG_ATOI
*/
static char *
regatoi(preg, localbuf)
const regex_t *preg;
char *localbuf;
regatoi(const regex_t *preg, char *localbuf)
{
struct rerr *r;
......
......@@ -37,11 +37,6 @@
* @(#)regexec.c 8.3 (Berkeley) 3/20/94
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94";
#endif /* LIBC_SCCS and not lint */
#include "postgres.h"
/*
......@@ -52,9 +47,6 @@ static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94";
* representations for state sets.
*/
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <ctype.h>
#include <assert.h>
......@@ -69,24 +61,24 @@ static int nope = 0; /* for use in asserts; shuts lint up */
#define states long
#define states1 states /* for later use in regexec() decision */
#define CLEAR(v) ((v) = 0)
#define SET0(v, n) ((v) &= ~(1 << (n)))
#define SET1(v, n) ((v) |= 1 << (n))
#define ISSET(v, n) ((v) & (1 << (n)))
#define SET0(v, n) ((v) &= ~(1L << (n)))
#define SET1(v, n) ((v) |= (1L << (n)))
#define ISSET(v, n) ((v) & (1L << (n)))
#define ASSIGN(d, s) ((d) = (s))
#define EQ(a, b) ((a) == (b))
#define STATEVARS int dummy /* dummy version */
#define STATESETUP(m, n) /* nothing */
#define STATETEARDOWN(m) /* nothing */
#define SETUP(v) ((v) = 0)
#define onestate int
#define INIT(o, n) ((o) = (unsigned)1 << (n))
#define INC(o) ((o) <<= 1)
#define ISSTATEIN(v, o) ((v) & (o))
#define onestate long
#define INIT(o, n) ((o) = (1L << (n)))
#define INC(o) ((o) <<= 1)
#define ISSTATEIN(v, o) ((v) & (o))
/* some abbreviations; note that some of these know variable names! */
/* do "if I'm here, I can also be there" etc without branches */
#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n))
#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n))
#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n)))
#define FWD(dst, src, n) ((dst) |= ((src) & (here)) << (n))
#define BACK(dst, src, n) ((dst) |= ((src) & (here)) >> (n))
#define ISSETBACK(v, n) ((v) & (here >> (n)))
/* function names */
#define SNAMES /* engine.c looks after details */
......@@ -129,7 +121,7 @@ static int nope = 0; /* for use in asserts; shuts lint up */
#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
#define onestate int
#define INIT(o, n) ((o) = (n))
#define INC(o) ((o)++)
#define INC(o) ((o)++)
#define ISSTATEIN(v, o) ((v)[o])
/* some abbreviations; note that some of these know variable names! */
/* do "if I'm here, I can also be there" etc without branches */
......@@ -142,27 +134,14 @@ static int nope = 0; /* for use in asserts; shuts lint up */
#include "engine.c"
/*
- regexec - interface for matching
= extern int regexec(const regex_t *, const char *, size_t, \
= regmatch_t [], int);
= #define REG_NOTBOL 00001
= #define REG_NOTEOL 00002
= #define REG_STARTEND 00004
= #define REG_TRACE 00400 // tracing of execution
= #define REG_LARGE 01000 // force large representation
= #define REG_BACKR 02000 // force use of backref code
* regexec - interface for matching
*
* We put this here so we can exploit knowledge of the state representation
* when choosing which matcher to call. Also, by this point the matchers
* have been prototyped.
* when choosing which matcher to call.
*/
int /* 0 success, REG_NOMATCH failure */
pg95_regexec(preg, string, nmatch, pmatch, eflags)
const regex_t *preg;
const char *string;
size_t nmatch;
regmatch_t *pmatch;
int eflags;
pg95_regexec(const regex_t *preg, const char *string, size_t nmatch,
regmatch_t *pmatch, int eflags)
{
struct re_guts *g = preg->re_g;
......
......@@ -37,28 +37,19 @@
* @(#)regfree.c 8.3 (Berkeley) 3/20/94
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)regfree.c 8.3 (Berkeley) 3/20/94";
#endif /* LIBC_SCCS and not lint */
#include "postgres.h"
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include "regex/regex.h"
#include "regex/utils.h"
#include "regex/regex2.h"
/*
- regfree - free everything
= extern void regfree(regex_t *);
* regfree - free everything
*/
void
pg95_regfree(preg)
regex_t *preg;
pg95_regfree(regex_t *preg)
{
struct re_guts *g;
......
......@@ -94,6 +94,6 @@ static struct cclass
""
},
{
NULL, 0, ""
NULL, NULL, ""
}
};
......@@ -102,10 +102,12 @@ typedef struct
#define REG_LARGE 01000 /* force large representation */
#define REG_BACKR 02000 /* force use of backref code */
int pg95_regcomp(regex_t *, const char *, int);
size_t pg95_regerror(int, const regex_t *, char *, size_t);
int pg95_regexec(const regex_t *,
const char *, size_t, regmatch_t[], int);
void pg95_regfree(regex_t *);
extern int pg95_regcomp(regex_t *preg, const char *pattern, int cflags);
extern size_t pg95_regerror(int errcode, const regex_t *preg,
char *errbuf, size_t errbuf_size);
extern int pg95_regexec(const regex_t *preg, const char *string,
size_t nmatch,
regmatch_t *pmatch, int eflags);
extern void pg95_regfree(regex_t *preg);
#endif /* !_REGEX_H_ */
......@@ -39,22 +39,6 @@
#include <limits.h>
/*
* First, the stuff that ends up in the outside-world include file
*/
/*
typedef off_t regoff_t;
typedef struct {
int re_magic;
size_t re_nsub; // number of parenthesized subexpressions
const char *re_endp; // end pointer for REG_PEND
struct re_guts *re_g; // none of your business :-)
} regex_t;
typedef struct {
regoff_t rm_so; // start of match
regoff_t rm_eo; // end of match
} regmatch_t;
*/
/*
* internals of regex_t
*/
......@@ -82,8 +66,8 @@
typedef unsigned long sop; /* strip operator */
typedef long sopno;
#define OPRMASK 0xf8000000
#define OPDMASK 0x07ffffff
#define OPRMASK ((sop) 0xf8000000)
#define OPDMASK ((sop) 0x07ffffff)
#define OPSHIFT ((unsigned)27)
#define OP(n) ((n)&OPRMASK)
#define OPND(n) ((n)&OPDMASK)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment