Commit 27af9143 authored by Tom Lane's avatar Tom Lane

Create the beginnings of internals documentation for the regex code.

Create src/backend/regex/README to hold an implementation overview of
the regex package, and fill it in with some preliminary notes about
the code's DFA/NFA processing and colormap management.  Much more to
do there of course.

Also, improve some code comments around the colormap and cvec code.
No functional changes except to add one missing assert.
parent 2f582f76
This diff is collapsed.
......@@ -77,6 +77,7 @@ static void
addchr(struct cvec * cv, /* character vector */
chr c) /* character to add */
{
assert(cv->nchrs < cv->chrspace);
cv->chrs[cv->nchrs++] = (chr) c;
}
......@@ -95,17 +96,27 @@ addrange(struct cvec * cv, /* character vector */
}
/*
* getcvec - get a cvec, remembering it as v->cv
* getcvec - get a transient cvec, initialized to empty
*
* The returned cvec is valid only until the next call of getcvec, which
* typically will recycle the space. Callers should *not* free the cvec
* explicitly; it will be cleaned up when the struct vars is destroyed.
*
* This is typically used while interpreting bracket expressions. In that
* usage the cvec is only needed momentarily until we build arcs from it,
* so transientness is a convenient behavior.
*/
static struct cvec *
getcvec(struct vars * v, /* context */
int nchrs, /* to hold this many chrs... */
int nranges) /* ... and this many ranges */
{
/* recycle existing transient cvec if large enough */
if (v->cv != NULL && nchrs <= v->cv->chrspace &&
nranges <= v->cv->rangespace)
return clearcvec(v->cv);
/* nope, make a new one */
if (v->cv != NULL)
freecvec(v->cv);
v->cv = newcvec(nchrs, nranges);
......
......@@ -356,6 +356,7 @@ pg_regcomp(regex_t *re,
ZAPCNFA(g->search);
v->nfa = newnfa(v, v->cm, (struct nfa *) NULL);
CNOERR();
/* set up a reasonably-sized transient cvec for getcvec usage */
v->cv = newcvec(100, 20);
if (v->cv == NULL)
return freev(v, REG_ESPACE);
......
......@@ -181,34 +181,52 @@ union tree
#define tcolor colors.ccolor
#define tptr ptrs.pptr
/* internal per-color descriptor structure for the color machinery */
/*
* Per-color data structure for the compile-time color machinery
*
* If "sub" is not NOSUB then it is the number of the color's current
* subcolor, i.e. we are in process of dividing this color (character
* equivalence class) into two colors. See src/backend/regex/README for
* discussion of subcolors.
*
* Currently-unused colors have the FREECOL bit set and are linked into a
* freelist using their "sub" fields, but only if their color numbers are
* less than colormap.max. Any array entries beyond "max" are just garbage.
*/
struct colordesc
{
uchr nchrs; /* number of chars of this color */
color sub; /* open subcolor (if any); free chain ptr */
#define NOSUB COLORLESS
struct arc *arcs; /* color chain */
int flags;
color sub; /* open subcolor, if any; or free-chain ptr */
#define NOSUB COLORLESS /* value of "sub" when no open subcolor */
struct arc *arcs; /* chain of all arcs of this color */
int flags; /* bit values defined next */
#define FREECOL 01 /* currently free */
#define PSEUDO 02 /* pseudocolor, no real chars */
#define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL)
union tree *block; /* block of solid color, if any */
};
/* the color map itself */
/*
* The color map itself
*
* Only the "tree" part is used at execution time, and that only via the
* GETCOLOR() macro. Possibly that should be separated from the compile-time
* data.
*/
struct colormap
{
int magic;
#define CMMAGIC 0x876
struct vars *v; /* for compile error reporting */
size_t ncds; /* number of colordescs */
size_t max; /* highest in use */
size_t ncds; /* allocated length of colordescs array */
size_t max; /* highest color number currently in use */
color free; /* beginning of free chain (if non-0) */
struct colordesc *cd;
struct colordesc *cd; /* pointer to array of colordescs */
#define CDEND(cm) (&(cm)->cd[(cm)->max + 1])
/* If we need up to NINLINECDS, we store them here to save a malloc */
#define NINLINECDS ((size_t)10)
struct colordesc cdspace[NINLINECDS];
union tree tree[NBYTS]; /* tree top, plus fill blocks */
union tree tree[NBYTS]; /* tree top, plus lower-level fill blocks */
};
/* optimization magic to do fast chr->color mapping */
......@@ -229,19 +247,25 @@ struct colormap
/*
* Interface definitions for locale-interface functions in locale.c.
* Interface definitions for locale-interface functions in regc_locale.c.
*/
/* Representation of a set of characters. */
/*
* Representation of a set of characters. chrs[] represents individual
* code points, ranges[] represents ranges in the form min..max inclusive.
*
* Note that in cvecs gotten from newcvec() and intended to be freed by
* freecvec(), both arrays of chrs are after the end of the struct, not
* separately malloc'd; so chrspace and rangespace are effectively immutable.
*/
struct cvec
{
int nchrs; /* number of chrs */
int chrspace; /* number of chrs possible */
int chrspace; /* number of chrs allocated in chrs[] */
chr *chrs; /* pointer to vector of chrs */
int nranges; /* number of ranges (chr pairs) */
int rangespace; /* number of chrs possible */
int rangespace; /* number of ranges allocated in ranges[] */
chr *ranges; /* pointer to vector of chr pairs */
/* both batches of chrs are on the end */
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment