for (s = nfa->states; s != NULL; s = s->next)
{
nstates++;
- narcs += 1 + s->nouts + 1;
- /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
+ narcs += s->nouts + 1; /* need one extra for endmarker */
}
+ cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
- if (cnfa->states == NULL || cnfa->arcs == NULL)
+ if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
{
+ if (cnfa->stflags != NULL)
+ FREE(cnfa->stflags);
if (cnfa->states != NULL)
FREE(cnfa->states);
if (cnfa->arcs != NULL)
for (s = nfa->states; s != NULL; s = s->next)
{
assert((size_t) s->no < nstates);
+ cnfa->stflags[s->no] = 0;
cnfa->states[s->no] = ca;
- ca->co = 0; /* clear and skip flags "arc" */
- ca++;
first = ca;
for (a = s->outs; a != NULL; a = a->outchain)
switch (a->type)
/* mark no-progress states */
for (a = nfa->pre->outs; a != NULL; a = a->outchain)
- cnfa->states[a->to->no]->co = 1;
- cnfa->states[nfa->pre->no]->co = 1;
+ cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
+ cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
}
/*
{
assert(cnfa->nstates != 0); /* not empty already */
cnfa->nstates = 0;
+ FREE(cnfa->stflags);
FREE(cnfa->states);
FREE(cnfa->arcs);
}
fprintf(f, ", haslacons");
fprintf(f, "\n");
for (st = 0; st < cnfa->nstates; st++)
- dumpcstate(st, cnfa->states[st], cnfa, f);
+ dumpcstate(st, cnfa, f);
fflush(f);
}
#endif
*/
static void
dumpcstate(int st,
- struct carc * ca,
struct cnfa * cnfa,
FILE *f)
{
- int i;
+ struct carc * ca;
int pos;
- fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
+ fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
pos = 1;
- for (i = 1; ca[i].co != COLORLESS; i++)
+ for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
{
- if (ca[i].co < cnfa->ncolors)
- fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
+ if (ca->co < cnfa->ncolors)
+ fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
else
- fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors,
- ca[i].to);
+ fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
if (pos == 5)
{
fprintf(f, "\n");
else
pos++;
}
- if (i == 1 || pos != 1)
+ if (ca == cnfa->states[st] || pos != 1)
fprintf(f, "\n");
fflush(f);
}
static int dumprarcs(struct arc *, struct state *, FILE *, int);
static void dumparc(struct arc *, struct state *, FILE *);
static void dumpcnfa(struct cnfa *, FILE *);
-static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
+static void dumpcstate(int, struct cnfa *, FILE *);
#endif
/* === regc_cvec.c === */
static struct cvec *newcvec(int, int);
gotstate = 0;
for (i = 0; i < d->nstates; i++)
if (ISBSET(css->states, i))
- for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++)
+ for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
if (ca->co == co)
{
BSET(d->work, ca->to);
gotstate = 1;
if (ca->to == cnfa->post)
ispost = 1;
- if (!cnfa->states[ca->to]->co)
+ if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
noprogress = 0;
FDEBUG(("%d -> %d\n", i, ca->to));
}
dolacons = 0;
for (i = 0; i < d->nstates; i++)
if (ISBSET(d->work, i))
- for (ca = cnfa->states[i] + 1; ca->co != COLORLESS;
- ca++)
+ for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
{
- if (ca->co <= cnfa->ncolors)
+ if (ca->co < cnfa->ncolors)
continue; /* NOTE CONTINUE */
sawlacons = 1;
if (ISBSET(d->work, ca->to))
dolacons = 1;
if (ca->to == cnfa->post)
ispost = 1;
- if (!cnfa->states[ca->to]->co)
+ if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
noprogress = 0;
FDEBUG(("%d :> %d\n", i, ca->to));
}
struct arc
{
- int type;
-#define ARCFREE '\0'
+ int type; /* 0 if free, else an NFA arc type code */
color co;
struct state *from; /* where it's from (and contained within) */
struct state *to; /* where it's to */
- struct arc *outchain; /* *from's outs chain or free chain */
+ struct arc *outchain; /* link in *from's outs chain or free chain */
#define freechain outchain
- struct arc *inchain; /* *to's ins chain */
- struct arc *colorchain; /* color's arc chain */
+ struct arc *inchain; /* link in *to's ins chain */
+ struct arc *colorchain; /* link in color's arc chain */
struct arc *colorchainRev; /* back-link in color's arc chain */
};
/*
* definitions for compacted NFA
+ *
+ * The main space savings in a compacted NFA is from making the arcs as small
+ * as possible. We store only the transition color and next-state number for
+ * each arc. The list of out arcs for each state is an array beginning at
+ * cnfa.states[statenumber], and terminated by a dummy carc struct with
+ * co == COLORLESS.
+ *
+ * The non-dummy carc structs are of two types: plain arcs and LACON arcs.
+ * Plain arcs just store the transition color number as "co". LACON arcs
+ * store the lookahead constraint number plus cnfa.ncolors as "co". LACON
+ * arcs can be distinguished from plain by testing for co >= cnfa.ncolors.
*/
struct carc
{
color co; /* COLORLESS is list terminator */
- int to; /* state number */
+ int to; /* next-state number */
};
struct cnfa
{
int nstates; /* number of states */
- int ncolors; /* number of colors */
+ int ncolors; /* number of colors (max color in use + 1) */
int flags;
-#define HASLACONS 01 /* uses lookahead constraints */
+#define HASLACONS 01 /* uses lookahead constraints */
int pre; /* setup state number */
int post; /* teardown state number */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */
+ char *stflags; /* vector of per-state flags bytes */
+#define CNFA_NOPROGRESS 01 /* flag bit for a no-progress state */
struct carc **states; /* vector of pointers to outarc lists */
+ /* states[n] are pointers into a single malloc'd array of arcs */
struct carc *arcs; /* the area for the lists */
};