]> granicus.if.org Git - postgresql/blob - src/backend/regex/regexec.c
e3bc41fa5e042c1ddb283c3ef921416962fa8c35
[postgresql] / src / backend / regex / regexec.c
1 /*
2  * re_*exec and friends - match REs
3  *
4  * Copyright (c) 1998, 1999 Henry Spencer.      All rights reserved.
5  *
6  * Development of this software was funded, in part, by Cray Research Inc.,
7  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
8  * Corporation, none of whom are responsible for the results.  The author
9  * thanks all of them.
10  *
11  * Redistribution and use in source and binary forms -- with or without
12  * modification -- are permitted for any purpose, provided that
13  * redistributions in source form retain this entire copyright notice and
14  * indicate the origin and nature of any modifications.
15  *
16  * I'd appreciate being given credit for this package in the documentation
17  * of software which uses it, but that is not a requirement.
18  *
19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
20  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
21  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
22  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $PostgreSQL: pgsql/src/backend/regex/regexec.c,v 1.25 2005/07/10 04:54:30 momjian Exp $
31  *
32  */
33
34 #include "regex/regguts.h"
35
36
37
38 /* lazy-DFA representation */
39 struct arcp
40 {                                                               /* "pointer" to an outarc */
41         struct sset *ss;
42         color           co;
43 };
44
45 struct sset
46 {                                                               /* state set */
47         unsigned   *states;                     /* pointer to bitvector */
48         unsigned        hash;                   /* hash of bitvector */
49 #define  HASH(bv, nw)    (((nw) == 1) ? *(bv) : hash(bv, nw))
50 #define  HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
51                 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
52         int                     flags;
53 #define  STARTER         01                     /* the initial state set */
54 #define  POSTSTATE       02                     /* includes the goal state */
55 #define  LOCKED          04                     /* locked in cache */
56 #define  NOPROGRESS  010                /* zero-progress state set */
57         struct arcp ins;                        /* chain of inarcs pointing here */
58         chr                *lastseen;           /* last entered on arrival here */
59         struct sset **outs;                     /* outarc vector indexed by color */
60         struct arcp *inchain;           /* chain-pointer vector for outarcs */
61 };
62
63 struct dfa
64 {
65         int                     nssets;                 /* size of cache */
66         int                     nssused;                /* how many entries occupied yet */
67         int                     nstates;                /* number of states */
68         int                     ncolors;                /* length of outarc and inchain vectors */
69         int                     wordsper;               /* length of state-set bitvectors */
70         struct sset *ssets;                     /* state-set cache */
71         unsigned   *statesarea;         /* bitvector storage */
72         unsigned   *work;                       /* pointer to work area within statesarea */
73         struct sset **outsarea;         /* outarc-vector storage */
74         struct arcp *incarea;           /* inchain storage */
75         struct cnfa *cnfa;
76         struct colormap *cm;
77         chr                *lastpost;           /* location of last cache-flushed success */
78         chr                *lastnopr;           /* location of last cache-flushed
79                                                                  * NOPROGRESS */
80         struct sset *search;            /* replacement-search-pointer memory */
81         int                     cptsmalloced;   /* were the areas individually malloced? */
82         char       *mallocarea;         /* self, or master malloced area, or NULL */
83 };
84
85 #define WORK    1                               /* number of work bitvectors needed */
86
87 /* setup for non-malloc allocation for small cases */
88 #define FEWSTATES       20                      /* must be less than UBITS */
89 #define FEWCOLORS       15
90 struct smalldfa
91 {
92         struct dfa      dfa;
93         struct sset ssets[FEWSTATES * 2];
94         unsigned        statesarea[FEWSTATES * 2 + WORK];
95         struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
96         struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
97 };
98
99 #define DOMALLOC        ((struct smalldfa *)NULL)       /* force malloc */
100
101
102
103 /* internal variables, bundled for easy passing around */
104 struct vars
105 {
106         regex_t    *re;
107         struct guts *g;
108         int                     eflags;                 /* copies of arguments */
109         size_t          nmatch;
110         regmatch_t *pmatch;
111         rm_detail_t *details;
112         chr                *start;                      /* start of string */
113         chr                *search_start;       /* search start of string */
114         chr                *stop;                       /* just past end of string */
115         int                     err;                    /* error code if any (0 none) */
116         regoff_t   *mem;                        /* memory vector for backtracking */
117         struct smalldfa dfa1;
118         struct smalldfa dfa2;
119 };
120
121 #define VISERR(vv)      ((vv)->err != 0)        /* have we seen an error yet? */
122 #define ISERR() VISERR(v)
123 #define VERR(vv,e)      (((vv)->err) ? (vv)->err : ((vv)->err = (e)))
124 #define ERR(e)  VERR(v, e)              /* record an error */
125 #define NOERR() {if (ISERR()) return v->err;}   /* if error seen, return
126                                                                                                  * it */
127 #define OFF(p)  ((p) - v->start)
128 #define LOFF(p) ((long)OFF(p))
129
130
131
132 /*
133  * forward declarations
134  */
135 /* === regexec.c === */
136 static int      find(struct vars *, struct cnfa *, struct colormap *);
137 static int      cfind(struct vars *, struct cnfa *, struct colormap *);
138 static int      cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
139 static void zapsubs(regmatch_t *, size_t);
140 static void zapmem(struct vars *, struct subre *);
141 static void subset(struct vars *, struct subre *, chr *, chr *);
142 static int      dissect(struct vars *, struct subre *, chr *, chr *);
143 static int      condissect(struct vars *, struct subre *, chr *, chr *);
144 static int      altdissect(struct vars *, struct subre *, chr *, chr *);
145 static int      cdissect(struct vars *, struct subre *, chr *, chr *);
146 static int      ccondissect(struct vars *, struct subre *, chr *, chr *);
147 static int      crevdissect(struct vars *, struct subre *, chr *, chr *);
148 static int      cbrdissect(struct vars *, struct subre *, chr *, chr *);
149 static int      caltdissect(struct vars *, struct subre *, chr *, chr *);
150
151 /* === rege_dfa.c === */
152 static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
153 static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
154 static chr *lastcold(struct vars *, struct dfa *);
155 static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
156 static void freedfa(struct dfa *);
157 static unsigned hash(unsigned *, int);
158 static struct sset *initialize(struct vars *, struct dfa *, chr *);
159 static struct sset *miss(struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *);
160 static int      lacon(struct vars *, struct cnfa *, chr *, pcolor);
161 static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
162 static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
163
164
165 /*
166  * pg_regexec - match regular expression
167  */
168 int
169 pg_regexec(regex_t *re,
170                    const chr *string,
171                    size_t len,
172                    size_t search_start,
173                    rm_detail_t *details,
174                    size_t nmatch,
175                    regmatch_t pmatch[],
176                    int flags)
177 {
178         struct vars var;
179         register struct vars *v = &var;
180         int                     st;
181         size_t          n;
182         int                     backref;
183
184 #define  LOCALMAT        20
185         regmatch_t      mat[LOCALMAT];
186
187 #define  LOCALMEM        40
188         regoff_t        mem[LOCALMEM];
189
190         /* sanity checks */
191         if (re == NULL || string == NULL || re->re_magic != REMAGIC)
192                 return REG_INVARG;
193         if (re->re_csize != sizeof(chr))
194                 return REG_MIXED;
195
196         /* setup */
197         v->re = re;
198         v->g = (struct guts *) re->re_guts;
199         if ((v->g->cflags & REG_EXPECT) && details == NULL)
200                 return REG_INVARG;
201         if (v->g->info & REG_UIMPOSSIBLE)
202                 return REG_NOMATCH;
203         backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
204         v->eflags = flags;
205         if (v->g->cflags & REG_NOSUB)
206                 nmatch = 0;                             /* override client */
207         v->nmatch = nmatch;
208         if (backref)
209         {
210                 /* need work area */
211                 if (v->g->nsub + 1 <= LOCALMAT)
212                         v->pmatch = mat;
213                 else
214                         v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
215                                                                                           sizeof(regmatch_t));
216                 if (v->pmatch == NULL)
217                         return REG_ESPACE;
218                 v->nmatch = v->g->nsub + 1;
219         }
220         else
221                 v->pmatch = pmatch;
222         v->details = details;
223         v->start = (chr *) string;
224         v->search_start = (chr *) string + search_start;
225         v->stop = (chr *) string + len;
226         v->err = 0;
227         if (backref)
228         {
229                 /* need retry memory */
230                 assert(v->g->ntree >= 0);
231                 n = (size_t) v->g->ntree;
232                 if (n <= LOCALMEM)
233                         v->mem = mem;
234                 else
235                         v->mem = (regoff_t *) MALLOC(n * sizeof(regoff_t));
236                 if (v->mem == NULL)
237                 {
238                         if (v->pmatch != pmatch && v->pmatch != mat)
239                                 FREE(v->pmatch);
240                         return REG_ESPACE;
241                 }
242         }
243         else
244                 v->mem = NULL;
245
246         /* do it */
247         assert(v->g->tree != NULL);
248         if (backref)
249                 st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
250         else
251                 st = find(v, &v->g->tree->cnfa, &v->g->cmap);
252
253         /* copy (portion of) match vector over if necessary */
254         if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
255         {
256                 zapsubs(pmatch, nmatch);
257                 n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
258                 memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
259         }
260
261         /* clean up */
262         if (v->pmatch != pmatch && v->pmatch != mat)
263                 FREE(v->pmatch);
264         if (v->mem != NULL && v->mem != mem)
265                 FREE(v->mem);
266         return st;
267 }
268
269 /*
270  * find - find a match for the main NFA (no-complications case)
271  */
272 static int
273 find(struct vars * v,
274          struct cnfa * cnfa,
275          struct colormap * cm)
276 {
277         struct dfa *s;
278         struct dfa *d;
279         chr                *begin;
280         chr                *end = NULL;
281         chr                *cold;
282         chr                *open;                       /* open and close of range of possible
283                                                                  * starts */
284         chr                *close;
285         int                     hitend;
286         int                     shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
287
288         /* first, a shot with the search RE */
289         s = newdfa(v, &v->g->search, cm, &v->dfa1);
290         assert(!(ISERR() && s != NULL));
291         NOERR();
292         MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
293         cold = NULL;
294         close = shortest(v, s, v->search_start, v->search_start, v->stop,
295                                          &cold, (int *) NULL);
296         freedfa(s);
297         NOERR();
298         if (v->g->cflags & REG_EXPECT)
299         {
300                 assert(v->details != NULL);
301                 if (cold != NULL)
302                         v->details->rm_extend.rm_so = OFF(cold);
303                 else
304                         v->details->rm_extend.rm_so = OFF(v->stop);
305                 v->details->rm_extend.rm_eo = OFF(v->stop);             /* unknown */
306         }
307         if (close == NULL)                      /* not found */
308                 return REG_NOMATCH;
309         if (v->nmatch == 0)                     /* found, don't need exact location */
310                 return REG_OKAY;
311
312         /* find starting point and match */
313         assert(cold != NULL);
314         open = cold;
315         cold = NULL;
316         MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
317         d = newdfa(v, cnfa, cm, &v->dfa1);
318         assert(!(ISERR() && d != NULL));
319         NOERR();
320         for (begin = open; begin <= close; begin++)
321         {
322                 MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
323                 if (shorter)
324                         end = shortest(v, d, begin, begin, v->stop,
325                                                    (chr **) NULL, &hitend);
326                 else
327                         end = longest(v, d, begin, v->stop, &hitend);
328                 NOERR();
329                 if (hitend && cold == NULL)
330                         cold = begin;
331                 if (end != NULL)
332                         break;                          /* NOTE BREAK OUT */
333         }
334         assert(end != NULL);            /* search RE succeeded so loop should */
335         freedfa(d);
336
337         /* and pin down details */
338         assert(v->nmatch > 0);
339         v->pmatch[0].rm_so = OFF(begin);
340         v->pmatch[0].rm_eo = OFF(end);
341         if (v->g->cflags & REG_EXPECT)
342         {
343                 if (cold != NULL)
344                         v->details->rm_extend.rm_so = OFF(cold);
345                 else
346                         v->details->rm_extend.rm_so = OFF(v->stop);
347                 v->details->rm_extend.rm_eo = OFF(v->stop);             /* unknown */
348         }
349         if (v->nmatch == 1)                     /* no need for submatches */
350                 return REG_OKAY;
351
352         /* submatches */
353         zapsubs(v->pmatch, v->nmatch);
354         return dissect(v, v->g->tree, begin, end);
355 }
356
357 /*
358  * cfind - find a match for the main NFA (with complications)
359  */
360 static int
361 cfind(struct vars * v,
362           struct cnfa * cnfa,
363           struct colormap * cm)
364 {
365         struct dfa *s;
366         struct dfa *d;
367         chr                *cold;
368         int                     ret;
369
370         s = newdfa(v, &v->g->search, cm, &v->dfa1);
371         NOERR();
372         d = newdfa(v, cnfa, cm, &v->dfa2);
373         if (ISERR())
374         {
375                 assert(d == NULL);
376                 freedfa(s);
377                 return v->err;
378         }
379
380         ret = cfindloop(v, cnfa, cm, d, s, &cold);
381
382         freedfa(d);
383         freedfa(s);
384         NOERR();
385         if (v->g->cflags & REG_EXPECT)
386         {
387                 assert(v->details != NULL);
388                 if (cold != NULL)
389                         v->details->rm_extend.rm_so = OFF(cold);
390                 else
391                         v->details->rm_extend.rm_so = OFF(v->stop);
392                 v->details->rm_extend.rm_eo = OFF(v->stop);             /* unknown */
393         }
394         return ret;
395 }
396
397 /*
398  * cfindloop - the heart of cfind
399  */
400 static int
401 cfindloop(struct vars * v,
402                   struct cnfa * cnfa,
403                   struct colormap * cm,
404                   struct dfa * d,
405                   struct dfa * s,
406                   chr **coldp)                  /* where to put coldstart pointer */
407 {
408         chr                *begin;
409         chr                *end;
410         chr                *cold;
411         chr                *open;                       /* open and close of range of possible
412                                                                  * starts */
413         chr                *close;
414         chr                *estart;
415         chr                *estop;
416         int                     er;
417         int                     shorter = v->g->tree->flags & SHORTER;
418         int                     hitend;
419
420         assert(d != NULL && s != NULL);
421         cold = NULL;
422         close = v->search_start;
423         do
424         {
425                 MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
426                 close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
427                 if (close == NULL)
428                         break;                          /* NOTE BREAK */
429                 assert(cold != NULL);
430                 open = cold;
431                 cold = NULL;
432                 MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
433                 for (begin = open; begin <= close; begin++)
434                 {
435                         MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
436                         estart = begin;
437                         estop = v->stop;
438                         for (;;)
439                         {
440                                 if (shorter)
441                                         end = shortest(v, d, begin, estart,
442                                                                    estop, (chr **) NULL, &hitend);
443                                 else
444                                         end = longest(v, d, begin, estop,
445                                                                   &hitend);
446                                 if (hitend && cold == NULL)
447                                         cold = begin;
448                                 if (end == NULL)
449                                         break;          /* NOTE BREAK OUT */
450                                 MDEBUG(("tentative end %ld\n", LOFF(end)));
451                                 zapsubs(v->pmatch, v->nmatch);
452                                 zapmem(v, v->g->tree);
453                                 er = cdissect(v, v->g->tree, begin, end);
454                                 if (er == REG_OKAY)
455                                 {
456                                         if (v->nmatch > 0)
457                                         {
458                                                 v->pmatch[0].rm_so = OFF(begin);
459                                                 v->pmatch[0].rm_eo = OFF(end);
460                                         }
461                                         *coldp = cold;
462                                         return REG_OKAY;
463                                 }
464                                 if (er != REG_NOMATCH)
465                                 {
466                                         ERR(er);
467                                         return er;
468                                 }
469                                 if ((shorter) ? end == estop : end == begin)
470                                 {
471                                         /* no point in trying again */
472                                         *coldp = cold;
473                                         return REG_NOMATCH;
474                                 }
475                                 /* go around and try again */
476                                 if (shorter)
477                                         estart = end + 1;
478                                 else
479                                         estop = end - 1;
480                         }
481                 }
482         } while (close < v->stop);
483
484         *coldp = cold;
485         return REG_NOMATCH;
486 }
487
488 /*
489  * zapsubs - initialize the subexpression matches to "no match"
490  */
491 static void
492 zapsubs(regmatch_t *p,
493                 size_t n)
494 {
495         size_t          i;
496
497         for (i = n - 1; i > 0; i--)
498         {
499                 p[i].rm_so = -1;
500                 p[i].rm_eo = -1;
501         }
502 }
503
504 /*
505  * zapmem - initialize the retry memory of a subtree to zeros
506  */
507 static void
508 zapmem(struct vars * v,
509            struct subre * t)
510 {
511         if (t == NULL)
512                 return;
513
514         assert(v->mem != NULL);
515         v->mem[t->retry] = 0;
516         if (t->op == '(')
517         {
518                 assert(t->subno > 0);
519                 v->pmatch[t->subno].rm_so = -1;
520                 v->pmatch[t->subno].rm_eo = -1;
521         }
522
523         if (t->left != NULL)
524                 zapmem(v, t->left);
525         if (t->right != NULL)
526                 zapmem(v, t->right);
527 }
528
529 /*
530  * subset - set any subexpression relevant to a successful subre
531  */
532 static void
533 subset(struct vars * v,
534            struct subre * sub,
535            chr *begin,
536            chr *end)
537 {
538         int                     n = sub->subno;
539
540         assert(n > 0);
541         if ((size_t) n >= v->nmatch)
542                 return;
543
544         MDEBUG(("setting %d\n", n));
545         v->pmatch[n].rm_so = OFF(begin);
546         v->pmatch[n].rm_eo = OFF(end);
547 }
548
549 /*
550  * dissect - determine subexpression matches (uncomplicated case)
551  */
552 static int                                              /* regexec return code */
553 dissect(struct vars * v,
554                 struct subre * t,
555                 chr *begin,                             /* beginning of relevant substring */
556                 chr *end)                               /* end of same */
557 {
558         assert(t != NULL);
559         MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
560
561         switch (t->op)
562         {
563                 case '=':                               /* terminal node */
564                         assert(t->left == NULL && t->right == NULL);
565                         return REG_OKAY;        /* no action, parent did the work */
566                         break;
567                 case '|':                               /* alternation */
568                         assert(t->left != NULL);
569                         return altdissect(v, t, begin, end);
570                         break;
571                 case 'b':                               /* back ref -- shouldn't be calling us! */
572                         return REG_ASSERT;
573                         break;
574                 case '.':                               /* concatenation */
575                         assert(t->left != NULL && t->right != NULL);
576                         return condissect(v, t, begin, end);
577                         break;
578                 case '(':                               /* capturing */
579                         assert(t->left != NULL && t->right == NULL);
580                         assert(t->subno > 0);
581                         subset(v, t, begin, end);
582                         return dissect(v, t->left, begin, end);
583                         break;
584                 default:
585                         return REG_ASSERT;
586                         break;
587         }
588 }
589
590 /*
591  * condissect - determine concatenation subexpression matches (uncomplicated)
592  */
593 static int                                              /* regexec return code */
594 condissect(struct vars * v,
595                    struct subre * t,
596                    chr *begin,                  /* beginning of relevant substring */
597                    chr *end)                    /* end of same */
598 {
599         struct dfa *d;
600         struct dfa *d2;
601         chr                *mid;
602         int                     i;
603         int                     shorter = (t->left->flags & SHORTER) ? 1 : 0;
604         chr                *stop = (shorter) ? end : begin;
605
606         assert(t->op == '.');
607         assert(t->left != NULL && t->left->cnfa.nstates > 0);
608         assert(t->right != NULL && t->right->cnfa.nstates > 0);
609
610         d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
611         NOERR();
612         d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
613         if (ISERR())
614         {
615                 assert(d2 == NULL);
616                 freedfa(d);
617                 return v->err;
618         }
619
620         /* pick a tentative midpoint */
621         if (shorter)
622                 mid = shortest(v, d, begin, begin, end, (chr **) NULL,
623                                            (int *) NULL);
624         else
625                 mid = longest(v, d, begin, end, (int *) NULL);
626         if (mid == NULL)
627         {
628                 freedfa(d);
629                 freedfa(d2);
630                 return REG_ASSERT;
631         }
632         MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
633
634         /* iterate until satisfaction or failure */
635         while (longest(v, d2, mid, end, (int *) NULL) != end)
636         {
637                 /* that midpoint didn't work, find a new one */
638                 if (mid == stop)
639                 {
640                         /* all possibilities exhausted! */
641                         MDEBUG(("no midpoint!\n"));
642                         freedfa(d);
643                         freedfa(d2);
644                         return REG_ASSERT;
645                 }
646                 if (shorter)
647                         mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL,
648                                                    (int *) NULL);
649                 else
650                         mid = longest(v, d, begin, mid - 1, (int *) NULL);
651                 if (mid == NULL)
652                 {
653                         /* failed to find a new one! */
654                         MDEBUG(("failed midpoint!\n"));
655                         freedfa(d);
656                         freedfa(d2);
657                         return REG_ASSERT;
658                 }
659                 MDEBUG(("new midpoint %ld\n", LOFF(mid)));
660         }
661
662         /* satisfaction */
663         MDEBUG(("successful\n"));
664         freedfa(d);
665         freedfa(d2);
666         i = dissect(v, t->left, begin, mid);
667         if (i != REG_OKAY)
668                 return i;
669         return dissect(v, t->right, mid, end);
670 }
671
672 /*
673  * altdissect - determine alternative subexpression matches (uncomplicated)
674  */
675 static int                                              /* regexec return code */
676 altdissect(struct vars * v,
677                    struct subre * t,
678                    chr *begin,                  /* beginning of relevant substring */
679                    chr *end)                    /* end of same */
680 {
681         struct dfa *d;
682         int                     i;
683
684         assert(t != NULL);
685         assert(t->op == '|');
686
687         for (i = 0; t != NULL; t = t->right, i++)
688         {
689                 MDEBUG(("trying %dth\n", i));
690                 assert(t->left != NULL && t->left->cnfa.nstates > 0);
691                 d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
692                 if (ISERR())
693                         return v->err;
694                 if (longest(v, d, begin, end, (int *) NULL) == end)
695                 {
696                         MDEBUG(("success\n"));
697                         freedfa(d);
698                         return dissect(v, t->left, begin, end);
699                 }
700                 freedfa(d);
701         }
702         return REG_ASSERT;                      /* none of them matched?!? */
703 }
704
705 /*
706  * cdissect - determine subexpression matches (with complications)
707  * The retry memory stores the offset of the trial midpoint from begin,
708  * plus 1 so that 0 uniquely means "clean slate".
709  */
710 static int                                              /* regexec return code */
711 cdissect(struct vars * v,
712                  struct subre * t,
713                  chr *begin,                    /* beginning of relevant substring */
714                  chr *end)                              /* end of same */
715 {
716         int                     er;
717
718         assert(t != NULL);
719         MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
720
721         switch (t->op)
722         {
723                 case '=':                               /* terminal node */
724                         assert(t->left == NULL && t->right == NULL);
725                         return REG_OKAY;        /* no action, parent did the work */
726                         break;
727                 case '|':                               /* alternation */
728                         assert(t->left != NULL);
729                         return caltdissect(v, t, begin, end);
730                         break;
731                 case 'b':                               /* back ref -- shouldn't be calling us! */
732                         assert(t->left == NULL && t->right == NULL);
733                         return cbrdissect(v, t, begin, end);
734                         break;
735                 case '.':                               /* concatenation */
736                         assert(t->left != NULL && t->right != NULL);
737                         return ccondissect(v, t, begin, end);
738                         break;
739                 case '(':                               /* capturing */
740                         assert(t->left != NULL && t->right == NULL);
741                         assert(t->subno > 0);
742                         er = cdissect(v, t->left, begin, end);
743                         if (er == REG_OKAY)
744                                 subset(v, t, begin, end);
745                         return er;
746                         break;
747                 default:
748                         return REG_ASSERT;
749                         break;
750         }
751 }
752
753 /*
754  * ccondissect - concatenation subexpression matches (with complications)
755  * The retry memory stores the offset of the trial midpoint from begin,
756  * plus 1 so that 0 uniquely means "clean slate".
757  */
758 static int                                              /* regexec return code */
759 ccondissect(struct vars * v,
760                         struct subre * t,
761                         chr *begin,                     /* beginning of relevant substring */
762                         chr *end)                       /* end of same */
763 {
764         struct dfa *d;
765         struct dfa *d2;
766         chr                *mid;
767         int                     er;
768
769         assert(t->op == '.');
770         assert(t->left != NULL && t->left->cnfa.nstates > 0);
771         assert(t->right != NULL && t->right->cnfa.nstates > 0);
772
773         if (t->left->flags & SHORTER)           /* reverse scan */
774                 return crevdissect(v, t, begin, end);
775
776         d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
777         if (ISERR())
778                 return v->err;
779         d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
780         if (ISERR())
781         {
782                 freedfa(d);
783                 return v->err;
784         }
785         MDEBUG(("cconcat %d\n", t->retry));
786
787         /* pick a tentative midpoint */
788         if (v->mem[t->retry] == 0)
789         {
790                 mid = longest(v, d, begin, end, (int *) NULL);
791                 if (mid == NULL)
792                 {
793                         freedfa(d);
794                         freedfa(d2);
795                         return REG_NOMATCH;
796                 }
797                 MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
798                 v->mem[t->retry] = (mid - begin) + 1;
799         }
800         else
801         {
802                 mid = begin + (v->mem[t->retry] - 1);
803                 MDEBUG(("working midpoint %ld\n", LOFF(mid)));
804         }
805
806         /* iterate until satisfaction or failure */
807         for (;;)
808         {
809                 /* try this midpoint on for size */
810                 er = cdissect(v, t->left, begin, mid);
811                 if (er == REG_OKAY &&
812                         longest(v, d2, mid, end, (int *) NULL) == end &&
813                         (er = cdissect(v, t->right, mid, end)) ==
814                         REG_OKAY)
815                         break;                          /* NOTE BREAK OUT */
816                 if (er != REG_OKAY && er != REG_NOMATCH)
817                 {
818                         freedfa(d);
819                         freedfa(d2);
820                         return er;
821                 }
822
823                 /* that midpoint didn't work, find a new one */
824                 if (mid == begin)
825                 {
826                         /* all possibilities exhausted */
827                         MDEBUG(("%d no midpoint\n", t->retry));
828                         freedfa(d);
829                         freedfa(d2);
830                         return REG_NOMATCH;
831                 }
832                 mid = longest(v, d, begin, mid - 1, (int *) NULL);
833                 if (mid == NULL)
834                 {
835                         /* failed to find a new one */
836                         MDEBUG(("%d failed midpoint\n", t->retry));
837                         freedfa(d);
838                         freedfa(d2);
839                         return REG_NOMATCH;
840                 }
841                 MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
842                 v->mem[t->retry] = (mid - begin) + 1;
843                 zapmem(v, t->left);
844                 zapmem(v, t->right);
845         }
846
847         /* satisfaction */
848         MDEBUG(("successful\n"));
849         freedfa(d);
850         freedfa(d2);
851         return REG_OKAY;
852 }
853
854 /*
855  * crevdissect - determine backref shortest-first subexpression matches
856  * The retry memory stores the offset of the trial midpoint from begin,
857  * plus 1 so that 0 uniquely means "clean slate".
858  */
859 static int                                              /* regexec return code */
860 crevdissect(struct vars * v,
861                         struct subre * t,
862                         chr *begin,                     /* beginning of relevant substring */
863                         chr *end)                       /* end of same */
864 {
865         struct dfa *d;
866         struct dfa *d2;
867         chr                *mid;
868         int                     er;
869
870         assert(t->op == '.');
871         assert(t->left != NULL && t->left->cnfa.nstates > 0);
872         assert(t->right != NULL && t->right->cnfa.nstates > 0);
873         assert(t->left->flags & SHORTER);
874
875         /* concatenation -- need to split the substring between parts */
876         d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
877         if (ISERR())
878                 return v->err;
879         d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
880         if (ISERR())
881         {
882                 freedfa(d);
883                 return v->err;
884         }
885         MDEBUG(("crev %d\n", t->retry));
886
887         /* pick a tentative midpoint */
888         if (v->mem[t->retry] == 0)
889         {
890                 mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
891                 if (mid == NULL)
892                 {
893                         freedfa(d);
894                         freedfa(d2);
895                         return REG_NOMATCH;
896                 }
897                 MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
898                 v->mem[t->retry] = (mid - begin) + 1;
899         }
900         else
901         {
902                 mid = begin + (v->mem[t->retry] - 1);
903                 MDEBUG(("working midpoint %ld\n", LOFF(mid)));
904         }
905
906         /* iterate until satisfaction or failure */
907         for (;;)
908         {
909                 /* try this midpoint on for size */
910                 er = cdissect(v, t->left, begin, mid);
911                 if (er == REG_OKAY &&
912                         longest(v, d2, mid, end, (int *) NULL) == end &&
913                         (er = cdissect(v, t->right, mid, end)) ==
914                         REG_OKAY)
915                         break;                          /* NOTE BREAK OUT */
916                 if (er != REG_OKAY && er != REG_NOMATCH)
917                 {
918                         freedfa(d);
919                         freedfa(d2);
920                         return er;
921                 }
922
923                 /* that midpoint didn't work, find a new one */
924                 if (mid == end)
925                 {
926                         /* all possibilities exhausted */
927                         MDEBUG(("%d no midpoint\n", t->retry));
928                         freedfa(d);
929                         freedfa(d2);
930                         return REG_NOMATCH;
931                 }
932                 mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
933                 if (mid == NULL)
934                 {
935                         /* failed to find a new one */
936                         MDEBUG(("%d failed midpoint\n", t->retry));
937                         freedfa(d);
938                         freedfa(d2);
939                         return REG_NOMATCH;
940                 }
941                 MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
942                 v->mem[t->retry] = (mid - begin) + 1;
943                 zapmem(v, t->left);
944                 zapmem(v, t->right);
945         }
946
947         /* satisfaction */
948         MDEBUG(("successful\n"));
949         freedfa(d);
950         freedfa(d2);
951         return REG_OKAY;
952 }
953
954 /*
955  * cbrdissect - determine backref subexpression matches
956  */
957 static int                                              /* regexec return code */
958 cbrdissect(struct vars * v,
959                    struct subre * t,
960                    chr *begin,                  /* beginning of relevant substring */
961                    chr *end)                    /* end of same */
962 {
963         int                     i;
964         int                     n = t->subno;
965         size_t          len;
966         chr                *paren;
967         chr                *p;
968         chr                *stop;
969         int                     min = t->min;
970         int                     max = t->max;
971
972         assert(t != NULL);
973         assert(t->op == 'b');
974         assert(n >= 0);
975         assert((size_t) n < v->nmatch);
976
977         MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
978
979         if (v->pmatch[n].rm_so == -1)
980                 return REG_NOMATCH;
981         paren = v->start + v->pmatch[n].rm_so;
982         len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
983
984         /* no room to maneuver -- retries are pointless */
985         if (v->mem[t->retry])
986                 return REG_NOMATCH;
987         v->mem[t->retry] = 1;
988
989         /* special-case zero-length string */
990         if (len == 0)
991         {
992                 if (begin == end)
993                         return REG_OKAY;
994                 return REG_NOMATCH;
995         }
996
997         /* and too-short string */
998         assert(end >= begin);
999         if ((size_t) (end - begin) < len)
1000                 return REG_NOMATCH;
1001         stop = end - len;
1002
1003         /* count occurrences */
1004         i = 0;
1005         for (p = begin; p <= stop && (i < max || max == INFINITY); p += len)
1006         {
1007                 if ((*v->g->compare) (paren, p, len) != 0)
1008                         break;
1009                 i++;
1010         }
1011         MDEBUG(("cbackref found %d\n", i));
1012
1013         /* and sort it out */
1014         if (p != end)                           /* didn't consume all of it */
1015                 return REG_NOMATCH;
1016         if (min <= i && (i <= max || max == INFINITY))
1017                 return REG_OKAY;
1018         return REG_NOMATCH;                     /* out of range */
1019 }
1020
1021 /*
1022  * caltdissect - determine alternative subexpression matches (w. complications)
1023  */
1024 static int                                              /* regexec return code */
1025 caltdissect(struct vars * v,
1026                         struct subre * t,
1027                         chr *begin,                     /* beginning of relevant substring */
1028                         chr *end)                       /* end of same */
1029 {
1030         struct dfa *d;
1031         int                     er;
1032
1033 #define  UNTRIED 0                              /* not yet tried at all */
1034 #define  TRYING  1                              /* top matched, trying submatches */
1035 #define  TRIED   2                              /* top didn't match or submatches
1036                                                                  * exhausted */
1037
1038         if (t == NULL)
1039                 return REG_NOMATCH;
1040         assert(t->op == '|');
1041         if (v->mem[t->retry] == TRIED)
1042                 return caltdissect(v, t->right, begin, end);
1043
1044         MDEBUG(("calt n%d\n", t->retry));
1045         assert(t->left != NULL);
1046
1047         if (v->mem[t->retry] == UNTRIED)
1048         {
1049                 d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
1050                 if (ISERR())
1051                         return v->err;
1052                 if (longest(v, d, begin, end, (int *) NULL) != end)
1053                 {
1054                         freedfa(d);
1055                         v->mem[t->retry] = TRIED;
1056                         return caltdissect(v, t->right, begin, end);
1057                 }
1058                 freedfa(d);
1059                 MDEBUG(("calt matched\n"));
1060                 v->mem[t->retry] = TRYING;
1061         }
1062
1063         er = cdissect(v, t->left, begin, end);
1064         if (er != REG_NOMATCH)
1065                 return er;
1066
1067         v->mem[t->retry] = TRIED;
1068         return caltdissect(v, t->right, begin, end);
1069 }
1070
1071
1072
1073 #include "rege_dfa.c"