]> granicus.if.org Git - re2c/commitdiff
In greedy regexps first alternative must correspond to consuming path.
authorUlya Trofimovich <skvadrik@gmail.com>
Mon, 13 Feb 2017 15:01:12 +0000 (15:01 +0000)
committerUlya Trofimovich <skvadrik@gmail.com>
Mon, 13 Feb 2017 15:01:12 +0000 (15:01 +0000)
By convention first alternative has higher priority.
So, for example, the following must be true:
    r*     = rr* | <empty>
    r{n,m} = r{n} (r{m - n} | r{m - n - 1} | ... | r{1} | <empty>)
    r{n,}  = r{n} (rr* | <empty>)

For now we don't care about priorities: this is a preparatory step
before transition to greedy leftmost semantics for tags.

re2c/bootstrap/src/parse/lex.cc
re2c/bootstrap/src/parse/parser.cc
re2c/src/ir/nfa/regexps2nfa.cc
re2c/src/ir/regexp/regexp.cc
re2c/src/parse/parser.ypp

index a0d62ceaa28a9dc1b4296d81cc3471493f21ca45..d25052895a91ece7ff5f02021780f65064742ca8 100644 (file)
@@ -1,4 +1,4 @@
-/* Generated by re2c 0.16 on Mon Jan 23 16:17:28 2017 */
+/* Generated by re2c 0.16 on Mon Feb 13 14:29:09 2017 */
 #line 1 "../src/parse/lex.re"
 #include "src/util/c99_stdint.h"
 #include <stddef.h>
index 4eb4a38e56a57c22106cc0435f28df2f9f65f45a..a8b1e5f711ee786274bc3a3d2416ae2094b602dc 100644 (file)
@@ -1632,8 +1632,8 @@ yyreduce:
                        switch((yyvsp[0].op))
                        {
                        case '*':
-                               (yyval.regexp) = RegExp::make_alt(RegExp::make_nil(),
-                                       RegExp::make_iter((yyvsp[-1].regexp)));
+                               (yyval.regexp) = RegExp::make_alt(RegExp::make_iter((yyvsp[-1].regexp)),
+                                       RegExp::make_nil());
                                break;
                        case '+':
                                (yyval.regexp) = RegExp::make_iter((yyvsp[-1].regexp));
index 62fd9b16c0b79d53aeacd01e2b8156b42b2ec06c..cab4290a8f5a6f37675043e102f9586625a16f7e 100644 (file)
@@ -76,7 +76,7 @@ static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, size_t &dist,
                        // see note [Kleene star is expressed in terms of plus]
                        nfa_state_t *q = &nfa.states[nfa.size++];
                        s = regexp2nfa(nfa, nrule, dist, base, false, re->iter, q);
-                       q->make_alt(nrule, t, s);
+                       q->make_alt(nrule, s, t);
 
                        dist = VARDIST;
                        break;
index cde534ae0bcc0baca1a33f22f4d2625f8a04ff83..530f1c47ba943d0827e98879672356c3bbd178cd 100644 (file)
@@ -26,45 +26,14 @@ const RegExp *doAlt(const RegExp *re1, const RegExp *re2)
        return RegExp::make_alt(re1, re2);
 }
 
-static const RegExp *merge(const RegExp *sym1, const RegExp *sym2)
-{
-       if (!sym1) {
-               return sym2;
-       }
-       if (!sym2) {
-               return sym1;
-       }
-       return RegExp::make_sym(Range::add(sym1->sym, sym2->sym));
-}
-
-static const RegExp *lift_sym(const RegExp *&re)
-{
-       if (!re) {
-               return NULL;
-       }
-       if (re->type == RegExp::SYM) {
-               const RegExp *sym = re;
-               re = NULL;
-               return sym;
-       }
-       if (re->type == RegExp::ALT) {
-               // second alternative cannot be SYM by construction
-               const RegExp *alt1 = re->alt.re1;
-               if (alt1 && alt1->type == RegExp::SYM) {
-                       re = re->alt.re2;
-                       return alt1;
-               }
-       }
-       return NULL;
-}
-
 const RegExp *mkAlt(const RegExp *re1, const RegExp *re2)
 {
-       const RegExp *sym1 = lift_sym(re1);
-       const RegExp *sym2 = lift_sym(re2);
-       return doAlt(
-               merge(sym1, sym2),
-               doAlt(re1, re2));
+       if (!re1) return re2;
+       if (!re2) return re1;
+       if (re1->type == RegExp::SYM && re2->type == RegExp::SYM) {
+               return RegExp::make_sym(Range::add(re1->sym, re2->sym));
+       }
+       return RegExp::make_alt(re1, re2);
 }
 
 const RegExp *doCat(const RegExp *re1, const RegExp *re2)
@@ -185,8 +154,7 @@ const RegExp *repeat_from_to(const RegExp *re, uint32_t n, uint32_t m)
        const RegExp *r1 = repeat(re, n);
        const RegExp *r2 = NULL;
        for (uint32_t i = n; i < m; ++i) {
-               r2 = mkAlt(RegExp::make_nil(),
-                       doCat(re, r2));
+               r2 = mkAlt(doCat(re, r2), RegExp::make_nil());
        }
        return doCat(r1, r2);
 }
@@ -196,7 +164,7 @@ const RegExp *repeat_from(const RegExp *re, uint32_t n)
 {
        // see note [Kleene star is expressed in terms of plus]
        return doCat(repeat(re, n),
-               RegExp::make_alt(RegExp::make_nil(), RegExp::make_iter(re)));
+               RegExp::make_alt(RegExp::make_iter(re), RegExp::make_nil()));
 }
 
 } // namespace re2c
index 92a6dc0491394b495bcdfcef071627f8a57661db..e53107a7256a396e0e9bf7d159de9322c994a754 100644 (file)
@@ -344,8 +344,8 @@ factor:
                        switch($2)
                        {
                        case '*':
-                               $$ = RegExp::make_alt(RegExp::make_nil(),
-                                       RegExp::make_iter($1));
+                               $$ = RegExp::make_alt(RegExp::make_iter($1),
+                                       RegExp::make_nil());
                                break;
                        case '+':
                                $$ = RegExp::make_iter($1);