]> granicus.if.org Git - re2c/commitdiff
Don't force mutations of immutable regexp AST.
authorUlya Trofimovich <skvadrik@gmail.com>
Fri, 6 May 2016 10:29:57 +0000 (11:29 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Fri, 6 May 2016 10:45:04 +0000 (11:45 +0100)
Regexp AST should stay immutable as it may be shared between
different conditions. This means we have to store tag indices
somwhere else.

17 files changed:
re2c/bootstrap/src/parse/lex.cc
re2c/bootstrap/src/parse/parser.cc
re2c/src/ir/ctx.h
re2c/src/ir/nfa/make_tags.cc
re2c/src/ir/nfa/nfa.cc
re2c/src/ir/nfa/nfa.h
re2c/src/ir/nfa/nullable.cc
re2c/src/ir/nfa/regexps2nfa.cc
re2c/src/ir/nfa/sizeof_regexps.cc
re2c/src/ir/regexp/encoding/range_suffix.cc
re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc
re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc
re2c/src/ir/regexp/regexp.cc
re2c/src/ir/regexp/regexp.h
re2c/src/ir/regexp/split_charset.cc
re2c/src/parse/lex.re
re2c/src/parse/parser.ypp

index eace75f806a460ee3a10484079594ee54d2f1902..b1c176676647b8c948b7dfa1d929646087384375 100644 (file)
@@ -1,4 +1,4 @@
-/* Generated by re2c 0.16 on Thu May  5 17:05:17 2016 */
+/* Generated by re2c 0.16 on Fri May  6 10:34:02 2016 */
 #line 1 "../src/parse/lex.re"
 #include "src/util/c99_stdint.h"
 #include <stddef.h>
@@ -1224,7 +1224,7 @@ yy198:
                                                        const uint32_t c = static_cast<uint8_t>(*s);
                                                        r = doCat(r, casing ? ichr(c) : schr(c));
                                                }
-                                               yylval.regexp = r ? r : RegExp::nil();
+                                               yylval.regexp = r ? r : RegExp::make_nil();
                                                return TOKEN_REGEXP;
                                        }
                                }
@@ -1342,7 +1342,7 @@ yy212:
                        fatal("tags are only allowed with '-T, --tags' option");
                }
                const std::string *name = new std::string(tok + 1, tok_len() - 1);
-               yylval.regexp = RegExp::ctx(name);
+               yylval.regexp = RegExp::make_tag(name);
                return TOKEN_REGEXP;
        }
 #line 1349 "src/parse/lex.cc"
@@ -2972,7 +2972,7 @@ const RegExp *Scanner::lex_str(char quote, bool casing)
        for (bool end;;) {
                const uint32_t c = lex_str_chr(quote, end);
                if (end) {
-                       return r ? r : RegExp::nil();
+                       return r ? r : RegExp::make_nil();
                }
                r = doCat(r, casing ? ichr(c) : schr(c));
        }
index ed9d06bcadd8ebfb407d4409e8713adc0d7074c7..fb5f17dd20a748f5a3f28d7d20d734055828e993 100644 (file)
@@ -558,8 +558,8 @@ static const yytype_uint16 yyrline[] =
        0,   157,   157,   159,   160,   161,   166,   173,   178,   181,
      185,   185,   188,   197,   208,   212,   218,   224,   231,   240,
      248,   258,   269,   275,   281,   284,   291,   297,   307,   310,
-     317,   321,   326,   330,   337,   341,   348,   352,   359,   363,
-     378,   397,   401,   405,   409,   416,   426,   430
+     317,   321,   327,   331,   338,   342,   349,   353,   360,   364,
+     379,   398,   402,   406,   410,   417,   427,   431
 };
 #endif
 
@@ -1641,7 +1641,7 @@ yyreduce:
                if (specNone) {
                        in->fatal("code to handle illegal condition already defined");
                }
-               specNone = new RegExpRule(RegExp::nil());
+               specNone = new RegExpRule(RegExp::make_nil());
                specNone->info = new RuleInfo((yyvsp[(3) - (3)].code)->loc, (yyvsp[(3) - (3)].code), (yyvsp[(2) - (3)].str));
                delete (yyvsp[(2) - (3)].str);
        ;}
@@ -1655,7 +1655,7 @@ yyreduce:
                        in->fatal("code to handle illegal condition already defined");
                }
                Loc loc(in->get_fname(), in->get_cline());
-               specNone = new RegExpRule(RegExp::nil());
+               specNone = new RegExpRule(RegExp::make_nil());
                specNone->info = new RuleInfo(loc, NULL, (yyvsp[(3) - (3)].str));
                delete (yyvsp[(3) - (3)].str);
        ;}
@@ -1733,7 +1733,8 @@ yyreduce:
   case 31:
 
     {
-               (yyval.rule) = new RegExpRule(RegExp::cat((yyvsp[(1) - (3)].regexp), RegExp::cat(RegExp::ctx(NULL), (yyvsp[(3) - (3)].regexp))));
+               (yyval.rule) = new RegExpRule(RegExp::make_cat((yyvsp[(1) - (3)].regexp),
+                       RegExp::make_cat(RegExp::make_tag(NULL), (yyvsp[(3) - (3)].regexp))));
        ;}
     break;
 
@@ -1775,7 +1776,7 @@ yyreduce:
   case 37:
 
     {
-                       (yyval.regexp) = RegExp::cat((yyvsp[(1) - (2)].regexp), (yyvsp[(2) - (2)].regexp));
+                       (yyval.regexp) = RegExp::make_cat((yyvsp[(1) - (2)].regexp), (yyvsp[(2) - (2)].regexp));
                ;}
     break;
 
@@ -1792,13 +1793,13 @@ yyreduce:
                        switch((yyvsp[(2) - (2)].op))
                        {
                        case '*':
-                               (yyval.regexp) = RegExp::iter((yyvsp[(1) - (2)].regexp));
+                               (yyval.regexp) = RegExp::make_iter((yyvsp[(1) - (2)].regexp));
                                break;
                        case '+':
-                               (yyval.regexp) = RegExp::cat(RegExp::iter((yyvsp[(1) - (2)].regexp)), (yyvsp[(1) - (2)].regexp));
+                               (yyval.regexp) = RegExp::make_cat(RegExp::make_iter((yyvsp[(1) - (2)].regexp)), (yyvsp[(1) - (2)].regexp));
                                break;
                        case '?':
-                               (yyval.regexp) = mkAlt((yyvsp[(1) - (2)].regexp), RegExp::nil());
+                               (yyval.regexp) = mkAlt((yyvsp[(1) - (2)].regexp), RegExp::make_nil());
                                break;
                        }
                ;}
@@ -1819,7 +1820,7 @@ yyreduce:
                        {
                                (yyval.regexp) = repeat_from_to ((yyvsp[(1) - (2)].regexp), (yyvsp[(2) - (2)].extop).min, (yyvsp[(2) - (2)].extop).max);
                        }
-                       (yyval.regexp) = (yyval.regexp) ? (yyval.regexp) : RegExp::nil();
+                       (yyval.regexp) = (yyval.regexp) ? (yyval.regexp) : RegExp::make_nil();
                ;}
     break;
 
index 06b92bdbf14a4e7ea7f37078d5982eab4ed3125e..9449e047118368fb8ce1892016196fc1b51c3e60 100644 (file)
@@ -1,11 +1,14 @@
 #ifndef _RE2C_IR_CTX_
 #define _RE2C_IR_CTX_
 
+#include <limits>
 #include <string>
 
 namespace re2c
 {
 
+static const size_t NO_TAG = std::numeric_limits<size_t>::max();
+
 struct CtxVar
 {
        size_t rule;
index b96be8e7d7b62356cd05dadcac5c4a5536657464..e82e0913d8fb788e1e5b839df742d5b9a91a6748 100644 (file)
@@ -10,9 +10,11 @@ static const size_t VARDIST = std::numeric_limits<size_t>::max();
 
 static void make_tags_var(size_t nrule,
        std::vector<CtxVar> &vartags,
-       const RegExp *re, size_t &dist)
+       std::vector<size_t> &tagidxs,
+       const RegExp *re,
+       size_t &dist)
 {
-       switch (re->tag) {
+       switch (re->type) {
                case RegExp::NIL: break;
                case RegExp::SYM:
                        if (dist != VARDIST) {
@@ -21,48 +23,53 @@ static void make_tags_var(size_t nrule,
                        break;
                case RegExp::ALT: {
                        size_t d1 = dist, d2 = dist;
-                       make_tags_var(nrule, vartags, re->pld.alt.re1, d1);
-                       make_tags_var(nrule, vartags, re->pld.alt.re2, d2);
+                       make_tags_var(nrule, vartags, tagidxs, re->alt.re1, d1);
+                       make_tags_var(nrule, vartags, tagidxs, re->alt.re2, d2);
                        dist = (d1 == d2) ? d1 : VARDIST;
                        break;
                }
                case RegExp::CAT:
-                       make_tags_var(nrule, vartags, re->pld.cat.re2, dist);
-                       make_tags_var(nrule, vartags, re->pld.cat.re1, dist);
+                       make_tags_var(nrule, vartags, tagidxs, re->cat.re2, dist);
+                       make_tags_var(nrule, vartags, tagidxs, re->cat.re1, dist);
                        break;
                case RegExp::ITER:
                        dist = VARDIST;
-                       make_tags_var(nrule, vartags, re->pld.iter.re, dist);
+                       make_tags_var(nrule, vartags, tagidxs, re->iter, dist);
                        break;
                case RegExp::TAG:
-                       (size_t&)re->pld.ctx.idx = vartags.size();
-                       vartags.push_back(CtxVar(re->pld.ctx.name, nrule));
+                       tagidxs.push_back(vartags.size());
+                       vartags.push_back(CtxVar(re->tag, nrule));
                        break;
        }
 }
 
 static void make_tags_var_fix(size_t nrule,
-       std::vector<CtxVar> &vartags, std::vector<CtxFix> &fixtags,
-       const RegExp *re, size_t &dist, size_t &base)
+       std::vector<CtxVar> &vartags,
+       std::vector<CtxFix> &fixtags,
+       std::vector<size_t> &tagidxs,
+       const RegExp *re,
+       size_t &dist,
+       size_t &base)
 {
-       switch (re->tag) {
+       switch (re->type) {
                case RegExp::NIL:
                case RegExp::SYM:
                case RegExp::ALT:
                case RegExp::ITER:
-                       make_tags_var(nrule, vartags, re, dist);
+                       make_tags_var(nrule, vartags, tagidxs, re, dist);
                        break;
                case RegExp::CAT:
-                       make_tags_var_fix(nrule, vartags, fixtags, re->pld.cat.re2, dist, base);
-                       make_tags_var_fix(nrule, vartags, fixtags, re->pld.cat.re1, dist, base);
+                       make_tags_var_fix(nrule, vartags, fixtags, tagidxs, re->cat.re2, dist, base);
+                       make_tags_var_fix(nrule, vartags, fixtags, tagidxs, re->cat.re1, dist, base);
                        break;
                case RegExp::TAG: {
-                       const std::string *name = re->pld.ctx.name;
+                       const std::string *name = re->tag;
                        if (dist == VARDIST) {
-                               base = (size_t&)re->pld.ctx.idx = vartags.size();
+                               tagidxs.push_back(base = vartags.size());
                                vartags.push_back(CtxVar(name, nrule));
                                dist = 0;
                        } else {
+                               tagidxs.push_back(NO_TAG);
                                fixtags.push_back(CtxFix(name, nrule, base, dist));
                        }
                        if (name == NULL) {
@@ -73,8 +80,31 @@ static void make_tags_var_fix(size_t nrule,
        }
 }
 
+/* note [fixed and variable tags]
+ *
+ * If distance between two tags is constant (fixed for all
+ * strings that match the given regular expression), then
+ * lexer needs to track only one of the two tags: the other
+ * tag can be statically calculated from the first one.
+ *
+ * However, this optimization can only be applied to tags
+ * that appear in top-level concatenation, because these
+ * are the only tags that are guaranteed to be initialized.
+ *
+ * One may observe that the same argument can be applied to
+ * subregexps: tags on top-level concatenation of a subregexp
+ * are either initialized all at once, or none of them is
+ * initialized. It may therefore seem that we can fix
+ * same-level tags on each other. However, fixed tags do not
+ * preserve default value: if the tag they are fixed on
+ * remains uninitialized, lexer will still statically
+ * calculate fixed tag value based on initialized value
+ * (and spoil default value expected by the programmer).
+ */
 void make_tags(const std::vector<const RegExpRule*> &rs,
-       std::vector<CtxVar> &vartags, std::vector<CtxFix> &fixtags)
+       std::vector<CtxVar> &vartags,
+       std::vector<CtxFix> &fixtags,
+       std::vector<size_t> &tagidxs)
 {
        const size_t nrs = rs.size();
        for (size_t i = 0; i < nrs; ++i) {
@@ -86,7 +116,7 @@ void make_tags(const std::vector<const RegExpRule*> &rs,
                if (!opts->contexts && opts->input_api.type() == InputAPI::CUSTOM) {
                        dist = VARDIST;
                }
-               make_tags_var_fix(i, vartags, fixtags, rs[i]->re, dist, base);
+               make_tags_var_fix(i, vartags, fixtags, tagidxs, rs[i]->re, dist, base);
        }
 
 }
index 85a92bc35774ddaefb36c2a296fb67d06a061444..ed8c9e846aadf6d5e4414b9d27f63a5cfc34169a 100644 (file)
@@ -11,8 +11,11 @@ nfa_t::nfa_t(const std::vector<const RegExpRule*> &regexps)
        , fixtags(*new std::vector<CtxFix>)
        , root(NULL)
 {
-       make_tags(regexps, vartags, fixtags);
-       regexps2nfa(regexps, *this);
+       std::vector<size_t> tagidxs;
+       make_tags(regexps, vartags, fixtags, tagidxs);
+
+       regexps2nfa(regexps, *this, tagidxs.begin());
+
        init_rules(rules, regexps, vartags, fixtags);
 }
 
index ec247a279dd33b3867a8605a138408a65615409f..729b355e570338d6c9ea8868b9012a52420d7922 100644 (file)
@@ -91,10 +91,15 @@ struct nfa_t
        FORBID_COPY(nfa_t);
 };
 
+typedef std::vector<size_t>::const_iterator tagidx_t;
+
 size_t sizeof_regexps(const std::vector<const RegExpRule*> &regexps);
 void make_tags(const std::vector<const RegExpRule*> &rs,
-       std::vector<CtxVar> &vartags, std::vector<CtxFix> &fixtags);
-void regexps2nfa(const std::vector<const RegExpRule*> &rs, nfa_t &nfa);
+       std::vector<CtxVar> &vartags,
+       std::vector<CtxFix> &fixtags,
+       std::vector<size_t> &tagidxs);
+void regexps2nfa(const std::vector<const RegExpRule*> &rs,
+       nfa_t &nfa, tagidx_t tagidx);
 bool nullable_rule(const RegExpRule *rule);
 void init_rules(std::valarray<Rule> &rules,
        const std::vector<const RegExpRule*> &regexps,
index a1e886816a9cacfac65e548bcf61eb5a7987cb03..a2fbf822255f33c12e72621d76492eba107905e1 100644 (file)
@@ -7,23 +7,23 @@ static bool nullable(const RegExp *re, bool &trail)
        if (trail) {
                return true;
        }
-       switch (re->tag) {
+       switch (re->type) {
                case RegExp::NIL:
                case RegExp::ITER:
                        return true;
                case RegExp::TAG:
-                       if (re->pld.ctx.name == NULL) {
+                       if (re->tag == NULL) {
                                trail = true;
                        }
                        return true;
                case RegExp::SYM:
                        return false;
                case RegExp::ALT:
-                       return nullable(re->pld.alt.re1, trail)
-                               || nullable(re->pld.alt.re2, trail);
+                       return nullable(re->alt.re1, trail)
+                               || nullable(re->alt.re2, trail);
                case RegExp::CAT:
-                       return nullable(re->pld.cat.re1, trail)
-                               && nullable(re->pld.cat.re2, trail);
+                       return nullable(re->cat.re1, trail)
+                               && nullable(re->cat.re2, trail);
                default:
                        assert(false);
        }
index e74f4a11d285e346826e7964770c964d05c61933..a7fff690c6762104ab8ada18f48887b0b67efb7f 100644 (file)
@@ -2,34 +2,35 @@
 
 namespace re2c {
 
-static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, const RegExp *re, nfa_state_t *t)
+static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule,
+       tagidx_t &tagidx, const RegExp *re, nfa_state_t *t)
 {
        nfa_state_t *s = NULL;
-       switch (re->tag) {
+       switch (re->type) {
                case RegExp::NIL:
                        s = t;
                        break;
                case RegExp::SYM:
                        s = &nfa.states[nfa.size++];
-                       s->ran(nrule, t, re->pld.sym.range);
+                       s->ran(nrule, t, re->sym);
                        break;
                case RegExp::ALT:
                        s = &nfa.states[nfa.size++];
                        s->alt(nrule,
-                               regexp2nfa(nfa, nrule, re->pld.alt.re1, t),
-                               regexp2nfa(nfa, nrule, re->pld.alt.re2, t));
+                               regexp2nfa(nfa, nrule, tagidx, re->alt.re1, t),
+                               regexp2nfa(nfa, nrule, tagidx, re->alt.re2, t));
                        break;
                case RegExp::CAT:
-                       s = regexp2nfa(nfa, nrule, re->pld.cat.re2, t);
-                       s = regexp2nfa(nfa, nrule, re->pld.cat.re1, s);
+                       s = regexp2nfa(nfa, nrule, tagidx, re->cat.re2, t);
+                       s = regexp2nfa(nfa, nrule, tagidx, re->cat.re1, s);
                        break;
                case RegExp::ITER:
                        s = &nfa.states[nfa.size++];
-                       s->alt(nrule, t, regexp2nfa(nfa, nrule, re->pld.iter.re, s));
+                       s->alt(nrule, t, regexp2nfa(nfa, nrule, tagidx, re->iter, s));
                        break;
                case RegExp::TAG: {
-                       const size_t idx = re->pld.ctx.idx;
-                       if (idx != ~0u) {
+                       const size_t idx = *tagidx++;
+                       if (idx != NO_TAG) {
                                s = &nfa.states[nfa.size++];
                                s->ctx(nrule, t, idx);
                        } else {
@@ -41,14 +42,16 @@ static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, const RegExp *re, nfa_s
        return s;
 }
 
-static nfa_state_t *regexp2nfa_rule(nfa_t &nfa, size_t nrule, const RegExpRule *rule)
+static nfa_state_t *regexp2nfa_rule(nfa_t &nfa, size_t nrule,
+       tagidx_t &tagidx, const RegExpRule *rule)
 {
        nfa_state_t *s = &nfa.states[nfa.size++];
        s->fin(nrule);
-       return regexp2nfa(nfa, nrule, rule->re, s);
+       return regexp2nfa(nfa, nrule, tagidx, rule->re, s);
 }
 
-void regexps2nfa(const std::vector<const RegExpRule*> &rs, nfa_t &nfa)
+void regexps2nfa(const std::vector<const RegExpRule*> &rs,
+       nfa_t &nfa, tagidx_t tagidx)
 {
        const size_t nrs = rs.size();
 
@@ -56,10 +59,10 @@ void regexps2nfa(const std::vector<const RegExpRule*> &rs, nfa_t &nfa)
                return;
        }
 
-       nfa_state_t *s = regexp2nfa_rule(nfa, 0, rs[0]);
+       nfa_state_t *s = regexp2nfa_rule(nfa, 0, tagidx, rs[0]);
        for (size_t i = 1; i < nrs; ++i) {
                nfa_state_t *t = &nfa.states[nfa.size++];
-               t->alt(i, s, regexp2nfa_rule(nfa, i, rs[i]));
+               t->alt(i, s, regexp2nfa_rule(nfa, i, tagidx, rs[i]));
                s = t;
        }
        nfa.root = s;
index eba02b0e48d250a64575d388138ca63373b1033c..1dde2514532a29b5aa99f93fe99414e2742cd77c 100644 (file)
@@ -4,20 +4,20 @@ namespace re2c {
 
 static size_t sizeof_regexp(const RegExp *re)
 {
-       switch (re->tag) {
+       switch (re->type) {
                case RegExp::NIL:
                        return 0;
                case RegExp::SYM:
                        return 1;
                case RegExp::ALT:
-                       return sizeof_regexp(re->pld.alt.re1)
-                               + sizeof_regexp(re->pld.alt.re2)
+                       return sizeof_regexp(re->alt.re1)
+                               + sizeof_regexp(re->alt.re2)
                                + 1;
                case RegExp::CAT:
-                       return sizeof_regexp(re->pld.cat.re1)
-                               + sizeof_regexp(re->pld.cat.re2);
+                       return sizeof_regexp(re->cat.re1)
+                               + sizeof_regexp(re->cat.re2);
                case RegExp::ITER:
-                       return sizeof_regexp(re->pld.iter.re)
+                       return sizeof_regexp(re->iter)
                                + 1;
                case RegExp::TAG:
                        return 1;
index 304f188b36efa75235e33e23bf4be637ab3be5ef..70241e360cfddaeec90210dd3807db30064f4ced 100644 (file)
@@ -10,9 +10,8 @@ free_list<RangeSuffix *> RangeSuffix::freeList;
 
 const RegExp * to_regexp (RangeSuffix * p)
 {
-       return p
-               ? emit (p, NULL)
-               : RegExp::sym(NULL);
+       return p ? emit(p, NULL)
+               : RegExp::make_sym(NULL);
 }
 
 /*
@@ -20,14 +19,12 @@ const RegExp * to_regexp (RangeSuffix * p)
  */
 const RegExp * emit(RangeSuffix * p, const RegExp * re)
 {
-       if (p == NULL)
+       if (p == NULL) {
                return re;
-       else
-       {
-               const RegExp * regexp = NULL;
-               for (; p != NULL; p = p->next)
-               {
-                       const RegExp * re1 = doCat(RegExp::sym(Range::ran (p->l, p->h + 1)), re);
+       } else {
+               const RegExp *regexp = NULL;
+               for (; p != NULL; p = p->next) {
+                       const RegExp *re1 = doCat(RegExp::make_sym(Range::ran(p->l, p->h + 1)), re);
                        regexp = doAlt(regexp, emit(p->child, re1));
                }
                return regexp;
index c8b97fcaccbdebfdc10c98a19174ebc82a01cd50..43b46122ca17a957bebe1c5ac7ac9b20d647e1e6 100644 (file)
@@ -10,13 +10,13 @@ namespace re2c {
 
 const RegExp * UTF16Symbol(utf16::rune r)
 {
-       if (r <= utf16::MAX_1WORD_RUNE)
-               return RegExp::sym(Range::sym (r));
-       else
-       {
+       if (r <= utf16::MAX_1WORD_RUNE) {
+               return RegExp::make_sym(Range::sym(r));
+       } else {
                const uint32_t ld = utf16::lead_surr(r);
                const uint32_t tr = utf16::trail_surr(r);
-               return RegExp::cat(RegExp::sym(Range::sym (ld)), RegExp::sym(Range::sym (tr)));
+               return RegExp::make_cat(RegExp::make_sym(Range::sym(ld)),
+                       RegExp::make_sym(Range::sym(tr)));
        }
 }
 
index e2e8c82a8b56b1a2dcf47cdb2cdd4381cc9e5098..7d4ab93e1c41b878b58b7e0d25e9f6f26d0da261 100644 (file)
@@ -12,9 +12,10 @@ const RegExp * UTF8Symbol(utf8::rune r)
 {
        uint32_t chars[utf8::MAX_RUNE_LENGTH];
        const uint32_t chars_count = utf8::rune_to_bytes(chars, r);
-       const RegExp * re = RegExp::sym(Range::sym (chars[0]));
-       for (uint32_t i = 1; i < chars_count; ++i)
-               re = RegExp::cat(re, RegExp::sym(Range::sym (chars[i])));
+       const RegExp *re = RegExp::make_sym(Range::sym(chars[0]));
+       for (uint32_t i = 1; i < chars_count; ++i) {
+               re = RegExp::make_cat(re, RegExp::make_sym(Range::sym(chars[i])));
+       }
        return re;
 }
 
index 1ebb17119c7817133aa33f649284464791a5ef05..cab9fc3c5a64a01e6cb323296843fbeec6d55073 100644 (file)
@@ -1,3 +1,4 @@
+#include <limits>
 #include <stddef.h>
 
 #include "src/conf/opt.h"
@@ -26,7 +27,7 @@ const RegExp *doAlt(const RegExp *re1, const RegExp *re2)
        if (!re2) {
                return re1;
        }
-       return RegExp::alt(re1, re2);
+       return RegExp::make_alt(re1, re2);
 }
 
 static const RegExp *merge(const RegExp *sym1, const RegExp *sym2)
@@ -37,9 +38,7 @@ static const RegExp *merge(const RegExp *sym1, const RegExp *sym2)
        if (!sym2) {
                return sym1;
        }
-       return RegExp::sym(Range::add(
-               sym1->pld.sym.range,
-               sym2->pld.sym.range));
+       return RegExp::make_sym(Range::add(sym1->sym, sym2->sym));
 }
 
 static const RegExp *lift_sym(const RegExp *&re)
@@ -47,16 +46,16 @@ static const RegExp *lift_sym(const RegExp *&re)
        if (!re) {
                return NULL;
        }
-       if (re->tag == RegExp::SYM) {
+       if (re->type == RegExp::SYM) {
                const RegExp *sym = re;
                re = NULL;
                return sym;
        }
-       if (re->tag == RegExp::ALT) {
+       if (re->type == RegExp::ALT) {
                // second alternative cannot be SYM by construction
-               const RegExp *alt1 = re->pld.alt.re1;
-               if (alt1 && alt1->tag == RegExp::SYM) {
-                       re = re->pld.alt.re2;
+               const RegExp *alt1 = re->alt.re1;
+               if (alt1 && alt1->type == RegExp::SYM) {
+                       re = re->alt.re2;
                        return alt1;
                }
        }
@@ -80,7 +79,7 @@ const RegExp *doCat(const RegExp *re1, const RegExp *re2)
        if (!re2) {
                return re1;
        }
-       return RegExp::cat(re1, re2);
+       return RegExp::make_cat(re1, re2);
 }
 
 const RegExp *Scanner::schr(uint32_t c) const
@@ -91,7 +90,7 @@ const RegExp *Scanner::schr(uint32_t c) const
        switch (opts->encoding.type ()) {
                case Enc::UTF16: return UTF16Symbol(c);
                case Enc::UTF8:  return UTF8Symbol(c);
-               default:         return RegExp::sym(Range::sym(c));
+               default:         return RegExp::make_sym(Range::sym(c));
        }
 }
 
@@ -112,7 +111,7 @@ const RegExp *Scanner::cls(const Range *r) const
                switch (opts->empty_class_policy) {
                        case EMPTY_CLASS_MATCH_EMPTY:
                                warn.empty_class(get_line());
-                               return RegExp::nil();
+                               return RegExp::make_nil();
                        case EMPTY_CLASS_MATCH_NONE:
                                warn.empty_class(get_line());
                                break;
@@ -125,18 +124,16 @@ const RegExp *Scanner::cls(const Range *r) const
        switch (opts->encoding.type()) {
                case Enc::UTF16: return UTF16Range(r);
                case Enc::UTF8:  return UTF8Range(r);
-               default:         return RegExp::sym(r);
+               default:         return RegExp::make_sym(r);
        }
 }
 
 const RegExp *Scanner::mkDiff(const RegExp *re1, const RegExp *re2) const
 {
        if (re1 && re2
-               && re1->tag == RegExp::SYM
-               && re2->tag == RegExp::SYM) {
-               return cls(Range::sub(
-                       re1->pld.sym.range,
-                       re2->pld.sym.range));
+               && re1->type == RegExp::SYM
+               && re2->type == RegExp::SYM) {
+               return cls(Range::sub(re1->sym, re2->sym));
        }
        fatal("can only difference char sets");
        return NULL;
@@ -148,8 +145,7 @@ const RegExp *Scanner::mkDot() const
        if (!opts->encoding.encode(c)) {
                fatalf("Bad code point: '0x%X'", c);
        }
-       return cls(Range::sub(
-               opts->encoding.fullRange(),
+       return cls(Range::sub(opts->encoding.fullRange(),
                Range::sym(c)));
 }
 
@@ -165,7 +161,7 @@ const RegExp *Scanner::mkDot() const
  */
 const RegExp *Scanner::mkDefault() const
 {
-       return RegExp::sym(Range::ran(0,
+       return RegExp::make_sym(Range::ran(0,
                opts->encoding.nCodeUnits()));
 }
 
@@ -194,8 +190,7 @@ const RegExp *repeat_from_to(const RegExp *re, uint32_t n, uint32_t m)
        const RegExp *r1 = repeat(re, n);
        const RegExp *r2 = NULL;
        for (uint32_t i = n; i < m; ++i) {
-               r2 = mkAlt(
-                       RegExp::nil(),
+               r2 = mkAlt(RegExp::make_nil(),
                        doCat(re, r2));
        }
        return doCat(r1, r2);
@@ -204,9 +199,8 @@ const RegExp *repeat_from_to(const RegExp *re, uint32_t n, uint32_t m)
 // see note [counted repetition expansion]
 const RegExp *repeat_from(const RegExp *re, uint32_t n)
 {
-       return doCat(
-               repeat(re, n),
-               RegExp::iter(re));
+       return doCat(repeat(re, n),
+               RegExp::make_iter(re));
 }
 
 } // namespace re2c
index 1ce227867475be69841f1e3886afb2f1a9445de4..9440a74369406a9fe17978b9b3bc3a2001466cdb 100644 (file)
@@ -20,21 +20,13 @@ typedef std::vector<uint32_t> charset_t;
 
 struct RegExp
 {
-       enum tag_t
-       {
-               NIL,
-               SYM,
-               ALT,
-               CAT,
-               ITER,
-               TAG
-       };
-       union payload_t
+       static free_list<RegExp*> flist;
+       static const size_t NO_TAG;
+
+       enum type_t {NIL, SYM, ALT, CAT, ITER, TAG} type;
+       union
        {
-               struct
-               {
-                       const Range *range;
-               } sym;
+               const Range *sym;
                struct
                {
                        const RegExp *re1;
@@ -45,57 +37,44 @@ struct RegExp
                        const RegExp *re1;
                        const RegExp *re2;
                } cat;
-               struct
-               {
-                       const RegExp *re;
-               } iter;
-               struct
-               {
-                       const std::string *name;
-                       size_t idx;
-               } ctx;
+               const RegExp *iter;
+               const std::string *tag;
        };
 
-       static free_list<RegExp*> flist;
-
-       tag_t tag;
-       payload_t pld;
-
-       static const RegExp *nil()
+       static const RegExp *make_nil()
        {
                return new RegExp(NIL);
        }
-       static const RegExp *sym(const Range *r)
+       static const RegExp *make_sym(const Range *r)
        {
                RegExp *re = new RegExp(SYM);
-               re->pld.sym.range = r;
+               re->sym = r;
                return re;
        }
-       static const RegExp *alt(const RegExp *r1, const RegExp *r2)
+       static const RegExp *make_alt(const RegExp *r1, const RegExp *r2)
        {
                RegExp *re = new RegExp(ALT);
-               re->pld.alt.re1 = r1;
-               re->pld.alt.re2 = r2;
+               re->alt.re1 = r1;
+               re->alt.re2 = r2;
                return re;
        }
-       static const RegExp *cat(const RegExp *r1, const RegExp *r2)
+       static const RegExp *make_cat(const RegExp *r1, const RegExp *r2)
        {
                RegExp *re = new RegExp(CAT);
-               re->pld.cat.re1 = r1;
-               re->pld.cat.re2 = r2;
+               re->cat.re1 = r1;
+               re->cat.re2 = r2;
                return re;
        }
-       static const RegExp *iter(const RegExp *r)
+       static const RegExp *make_iter(const RegExp *r)
        {
                RegExp *re = new RegExp(ITER);
-               re->pld.iter.re = r;
+               re->iter = r;
                return re;
        }
-       static const RegExp *ctx(const std::string *n)
+       static const RegExp *make_tag(const std::string *t)
        {
                RegExp *re = new RegExp(TAG);
-               re->pld.ctx.name = n;
-               re->pld.ctx.idx = ~0u;
+               re->tag = t;
                return re;
        }
        inline ~RegExp()
@@ -104,7 +83,7 @@ struct RegExp
        }
 
 private:
-       inline RegExp(tag_t t) : tag(t), pld()
+       inline RegExp(type_t t) : type(t)
        {
                flist.insert(this);
        }
@@ -117,9 +96,7 @@ struct RegExpRule
        const RegExp *re;
        RuleInfo *info;
 
-       RegExpRule(const RegExp* r)
-               : re(r)
-               , info(NULL)
+       explicit RegExpRule(const RegExp *r): re(r), info(NULL)
        {
                flist.insert(this);
        }
@@ -128,7 +105,6 @@ struct RegExpRule
                delete info;
                flist.erase(this);
        }
-
        FORBID_COPY(RegExpRule);
 };
 
index 687c503356018e70e29a576ed61d28c82692fe2b..f900cc2a01676b674a9cafe5926d2c3cd7d5f153 100644 (file)
@@ -8,26 +8,26 @@ namespace re2c {
 
 static void split(const RegExp* re, std::set<uint32_t> &cs)
 {
-       switch (re->tag) {
+       switch (re->type) {
                case RegExp::NIL:
                case RegExp::TAG:
                        break;
                case RegExp::SYM:
-                       for (const Range *r = re->pld.sym.range; r; r = r->next()) {
+                       for (const Range *r = re->sym; r; r = r->next()) {
                                cs.insert(r->lower());
                                cs.insert(r->upper());
                        }
                        break;
                case RegExp::ALT:
-                       split(re->pld.alt.re1, cs);
-                       split(re->pld.alt.re2, cs);
+                       split(re->alt.re1, cs);
+                       split(re->alt.re2, cs);
                        break;
                case RegExp::CAT:
-                       split(re->pld.cat.re1, cs);
-                       split(re->pld.cat.re2, cs);
+                       split(re->cat.re1, cs);
+                       split(re->cat.re2, cs);
                        break;
                case RegExp::ITER:
-                       split(re->pld.iter.re, cs);
+                       split(re->iter, cs);
                        break;
        }
 }
index 0968c024a32cbe974d50dcd8c170cb7e9656097f..8d925f72e016c3fb03976d60077414d55c073685 100644 (file)
@@ -277,7 +277,7 @@ start:
                        fatal("tags are only allowed with '-T, --tags' option");
                }
                const std::string *name = new std::string(tok + 1, tok_len() - 1);
-               yylval.regexp = RegExp::ctx(name);
+               yylval.regexp = RegExp::make_tag(name);
                return TOKEN_REGEXP;
        }
 
@@ -368,7 +368,7 @@ start:
                                                        const uint32_t c = static_cast<uint8_t>(*s);
                                                        r = doCat(r, casing ? ichr(c) : schr(c));
                                                }
-                                               yylval.regexp = r ? r : RegExp::nil();
+                                               yylval.regexp = r ? r : RegExp::make_nil();
                                                return TOKEN_REGEXP;
                                        }
                                }
@@ -667,7 +667,7 @@ const RegExp *Scanner::lex_str(char quote, bool casing)
        for (bool end;;) {
                const uint32_t c = lex_str_chr(quote, end);
                if (end) {
-                       return r ? r : RegExp::nil();
+                       return r ? r : RegExp::make_nil();
                }
                r = doCat(r, casing ? ichr(c) : schr(c));
        }
index 3b8e7342a77f1dda206230a3907a2c20fc586116..2d278a07f8ddeb5b1c34bbcf896f9c05da081d6c 100644 (file)
@@ -250,7 +250,7 @@ rule
                if (specNone) {
                        in->fatal("code to handle illegal condition already defined");
                }
-               specNone = new RegExpRule(RegExp::nil());
+               specNone = new RegExpRule(RegExp::make_nil());
                specNone->info = new RuleInfo($3->loc, $3, $2);
                delete $2;
        }
@@ -261,7 +261,7 @@ rule
                        in->fatal("code to handle illegal condition already defined");
                }
                Loc loc(in->get_fname(), in->get_cline());
-               specNone = new RegExpRule(RegExp::nil());
+               specNone = new RegExpRule(RegExp::make_nil());
                specNone->info = new RuleInfo(loc, NULL, $3);
                delete $3;
        }
@@ -319,7 +319,8 @@ trailexpr
        }
 
        | expr '/' expr {
-               $$ = new RegExpRule(RegExp::cat($1, RegExp::cat(RegExp::ctx(NULL), $3)));
+               $$ = new RegExpRule(RegExp::make_cat($1,
+                       RegExp::make_cat(RegExp::make_tag(NULL), $3)));
        };
 
 expr:
@@ -351,7 +352,7 @@ term:
                }
        |       term factor
                {
-                       $$ = RegExp::cat($1, $2);
+                       $$ = RegExp::make_cat($1, $2);
                }
 ;
 
@@ -365,13 +366,13 @@ factor:
                        switch($2)
                        {
                        case '*':
-                               $$ = RegExp::iter($1);
+                               $$ = RegExp::make_iter($1);
                                break;
                        case '+':
-                               $$ = RegExp::cat(RegExp::iter($1), $1);
+                               $$ = RegExp::make_cat(RegExp::make_iter($1), $1);
                                break;
                        case '?':
-                               $$ = mkAlt($1, RegExp::nil());
+                               $$ = mkAlt($1, RegExp::make_nil());
                                break;
                        }
                }
@@ -389,7 +390,7 @@ factor:
                        {
                                $$ = repeat_from_to ($1, $2.min, $2.max);
                        }
-                       $$ = $$ ? $$ : RegExp::nil();
+                       $$ = $$ ? $$ : RegExp::make_nil();
                }
 ;