]> granicus.if.org Git - re2c/commitdiff
Operations on character classes: fixed subtraction, simplified addition.
authorUlya Trofimovich <skvadrik@gmail.com>
Mon, 22 Jun 2015 12:50:17 +0000 (13:50 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Mon, 22 Jun 2015 12:50:17 +0000 (13:50 +0100)
Subtraction was broken by commit f56196d29f6c29b37e3e95a6777714c237e1c71c:
"Simplified implementation of range union and difference."

re2c/src/ir/regexp/encoding/enc.cc
re2c/src/ir/regexp/encoding/range_suffix.cc
re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc
re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc
re2c/src/ir/regexp/regexp.cc
re2c/src/main.cc
re2c/src/util/range.cc
re2c/src/util/range.h

index fcc68302d2bec6b12fbd1eccbcf374a7726be34f..b0d48e8badd456a17d434a89bfe0904e0c8b3ae1 100644 (file)
@@ -132,16 +132,16 @@ Range * Enc::encodeRange(uint32_t l, uint32_t h) const
                case ASCII:
                        l &= 0xFF;
                        h &= 0xFF;
-                       r = new Range(l, h + 1);
+                       r = Range::ran (l, h + 1);
                        break;
                case EBCDIC:
                {
                        const uint32_t el = asc2ebc[l & 0xFF];
-                       r = new Range(el, el + 1);
+                       r = Range::sym (el);
                        for (uint32_t c = l + 1; c <= h; ++c)
                        {
                                const uint32_t ec = asc2ebc[c & 0xFF];
-                               r = range_union (r, new Range(ec, ec + 1));
+                               r = Range::add (r, Range::sym (ec));
                        }
                        break;
                }
@@ -149,7 +149,7 @@ Range * Enc::encodeRange(uint32_t l, uint32_t h) const
                case UTF16:
                case UTF32:
                case UTF8:
-                       r = new Range(l, h + 1);
+                       r = Range::ran (l, h + 1);
                        if (l <= SURR_MAX && h >= SURR_MIN)
                        {
                                switch (policy)
@@ -159,10 +159,10 @@ Range * Enc::encodeRange(uint32_t l, uint32_t h) const
                                                break;
                                        case POLICY_SUBSTITUTE:
                                        {
-                                               Range * surrs = new Range(SURR_MIN, SURR_MAX + 1);
-                                               Range * error = new Range(UNICODE_ERROR, UNICODE_ERROR + 1);
-                                               r = range_diff (r, surrs);
-                                               r = range_union (r, error);
+                                               Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1);
+                                               Range * error = Range::sym (UNICODE_ERROR);
+                                               r = Range::sub (r, surrs);
+                                               r = Range::add (r, error);
                                                break;
                                        }
                                        case POLICY_IGNORE:
@@ -186,11 +186,11 @@ Range * Enc::encodeRange(uint32_t l, uint32_t h) const
  */
 Range * Enc::fullRange() const
 {
-       Range * r = new Range(0, nCodePoints());
+       Range * r = Range::ran (0, nCodePoints());
        if (policy != POLICY_IGNORE)
        {
-               Range * surrs = new Range(SURR_MIN, SURR_MAX + 1);
-               r = range_diff (r, surrs);
+               Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1);
+               r = Range::sub (r, surrs);
        }
        return r;
 }
index 878984913b3256c5092c49c226d18d0bf79998e6..09617379fae44eac89ccc2c11f919d05f78da048 100644 (file)
@@ -27,7 +27,7 @@ RegExp * emit(RangeSuffix * p, RegExp * re)
                RegExp * regexp = NULL;
                for (; p != NULL; p = p->next)
                {
-                       RegExp * re1 = doCat(new MatchOp(new Range(p->l, p->h + 1)), re);
+                       RegExp * re1 = doCat(new MatchOp(Range::ran (p->l, p->h + 1)), re);
                        regexp = doAlt(regexp, emit(p->child, re1));
                }
                return regexp;
index 49522657d302e6a0105ef21af7b45baccc73b566..b8fef48a3dde3f8d28475d164a08bde4a127d0fb 100644 (file)
@@ -9,12 +9,12 @@ namespace re2c {
 RegExp * UTF16Symbol(utf16::rune r)
 {
        if (r <= utf16::MAX_1WORD_RUNE)
-               return new MatchOp(new Range(r, r + 1));
+               return new MatchOp(Range::sym (r));
        else
        {
                const uint16_t ld = utf16::lead_surr(r);
                const uint16_t tr = utf16::trail_surr(r);
-               return new CatOp(new MatchOp(new Range(ld, ld + 1)), new MatchOp(new Range(tr, tr + 1)));
+               return new CatOp(new MatchOp(Range::sym (ld)), new MatchOp(Range::sym (tr)));
        }
 }
 
index 084755fa25ac2f900636706483f1538776389ae6..e6bd1a20a87d6399182cc2d4d6eb5bf3a90374b4 100644 (file)
@@ -10,9 +10,9 @@ RegExp * UTF8Symbol(utf8::rune r)
 {
        uint8_t chars[utf8::MAX_RUNE_LENGTH];
        const int chars_count = utf8::rune_to_bytes(chars, r);
-       RegExp * re = new MatchOp(new Range(chars[0], chars[0] + 1));
+       RegExp * re = new MatchOp(Range::sym (chars[0]));
        for (int i = 1; i < chars_count; ++i)
-               re = new CatOp(re, new MatchOp(new Range(chars[i], chars[i] + 1)));
+               re = new CatOp(re, new MatchOp(Range::sym (chars[i])));
        return re;
 }
 
index 6e1cdc995127cdcf508575a3a4931c3165b29c46..55981b306ef2e582f1d8a705a6bcca97beb62f07 100644 (file)
@@ -84,7 +84,7 @@ MatchOp * merge (MatchOp * m1, MatchOp * m2)
        {
                return m1;
        }
-       MatchOp * m = new MatchOp (range_union (m1->match, m2->match));
+       MatchOp * m = new MatchOp (Range::add (m1->match, m2->match));
        if (m1->ins_access == RegExp::PRIVATE
                || m2->ins_access == RegExp::PRIVATE)
        {
@@ -143,7 +143,7 @@ RegExp * Scanner::matchSymbol(uint32_t c) const
        else if (encoding.is(Enc::UTF8))
                return UTF8Symbol(c);
        else
-               return new MatchOp(new Range(c, c + 1));
+               return new MatchOp (Range::sym (c));
 }
 
 RegExp * Scanner::strToRE (SubStr & s) const
@@ -202,7 +202,7 @@ Range * Scanner::mkRange(SubStr &s) const
 {
        Range *r = getRange(s);
        while (s.len > 0)
-               r = range_union (r, getRange(s));
+               r = Range::add (r, getRange(s));
 
        return r;
 }
@@ -252,7 +252,7 @@ RegExp * Scanner::invToRE (SubStr & s) const
 
        Range * r = s.len == 0
                ? full
-               : range_diff (full, mkRange (s));
+               : Range::sub (full, mkRange (s));
 
        return matchSymbolRange(r);
 }
@@ -265,7 +265,7 @@ RegExp * Scanner::mkDiff (RegExp * e1, RegExp * e2) const
        {
                fatal("can only difference char sets");
        }
-       Range * r = range_diff (m1->match, m2->match);
+       Range * r = Range::sub (m1->match, m2->match);
 
        return matchSymbolRange (r);
 }
@@ -276,8 +276,8 @@ RegExp * Scanner::mkDot() const
        uint32_t c = '\n';
        if (!encoding.encode(c))
                fatalf("Bad code point: '0x%X'", c);
-       Range * ran = new Range(c, c + 1);
-       Range * inv = range_diff (full, ran);
+       Range * ran = Range::sym (c);
+       Range * inv = Range::sub (full, ran);
 
        return matchSymbolRange(inv);
 }
@@ -294,7 +294,7 @@ RegExp * Scanner::mkDot() const
  */
 RegExp * Scanner::mkDefault() const
 {
-       Range * def = new Range(0, encoding.nCodeUnits());
+       Range * def = Range::ran (0, encoding.nCodeUnits());
        return new MatchOp(def);
 }
 
index f4a22634139cabff228885359df7906fc6b77a5a..37c44accae30cfff5c82d12a9f1bf8a240df58cc 100644 (file)
@@ -70,7 +70,6 @@ uint32_t last_fill_index = 0;
 CodeNames mapCodeName;
 
 free_list<RegExp*> RegExp::vFreeList;
-free_list<Range*>  Range::vFreeList;
 
 using namespace std;
 
index 92a5bead8755b7d5b43c03c63a549fca577fc5af..fa46ab332db8cdf438691590e7970c304559e8e5 100644 (file)
@@ -3,70 +3,95 @@
 namespace re2c
 {
 
-Range * range_union (Range * r1, Range * r2)
+free_list<Range*> Range::vFreeList;
+
+void Range::append_overlapping (Range * & head, Range * & tail, const Range * r)
 {
-       Range * r = NULL;
-       Range ** p = &r;
-       for (;;)
+       if (!head)
        {
-               if (!r1)
-               {
-                       * p = r2;
-                       break;
-               }
-               if (!r2)
-               {
-                       * p = r1;
-                       break;
-               }
-               if (r2->lb < r1->lb) // swap
+               head = Range::ran (r->lb, r->ub);
+               tail = head;
+       }
+       else if (tail->ub < r->lb)
+       {
+               tail->nx = Range::ran (r->lb, r->ub);
+               tail = tail->nx;
+       }
+       else if (tail->ub < r->ub)
+       {
+               tail->ub = r->ub;
+       }
+}
+
+Range * Range::add (const Range * r1, const Range * r2)
+{
+       Range * head = NULL;
+       Range * tail = NULL;
+       for (; r1 && r2;)
+       {
+               if (r1->lb < r2->lb)
                {
-                       Range * r1_old = r1;
-                       r1 = r2;
-                       r2 = r1_old;
+                       append_overlapping (head, tail, r1);
+                       r1 = r1->nx;
                }
-               uint32_t ub = r1->ub;
-               if (r2->lb < r1->ub)
+               else
                {
-                       for (; r2 && r2->lb < r1->ub; r2 = r2->nx)
-                       {
-                               if (r1->ub < r2->ub)
-                               {
-                                       ub = r2->ub;
-                               }
-                       }
+                       append_overlapping (head, tail, r2);
+                       r2 = r2->nx;
                }
-               * p = new Range (r1->lb, ub);
-               p = &(* p)->nx;
-               r1 = r1->nx;
        }
-       return r;
+       for (; r1; r1 = r1->nx)
+       {
+               append_overlapping (head, tail, r1);
+       }
+       for (; r2; r2 = r2->nx)
+       {
+               append_overlapping (head, tail, r2);
+       }
+       return head;
 }
 
-Range * range_diff (Range * r1, Range * r2)
+void Range::append (Range ** & ptail, uint32_t l, uint32_t u)
 {
-       Range * r = NULL;
-       Range ** p = &r;
-       for (; r1; r1 = r1->nx)
+       Range * & tail = * ptail;
+       tail = Range::ran (l, u);
+       ptail = &tail->nx;
+}
+
+Range * Range::sub (const Range * r1, const Range * r2)
+{
+       Range * head = NULL;
+       Range ** ptail = &head;
+       while (r1)
        {
-               for (; r2 && r2->ub <= r1->lb; r2 = r2->nx);
-               uint32_t lb = r1->lb;
-               for (; r2 && r2->lb < r1->ub; r2 = r2->nx)
+               if (!r2 || r2->lb >= r1->ub)
                {
-                       if (lb < r2->lb)
-                       {
-                               * p = new Range(lb, r2->lb);
-                               p = &(* p)->nx;
-                       }
-                       lb = r2->ub;
+                       append (ptail, r1->lb, r1->ub);
+                       r1 = r1->nx;
+               }
+               else if (r2->ub <= r1->lb)
+               {
+                       r2 = r2->nx;
                }
-               if (lb < r1->ub)
+               else
                {
-                       * p = new Range(lb, r1->ub);
-                       p = &(* p)->nx;
+                       if (r1->lb < r2->lb)
+                       {
+                               append (ptail, r1->lb, r2->lb);
+                       }
+                       while (r2 && r2->ub < r1->ub)
+                       {
+                               const uint32_t lb = r2->ub;
+                               r2 = r2->nx;
+                               const uint32_t ub = r2 && r2->lb < r1->ub
+                                       ? r2->lb
+                                       : r1->ub;
+                               append (ptail, lb, ub);
+                       }
+                       r1 = r1->nx;
                }
        }
-       return r;
+       return head;
 }
 
-} // end namespace re2c
+} // namespace re2c
index 48d79f384866d0788c206a8e0a5e3cf71aa4a92f..68d5d28c526147d9ff6f71bb47abdca3c7fc464c 100644 (file)
@@ -23,13 +23,13 @@ private:
        uint32_t ub;
 
 public:
-       Range (uint32_t l, uint32_t u)
-               : nx (NULL)
-               , lb (l)
-               , ub (u)
+       static Range * sym (uint32_t c)
        {
-               assert (lb < ub);
-               vFreeList.insert (this);
+               return new Range (NULL, c, c + 1);
+       }
+       static Range * ran (uint32_t l, uint32_t u)
+       {
+               return new Range (NULL, l, u);
        }
        ~Range ()
        {
@@ -38,12 +38,24 @@ public:
        Range * next () const { return nx; }
        uint32_t lower () const { return lb; }
        uint32_t upper () const { return ub; }
-       friend Range * range_union (Range * r1, Range * r2);
-       friend Range * range_diff (Range * r1, Range * r2);
+       static Range * add (const Range * r1, const Range * r2);
+       static Range * sub (const Range * r1, const Range * r2);
+
+private:
+       Range (Range * n, uint32_t l, uint32_t u)
+               : nx (n)
+               , lb (l)
+               , ub (u)
+       {
+               assert (lb < ub);
+               vFreeList.insert (this);
+       }
+       static void append_overlapping (Range * & head, Range * & tail, const Range * r);
+       static void append (Range ** & ptail, uint32_t l, uint32_t u);
 
        FORBID_COPY (Range);
 };
 
-} // end namespace re2c
+} // namespace re2c
 
 #endif // _RE2C_UTIL_RANGE_