]> granicus.if.org Git - re2c/commitdiff
Simplified implementation of range union and difference.
authorUlya Trofimovich <skvadrik@gmail.com>
Mon, 15 Jun 2015 13:09:08 +0000 (14:09 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Mon, 15 Jun 2015 13:09:08 +0000 (14:09 +0100)
re2c/src/ir/regexp/encoding/enc.cc
re2c/src/ir/regexp/regexp.cc
re2c/src/parse/scanner.cc
re2c/src/util/range.cc
re2c/src/util/range.h

index 9cd8e3b11592346f24b15b1b4fe60d2f80227b79..fcc68302d2bec6b12fbd1eccbcf374a7726be34f 100644 (file)
@@ -141,7 +141,7 @@ Range * Enc::encodeRange(uint32_t l, uint32_t h) const
                        for (uint32_t c = l + 1; c <= h; ++c)
                        {
                                const uint32_t ec = asc2ebc[c & 0xFF];
-                               r = doUnion(r, new Range(ec, ec + 1));
+                               r = range_union (r, new Range(ec, ec + 1));
                        }
                        break;
                }
@@ -161,8 +161,8 @@ Range * Enc::encodeRange(uint32_t l, uint32_t h) const
                                        {
                                                Range * surrs = new Range(SURR_MIN, SURR_MAX + 1);
                                                Range * error = new Range(UNICODE_ERROR, UNICODE_ERROR + 1);
-                                               r = doDiff(r, surrs);
-                                               r = doUnion(r, error);
+                                               r = range_diff (r, surrs);
+                                               r = range_union (r, error);
                                                break;
                                        }
                                        case POLICY_IGNORE:
@@ -190,7 +190,7 @@ Range * Enc::fullRange() const
        if (policy != POLICY_IGNORE)
        {
                Range * surrs = new Range(SURR_MIN, SURR_MAX + 1);
-               r = doDiff(r, surrs);
+               r = range_diff (r, surrs);
        }
        return r;
 }
index 67515cb218ab6c03b1760b35e9abe7e76d1c6674..3eade194cc1acaf2735c86dddcfc4a2b7ba9c771 100644 (file)
@@ -84,7 +84,7 @@ MatchOp * merge (MatchOp * m1, MatchOp * m2)
        {
                return m1;
        }
-       MatchOp * m = new MatchOp (doUnion (m1->match, m2->match));
+       MatchOp * m = new MatchOp (range_union (m1->match, m2->match));
        if (m1->ins_access == RegExp::PRIVATE
                || m2->ins_access == RegExp::PRIVATE)
        {
@@ -114,7 +114,7 @@ RegExp * mkDiff (RegExp * e1, RegExp * e2)
        {
                return NULL;
        }
-       Range * r = doDiff (m1->match, m2->match);
+       Range * r = range_diff (m1->match, m2->match);
        return r
                ? (RegExp *) new MatchOp(r)
                : (RegExp *) new NullOp;
@@ -216,7 +216,7 @@ Range * Scanner::mkRange(SubStr &s) const
 {
        Range *r = getRange(s);
        while (s.len > 0)
-               r = doUnion(r, getRange(s));
+               r = range_union (r, getRange(s));
 
        return r;
 }
@@ -251,7 +251,7 @@ RegExp * Scanner::invToRE (SubStr & s) const
 
        Range * r = s.len == 0
                ? full
-               : doDiff(full, mkRange (s));
+               : range_diff (full, mkRange (s));
 
        return matchSymbolRange(r);
 }
@@ -263,7 +263,7 @@ RegExp * Scanner::mkDot() const
        if (!encoding.encode(c))
                fatalf("Bad code point: '0x%X'", c);
        Range * ran = new Range(c, c + 1);
-       Range * inv = doDiff(full, ran);
+       Range * inv = range_diff (full, ran);
 
        return matchSymbolRange(inv);
 }
index cf4f85494a2f83ffdfb320e0dd4ec6d50db49d2f..d8985c9b56c035d8b0a584d48fb194817100860d 100644 (file)
@@ -1,3 +1,4 @@
+#include <iostream>
 #include <stdarg.h>
 #include <stdlib.h>
 #include <string.h>
index c08c1f8a12973537af656e39796ba2e3636a7310..bc3af9736efd5897aa47444189708565c54486b0 100644 (file)
 namespace re2c
 {
 
-Range *doUnion(Range *r1, Range *r2)
+Range * range_union (Range * r1, Range * r2)
 {
-       if (r1 == NULL)
-               return r2;
-       if (r2 == NULL)
-               return r1;
-
-       Range *r, **rP = &r;
-
+       Range * r = NULL;
+       Range ** p = &r;
        for (;;)
        {
-               Range *s;
-
-               if (r1->lb <= r2->lb)
+               if (!r1)
                {
-                       s = new Range(r1->lb, r1->ub);
+                       * p = r2;
+                       break;
                }
-               else
+               if (!r2)
                {
-                       s = new Range(r2->lb, r2->ub);
+                       * p = r1;
+                       break;
                }
-
-               *rP = s;
-               rP = &s->next;
-
-               for (;;)
+               if (r2->lb < r1->lb) // swap
                {
-                       if (r1->lb <= r2->lb)
+                       Range * r1_old = r1;
+                       r1 = r2;
+                       r2 = r1_old;
+               }
+               uint32_t ub = r1->ub;
+               if (r2->lb < r1->ub)
+               {
+                       for (; r2 && r2->lb < r1->ub; r2 = r2->next)
                        {
-                               if (r1->lb > s->ub)
-                                       break;
-
-                               if (r1->ub > s->ub)
-                                       s->ub = r1->ub;
-
-                               if (!(r1 = r1->next))
+                               if (r1->ub < r2->ub)
                                {
-                                       uint32_t ub = 0;
-
-                                       for (; r2 && r2->lb <= s->ub; r2 = r2->next)
-                                               ub = r2->ub;
-
-                                       if (ub > s->ub)
-                                               s->ub = ub;
-
-                                       *rP = r2;
-
-                                       return r;
-                               }
-                       }
-                       else
-                       {
-                               if (r2->lb > s->ub)
-                                       break;
-
-                               if (r2->ub > s->ub)
-                                       s->ub = r2->ub;
-
-                               if (!(r2 = r2->next))
-                               {
-                                       uint32_t ub = 0;
-
-                                       for (; r1 && r1->lb <= s->ub; r1 = r1->next)
-                                               ub = r1->ub;
-
-                                       if (ub > s->ub)
-                                               s->ub = ub;
-
-                                       *rP = r1;
-
-                                       return r;
+                                       ub = r2->ub;
                                }
                        }
                }
+               * p = new Range (r1->lb, ub);
+               p = &(* p)->next;
+               r1 = r1->next;
        }
-
-       *rP = NULL;
        return r;
 }
 
-Range *doDiff(Range *r1, Range *r2)
+Range * range_diff (Range * r1, Range * r2)
 {
-       Range *r, *s, **rP = &r;
-
+       Range * r = NULL;
+       Range ** p = &r;
        for (; r1; r1 = r1->next)
        {
+               for (; r2 && r2->ub <= r1->lb; r2 = r2->next);
                uint32_t lb = r1->lb;
-
-               for (; r2 && r2->ub <= r1->lb; r2 = r2->next)
-
-                       ;
                for (; r2 && r2->lb < r1->ub; r2 = r2->next)
                {
                        if (lb < r2->lb)
                        {
-                               *rP = s = new Range(lb, r2->lb);
-                               rP = &s->next;
+                               * p = new Range(lb, r2->lb);
+                               p = &(* p)->next;
                        }
-
-                       if ((lb = r2->ub) >= r1->ub)
-                               goto noMore;
+                       lb = r2->ub;
+               }
+               if (lb < r1->ub)
+               {
+                       * p = new Range(lb, r1->ub);
+                       p = &(* p)->next;
                }
-
-               *rP = s = new Range(lb, r1->ub);
-               rP = &s->next;
-
-noMore:
-               ;
        }
-
-       *rP = NULL;
        return r;
 }
 
index 2ce4bee1e54f667d30e8e700db2fd72042a895e1..639d7f068512452fcca62dfd4dae8f962d02950d 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _RE2C_UTIL_RANGE_
 #define _RE2C_UTIL_RANGE_
 
-#include <iostream>
+#include <stddef.h> // NULL
 
 #include "src/util/c99_stdint.h"
 #include "src/util/forbid_copy.h"
 namespace re2c
 {
 
-class Range
+struct Range
 {
-
-public:
-       Range           *next;
-       uint32_t        lb, ub; // [lb,ub)
-
        static free_list<Range*> vFreeList;
 
-public:
-       Range(uint32_t l, uint32_t u) : next(NULL), lb(l), ub(u)
+       Range * next;
+       // [lb,ub)
+       uint32_t lb;
+       uint32_t ub;
+
+       Range (uint32_t l, uint32_t u)
+               : next (NULL)
+               , lb (l)
+               , ub (u)
        {
-               vFreeList.insert(this);
+               vFreeList.insert (this);
        }
-
-       ~Range()
+       ~Range ()
        {
-               vFreeList.erase(this);
+               vFreeList.erase (this);
        }
 
        FORBID_COPY (Range);
 };
 
-Range *doUnion(Range *r1, Range *r2);
-Range *doDiff(Range *r1, Range *r2);
+Range * range_union (Range * r1, Range * r2);
+Range * range_diff (Range * r1, Range * r2);
 
 } // end namespace re2c