From f56196d29f6c29b37e3e95a6777714c237e1c71c Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 15 Jun 2015 14:09:08 +0100 Subject: [PATCH] Simplified implementation of range union and difference. --- re2c/src/ir/regexp/encoding/enc.cc | 8 +- re2c/src/ir/regexp/regexp.cc | 10 +-- re2c/src/parse/scanner.cc | 1 + re2c/src/util/range.cc | 118 +++++++++-------------------- re2c/src/util/range.h | 31 ++++---- 5 files changed, 61 insertions(+), 107 deletions(-) diff --git a/re2c/src/ir/regexp/encoding/enc.cc b/re2c/src/ir/regexp/encoding/enc.cc index 9cd8e3b1..fcc68302 100644 --- a/re2c/src/ir/regexp/encoding/enc.cc +++ b/re2c/src/ir/regexp/encoding/enc.cc @@ -141,7 +141,7 @@ Range * Enc::encodeRange(uint32_t l, uint32_t h) const for (uint32_t c = l + 1; c <= h; ++c) { const uint32_t ec = asc2ebc[c & 0xFF]; - r = doUnion(r, new Range(ec, ec + 1)); + r = range_union (r, new Range(ec, ec + 1)); } break; } @@ -161,8 +161,8 @@ Range * Enc::encodeRange(uint32_t l, uint32_t h) const { Range * surrs = new Range(SURR_MIN, SURR_MAX + 1); Range * error = new Range(UNICODE_ERROR, UNICODE_ERROR + 1); - r = doDiff(r, surrs); - r = doUnion(r, error); + r = range_diff (r, surrs); + r = range_union (r, error); break; } case POLICY_IGNORE: @@ -190,7 +190,7 @@ Range * Enc::fullRange() const if (policy != POLICY_IGNORE) { Range * surrs = new Range(SURR_MIN, SURR_MAX + 1); - r = doDiff(r, surrs); + r = range_diff (r, surrs); } return r; } diff --git a/re2c/src/ir/regexp/regexp.cc b/re2c/src/ir/regexp/regexp.cc index 67515cb2..3eade194 100644 --- a/re2c/src/ir/regexp/regexp.cc +++ b/re2c/src/ir/regexp/regexp.cc @@ -84,7 +84,7 @@ MatchOp * merge (MatchOp * m1, MatchOp * m2) { return m1; } - MatchOp * m = new MatchOp (doUnion (m1->match, m2->match)); + MatchOp * m = new MatchOp (range_union (m1->match, m2->match)); if (m1->ins_access == RegExp::PRIVATE || m2->ins_access == RegExp::PRIVATE) { @@ -114,7 +114,7 @@ RegExp * mkDiff (RegExp * e1, RegExp * e2) { return NULL; } - Range * r = doDiff (m1->match, m2->match); + Range * r = range_diff (m1->match, m2->match); return r ? (RegExp *) new MatchOp(r) : (RegExp *) new NullOp; @@ -216,7 +216,7 @@ Range * Scanner::mkRange(SubStr &s) const { Range *r = getRange(s); while (s.len > 0) - r = doUnion(r, getRange(s)); + r = range_union (r, getRange(s)); return r; } @@ -251,7 +251,7 @@ RegExp * Scanner::invToRE (SubStr & s) const Range * r = s.len == 0 ? full - : doDiff(full, mkRange (s)); + : range_diff (full, mkRange (s)); return matchSymbolRange(r); } @@ -263,7 +263,7 @@ RegExp * Scanner::mkDot() const if (!encoding.encode(c)) fatalf("Bad code point: '0x%X'", c); Range * ran = new Range(c, c + 1); - Range * inv = doDiff(full, ran); + Range * inv = range_diff (full, ran); return matchSymbolRange(inv); } diff --git a/re2c/src/parse/scanner.cc b/re2c/src/parse/scanner.cc index cf4f8549..d8985c9b 100644 --- a/re2c/src/parse/scanner.cc +++ b/re2c/src/parse/scanner.cc @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/re2c/src/util/range.cc b/re2c/src/util/range.cc index c08c1f8a..bc3af973 100644 --- a/re2c/src/util/range.cc +++ b/re2c/src/util/range.cc @@ -3,117 +3,69 @@ namespace re2c { -Range *doUnion(Range *r1, Range *r2) +Range * range_union (Range * r1, Range * r2) { - if (r1 == NULL) - return r2; - if (r2 == NULL) - return r1; - - Range *r, **rP = &r; - + Range * r = NULL; + Range ** p = &r; for (;;) { - Range *s; - - if (r1->lb <= r2->lb) + if (!r1) { - s = new Range(r1->lb, r1->ub); + * p = r2; + break; } - else + if (!r2) { - s = new Range(r2->lb, r2->ub); + * p = r1; + break; } - - *rP = s; - rP = &s->next; - - for (;;) + if (r2->lb < r1->lb) // swap { - if (r1->lb <= r2->lb) + Range * r1_old = r1; + r1 = r2; + r2 = r1_old; + } + uint32_t ub = r1->ub; + if (r2->lb < r1->ub) + { + for (; r2 && r2->lb < r1->ub; r2 = r2->next) { - if (r1->lb > s->ub) - break; - - if (r1->ub > s->ub) - s->ub = r1->ub; - - if (!(r1 = r1->next)) + if (r1->ub < r2->ub) { - uint32_t ub = 0; - - for (; r2 && r2->lb <= s->ub; r2 = r2->next) - ub = r2->ub; - - if (ub > s->ub) - s->ub = ub; - - *rP = r2; - - return r; - } - } - else - { - if (r2->lb > s->ub) - break; - - if (r2->ub > s->ub) - s->ub = r2->ub; - - if (!(r2 = r2->next)) - { - uint32_t ub = 0; - - for (; r1 && r1->lb <= s->ub; r1 = r1->next) - ub = r1->ub; - - if (ub > s->ub) - s->ub = ub; - - *rP = r1; - - return r; + ub = r2->ub; } } } + * p = new Range (r1->lb, ub); + p = &(* p)->next; + r1 = r1->next; } - - *rP = NULL; return r; } -Range *doDiff(Range *r1, Range *r2) +Range * range_diff (Range * r1, Range * r2) { - Range *r, *s, **rP = &r; - + Range * r = NULL; + Range ** p = &r; for (; r1; r1 = r1->next) { + for (; r2 && r2->ub <= r1->lb; r2 = r2->next); uint32_t lb = r1->lb; - - for (; r2 && r2->ub <= r1->lb; r2 = r2->next) - - ; for (; r2 && r2->lb < r1->ub; r2 = r2->next) { if (lb < r2->lb) { - *rP = s = new Range(lb, r2->lb); - rP = &s->next; + * p = new Range(lb, r2->lb); + p = &(* p)->next; } - - if ((lb = r2->ub) >= r1->ub) - goto noMore; + lb = r2->ub; + } + if (lb < r1->ub) + { + * p = new Range(lb, r1->ub); + p = &(* p)->next; } - - *rP = s = new Range(lb, r1->ub); - rP = &s->next; - -noMore: - ; } - - *rP = NULL; return r; } diff --git a/re2c/src/util/range.h b/re2c/src/util/range.h index 2ce4bee1..639d7f06 100644 --- a/re2c/src/util/range.h +++ b/re2c/src/util/range.h @@ -1,7 +1,7 @@ #ifndef _RE2C_UTIL_RANGE_ #define _RE2C_UTIL_RANGE_ -#include +#include // NULL #include "src/util/c99_stdint.h" #include "src/util/forbid_copy.h" @@ -10,31 +10,32 @@ namespace re2c { -class Range +struct Range { - -public: - Range *next; - uint32_t lb, ub; // [lb,ub) - static free_list vFreeList; -public: - Range(uint32_t l, uint32_t u) : next(NULL), lb(l), ub(u) + Range * next; + // [lb,ub) + uint32_t lb; + uint32_t ub; + + Range (uint32_t l, uint32_t u) + : next (NULL) + , lb (l) + , ub (u) { - vFreeList.insert(this); + vFreeList.insert (this); } - - ~Range() + ~Range () { - vFreeList.erase(this); + vFreeList.erase (this); } FORBID_COPY (Range); }; -Range *doUnion(Range *r1, Range *r2); -Range *doDiff(Range *r1, Range *r2); +Range * range_union (Range * r1, Range * r2); +Range * range_diff (Range * r1, Range * r2); } // end namespace re2c -- 2.40.0