From a4c87b8fc640a61d2e9802cbe203dc15cf2ecb47 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 15 Jun 2015 14:31:26 +0100 Subject: [PATCH] Now range internals are only visible to union/difference functions. Ranges must be constructed so that linked ranges don't overlap and are monotonous. This is always true for one-link ranges created by range constructor, and we construct larger ranges from them using union and difference functions (that maintain the invariant). --- re2c/src/ir/bytecode/calc_size.cc | 4 ++-- re2c/src/ir/bytecode/compile.cc | 4 ++-- re2c/src/ir/bytecode/split.cc | 4 ++-- .../ir/regexp/encoding/utf16/utf16_regexp.cc | 4 ++-- re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc | 4 ++-- re2c/src/util/range.cc | 16 ++++++++-------- re2c/src/util/range.h | 17 +++++++++++------ 7 files changed, 29 insertions(+), 24 deletions(-) diff --git a/re2c/src/ir/bytecode/calc_size.cc b/re2c/src/ir/bytecode/calc_size.cc index fc90e7e8..4daeb705 100644 --- a/re2c/src/ir/bytecode/calc_size.cc +++ b/re2c/src/ir/bytecode/calc_size.cc @@ -45,9 +45,9 @@ void CloseVOp::calcSize (Char * rep) void MatchOp::calcSize (Char * rep) { size = 1; - for (Range * r = match; r; r = r->next) + for (Range * r = match; r; r = r->next ()) { - for (uint32_t c = r->lb; c < r->ub; ++c) + for (uint32_t c = r->lower (); c < r->upper (); ++c) { if (rep[c] == c) { diff --git a/re2c/src/ir/bytecode/compile.cc b/re2c/src/ir/bytecode/compile.cc index 2f4609bf..f7f6aa93 100644 --- a/re2c/src/ir/bytecode/compile.cc +++ b/re2c/src/ir/bytecode/compile.cc @@ -176,9 +176,9 @@ uint32_t MatchOp::compile (Char * rep, Ins * i) i->i.link = &i[size]; Ins *j = &i[1]; uint32_t bump = size; - for (Range *r = match; r; r = r->next) + for (Range *r = match; r; r = r->next ()) { - for (uint32_t c = r->lb; c < r->ub; ++c) + for (uint32_t c = r->lower (); c < r->upper (); ++c) { if (rep[c] == c) { diff --git a/re2c/src/ir/bytecode/split.cc b/re2c/src/ir/bytecode/split.cc index 7e0bc6d1..5300d45d 100644 --- a/re2c/src/ir/bytecode/split.cc +++ b/re2c/src/ir/bytecode/split.cc @@ -32,9 +32,9 @@ void CloseVOp::split (CharSet & s) void MatchOp::split (CharSet & s) { - for (Range *r = match; r; r = r->next) + for (Range *r = match; r; r = r->next ()) { - for (uint32_t c = r->lb; c < r->ub; ++c) + for (uint32_t c = r->lower (); c < r->upper (); ++c) { CharPtn * x = s.rep[c]; CharPtn * a = x->nxt; diff --git a/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc b/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc index 2e7f69df..8b7e2bd5 100644 --- a/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc +++ b/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc @@ -27,8 +27,8 @@ RegExp * UTF16Symbol(utf16::rune r) RegExp * UTF16Range(const Range * r) { RangeSuffix * root = NULL; - for (; r != NULL; r = r->next) - UTF16splitByRuneLength(root, r->lb, r->ub - 1); + for (; r != NULL; r = r->next ()) + UTF16splitByRuneLength(root, r->lower (), r->upper () - 1); return emit(root, NULL); } diff --git a/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc b/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc index 0e6001d3..1372211f 100644 --- a/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc +++ b/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc @@ -25,8 +25,8 @@ RegExp * UTF8Symbol(utf8::rune r) RegExp * UTF8Range(const Range * r) { RangeSuffix * root = NULL; - for (; r != NULL; r = r->next) - UTF8splitByRuneLength(root, r->lb, r->ub - 1); + for (; r != NULL; r = r->next ()) + UTF8splitByRuneLength(root, r->lower (), r->upper () - 1); return emit(root, NULL); } diff --git a/re2c/src/util/range.cc b/re2c/src/util/range.cc index bc3af973..92a5bead 100644 --- a/re2c/src/util/range.cc +++ b/re2c/src/util/range.cc @@ -28,7 +28,7 @@ Range * range_union (Range * r1, Range * r2) uint32_t ub = r1->ub; if (r2->lb < r1->ub) { - for (; r2 && r2->lb < r1->ub; r2 = r2->next) + for (; r2 && r2->lb < r1->ub; r2 = r2->nx) { if (r1->ub < r2->ub) { @@ -37,8 +37,8 @@ Range * range_union (Range * r1, Range * r2) } } * p = new Range (r1->lb, ub); - p = &(* p)->next; - r1 = r1->next; + p = &(* p)->nx; + r1 = r1->nx; } return r; } @@ -47,23 +47,23 @@ Range * range_diff (Range * r1, Range * r2) { Range * r = NULL; Range ** p = &r; - for (; r1; r1 = r1->next) + for (; r1; r1 = r1->nx) { - for (; r2 && r2->ub <= r1->lb; r2 = r2->next); + for (; r2 && r2->ub <= r1->lb; r2 = r2->nx); uint32_t lb = r1->lb; - for (; r2 && r2->lb < r1->ub; r2 = r2->next) + for (; r2 && r2->lb < r1->ub; r2 = r2->nx) { if (lb < r2->lb) { * p = new Range(lb, r2->lb); - p = &(* p)->next; + p = &(* p)->nx; } lb = r2->ub; } if (lb < r1->ub) { * p = new Range(lb, r1->ub); - p = &(* p)->next; + p = &(* p)->nx; } } return r; diff --git a/re2c/src/util/range.h b/re2c/src/util/range.h index 639d7f06..b768cf23 100644 --- a/re2c/src/util/range.h +++ b/re2c/src/util/range.h @@ -10,17 +10,20 @@ namespace re2c { -struct Range +class Range { +public: static free_list vFreeList; - Range * next; +private: + Range * nx; // [lb,ub) uint32_t lb; uint32_t ub; +public: Range (uint32_t l, uint32_t u) - : next (NULL) + : nx (NULL) , lb (l) , ub (u) { @@ -30,13 +33,15 @@ struct Range { vFreeList.erase (this); } + Range * next () const { return nx; } + uint32_t lower () const { return lb; } + uint32_t upper () const { return ub; } + friend Range * range_union (Range * r1, Range * r2); + friend Range * range_diff (Range * r1, Range * r2); FORBID_COPY (Range); }; -Range * range_union (Range * r1, Range * r2); -Range * range_diff (Range * r1, Range * r2); - } // end namespace re2c #endif // _RE2C_UTIL_RANGE_ -- 2.40.0