case ASCII:
l &= 0xFF;
h &= 0xFF;
- r = new Range(l, h + 1);
+ r = Range::ran (l, h + 1);
break;
case EBCDIC:
{
const uint32_t el = asc2ebc[l & 0xFF];
- r = new Range(el, el + 1);
+ r = Range::sym (el);
for (uint32_t c = l + 1; c <= h; ++c)
{
const uint32_t ec = asc2ebc[c & 0xFF];
- r = range_union (r, new Range(ec, ec + 1));
+ r = Range::add (r, Range::sym (ec));
}
break;
}
case UTF16:
case UTF32:
case UTF8:
- r = new Range(l, h + 1);
+ r = Range::ran (l, h + 1);
if (l <= SURR_MAX && h >= SURR_MIN)
{
switch (policy)
break;
case POLICY_SUBSTITUTE:
{
- Range * surrs = new Range(SURR_MIN, SURR_MAX + 1);
- Range * error = new Range(UNICODE_ERROR, UNICODE_ERROR + 1);
- r = range_diff (r, surrs);
- r = range_union (r, error);
+ Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1);
+ Range * error = Range::sym (UNICODE_ERROR);
+ r = Range::sub (r, surrs);
+ r = Range::add (r, error);
break;
}
case POLICY_IGNORE:
*/
Range * Enc::fullRange() const
{
- Range * r = new Range(0, nCodePoints());
+ Range * r = Range::ran (0, nCodePoints());
if (policy != POLICY_IGNORE)
{
- Range * surrs = new Range(SURR_MIN, SURR_MAX + 1);
- r = range_diff (r, surrs);
+ Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1);
+ r = Range::sub (r, surrs);
}
return r;
}
RegExp * regexp = NULL;
for (; p != NULL; p = p->next)
{
- RegExp * re1 = doCat(new MatchOp(new Range(p->l, p->h + 1)), re);
+ RegExp * re1 = doCat(new MatchOp(Range::ran (p->l, p->h + 1)), re);
regexp = doAlt(regexp, emit(p->child, re1));
}
return regexp;
RegExp * UTF16Symbol(utf16::rune r)
{
if (r <= utf16::MAX_1WORD_RUNE)
- return new MatchOp(new Range(r, r + 1));
+ return new MatchOp(Range::sym (r));
else
{
const uint16_t ld = utf16::lead_surr(r);
const uint16_t tr = utf16::trail_surr(r);
- return new CatOp(new MatchOp(new Range(ld, ld + 1)), new MatchOp(new Range(tr, tr + 1)));
+ return new CatOp(new MatchOp(Range::sym (ld)), new MatchOp(Range::sym (tr)));
}
}
{
uint8_t chars[utf8::MAX_RUNE_LENGTH];
const int chars_count = utf8::rune_to_bytes(chars, r);
- RegExp * re = new MatchOp(new Range(chars[0], chars[0] + 1));
+ RegExp * re = new MatchOp(Range::sym (chars[0]));
for (int i = 1; i < chars_count; ++i)
- re = new CatOp(re, new MatchOp(new Range(chars[i], chars[i] + 1)));
+ re = new CatOp(re, new MatchOp(Range::sym (chars[i])));
return re;
}
{
return m1;
}
- MatchOp * m = new MatchOp (range_union (m1->match, m2->match));
+ MatchOp * m = new MatchOp (Range::add (m1->match, m2->match));
if (m1->ins_access == RegExp::PRIVATE
|| m2->ins_access == RegExp::PRIVATE)
{
else if (encoding.is(Enc::UTF8))
return UTF8Symbol(c);
else
- return new MatchOp(new Range(c, c + 1));
+ return new MatchOp (Range::sym (c));
}
RegExp * Scanner::strToRE (SubStr & s) const
{
Range *r = getRange(s);
while (s.len > 0)
- r = range_union (r, getRange(s));
+ r = Range::add (r, getRange(s));
return r;
}
Range * r = s.len == 0
? full
- : range_diff (full, mkRange (s));
+ : Range::sub (full, mkRange (s));
return matchSymbolRange(r);
}
{
fatal("can only difference char sets");
}
- Range * r = range_diff (m1->match, m2->match);
+ Range * r = Range::sub (m1->match, m2->match);
return matchSymbolRange (r);
}
uint32_t c = '\n';
if (!encoding.encode(c))
fatalf("Bad code point: '0x%X'", c);
- Range * ran = new Range(c, c + 1);
- Range * inv = range_diff (full, ran);
+ Range * ran = Range::sym (c);
+ Range * inv = Range::sub (full, ran);
return matchSymbolRange(inv);
}
*/
RegExp * Scanner::mkDefault() const
{
- Range * def = new Range(0, encoding.nCodeUnits());
+ Range * def = Range::ran (0, encoding.nCodeUnits());
return new MatchOp(def);
}
CodeNames mapCodeName;
free_list<RegExp*> RegExp::vFreeList;
-free_list<Range*> Range::vFreeList;
using namespace std;
namespace re2c
{
-Range * range_union (Range * r1, Range * r2)
+free_list<Range*> Range::vFreeList;
+
+void Range::append_overlapping (Range * & head, Range * & tail, const Range * r)
{
- Range * r = NULL;
- Range ** p = &r;
- for (;;)
+ if (!head)
{
- if (!r1)
- {
- * p = r2;
- break;
- }
- if (!r2)
- {
- * p = r1;
- break;
- }
- if (r2->lb < r1->lb) // swap
+ head = Range::ran (r->lb, r->ub);
+ tail = head;
+ }
+ else if (tail->ub < r->lb)
+ {
+ tail->nx = Range::ran (r->lb, r->ub);
+ tail = tail->nx;
+ }
+ else if (tail->ub < r->ub)
+ {
+ tail->ub = r->ub;
+ }
+}
+
+Range * Range::add (const Range * r1, const Range * r2)
+{
+ Range * head = NULL;
+ Range * tail = NULL;
+ for (; r1 && r2;)
+ {
+ if (r1->lb < r2->lb)
{
- Range * r1_old = r1;
- r1 = r2;
- r2 = r1_old;
+ append_overlapping (head, tail, r1);
+ r1 = r1->nx;
}
- uint32_t ub = r1->ub;
- if (r2->lb < r1->ub)
+ else
{
- for (; r2 && r2->lb < r1->ub; r2 = r2->nx)
- {
- if (r1->ub < r2->ub)
- {
- ub = r2->ub;
- }
- }
+ append_overlapping (head, tail, r2);
+ r2 = r2->nx;
}
- * p = new Range (r1->lb, ub);
- p = &(* p)->nx;
- r1 = r1->nx;
}
- return r;
+ for (; r1; r1 = r1->nx)
+ {
+ append_overlapping (head, tail, r1);
+ }
+ for (; r2; r2 = r2->nx)
+ {
+ append_overlapping (head, tail, r2);
+ }
+ return head;
}
-Range * range_diff (Range * r1, Range * r2)
+void Range::append (Range ** & ptail, uint32_t l, uint32_t u)
{
- Range * r = NULL;
- Range ** p = &r;
- for (; r1; r1 = r1->nx)
+ Range * & tail = * ptail;
+ tail = Range::ran (l, u);
+ ptail = &tail->nx;
+}
+
+Range * Range::sub (const Range * r1, const Range * r2)
+{
+ Range * head = NULL;
+ Range ** ptail = &head;
+ while (r1)
{
- for (; r2 && r2->ub <= r1->lb; r2 = r2->nx);
- uint32_t lb = r1->lb;
- for (; r2 && r2->lb < r1->ub; r2 = r2->nx)
+ if (!r2 || r2->lb >= r1->ub)
{
- if (lb < r2->lb)
- {
- * p = new Range(lb, r2->lb);
- p = &(* p)->nx;
- }
- lb = r2->ub;
+ append (ptail, r1->lb, r1->ub);
+ r1 = r1->nx;
+ }
+ else if (r2->ub <= r1->lb)
+ {
+ r2 = r2->nx;
}
- if (lb < r1->ub)
+ else
{
- * p = new Range(lb, r1->ub);
- p = &(* p)->nx;
+ if (r1->lb < r2->lb)
+ {
+ append (ptail, r1->lb, r2->lb);
+ }
+ while (r2 && r2->ub < r1->ub)
+ {
+ const uint32_t lb = r2->ub;
+ r2 = r2->nx;
+ const uint32_t ub = r2 && r2->lb < r1->ub
+ ? r2->lb
+ : r1->ub;
+ append (ptail, lb, ub);
+ }
+ r1 = r1->nx;
}
}
- return r;
+ return head;
}
-} // end namespace re2c
+} // namespace re2c
uint32_t ub;
public:
- Range (uint32_t l, uint32_t u)
- : nx (NULL)
- , lb (l)
- , ub (u)
+ static Range * sym (uint32_t c)
{
- assert (lb < ub);
- vFreeList.insert (this);
+ return new Range (NULL, c, c + 1);
+ }
+ static Range * ran (uint32_t l, uint32_t u)
+ {
+ return new Range (NULL, l, u);
}
~Range ()
{
Range * next () const { return nx; }
uint32_t lower () const { return lb; }
uint32_t upper () const { return ub; }
- friend Range * range_union (Range * r1, Range * r2);
- friend Range * range_diff (Range * r1, Range * r2);
+ static Range * add (const Range * r1, const Range * r2);
+ static Range * sub (const Range * r1, const Range * r2);
+
+private:
+ Range (Range * n, uint32_t l, uint32_t u)
+ : nx (n)
+ , lb (l)
+ , ub (u)
+ {
+ assert (lb < ub);
+ vFreeList.insert (this);
+ }
+ static void append_overlapping (Range * & head, Range * & tail, const Range * r);
+ static void append (Range ** & ptail, uint32_t l, uint32_t u);
FORBID_COPY (Range);
};
-} // end namespace re2c
+} // namespace re2c
#endif // _RE2C_UTIL_RANGE_