From f56196d29f6c29b37e3e95a6777714c237e1c71c Mon Sep 17 00:00:00 2001
From: Ulya Trofimovich <skvadrik@gmail.com>
Date: Mon, 15 Jun 2015 14:09:08 +0100
Subject: [PATCH] Simplified implementation of range union and difference.

---
 re2c/src/ir/regexp/encoding/enc.cc |   8 +-
 re2c/src/ir/regexp/regexp.cc       |  10 +--
 re2c/src/parse/scanner.cc          |   1 +
 re2c/src/util/range.cc             | 118 +++++++++--------------------
 re2c/src/util/range.h              |  31 ++++----
 5 files changed, 61 insertions(+), 107 deletions(-)

diff --git a/re2c/src/ir/regexp/encoding/enc.cc b/re2c/src/ir/regexp/encoding/enc.cc
index 9cd8e3b1..fcc68302 100644
--- a/re2c/src/ir/regexp/encoding/enc.cc
+++ b/re2c/src/ir/regexp/encoding/enc.cc
@@ -141,7 +141,7 @@ Range * Enc::encodeRange(uint32_t l, uint32_t h) const
 			for (uint32_t c = l + 1; c <= h; ++c)
 			{
 				const uint32_t ec = asc2ebc[c & 0xFF];
-				r = doUnion(r, new Range(ec, ec + 1));
+				r = range_union (r, new Range(ec, ec + 1));
 			}
 			break;
 		}
@@ -161,8 +161,8 @@ Range * Enc::encodeRange(uint32_t l, uint32_t h) const
 					{
 						Range * surrs = new Range(SURR_MIN, SURR_MAX + 1);
 						Range * error = new Range(UNICODE_ERROR, UNICODE_ERROR + 1);
-						r = doDiff(r, surrs);
-						r = doUnion(r, error);
+						r = range_diff (r, surrs);
+						r = range_union (r, error);
 						break;
 					}
 					case POLICY_IGNORE:
@@ -190,7 +190,7 @@ Range * Enc::fullRange() const
 	if (policy != POLICY_IGNORE)
 	{
 		Range * surrs = new Range(SURR_MIN, SURR_MAX + 1);
-		r = doDiff(r, surrs);
+		r = range_diff (r, surrs);
 	}
 	return r;
 }
diff --git a/re2c/src/ir/regexp/regexp.cc b/re2c/src/ir/regexp/regexp.cc
index 67515cb2..3eade194 100644
--- a/re2c/src/ir/regexp/regexp.cc
+++ b/re2c/src/ir/regexp/regexp.cc
@@ -84,7 +84,7 @@ MatchOp * merge (MatchOp * m1, MatchOp * m2)
 	{
 		return m1;
 	}
-	MatchOp * m = new MatchOp (doUnion (m1->match, m2->match));
+	MatchOp * m = new MatchOp (range_union (m1->match, m2->match));
 	if (m1->ins_access == RegExp::PRIVATE
 		|| m2->ins_access == RegExp::PRIVATE)
 	{
@@ -114,7 +114,7 @@ RegExp * mkDiff (RegExp * e1, RegExp * e2)
 	{
 		return NULL;
 	}
-	Range * r = doDiff (m1->match, m2->match);
+	Range * r = range_diff (m1->match, m2->match);
 	return r
 		? (RegExp *) new MatchOp(r)
 		: (RegExp *) new NullOp;
@@ -216,7 +216,7 @@ Range * Scanner::mkRange(SubStr &s) const
 {
 	Range *r = getRange(s);
 	while (s.len > 0)
-		r = doUnion(r, getRange(s));
+		r = range_union (r, getRange(s));
 
 	return r;
 }
@@ -251,7 +251,7 @@ RegExp * Scanner::invToRE (SubStr & s) const
 
 	Range * r = s.len == 0
 		? full
-		: doDiff(full, mkRange (s));
+		: range_diff (full, mkRange (s));
 
 	return matchSymbolRange(r);
 }
@@ -263,7 +263,7 @@ RegExp * Scanner::mkDot() const
 	if (!encoding.encode(c))
 		fatalf("Bad code point: '0x%X'", c);
 	Range * ran = new Range(c, c + 1);
-	Range * inv = doDiff(full, ran);
+	Range * inv = range_diff (full, ran);
 
 	return matchSymbolRange(inv);
 }
diff --git a/re2c/src/parse/scanner.cc b/re2c/src/parse/scanner.cc
index cf4f8549..d8985c9b 100644
--- a/re2c/src/parse/scanner.cc
+++ b/re2c/src/parse/scanner.cc
@@ -1,3 +1,4 @@
+#include <iostream>
 #include <stdarg.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/re2c/src/util/range.cc b/re2c/src/util/range.cc
index c08c1f8a..bc3af973 100644
--- a/re2c/src/util/range.cc
+++ b/re2c/src/util/range.cc
@@ -3,117 +3,69 @@
 namespace re2c
 {
 
-Range *doUnion(Range *r1, Range *r2)
+Range * range_union (Range * r1, Range * r2)
 {
-	if (r1 == NULL)
-		return r2;
-	if (r2 == NULL)
-		return r1;
-
-	Range *r, **rP = &r;
-
+	Range * r = NULL;
+	Range ** p = &r;
 	for (;;)
 	{
-		Range *s;
-
-		if (r1->lb <= r2->lb)
+		if (!r1)
 		{
-			s = new Range(r1->lb, r1->ub);
+			* p = r2;
+			break;
 		}
-		else
+		if (!r2)
 		{
-			s = new Range(r2->lb, r2->ub);
+			* p = r1;
+			break;
 		}
-
-		*rP = s;
-		rP = &s->next;
-
-		for (;;)
+		if (r2->lb < r1->lb) // swap
 		{
-			if (r1->lb <= r2->lb)
+			Range * r1_old = r1;
+			r1 = r2;
+			r2 = r1_old;
+		}
+		uint32_t ub = r1->ub;
+		if (r2->lb < r1->ub)
+		{
+			for (; r2 && r2->lb < r1->ub; r2 = r2->next)
 			{
-				if (r1->lb > s->ub)
-					break;
-
-				if (r1->ub > s->ub)
-					s->ub = r1->ub;
-
-				if (!(r1 = r1->next))
+				if (r1->ub < r2->ub)
 				{
-					uint32_t ub = 0;
-
-					for (; r2 && r2->lb <= s->ub; r2 = r2->next)
-						ub = r2->ub;
-
-					if (ub > s->ub)
-						s->ub = ub;
-
-					*rP = r2;
-
-					return r;
-				}
-			}
-			else
-			{
-				if (r2->lb > s->ub)
-					break;
-
-				if (r2->ub > s->ub)
-					s->ub = r2->ub;
-
-				if (!(r2 = r2->next))
-				{
-					uint32_t ub = 0;
-
-					for (; r1 && r1->lb <= s->ub; r1 = r1->next)
-						ub = r1->ub;
-
-					if (ub > s->ub)
-						s->ub = ub;
-
-					*rP = r1;
-
-					return r;
+					ub = r2->ub;
 				}
 			}
 		}
+		* p = new Range (r1->lb, ub);
+		p = &(* p)->next;
+		r1 = r1->next;
 	}
-
-	*rP = NULL;
 	return r;
 }
 
-Range *doDiff(Range *r1, Range *r2)
+Range * range_diff (Range * r1, Range * r2)
 {
-	Range *r, *s, **rP = &r;
-
+	Range * r = NULL;
+	Range ** p = &r;
 	for (; r1; r1 = r1->next)
 	{
+		for (; r2 && r2->ub <= r1->lb; r2 = r2->next);
 		uint32_t lb = r1->lb;
-
-		for (; r2 && r2->ub <= r1->lb; r2 = r2->next)
-
-			;
 		for (; r2 && r2->lb < r1->ub; r2 = r2->next)
 		{
 			if (lb < r2->lb)
 			{
-				*rP = s = new Range(lb, r2->lb);
-				rP = &s->next;
+				* p = new Range(lb, r2->lb);
+				p = &(* p)->next;
 			}
-
-			if ((lb = r2->ub) >= r1->ub)
-				goto noMore;
+			lb = r2->ub;
+		}
+		if (lb < r1->ub)
+		{
+			* p = new Range(lb, r1->ub);
+			p = &(* p)->next;
 		}
-
-		*rP = s = new Range(lb, r1->ub);
-		rP = &s->next;
-
-noMore:
-		;
 	}
-
-	*rP = NULL;
 	return r;
 }
 
diff --git a/re2c/src/util/range.h b/re2c/src/util/range.h
index 2ce4bee1..639d7f06 100644
--- a/re2c/src/util/range.h
+++ b/re2c/src/util/range.h
@@ -1,7 +1,7 @@
 #ifndef _RE2C_UTIL_RANGE_
 #define _RE2C_UTIL_RANGE_
 
-#include <iostream>
+#include <stddef.h> // NULL
 
 #include "src/util/c99_stdint.h"
 #include "src/util/forbid_copy.h"
@@ -10,31 +10,32 @@
 namespace re2c
 {
 
-class Range
+struct Range
 {
-
-public:
-	Range		*next;
-	uint32_t	lb, ub;	// [lb,ub)
-
 	static free_list<Range*> vFreeList;
 
-public:
-	Range(uint32_t l, uint32_t u) : next(NULL), lb(l), ub(u)
+	Range * next;
+	// [lb,ub)
+	uint32_t lb;
+	uint32_t ub;
+
+	Range (uint32_t l, uint32_t u)
+		: next (NULL)
+		, lb (l)
+		, ub (u)
 	{
-		vFreeList.insert(this);
+		vFreeList.insert (this);
 	}
-
-	~Range()
+	~Range ()
 	{
-		vFreeList.erase(this);
+		vFreeList.erase (this);
 	}
 
 	FORBID_COPY (Range);
 };
 
-Range *doUnion(Range *r1, Range *r2);
-Range *doDiff(Range *r1, Range *r2);
+Range * range_union (Range * r1, Range * r2);
+Range * range_diff (Range * r1, Range * r2);
 
 } // end namespace re2c
 
-- 
2.40.0