From: Ulya Fokanova <skvadrik@gmail.com>
Date: Wed, 9 Apr 2014 22:35:37 +0000 (+0300)
Subject: Disallow to set multiple encodings.
X-Git-Tag: 0.13.7.1~9
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=cfa2fe4cddd10f110494ff035ab5c541451194dd;p=re2c

Disallow to set multiple encodings.

Fail, if someone tries to set non-ASCII encoding
when another non-ASCII encoding is already set.
If encoding has been set successfully, it is
guaranteed to be valid.
---

diff --git a/re2c/actions.cc b/re2c/actions.cc
index a9008b5c..d3d02478 100644
--- a/re2c/actions.cc
+++ b/re2c/actions.cc
@@ -757,7 +757,7 @@ Range * Scanner::getRange(SubStr &s) const
 	xlb = encoding.xlat(lb);
 	xub = encoding.xlat(ub);
 
-	if (encoding.isEBCDIC())
+	if (encoding.is(Enc::EBCDIC))
 	{
 		Range * r = new Range(xlb, xlb + 1);
 		for (uint c = lb + 1; c <= ub; ++c)
@@ -781,9 +781,9 @@ RegExp * Scanner::matchChar(uint c) const
 
 RegExp * Scanner::matchSymbol(uint c) const
 {
-	if (encoding.isUTF16())
+	if (encoding.is(Enc::UTF16))
 		return UTF16Symbol(c);
-	else if (encoding.isUTF8())
+	else if (encoding.is(Enc::UTF8))
 		return UTF8Symbol(c);
 	else
 		return matchChar(c);
@@ -858,9 +858,9 @@ Range * Scanner::mkRange(SubStr &s) const
 
 RegExp * Scanner::matchSymbolRange(Range * r) const
 {
-	if (encoding.isUTF16())
+	if (encoding.is(Enc::UTF16))
 		return UTF16Range(r);
-	else if (encoding.isUTF8())
+	else if (encoding.is(Enc::UTF8))
 		return UTF8Range(r);
 	else
 		return new MatchOp(r);
diff --git a/re2c/code.cc b/re2c/code.cc
index fe9305cb..40bf7b6f 100644
--- a/re2c/code.cc
+++ b/re2c/code.cc
@@ -908,7 +908,7 @@ static bool genCases(std::ostream &o, uint ind, uint lb, Span *s, bool &newLine,
 					o << indent(ind) << "case ";
 					prtChOrHex(o, lb);
 					o << ":";
-					if (dFlag && encoding.isEBCDIC() && lb < 256u && isprint(encoding.talx(lb)))
+					if (dFlag && encoding.is(Enc::EBCDIC) && lb < 256u && isprint(encoding.talx(lb)))
 					{
 						o << " /* " << std::string(1, encoding.talx(lb)) << " */";
 					}
@@ -2248,47 +2248,52 @@ void Scanner::config(const Str& cfg, int num)
 	else if (cfg.to_string() == "flags:e")
 	{
 		if (num != 0)
-			encoding.setEBCDIC();
+		{
+			if (!encoding.set(Enc::EBCDIC))
+				fatal("Cannot set '-e' switch: please reset '-w', '-x', '-u' and '-8' switches at first.\n");
+		}
 		else
-			encoding.unsetEBCDIC();
-		if (encoding.isBad())
-			fatal("Cannot set '-e' switch: please reset '-w', '-x', '-u' and '-8' switches at first.\n");
+			encoding.unset(Enc::EBCDIC);
 	}
 	else if (cfg.to_string() == "flags:u")
 	{
 		if (num != 0)
-			encoding.setUTF32();
+		{
+			if (!encoding.set(Enc::UTF32))
+				fatal("Cannot set '-u' switch: please reset '-e', '-w', '-x' and '-8' switches at first.\n");
+		}
 		else
-			encoding.unsetUTF32();
-		if (encoding.isBad())
-			fatal("Cannot set '-u' switch: please reset '-e', '-w', '-x' and '-8' switches at first.\n");
+			encoding.unset(Enc::UTF32);
 	}
 	else if (cfg.to_string() == "flags:w")
 	{
 		if (num != 0)
-			encoding.setUCS2();
+		{
+			if (!encoding.set(Enc::UCS2))
+				fatal("Cannot set '-w' switch: please reset '-e', '-x', '-u' and '-8' switches at first.\n");
+		}
 		else
-			encoding.unsetUCS2();
-		if (encoding.isBad())
-			fatal("Cannot set '-w' switch: please reset '-e', '-x', '-u' and '-8' switches at first.\n");
+			encoding.unset(Enc::UCS2);
 	}
 	else if (cfg.to_string() == "flags:x")
 	{
 		if (num != 0)
-			encoding.setUTF16();
+		{
+			if (!encoding.set(Enc::UTF16))
+				fatal("Cannot set '-x' switch: please reset '-e', '-x', '-u' and '-8' switches at first.\n");
+		}
 		else
-			encoding.unsetUTF16();
-		if (encoding.isBad())
-			fatal("Cannot set '-w' switch: please reset '-e', '-x', '-u' and '-8' switches at first.\n");
+			encoding.unset(Enc::UTF16);
 	}
 	else if (cfg.to_string() == "flags:8")
 	{
 		if (num != 0)
-			encoding.setUTF8();
+		{
+			if (!encoding.set(Enc::UTF8))
+				fatal("Cannot set '-8' switch: please reset '-e', '-w', '-x' and '-u' switches at first.\n");
+		}
 		else
-			encoding.unsetUTF8();
-		if (encoding.isBad())
-			fatal("Cannot set '-8' switch: please reset '-e', '-w', '-x' and '-u' switches at first.\n");
+			encoding.unset(Enc::UTF8);
 	}
 	else
 	{
diff --git a/re2c/enc.cc b/re2c/enc.cc
index 8cfed960..6d85b412 100644
--- a/re2c/enc.cc
+++ b/re2c/enc.cc
@@ -2,8 +2,6 @@
 
 namespace re2c {
 
-const uint Enc::ERROR = ~0u;
-
 const uint Enc::asc2ebc[256] =
     { /* Based on ISO 8859/1 and Code Page 37 */
         0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
diff --git a/re2c/enc.h b/re2c/enc.h
index 95f05cb0..40372447 100644
--- a/re2c/enc.h
+++ b/re2c/enc.h
@@ -30,21 +30,22 @@ namespace re2c {
 
 class Enc
 {
+public:
 	// Supported encodings.
 	enum type_t
-		{ ASCII  = 0x00000000u
-		, EBCDIC = 0x00000001u
-		, UCS2   = 0x00000002u
-		, UTF16  = 0x00000004u
-		, UTF32  = 0x00000008u
-		, UTF8   = 0x00000010u
+		{ ASCII
+		, EBCDIC
+		, UCS2
+		, UTF16
+		, UTF32
+		, UTF8
 		};
 
-	static const uint ERROR;
 	static const uint asc2ebc[256];
 	static const uint ebc2asc[256];
 
-	uint type;
+private:
+	type_t type;
 
 public:
 	Enc()
@@ -58,25 +59,9 @@ public:
 	inline uint szCodePoint() const;
 	inline uint szCodeUnit() const;
 
-	inline void setEBCDIC()	{ type |= EBCDIC; }
-	inline void setUCS2()	{ type |= UCS2; }
-	inline void setUTF16()	{ type |= UTF16; }
-	inline void setUTF32()	{ type |= UTF32; }
-	inline void setUTF8()	{ type |= UTF8; }
-
-	inline void unsetEBCDIC()	{ type &= ~EBCDIC; }
-	inline void unsetUCS2()		{ type &= ~UCS2; }
-	inline void unsetUTF16()	{ type &= ~UTF16; }
-	inline void unsetUTF32()	{ type &= ~UTF32; }
-	inline void unsetUTF8()		{ type &= ~UTF8; }
-
-	inline bool isEBCDIC() const	{ return type & EBCDIC; }
-	inline bool isUCS2() const	{ return type & UCS2; }
-	inline bool isUTF16() const	{ return type & UTF16; }
-	inline bool isUTF32() const	{ return type & UTF32; }
-	inline bool isUTF8() const	{ return type & UTF8; }
-
-	inline bool isBad() const;
+	inline bool set(type_t t);
+	inline void unset(type_t);
+	inline bool is(type_t) const;
 
 	inline uint xlat(uint c) const;
 	inline uint talx(uint c) const;
@@ -92,8 +77,8 @@ inline uint Enc::nCodePoints() const
 		case UTF16:
 		case UTF32:
 		case UTF8:	return 0x110000;
-		default:	return ERROR;
 	}
+	return ~0; // to silence gcc warning
 }
 
 inline uint Enc::nCodeUnits() const
@@ -106,8 +91,8 @@ inline uint Enc::nCodeUnits() const
 		case UCS2:
 		case UTF16:	return 0x10000;
 		case UTF32:	return 0x110000;
-		default:	return ERROR;
 	}
+	return ~0; // to silence gcc warning
 }
 
 // returns *maximal* code point size for encoding
@@ -121,8 +106,8 @@ inline uint Enc::szCodePoint() const
 		case UTF16:
 		case UTF32:
 		case UTF8:	return 4;
-		default:	return ERROR;
 	}
+	return ~0; // to silence gcc warning
 }
 
 inline uint Enc::szCodeUnit() const
@@ -135,18 +120,32 @@ inline uint Enc::szCodeUnit() const
 		case UCS2:
 		case UTF16:	return 2;
 		case UTF32:	return 4;
-		default:	return ERROR;
 	}
+	return ~0; // to silence gcc warning
+}
+
+inline bool Enc::set(type_t t)
+{
+	if (type == t)
+		return true;
+	else if (type != ASCII)
+		return false;
+	else
+	{
+		type = t;
+		return true;
+	}
+}
+
+inline void Enc::unset(type_t t)
+{
+	if (type == t)
+		type = ASCII;
 }
 
-// This test returns 'true' for all valid encoding types
-inline bool Enc::isBad() const
+inline bool Enc::is(type_t t) const
 {
-	// test if 'type' is a power of 2
-	// notice: ASCII mask is 0 => it's ok if either
-	// 1) only ASCII is set
-	// 2) both ASCII and some other encoding is set
-	return (type & (type - 1)) != 0;
+	return type == t;
 }
 
 inline uint Enc::xlat(uint c) const
@@ -159,8 +158,8 @@ inline uint Enc::xlat(uint c) const
 		case UTF16:
 		case UTF32:
 		case UTF8:	return c;
-		default:	return ERROR;
 	}
+	return ~0; // to silence gcc warning
 }
 
 inline uint Enc::talx(uint c) const
@@ -173,8 +172,8 @@ inline uint Enc::talx(uint c) const
 		case UTF16:
 		case UTF32:
 		case UTF8:	return c;
-		default:	return ERROR;
 	}
+	return ~0; // to silence gcc warning
 }
 
 } // namespace re2c
diff --git a/re2c/main.cc b/re2c/main.cc
index d5a122ec..4af5c0f1 100644
--- a/re2c/main.cc
+++ b/re2c/main.cc
@@ -246,7 +246,11 @@ int main(int argc, char *argv[])
 			break;
 
 			case 'e':
-			encoding.setEBCDIC();
+			if (!encoding.set(Enc::EBCDIC))
+			{
+				std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n";
+				return 2;
+			}
 			break;
 
 			case 'd':
@@ -322,26 +326,42 @@ int main(int argc, char *argv[])
 				cout << vernum << endl;
 				return 2;
 			}
-			
+
 			case 'w':
 			sFlag = true;
-			encoding.setUCS2();
+			if (!encoding.set(Enc::UCS2))
+			{
+				std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n";
+				return 2;
+			}
 			break;
 
 			case 'x':
 			sFlag = true;
-			encoding.setUTF16();
+			if (!encoding.set(Enc::UTF16))
+			{
+				std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n";
+				return 2;
+			}
 			break;
 
 			case 'u':
 			sFlag = true;
-			encoding.setUTF32();
+			if (!encoding.set(Enc::UTF32))
+			{
+				std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n";
+				return 2;
+			}
 			break;
 
 			case '8':
-			encoding.setUTF8();
+			if (!encoding.set(Enc::UTF8))
+			{
+				std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n";
+				return 2;
+			}
 			break;
-	  
+
 			default:
 			case 'h':
 			case '?':
@@ -372,12 +392,6 @@ int main(int argc, char *argv[])
 		return 2;
 	}
 
-	if (encoding.isBad())
-	{
-		std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n";
-		return 2;
-	}
-
 	if (DFlag && (bFlag || dFlag || sFlag))
 	{
 		std::cerr << "re2c: error: Cannot combine -D with -b, -d  or -s switches\n";
diff --git a/re2c/print.cc b/re2c/print.cc
index 9f61bdc5..22aa7c7c 100644
--- a/re2c/print.cc
+++ b/re2c/print.cc
@@ -8,7 +8,7 @@ namespace re2c
 
 void prtChOrHex(std::ostream& o, uint c)
 {
-	if (!encoding.isEBCDIC() && (c < 256u) && (isprint(c) || isspace(c)))
+	if (!encoding.is(Enc::EBCDIC) && (c < 256u) && (isprint(c) || isspace(c)))
 	{
 		o << (DFlag ? '"' : '\'');
 		prtCh(o, c);
@@ -56,7 +56,7 @@ void prtHex(std::ostream& o, uint c)
 
 void prtCh(std::ostream& o, uint c)
 {
-	if (encoding.isEBCDIC())
+	if (encoding.is(Enc::EBCDIC))
 	{
 		prtHex(o, c);
 		return;