xlb = encoding.xlat(lb);
xub = encoding.xlat(ub);
- if (encoding.isEBCDIC())
+ if (encoding.is(Enc::EBCDIC))
{
Range * r = new Range(xlb, xlb + 1);
for (uint c = lb + 1; c <= ub; ++c)
RegExp * Scanner::matchSymbol(uint c) const
{
- if (encoding.isUTF16())
+ if (encoding.is(Enc::UTF16))
return UTF16Symbol(c);
- else if (encoding.isUTF8())
+ else if (encoding.is(Enc::UTF8))
return UTF8Symbol(c);
else
return matchChar(c);
RegExp * Scanner::matchSymbolRange(Range * r) const
{
- if (encoding.isUTF16())
+ if (encoding.is(Enc::UTF16))
return UTF16Range(r);
- else if (encoding.isUTF8())
+ else if (encoding.is(Enc::UTF8))
return UTF8Range(r);
else
return new MatchOp(r);
o << indent(ind) << "case ";
prtChOrHex(o, lb);
o << ":";
- if (dFlag && encoding.isEBCDIC() && lb < 256u && isprint(encoding.talx(lb)))
+ if (dFlag && encoding.is(Enc::EBCDIC) && lb < 256u && isprint(encoding.talx(lb)))
{
o << " /* " << std::string(1, encoding.talx(lb)) << " */";
}
else if (cfg.to_string() == "flags:e")
{
if (num != 0)
- encoding.setEBCDIC();
+ {
+ if (!encoding.set(Enc::EBCDIC))
+ fatal("Cannot set '-e' switch: please reset '-w', '-x', '-u' and '-8' switches at first.\n");
+ }
else
- encoding.unsetEBCDIC();
- if (encoding.isBad())
- fatal("Cannot set '-e' switch: please reset '-w', '-x', '-u' and '-8' switches at first.\n");
+ encoding.unset(Enc::EBCDIC);
}
else if (cfg.to_string() == "flags:u")
{
if (num != 0)
- encoding.setUTF32();
+ {
+ if (!encoding.set(Enc::UTF32))
+ fatal("Cannot set '-u' switch: please reset '-e', '-w', '-x' and '-8' switches at first.\n");
+ }
else
- encoding.unsetUTF32();
- if (encoding.isBad())
- fatal("Cannot set '-u' switch: please reset '-e', '-w', '-x' and '-8' switches at first.\n");
+ encoding.unset(Enc::UTF32);
}
else if (cfg.to_string() == "flags:w")
{
if (num != 0)
- encoding.setUCS2();
+ {
+ if (!encoding.set(Enc::UCS2))
+ fatal("Cannot set '-w' switch: please reset '-e', '-x', '-u' and '-8' switches at first.\n");
+ }
else
- encoding.unsetUCS2();
- if (encoding.isBad())
- fatal("Cannot set '-w' switch: please reset '-e', '-x', '-u' and '-8' switches at first.\n");
+ encoding.unset(Enc::UCS2);
}
else if (cfg.to_string() == "flags:x")
{
if (num != 0)
- encoding.setUTF16();
+ {
+ if (!encoding.set(Enc::UTF16))
+ fatal("Cannot set '-x' switch: please reset '-e', '-x', '-u' and '-8' switches at first.\n");
+ }
else
- encoding.unsetUTF16();
- if (encoding.isBad())
- fatal("Cannot set '-w' switch: please reset '-e', '-x', '-u' and '-8' switches at first.\n");
+ encoding.unset(Enc::UTF16);
}
else if (cfg.to_string() == "flags:8")
{
if (num != 0)
- encoding.setUTF8();
+ {
+ if (!encoding.set(Enc::UTF8))
+ fatal("Cannot set '-8' switch: please reset '-e', '-w', '-x' and '-u' switches at first.\n");
+ }
else
- encoding.unsetUTF8();
- if (encoding.isBad())
- fatal("Cannot set '-8' switch: please reset '-e', '-w', '-x' and '-u' switches at first.\n");
+ encoding.unset(Enc::UTF8);
}
else
{
namespace re2c {
-const uint Enc::ERROR = ~0u;
-
const uint Enc::asc2ebc[256] =
{ /* Based on ISO 8859/1 and Code Page 37 */
0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
class Enc
{
+public:
// Supported encodings.
enum type_t
- { ASCII = 0x00000000u
- , EBCDIC = 0x00000001u
- , UCS2 = 0x00000002u
- , UTF16 = 0x00000004u
- , UTF32 = 0x00000008u
- , UTF8 = 0x00000010u
+ { ASCII
+ , EBCDIC
+ , UCS2
+ , UTF16
+ , UTF32
+ , UTF8
};
- static const uint ERROR;
static const uint asc2ebc[256];
static const uint ebc2asc[256];
- uint type;
+private:
+ type_t type;
public:
Enc()
inline uint szCodePoint() const;
inline uint szCodeUnit() const;
- inline void setEBCDIC() { type |= EBCDIC; }
- inline void setUCS2() { type |= UCS2; }
- inline void setUTF16() { type |= UTF16; }
- inline void setUTF32() { type |= UTF32; }
- inline void setUTF8() { type |= UTF8; }
-
- inline void unsetEBCDIC() { type &= ~EBCDIC; }
- inline void unsetUCS2() { type &= ~UCS2; }
- inline void unsetUTF16() { type &= ~UTF16; }
- inline void unsetUTF32() { type &= ~UTF32; }
- inline void unsetUTF8() { type &= ~UTF8; }
-
- inline bool isEBCDIC() const { return type & EBCDIC; }
- inline bool isUCS2() const { return type & UCS2; }
- inline bool isUTF16() const { return type & UTF16; }
- inline bool isUTF32() const { return type & UTF32; }
- inline bool isUTF8() const { return type & UTF8; }
-
- inline bool isBad() const;
+ inline bool set(type_t t);
+ inline void unset(type_t);
+ inline bool is(type_t) const;
inline uint xlat(uint c) const;
inline uint talx(uint c) const;
case UTF16:
case UTF32:
case UTF8: return 0x110000;
- default: return ERROR;
}
+ return ~0; // to silence gcc warning
}
inline uint Enc::nCodeUnits() const
case UCS2:
case UTF16: return 0x10000;
case UTF32: return 0x110000;
- default: return ERROR;
}
+ return ~0; // to silence gcc warning
}
// returns *maximal* code point size for encoding
case UTF16:
case UTF32:
case UTF8: return 4;
- default: return ERROR;
}
+ return ~0; // to silence gcc warning
}
inline uint Enc::szCodeUnit() const
case UCS2:
case UTF16: return 2;
case UTF32: return 4;
- default: return ERROR;
}
+ return ~0; // to silence gcc warning
+}
+
+inline bool Enc::set(type_t t)
+{
+ if (type == t)
+ return true;
+ else if (type != ASCII)
+ return false;
+ else
+ {
+ type = t;
+ return true;
+ }
+}
+
+inline void Enc::unset(type_t t)
+{
+ if (type == t)
+ type = ASCII;
}
-// This test returns 'true' for all valid encoding types
-inline bool Enc::isBad() const
+inline bool Enc::is(type_t t) const
{
- // test if 'type' is a power of 2
- // notice: ASCII mask is 0 => it's ok if either
- // 1) only ASCII is set
- // 2) both ASCII and some other encoding is set
- return (type & (type - 1)) != 0;
+ return type == t;
}
inline uint Enc::xlat(uint c) const
case UTF16:
case UTF32:
case UTF8: return c;
- default: return ERROR;
}
+ return ~0; // to silence gcc warning
}
inline uint Enc::talx(uint c) const
case UTF16:
case UTF32:
case UTF8: return c;
- default: return ERROR;
}
+ return ~0; // to silence gcc warning
}
} // namespace re2c
break;
case 'e':
- encoding.setEBCDIC();
+ if (!encoding.set(Enc::EBCDIC))
+ {
+ std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n";
+ return 2;
+ }
break;
case 'd':
cout << vernum << endl;
return 2;
}
-
+
case 'w':
sFlag = true;
- encoding.setUCS2();
+ if (!encoding.set(Enc::UCS2))
+ {
+ std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n";
+ return 2;
+ }
break;
case 'x':
sFlag = true;
- encoding.setUTF16();
+ if (!encoding.set(Enc::UTF16))
+ {
+ std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n";
+ return 2;
+ }
break;
case 'u':
sFlag = true;
- encoding.setUTF32();
+ if (!encoding.set(Enc::UTF32))
+ {
+ std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n";
+ return 2;
+ }
break;
case '8':
- encoding.setUTF8();
+ if (!encoding.set(Enc::UTF8))
+ {
+ std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n";
+ return 2;
+ }
break;
-
+
default:
case 'h':
case '?':
return 2;
}
- if (encoding.isBad())
- {
- std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n";
- return 2;
- }
-
if (DFlag && (bFlag || dFlag || sFlag))
{
std::cerr << "re2c: error: Cannot combine -D with -b, -d or -s switches\n";
void prtChOrHex(std::ostream& o, uint c)
{
- if (!encoding.isEBCDIC() && (c < 256u) && (isprint(c) || isspace(c)))
+ if (!encoding.is(Enc::EBCDIC) && (c < 256u) && (isprint(c) || isspace(c)))
{
o << (DFlag ? '"' : '\'');
prtCh(o, c);
void prtCh(std::ostream& o, uint c)
{
- if (encoding.isEBCDIC())
+ if (encoding.is(Enc::EBCDIC))
{
prtHex(o, c);
return;