From: Ulya Fokanova Date: Wed, 9 Apr 2014 22:35:37 +0000 (+0300) Subject: Disallow to set multiple encodings. X-Git-Tag: 0.13.7.1~9 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=cfa2fe4cddd10f110494ff035ab5c541451194dd;p=re2c Disallow to set multiple encodings. Fail, if someone tries to set non-ASCII encoding when another non-ASCII encoding is already set. If encoding has been set successfully, it is guaranteed to be valid. --- diff --git a/re2c/actions.cc b/re2c/actions.cc index a9008b5c..d3d02478 100644 --- a/re2c/actions.cc +++ b/re2c/actions.cc @@ -757,7 +757,7 @@ Range * Scanner::getRange(SubStr &s) const xlb = encoding.xlat(lb); xub = encoding.xlat(ub); - if (encoding.isEBCDIC()) + if (encoding.is(Enc::EBCDIC)) { Range * r = new Range(xlb, xlb + 1); for (uint c = lb + 1; c <= ub; ++c) @@ -781,9 +781,9 @@ RegExp * Scanner::matchChar(uint c) const RegExp * Scanner::matchSymbol(uint c) const { - if (encoding.isUTF16()) + if (encoding.is(Enc::UTF16)) return UTF16Symbol(c); - else if (encoding.isUTF8()) + else if (encoding.is(Enc::UTF8)) return UTF8Symbol(c); else return matchChar(c); @@ -858,9 +858,9 @@ Range * Scanner::mkRange(SubStr &s) const RegExp * Scanner::matchSymbolRange(Range * r) const { - if (encoding.isUTF16()) + if (encoding.is(Enc::UTF16)) return UTF16Range(r); - else if (encoding.isUTF8()) + else if (encoding.is(Enc::UTF8)) return UTF8Range(r); else return new MatchOp(r); diff --git a/re2c/code.cc b/re2c/code.cc index fe9305cb..40bf7b6f 100644 --- a/re2c/code.cc +++ b/re2c/code.cc @@ -908,7 +908,7 @@ static bool genCases(std::ostream &o, uint ind, uint lb, Span *s, bool &newLine, o << indent(ind) << "case "; prtChOrHex(o, lb); o << ":"; - if (dFlag && encoding.isEBCDIC() && lb < 256u && isprint(encoding.talx(lb))) + if (dFlag && encoding.is(Enc::EBCDIC) && lb < 256u && isprint(encoding.talx(lb))) { o << " /* " << std::string(1, encoding.talx(lb)) << " */"; } @@ -2248,47 +2248,52 @@ void Scanner::config(const Str& cfg, int num) else if (cfg.to_string() == "flags:e") { if (num != 0) - encoding.setEBCDIC(); + { + if (!encoding.set(Enc::EBCDIC)) + fatal("Cannot set '-e' switch: please reset '-w', '-x', '-u' and '-8' switches at first.\n"); + } else - encoding.unsetEBCDIC(); - if (encoding.isBad()) - fatal("Cannot set '-e' switch: please reset '-w', '-x', '-u' and '-8' switches at first.\n"); + encoding.unset(Enc::EBCDIC); } else if (cfg.to_string() == "flags:u") { if (num != 0) - encoding.setUTF32(); + { + if (!encoding.set(Enc::UTF32)) + fatal("Cannot set '-u' switch: please reset '-e', '-w', '-x' and '-8' switches at first.\n"); + } else - encoding.unsetUTF32(); - if (encoding.isBad()) - fatal("Cannot set '-u' switch: please reset '-e', '-w', '-x' and '-8' switches at first.\n"); + encoding.unset(Enc::UTF32); } else if (cfg.to_string() == "flags:w") { if (num != 0) - encoding.setUCS2(); + { + if (!encoding.set(Enc::UCS2)) + fatal("Cannot set '-w' switch: please reset '-e', '-x', '-u' and '-8' switches at first.\n"); + } else - encoding.unsetUCS2(); - if (encoding.isBad()) - fatal("Cannot set '-w' switch: please reset '-e', '-x', '-u' and '-8' switches at first.\n"); + encoding.unset(Enc::UCS2); } else if (cfg.to_string() == "flags:x") { if (num != 0) - encoding.setUTF16(); + { + if (!encoding.set(Enc::UTF16)) + fatal("Cannot set '-x' switch: please reset '-e', '-x', '-u' and '-8' switches at first.\n"); + } else - encoding.unsetUTF16(); - if (encoding.isBad()) - fatal("Cannot set '-w' switch: please reset '-e', '-x', '-u' and '-8' switches at first.\n"); + encoding.unset(Enc::UTF16); } else if (cfg.to_string() == "flags:8") { if (num != 0) - encoding.setUTF8(); + { + if (!encoding.set(Enc::UTF8)) + fatal("Cannot set '-8' switch: please reset '-e', '-w', '-x' and '-u' switches at first.\n"); + } else - encoding.unsetUTF8(); - if (encoding.isBad()) - fatal("Cannot set '-8' switch: please reset '-e', '-w', '-x' and '-u' switches at first.\n"); + encoding.unset(Enc::UTF8); } else { diff --git a/re2c/enc.cc b/re2c/enc.cc index 8cfed960..6d85b412 100644 --- a/re2c/enc.cc +++ b/re2c/enc.cc @@ -2,8 +2,6 @@ namespace re2c { -const uint Enc::ERROR = ~0u; - const uint Enc::asc2ebc[256] = { /* Based on ISO 8859/1 and Code Page 37 */ 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, diff --git a/re2c/enc.h b/re2c/enc.h index 95f05cb0..40372447 100644 --- a/re2c/enc.h +++ b/re2c/enc.h @@ -30,21 +30,22 @@ namespace re2c { class Enc { +public: // Supported encodings. enum type_t - { ASCII = 0x00000000u - , EBCDIC = 0x00000001u - , UCS2 = 0x00000002u - , UTF16 = 0x00000004u - , UTF32 = 0x00000008u - , UTF8 = 0x00000010u + { ASCII + , EBCDIC + , UCS2 + , UTF16 + , UTF32 + , UTF8 }; - static const uint ERROR; static const uint asc2ebc[256]; static const uint ebc2asc[256]; - uint type; +private: + type_t type; public: Enc() @@ -58,25 +59,9 @@ public: inline uint szCodePoint() const; inline uint szCodeUnit() const; - inline void setEBCDIC() { type |= EBCDIC; } - inline void setUCS2() { type |= UCS2; } - inline void setUTF16() { type |= UTF16; } - inline void setUTF32() { type |= UTF32; } - inline void setUTF8() { type |= UTF8; } - - inline void unsetEBCDIC() { type &= ~EBCDIC; } - inline void unsetUCS2() { type &= ~UCS2; } - inline void unsetUTF16() { type &= ~UTF16; } - inline void unsetUTF32() { type &= ~UTF32; } - inline void unsetUTF8() { type &= ~UTF8; } - - inline bool isEBCDIC() const { return type & EBCDIC; } - inline bool isUCS2() const { return type & UCS2; } - inline bool isUTF16() const { return type & UTF16; } - inline bool isUTF32() const { return type & UTF32; } - inline bool isUTF8() const { return type & UTF8; } - - inline bool isBad() const; + inline bool set(type_t t); + inline void unset(type_t); + inline bool is(type_t) const; inline uint xlat(uint c) const; inline uint talx(uint c) const; @@ -92,8 +77,8 @@ inline uint Enc::nCodePoints() const case UTF16: case UTF32: case UTF8: return 0x110000; - default: return ERROR; } + return ~0; // to silence gcc warning } inline uint Enc::nCodeUnits() const @@ -106,8 +91,8 @@ inline uint Enc::nCodeUnits() const case UCS2: case UTF16: return 0x10000; case UTF32: return 0x110000; - default: return ERROR; } + return ~0; // to silence gcc warning } // returns *maximal* code point size for encoding @@ -121,8 +106,8 @@ inline uint Enc::szCodePoint() const case UTF16: case UTF32: case UTF8: return 4; - default: return ERROR; } + return ~0; // to silence gcc warning } inline uint Enc::szCodeUnit() const @@ -135,18 +120,32 @@ inline uint Enc::szCodeUnit() const case UCS2: case UTF16: return 2; case UTF32: return 4; - default: return ERROR; } + return ~0; // to silence gcc warning +} + +inline bool Enc::set(type_t t) +{ + if (type == t) + return true; + else if (type != ASCII) + return false; + else + { + type = t; + return true; + } +} + +inline void Enc::unset(type_t t) +{ + if (type == t) + type = ASCII; } -// This test returns 'true' for all valid encoding types -inline bool Enc::isBad() const +inline bool Enc::is(type_t t) const { - // test if 'type' is a power of 2 - // notice: ASCII mask is 0 => it's ok if either - // 1) only ASCII is set - // 2) both ASCII and some other encoding is set - return (type & (type - 1)) != 0; + return type == t; } inline uint Enc::xlat(uint c) const @@ -159,8 +158,8 @@ inline uint Enc::xlat(uint c) const case UTF16: case UTF32: case UTF8: return c; - default: return ERROR; } + return ~0; // to silence gcc warning } inline uint Enc::talx(uint c) const @@ -173,8 +172,8 @@ inline uint Enc::talx(uint c) const case UTF16: case UTF32: case UTF8: return c; - default: return ERROR; } + return ~0; // to silence gcc warning } } // namespace re2c diff --git a/re2c/main.cc b/re2c/main.cc index d5a122ec..4af5c0f1 100644 --- a/re2c/main.cc +++ b/re2c/main.cc @@ -246,7 +246,11 @@ int main(int argc, char *argv[]) break; case 'e': - encoding.setEBCDIC(); + if (!encoding.set(Enc::EBCDIC)) + { + std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n"; + return 2; + } break; case 'd': @@ -322,26 +326,42 @@ int main(int argc, char *argv[]) cout << vernum << endl; return 2; } - + case 'w': sFlag = true; - encoding.setUCS2(); + if (!encoding.set(Enc::UCS2)) + { + std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n"; + return 2; + } break; case 'x': sFlag = true; - encoding.setUTF16(); + if (!encoding.set(Enc::UTF16)) + { + std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n"; + return 2; + } break; case 'u': sFlag = true; - encoding.setUTF32(); + if (!encoding.set(Enc::UTF32)) + { + std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n"; + return 2; + } break; case '8': - encoding.setUTF8(); + if (!encoding.set(Enc::UTF8)) + { + std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n"; + return 2; + } break; - + default: case 'h': case '?': @@ -372,12 +392,6 @@ int main(int argc, char *argv[]) return 2; } - if (encoding.isBad()) - { - std::cerr << "re2c: error: Only one of switches -e, -w, -x, -u and -8 must be set\n"; - return 2; - } - if (DFlag && (bFlag || dFlag || sFlag)) { std::cerr << "re2c: error: Cannot combine -D with -b, -d or -s switches\n"; diff --git a/re2c/print.cc b/re2c/print.cc index 9f61bdc5..22aa7c7c 100644 --- a/re2c/print.cc +++ b/re2c/print.cc @@ -8,7 +8,7 @@ namespace re2c void prtChOrHex(std::ostream& o, uint c) { - if (!encoding.isEBCDIC() && (c < 256u) && (isprint(c) || isspace(c))) + if (!encoding.is(Enc::EBCDIC) && (c < 256u) && (isprint(c) || isspace(c))) { o << (DFlag ? '"' : '\''); prtCh(o, c); @@ -56,7 +56,7 @@ void prtHex(std::ostream& o, uint c) void prtCh(std::ostream& o, uint c) { - if (encoding.isEBCDIC()) + if (encoding.is(Enc::EBCDIC)) { prtHex(o, c); return;