From af5b5438c41ae5556b97da925c45854e87fe65c5 Mon Sep 17 00:00:00 2001 From: helly Date: Sun, 5 Nov 2006 00:48:30 +0000 Subject: [PATCH] - Added -u switch to support unicode. --- re2c/CHANGELOG | 1 + re2c/actions.cc | 19 +++- re2c/dfa.cc | 24 ++++- re2c/globals.h | 1 + re2c/htdocs/index.html | 4 + re2c/main.cc | 27 ++++- re2c/test/cvsignore.u.c | 221 +++++++++++++++++++++++++++++++++++++++ re2c/test/cvsignore.u.re | 63 +++++++++++ 8 files changed, 350 insertions(+), 10 deletions(-) create mode 100755 re2c/test/cvsignore.u.c create mode 100755 re2c/test/cvsignore.u.re diff --git a/re2c/CHANGELOG b/re2c/CHANGELOG index 8d0c51ed..1bdb684b 100644 --- a/re2c/CHANGELOG +++ b/re2c/CHANGELOG @@ -1,5 +1,6 @@ Version 0.11.0 (????-??-??) --------------------------- +- Added -u switch to support unicode. Version 0.10.6 (2006-08-05) --------------------------- diff --git a/re2c/actions.cc b/re2c/actions.cc index 8db3efe6..e0bacb44 100644 --- a/re2c/actions.cc +++ b/re2c/actions.cc @@ -524,7 +524,7 @@ uint Scanner::unescape(SubStr &s) const static const char * oct = "01234567"; s.len--; - uint c; + uint c, ucb = 0; if ((c = *s.str++) != '\\' || s.len == 0) { @@ -587,10 +587,20 @@ uint Scanner::unescape(SubStr &s) const if (s.str[1] == '0') { l++; - if (s.str[2] == '0') + if (s.str[2] == '0' || (s.str[2] == '1' && uFlag)) { l++; - if (s.str[3] == '0') + if (uFlag) { + const char *u3 = strchr(hex, tolower(s.str[2])); + const char *u4 = strchr(hex, tolower(s.str[3])); + if (u3 && u4) + { + ucb = (uint)((u3 - hex) << 20) + + (uint)((u4 - hex) << 16); + l++; + } + } + else if (s.str[3] == '0') { l++; } @@ -641,7 +651,8 @@ uint Scanner::unescape(SubStr &s) const uint v = (uint)((p1 - hex) << 12) + (uint)((p2 - hex) << 8) + (uint)((p3 - hex) << 4) - + (uint)((p4 - hex)); + + (uint)((p4 - hex)) + + ucb; if (v >= nRealChars) { diff --git a/re2c/dfa.cc b/re2c/dfa.cc index dcd6ff6a..a1ea5d4b 100644 --- a/re2c/dfa.cc +++ b/re2c/dfa.cc @@ -28,7 +28,19 @@ void prtHex(std::ostream& o, uint c, bool useTalx) { int oc = (int)(re2c::wFlag || !useTalx ? c : re2c::talx[c]); - if (re2c::wFlag) + if (re2c::uFlag) + { + o << "0x" + << hexCh(oc >> 28) + << hexCh(oc >> 24) + << hexCh(oc >> 20) + << hexCh(oc >> 16) + << hexCh(oc >> 12) + << hexCh(oc >> 8) + << hexCh(oc >> 4) + << hexCh(oc); + } + else if (re2c::wFlag) { o << "0x" << hexCh(oc >> 12) @@ -92,6 +104,16 @@ void prtCh(std::ostream& o, uint c, bool useTalx) { o << (char) oc; } + else if (re2c::uFlag) + { + o << "0x" + << hexCh(oc >> 20) + << hexCh(oc >> 16) + << hexCh(oc >> 12) + << hexCh(oc >> 8) + << hexCh(oc >> 4) + << hexCh(oc); + } else if (re2c::wFlag) { o << "0x" diff --git a/re2c/globals.h b/re2c/globals.h index b8645c42..b4534ab8 100644 --- a/re2c/globals.h +++ b/re2c/globals.h @@ -21,6 +21,7 @@ extern bool fFlag; extern bool gFlag; extern bool iFlag; extern bool sFlag; +extern bool uFlag; extern bool wFlag; extern bool bSinglePass; diff --git a/re2c/htdocs/index.html b/re2c/htdocs/index.html index 6ddfc56f..2075f376 100755 --- a/re2c/htdocs/index.html +++ b/re2c/htdocs/index.html @@ -79,6 +79,10 @@ fixes which were incorporated.

Changelog

+

200?-??-??: 0.11.0

+

2006-08-05: 0.10.6