From: helly Date: Sun, 10 Jul 2005 13:48:03 +0000 (+0000) Subject: - Moved some actions into class Scanner to be able to issue fatal errors X-Git-Tag: 0.13.6~618 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5bb487e509a81596547b830417fb1749e7635479;p=re2c - Moved some actions into class Scanner to be able to issue fatal errors - Added hexadecimal character definitions - Check for consistency of octal character definitions - Update man page --- diff --git a/actions.cc b/actions.cc index 334da2b2..1a3f3954 100644 --- a/actions.cc +++ b/actions.cc @@ -313,7 +313,7 @@ void MatchOp::split(CharSet &s) s.fix->nxt = NULL; } -RegExp *mkDiff(RegExp *e1, RegExp *e2) +RegExp * mkDiff(RegExp *e1, RegExp *e2) { MatchOp *m1, *m2; @@ -505,7 +505,7 @@ void CloseVOp::split(CharSet &s) RegExp *expr(Scanner &); -uchar unescape(SubStr &s) +uchar Scanner::unescape(SubStr &s) const { s.len--; uchar c; @@ -538,6 +538,24 @@ uchar unescape(SubStr &s) case 'a': return xlat['\a']; + + case 'x': + { + static const char * hex = "0123456789abcdef"; + char *p1, *p2; + + if (s.len < 2 || !(p1 = strchr(hex, tolower(s.str[0]))) + || !(p2 = strchr(hex, tolower(s.str[1])))) + { + fatal("Illegal hexadecimal character code"); + } + s.len -= 2; + s.str += 2; + + uchar v = (uchar)((p1 - hex) << 4) + (uchar)(p2 - hex); + + return v; + } case '0': @@ -555,10 +573,19 @@ uchar unescape(SubStr &s) case '7': { - uchar v = c - '0'; + static const char * oct = "01234567"; + char *p0, *p1, *p2; - for (; s.len != 0 && '0' <= (c = *s.str) && c <= '7'; s.len--, s.str++) - v = v * 8 + (c - '0'); + if (s.len < 2 || !(p0 = strchr(oct, c)) || c > '3' + || !(p1 = strchr(oct, s.str[0])) + || !(p2 = strchr(oct, s.str[1]))) + { + fatal("Illegal octal character code"); + } + s.len -= 2; + s.str += 2; + + uchar v = (uchar)((p0 - oct) << 6) + (uchar)((p1 - oct) << 3) + (uchar)(p2 - oct); return v; } @@ -568,7 +595,7 @@ uchar unescape(SubStr &s) } } -Range *getRange(SubStr &s) +Range * Scanner::getRange(SubStr &s) const { uchar lb = unescape(s), ub; @@ -594,12 +621,12 @@ Range *getRange(SubStr &s) return new Range(lb, ub + 1); } -RegExp *matchChar(uint c) +RegExp * Scanner::matchChar(uint c) const { return new MatchOp(new Range(c, c + 1)); } -RegExp *strToRE(SubStr s) +RegExp * Scanner::strToRE(SubStr s) const { s.len -= 2; s.str += 1; @@ -615,7 +642,7 @@ RegExp *strToRE(SubStr s) return re; } -RegExp *strToCaseInsensitiveRE(SubStr s) +RegExp * Scanner::strToCaseInsensitiveRE(SubStr s) const { s.len -= 2; s.str += 1; @@ -657,7 +684,7 @@ RegExp *strToCaseInsensitiveRE(SubStr s) return re; } -RegExp *ranToRE(SubStr s) +RegExp * Scanner::ranToRE(SubStr s) const { s.len -= 2; s.str += 1; @@ -673,7 +700,7 @@ RegExp *ranToRE(SubStr s) return new MatchOp(r); } -RegExp *invToRE(SubStr s) +RegExp * Scanner::invToRE(SubStr s) const { s.len--; s.str++; @@ -694,7 +721,7 @@ RegExp *invToRE(SubStr s) return inv; } -RegExp *mkDot() +RegExp * Scanner::mkDot() const { RegExp * any = ranToRE(SubStr("[\\000-\\377]")); RegExp * ran = matchChar('\n'); diff --git a/bootstrap/scanner.cc b/bootstrap/scanner.cc index f8a3373d..a369a977 100644 --- a/bootstrap/scanner.cc +++ b/bootstrap/scanner.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 0.9.9.dev on Sun Jul 10 01:26:44 2005 */ +/* Generated by re2c 0.9.9.dev on Sun Jul 10 15:42:04 2005 */ #line 1 "scanner.re" /* $Id$ */ #include @@ -239,10 +239,10 @@ yy29: if(yych <= '"'){ if(yych <= 0x0A){ if(yych <= 0x08) goto yy53; - if(yych <= 0x09) goto yy47; - goto yy49; + if(yych <= 0x09) goto yy49; + goto yy51; } else { - if(yych == ' ') goto yy47; + if(yych == ' ') goto yy49; if(yych <= '!') goto yy53; goto yy37; } @@ -255,7 +255,7 @@ yy29: } else { if(yych <= '+') goto yy44; if(yych <= '-') goto yy53; - if(yych <= '.') goto yy51; + if(yych <= '.') goto yy47; goto yy33; } } @@ -348,29 +348,29 @@ yy46: return ID; } #line 350 "scanner.cc" yy47: ++YYCURSOR; - yych = *YYCURSOR; - goto yy56; + goto yy48; yy48: #line 190 "scanner.re" -{ goto scan; } -#line 357 "scanner.cc" +{ cur = cursor; + yylval.regexp = mkDot(); + return RANGE; + } +#line 359 "scanner.cc" yy49: ++YYCURSOR; - goto yy50; + yych = *YYCURSOR; + goto yy56; yy50: -#line 192 "scanner.re" -{ if(cursor == eof) RETURN(0); - pos = cursor; cline++; - goto scan; - } +#line 195 "scanner.re" +{ goto scan; } #line 366 "scanner.cc" yy51: ++YYCURSOR; goto yy52; yy52: #line 197 "scanner.re" -{ cur = cursor; - yylval.regexp = mkDot(); - return RANGE; - } +{ if(cursor == eof) RETURN(0); + pos = cursor; cline++; + goto scan; + } #line 375 "scanner.cc" yy53: ++YYCURSOR; goto yy54; @@ -386,7 +386,7 @@ yy55: ++YYCURSOR; goto yy56; yy56: if(yych == 0x09) goto yy55; if(yych == ' ') goto yy55; - goto yy48; + goto yy50; yy57: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; @@ -745,9 +745,10 @@ yy125: } -void Scanner::fatal(char *msg){ +void Scanner::fatal(char *msg) const +{ std::cerr << "line " << tline << ", column " << (tchar + 1) << ": " - << msg << std::endl; + << msg << std::endl; exit(1); } diff --git a/re.h b/re.h index 1bfc2c62..98707fba 100644 --- a/re.h +++ b/re.h @@ -292,11 +292,6 @@ public: extern void genCode(std::ostream&, RegExp*); extern RegExp *mkDiff(RegExp*, RegExp*); -extern RegExp *mkDot(); -extern RegExp *strToRE(SubStr); -extern RegExp *ranToRE(SubStr); -extern RegExp *invToRE(SubStr); -extern RegExp *strToCaseInsensitiveRE(SubStr s); } // end namespace re2c diff --git a/re2c.1.in b/re2c.1.in index f02b2f52..a37fa2c7 100644 --- a/re2c.1.in +++ b/re2c.1.in @@ -7,6 +7,12 @@ .ds rx regular expression .ds lx \fIl\fP-expression \"$Log$ +\"Revision 1.21 2005/07/10 13:48:03 helly +\"- Moved some actions into class Scanner to be able to issue fatal errors +\"- Added hexadecimal character definitions +\"- Check for consistency of octal character definitions +\"- Update man page +\" \"Revision 1.20 2005/05/03 15:46:46 helly \"- Add missing YYDEBUG() section \" @@ -310,6 +316,9 @@ a "character class" with a range in it; matches an '\fCa\fP', a '\fCb\fP', any letter from '\fCj\fP' through '\fCo\fP', or a '\fCZ\fP'. .TP +\fC[^\fIclass\fP\fC]\fP +an inverted "character class". +.TP \fIr\fP\fC\e\fP\fIs\fP match any \fIr\fP which isn't an \fIs\fP. \fIr\fP and \fIs\fP must be regular expressions which can be expressed as character classes. @@ -348,7 +357,19 @@ matches \fIr\fP at least \fIn\fP times. .TP \fIr\fP\fC{\fP\fIn\fP\fC,\fP\fIm\fP\fC}\fP matches \fIr\fP at least \fIn\fP but not more than \fIm\fP times. - +.TP +\fC.\fP +match any character except newline (\\n). +.TP +\fIdef\fP +matches named definition as specified by \fIdef\fP. +.LP +Character classes and string literals may contain octoal or hexadecimal +character definitions and the following set of escape sequences (\fC\\n\fP, + \fC\\t\fP, \fC\\v\fP, \fC\\b\fP, \fC\\r\fP, \fC\\f\fP, \fC\\a\fP, \fC\\\\\fP). +An octal character is defined by a backslash followed by its three octal digits +and a hexadecimal character is defined by backslash, a lower cased 'x' and its +two hexadecimal digits. .LP The regular expressions listed above are grouped according to precedence, from highest precedence at the top to lowest at the bottom. @@ -656,8 +677,6 @@ and act accordingly. .LP \*(re does not provide start conditions: use a separate scanner specification for each start condition (as illustrated in the above example). -.LP -No [^x]. Use difference instead. .SH BUGS .LP Only fixed length trailing context can be handled. diff --git a/scanner.h b/scanner.h index d252e2cf..66a6200c 100644 --- a/scanner.h +++ b/scanner.h @@ -4,6 +4,7 @@ #include #include "token.h" +#include "re.h" namespace re2c { @@ -25,9 +26,18 @@ public: Scanner(std::istream&); int echo(std::ostream&); int scan(); - void fatal(char*); + void fatal(char*) const; SubStr token(); uint line(); + + uchar unescape(SubStr &s) const; + Range * getRange(SubStr &s) const; + RegExp * matchChar(uint c) const; + RegExp * strToRE(SubStr s) const; + RegExp * strToCaseInsensitiveRE(SubStr s) const; + RegExp * ranToRE(SubStr s) const; + RegExp * invToRE(SubStr s) const; + RegExp * mkDot() const; }; inline SubStr Scanner::token() diff --git a/scanner.re b/scanner.re index 5d9fd3cc..15b10e2b 100644 --- a/scanner.re +++ b/scanner.re @@ -187,6 +187,11 @@ scan: yylval.symbol = Symbol::find(token()); return ID; } + "." { cur = cursor; + yylval.regexp = mkDot(); + return RANGE; + } + [ \t]+ { goto scan; } "\n" { if(cursor == eof) RETURN(0); @@ -194,11 +199,6 @@ scan: goto scan; } - "." { cur = cursor; - yylval.regexp = mkDot(); - return RANGE; - } - any { std::cerr << "unexpected character: " << *tok << std::endl; goto scan; } @@ -237,9 +237,10 @@ comment: */ } -void Scanner::fatal(char *msg){ +void Scanner::fatal(char *msg) const +{ std::cerr << "line " << tline << ", column " << (tchar + 1) << ": " - << msg << std::endl; + << msg << std::endl; exit(1); } diff --git a/test/input9.c b/test/input9.c new file mode 100755 index 00000000..d370c313 --- /dev/null +++ b/test/input9.c @@ -0,0 +1,112 @@ +/* Generated by re2c */ +#line 1 "input9.re" + +#line 5 "" +{ + YYCTYPE yych; + goto yy0; + ++YYCURSOR; +yy0: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + switch(yych){ + case 0x00: goto yy2; + case 0x01: goto yy4; + case 0x02: goto yy16; + case 0x07: goto yy6; + case 0x08: goto yy8; + case ' ': goto yy18; + case '4': goto yy20; + case '@': goto yy10; + case 'S': goto yy12; + case 0xAB: goto yy22; + case 0xCD: goto yy24; + case 0xEE: goto yy14; + default: goto yy26; + } +yy2: ++YYCURSOR; + goto yy3; +yy3: +#line 5 "input9.re" +{ return 1; } +#line 33 "" +yy4: ++YYCURSOR; + goto yy5; +yy5: +#line 6 "input9.re" +{ return 2; } +#line 39 "" +yy6: ++YYCURSOR; + goto yy7; +yy7: +#line 7 "input9.re" +{ return 3; } +#line 45 "" +yy8: ++YYCURSOR; + goto yy9; +yy9: +#line 8 "input9.re" +{ return 4; } +#line 51 "" +yy10: ++YYCURSOR; + goto yy11; +yy11: +#line 9 "input9.re" +{ return 5; } +#line 57 "" +yy12: ++YYCURSOR; + goto yy13; +yy13: +#line 10 "input9.re" +{ return 6; } +#line 63 "" +yy14: ++YYCURSOR; + switch((yych = *YYCURSOR)) { + case '7': goto yy27; + default: goto yy15; + } +yy15: +#line 18 "input9.re" +{ return 0; } +#line 72 "" +yy16: ++YYCURSOR; + goto yy17; +yy17: +#line 13 "input9.re" +{ return 8; } +#line 78 "" +yy18: ++YYCURSOR; + goto yy19; +yy19: +#line 14 "input9.re" +{ return 9; } +#line 84 "" +yy20: ++YYCURSOR; + goto yy21; +yy21: +#line 15 "input9.re" +{ return 10; } +#line 90 "" +yy22: ++YYCURSOR; + goto yy23; +yy23: +#line 16 "input9.re" +{ return 11; } +#line 96 "" +yy24: ++YYCURSOR; + goto yy25; +yy25: +#line 17 "input9.re" +{ return 12; } +#line 102 "" +yy26: yych = *++YYCURSOR; + goto yy15; +yy27: ++YYCURSOR; + goto yy28; +yy28: +#line 11 "input9.re" +{ return 7; } +#line 110 "" +} +#line 20 "input9.re" + diff --git a/test/input9.re b/test/input9.re new file mode 100755 index 00000000..77e6ee61 --- /dev/null +++ b/test/input9.re @@ -0,0 +1,20 @@ +/*!re2c + +any = [\000-\377]; + +"\000" { return 1; } +"\001" { return 2; } +"\007" { return 3; } +"\010" { return 4; } +"\100" { return 5; } +"\123" { return 6; } +"\3567" { return 7; } + +"\x02" { return 8; } +"\x20" { return 9; } +"\x34" { return 10; } +"\xab" { return 11; } +"\xCD" { return 12; } +any { return 0; } + +*/ diff --git a/test/push.re b/test/push.re index a76b7aec..38ca5abe 100755 --- a/test/push.re +++ b/test/push.re @@ -233,7 +233,7 @@ public: digit = [0-9]; integer = digit+; alpha = [A-Za-z_]; - any = [\000-\0377]; + any = [\000-\377]; space = [ \h\t\v\f\r]; "if" { SEND(kIf); }