s.fix->nxt = NULL;
}
-RegExp *mkDiff(RegExp *e1, RegExp *e2)
+RegExp * mkDiff(RegExp *e1, RegExp *e2)
{
MatchOp *m1, *m2;
RegExp *expr(Scanner &);
-uchar unescape(SubStr &s)
+uchar Scanner::unescape(SubStr &s) const
{
s.len--;
uchar c;
case 'a':
return xlat['\a'];
+
+ case 'x':
+ {
+ static const char * hex = "0123456789abcdef";
+ char *p1, *p2;
+
+ if (s.len < 2 || !(p1 = strchr(hex, tolower(s.str[0])))
+ || !(p2 = strchr(hex, tolower(s.str[1]))))
+ {
+ fatal("Illegal hexadecimal character code");
+ }
+ s.len -= 2;
+ s.str += 2;
+
+ uchar v = (uchar)((p1 - hex) << 4) + (uchar)(p2 - hex);
+
+ return v;
+ }
case '0':
case '7':
{
- uchar v = c - '0';
+ static const char * oct = "01234567";
+ char *p0, *p1, *p2;
- for (; s.len != 0 && '0' <= (c = *s.str) && c <= '7'; s.len--, s.str++)
- v = v * 8 + (c - '0');
+ if (s.len < 2 || !(p0 = strchr(oct, c)) || c > '3'
+ || !(p1 = strchr(oct, s.str[0]))
+ || !(p2 = strchr(oct, s.str[1])))
+ {
+ fatal("Illegal octal character code");
+ }
+ s.len -= 2;
+ s.str += 2;
+
+ uchar v = (uchar)((p0 - oct) << 6) + (uchar)((p1 - oct) << 3) + (uchar)(p2 - oct);
return v;
}
}
}
-Range *getRange(SubStr &s)
+Range * Scanner::getRange(SubStr &s) const
{
uchar lb = unescape(s), ub;
return new Range(lb, ub + 1);
}
-RegExp *matchChar(uint c)
+RegExp * Scanner::matchChar(uint c) const
{
return new MatchOp(new Range(c, c + 1));
}
-RegExp *strToRE(SubStr s)
+RegExp * Scanner::strToRE(SubStr s) const
{
s.len -= 2;
s.str += 1;
return re;
}
-RegExp *strToCaseInsensitiveRE(SubStr s)
+RegExp * Scanner::strToCaseInsensitiveRE(SubStr s) const
{
s.len -= 2;
s.str += 1;
return re;
}
-RegExp *ranToRE(SubStr s)
+RegExp * Scanner::ranToRE(SubStr s) const
{
s.len -= 2;
s.str += 1;
return new MatchOp(r);
}
-RegExp *invToRE(SubStr s)
+RegExp * Scanner::invToRE(SubStr s) const
{
s.len--;
s.str++;
return inv;
}
-RegExp *mkDot()
+RegExp * Scanner::mkDot() const
{
RegExp * any = ranToRE(SubStr("[\\000-\\377]"));
RegExp * ran = matchChar('\n');
-/* Generated by re2c 0.9.9.dev on Sun Jul 10 01:26:44 2005 */
+/* Generated by re2c 0.9.9.dev on Sun Jul 10 15:42:04 2005 */
#line 1 "scanner.re"
/* $Id$ */
#include <stdlib.h>
if(yych <= '"'){
if(yych <= 0x0A){
if(yych <= 0x08) goto yy53;
- if(yych <= 0x09) goto yy47;
- goto yy49;
+ if(yych <= 0x09) goto yy49;
+ goto yy51;
} else {
- if(yych == ' ') goto yy47;
+ if(yych == ' ') goto yy49;
if(yych <= '!') goto yy53;
goto yy37;
}
} else {
if(yych <= '+') goto yy44;
if(yych <= '-') goto yy53;
- if(yych <= '.') goto yy51;
+ if(yych <= '.') goto yy47;
goto yy33;
}
}
return ID; }
#line 350 "scanner.cc"
yy47: ++YYCURSOR;
- yych = *YYCURSOR;
- goto yy56;
+ goto yy48;
yy48:
#line 190 "scanner.re"
-{ goto scan; }
-#line 357 "scanner.cc"
+{ cur = cursor;
+ yylval.regexp = mkDot();
+ return RANGE;
+ }
+#line 359 "scanner.cc"
yy49: ++YYCURSOR;
- goto yy50;
+ yych = *YYCURSOR;
+ goto yy56;
yy50:
-#line 192 "scanner.re"
-{ if(cursor == eof) RETURN(0);
- pos = cursor; cline++;
- goto scan;
- }
+#line 195 "scanner.re"
+{ goto scan; }
#line 366 "scanner.cc"
yy51: ++YYCURSOR;
goto yy52;
yy52:
#line 197 "scanner.re"
-{ cur = cursor;
- yylval.regexp = mkDot();
- return RANGE;
- }
+{ if(cursor == eof) RETURN(0);
+ pos = cursor; cline++;
+ goto scan;
+ }
#line 375 "scanner.cc"
yy53: ++YYCURSOR;
goto yy54;
goto yy56;
yy56: if(yych == 0x09) goto yy55;
if(yych == ' ') goto yy55;
- goto yy48;
+ goto yy50;
yy57: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
}
-void Scanner::fatal(char *msg){
+void Scanner::fatal(char *msg) const
+{
std::cerr << "line " << tline << ", column " << (tchar + 1) << ": "
- << msg << std::endl;
+ << msg << std::endl;
exit(1);
}
extern void genCode(std::ostream&, RegExp*);
extern RegExp *mkDiff(RegExp*, RegExp*);
-extern RegExp *mkDot();
-extern RegExp *strToRE(SubStr);
-extern RegExp *ranToRE(SubStr);
-extern RegExp *invToRE(SubStr);
-extern RegExp *strToCaseInsensitiveRE(SubStr s);
} // end namespace re2c
.ds rx regular expression
.ds lx \fIl\fP-expression
\"$Log$
+\"Revision 1.21 2005/07/10 13:48:03 helly
+\"- Moved some actions into class Scanner to be able to issue fatal errors
+\"- Added hexadecimal character definitions
+\"- Check for consistency of octal character definitions
+\"- Update man page
+\"
\"Revision 1.20 2005/05/03 15:46:46 helly
\"- Add missing YYDEBUG() section
\"
matches an '\fCa\fP', a '\fCb\fP', any letter from '\fCj\fP' through '\fCo\fP',
or a '\fCZ\fP'.
.TP
+\fC[^\fIclass\fP\fC]\fP
+an inverted "character class".
+.TP
\fIr\fP\fC\e\fP\fIs\fP
match any \fIr\fP which isn't an \fIs\fP. \fIr\fP and \fIs\fP must be regular expressions
which can be expressed as character classes.
.TP
\fIr\fP\fC{\fP\fIn\fP\fC,\fP\fIm\fP\fC}\fP
matches \fIr\fP at least \fIn\fP but not more than \fIm\fP times.
-
+.TP
+\fC.\fP
+match any character except newline (\\n).
+.TP
+\fIdef\fP
+matches named definition as specified by \fIdef\fP.
+.LP
+Character classes and string literals may contain octoal or hexadecimal
+character definitions and the following set of escape sequences (\fC\\n\fP,
+ \fC\\t\fP, \fC\\v\fP, \fC\\b\fP, \fC\\r\fP, \fC\\f\fP, \fC\\a\fP, \fC\\\\\fP).
+An octal character is defined by a backslash followed by its three octal digits
+and a hexadecimal character is defined by backslash, a lower cased 'x' and its
+two hexadecimal digits.
.LP
The regular expressions listed above are grouped according to
precedence, from highest precedence at the top to lowest at the bottom.
.LP
\*(re does not provide start conditions: use a separate scanner
specification for each start condition (as illustrated in the above example).
-.LP
-No [^x]. Use difference instead.
.SH BUGS
.LP
Only fixed length trailing context can be handled.
#include <iosfwd>
#include "token.h"
+#include "re.h"
namespace re2c
{
Scanner(std::istream&);
int echo(std::ostream&);
int scan();
- void fatal(char*);
+ void fatal(char*) const;
SubStr token();
uint line();
+
+ uchar unescape(SubStr &s) const;
+ Range * getRange(SubStr &s) const;
+ RegExp * matchChar(uint c) const;
+ RegExp * strToRE(SubStr s) const;
+ RegExp * strToCaseInsensitiveRE(SubStr s) const;
+ RegExp * ranToRE(SubStr s) const;
+ RegExp * invToRE(SubStr s) const;
+ RegExp * mkDot() const;
};
inline SubStr Scanner::token()
yylval.symbol = Symbol::find(token());
return ID; }
+ "." { cur = cursor;
+ yylval.regexp = mkDot();
+ return RANGE;
+ }
+
[ \t]+ { goto scan; }
"\n" { if(cursor == eof) RETURN(0);
goto scan;
}
- "." { cur = cursor;
- yylval.regexp = mkDot();
- return RANGE;
- }
-
any { std::cerr << "unexpected character: " << *tok << std::endl;
goto scan;
}
*/
}
-void Scanner::fatal(char *msg){
+void Scanner::fatal(char *msg) const
+{
std::cerr << "line " << tline << ", column " << (tchar + 1) << ": "
- << msg << std::endl;
+ << msg << std::endl;
exit(1);
}
--- /dev/null
+/* Generated by re2c */
+#line 1 "input9.re"
+
+#line 5 "<stdout>"
+{
+ YYCTYPE yych;
+ goto yy0;
+ ++YYCURSOR;
+yy0:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ switch(yych){
+ case 0x00: goto yy2;
+ case 0x01: goto yy4;
+ case 0x02: goto yy16;
+ case 0x07: goto yy6;
+ case 0x08: goto yy8;
+ case ' ': goto yy18;
+ case '4': goto yy20;
+ case '@': goto yy10;
+ case 'S': goto yy12;
+ case 0xAB: goto yy22;
+ case 0xCD: goto yy24;
+ case 0xEE: goto yy14;
+ default: goto yy26;
+ }
+yy2: ++YYCURSOR;
+ goto yy3;
+yy3:
+#line 5 "input9.re"
+{ return 1; }
+#line 33 "<stdout>"
+yy4: ++YYCURSOR;
+ goto yy5;
+yy5:
+#line 6 "input9.re"
+{ return 2; }
+#line 39 "<stdout>"
+yy6: ++YYCURSOR;
+ goto yy7;
+yy7:
+#line 7 "input9.re"
+{ return 3; }
+#line 45 "<stdout>"
+yy8: ++YYCURSOR;
+ goto yy9;
+yy9:
+#line 8 "input9.re"
+{ return 4; }
+#line 51 "<stdout>"
+yy10: ++YYCURSOR;
+ goto yy11;
+yy11:
+#line 9 "input9.re"
+{ return 5; }
+#line 57 "<stdout>"
+yy12: ++YYCURSOR;
+ goto yy13;
+yy13:
+#line 10 "input9.re"
+{ return 6; }
+#line 63 "<stdout>"
+yy14: ++YYCURSOR;
+ switch((yych = *YYCURSOR)) {
+ case '7': goto yy27;
+ default: goto yy15;
+ }
+yy15:
+#line 18 "input9.re"
+{ return 0; }
+#line 72 "<stdout>"
+yy16: ++YYCURSOR;
+ goto yy17;
+yy17:
+#line 13 "input9.re"
+{ return 8; }
+#line 78 "<stdout>"
+yy18: ++YYCURSOR;
+ goto yy19;
+yy19:
+#line 14 "input9.re"
+{ return 9; }
+#line 84 "<stdout>"
+yy20: ++YYCURSOR;
+ goto yy21;
+yy21:
+#line 15 "input9.re"
+{ return 10; }
+#line 90 "<stdout>"
+yy22: ++YYCURSOR;
+ goto yy23;
+yy23:
+#line 16 "input9.re"
+{ return 11; }
+#line 96 "<stdout>"
+yy24: ++YYCURSOR;
+ goto yy25;
+yy25:
+#line 17 "input9.re"
+{ return 12; }
+#line 102 "<stdout>"
+yy26: yych = *++YYCURSOR;
+ goto yy15;
+yy27: ++YYCURSOR;
+ goto yy28;
+yy28:
+#line 11 "input9.re"
+{ return 7; }
+#line 110 "<stdout>"
+}
+#line 20 "input9.re"
+
--- /dev/null
+/*!re2c
+
+any = [\000-\377];
+
+"\000" { return 1; }
+"\001" { return 2; }
+"\007" { return 3; }
+"\010" { return 4; }
+"\100" { return 5; }
+"\123" { return 6; }
+"\3567" { return 7; }
+
+"\x02" { return 8; }
+"\x20" { return 9; }
+"\x34" { return 10; }
+"\xab" { return 11; }
+"\xCD" { return 12; }
+any { return 0; }
+
+*/
digit = [0-9];
integer = digit+;
alpha = [A-Za-z_];
- any = [\000-\0377];
+ any = [\000-\377];
space = [ \h\t\v\f\r];
"if" { SEND(kIf); }