Version 0.9.13 (????-??-??)
---------------------------
+- Added support for DOS line endings.
+- Added experimental unicode support.
+- Applied #1307467 Unicode patch for 0.9.7.
Version 0.9.12 (2005-12-28)
---------------------------
RegExp *expr(Scanner &);
-uchar Scanner::unescape(SubStr &s) const
+uint Scanner::unescape(SubStr &s) const
{
s.len--;
- uchar c;
+ uint c;
if ((c = *s.str++) != '\\' || s.len == 0)
- return xlat[c];
+ {
+ return xlat(c);
+ }
s.len--;
switch (c = *s.str++)
{
+ case 'n': return xlat('\n');
+ case 't': return xlat('\t');
+ case 'v': return xlat('\v');
+ case 'b': return xlat('\b');
+ case 'r': return xlat('\r');
+ case 'f': return xlat('\f');
+ case 'a': return xlat('\a');
- case 'n':
- return xlat['\n'];
-
- case 't':
- return xlat['\t'];
-
- case 'v':
- return xlat['\v'];
-
- case 'b':
- return xlat['\b'];
-
- case 'r':
- return xlat['\r'];
-
- case 'f':
- return xlat['\f'];
-
- case 'a':
- return xlat['\a'];
-
case 'x':
{
static const char * hex = "0123456789abcdef";
s.len -= 2;
s.str += 2;
- uchar v = (uchar)((p1 - hex) << 4) + (uchar)(p2 - hex);
+ uint v = (uint)((p1 - hex) << 4)
+ + (uint)((p2 - hex));
return v;
}
- case '0':
+ case 'X':
+ {
+ static const char * hex = "0123456789abcdef";
+ char *p1, *p2, *p3, *p4;
- case '1':
+ if (s.len < 4 || !(p1 = strchr(hex, tolower(s.str[0])))
+ || !(p2 = strchr(hex, tolower(s.str[1])))
+ || !(p3 = strchr(hex, tolower(s.str[2])))
+ || !(p4 = strchr(hex, tolower(s.str[3]))))
+ {
+ fatal("Illegal hexadecimal character code");
+ }
+ s.len -= 4;
+ s.str += 4;
+
+ uint v = (uint)((p1 - hex) << 12)
+ + (uint)((p2 - hex) << 8)
+ + (uint)((p3 - hex) << 4)
+ + (uint)((p4 - hex));
- case '2':
+ return v;
+ }
+ case '0':
+ case '1':
+ case '2':
case '3':
-
case '4':
-
case '5':
-
case '6':
-
case '7':
{
static const char * oct = "01234567";
s.len -= 2;
s.str += 2;
- uchar v = (uchar)((p0 - oct) << 6) + (uchar)((p1 - oct) << 3) + (uchar)(p2 - oct);
+ uint v = (uint)((p0 - oct) << 6) + (uint)((p1 - oct) << 3) + (uint)(p2 - oct);
return v;
}
default:
- return xlat[c];
+ return xlat(c);
}
}
ub = tmp;
}
- xlb = xlat[lb];
- xub = xlat[ub];
+ xlb = xlat(lb);
+ xub = xlat(ub);
for(c = lb; c <= ub; c++)
{
- if (!(xlb <= xlat[c] && xlat[c] <= ub))
+ if (!(xlb <= xlat(c) && xlat(c) <= ub))
{
/* range doesn't work */
Range * r = new Range(xlb, xlb + 1);
for (c = lb + 1; c <= ub; c++)
{
- r = doUnion(r, new Range(xlat[c], xlat[c] + 1));
+ r = doUnion(r, new Range(xlat(c), xlat(c) + 1));
}
return r;
}
if (s.len == 0)
return new NullOp;
- uchar c = unescape(s);
+ uint c = unescape(s);
RegExp *re, *reL, *reU;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
{
- reL = matchChar(xlat[tolower(c)]);
- reU = matchChar(xlat[toupper(c)]);
+ reL = matchChar(xlat(tolower(c)));
+ reU = matchChar(xlat(toupper(c)));
re = mkAlt(reL, reU);
}
else
while (s.len > 0)
{
- uchar c = unescape(s);
+ uint c = unescape(s);
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
{
- reL = matchChar(xlat[tolower(c)]);
- reU = matchChar(xlat[toupper(c)]);
+ reL = matchChar(xlat(tolower(c)));
+ reU = matchChar(xlat(toupper(c)));
re = new CatOp(re, mkAlt(reL, reU));
}
else
RegExp * Scanner::mkDot() const
{
RegExp * any = ranToRE(SubStr("[\\000-\\377]"));
- RegExp * ran = matchChar(xlat['\n']);
+ RegExp * ran = matchChar(xlat('\n'));
RegExp * inv = mkDiff(any, ran);
delete ran;
uint j;
memset(&cs, 0, sizeof(cs));
- for (j = 0; j < nChars; ++j)
+ for (j = 0; j < nRealChars; ++j)
{
cs.rep[j] = &cs.ptn[0];
cs.ptn[j].nxt = &cs.ptn[j + 1];
*/
Char rep[nChars];
- for (j = 0; j < nChars; ++j)
+ for (j = 0; j < nRealChars; ++j)
{
if (!cs.rep[j]->nxt)
cs.rep[j]->nxt = &cs.ptn[j];
}
}
- DFA *dfa = new DFA(ins, re->size, 0, 256, rep);
+ DFA *dfa = new DFA(ins, re->size, 0, nRealChars, rep);
dfa->emit(o);
delete dfa;
delete [] ins;
-/* Generated by re2c 0.9.12.dev on Wed Dec 28 00:53:17 2005 */
+/* Generated by re2c 0.9.13.dev on Wed Dec 28 18:30:39 2005 */
#line 1 "scanner.re"
/* $Id$ */
#include <stdlib.h>
Scanner::Scanner(std::istream& i) : in(i),
bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL),
- top(NULL), eof(NULL), tchar(0), tline(0), cline(1) {
+ top(NULL), eof(NULL), tchar(0), tline(0), cline(1)
+{
;
}
-char *Scanner::fill(char *cursor){
- if(!eof){
- uint cnt = tok - bot;
- if(cnt){
- memcpy(bot, tok, lim - tok);
- tok = bot;
- ptr -= cnt;
- cursor -= cnt;
- pos -= cnt;
- lim -= cnt;
- }
- if((top - lim) < BSIZE){
- char *buf = new char[(lim - bot) + BSIZE];
- memcpy(buf, tok, lim - tok);
- tok = buf;
- ptr = &buf[ptr - bot];
- cursor = &buf[cursor - bot];
- pos = &buf[pos - bot];
- lim = &buf[lim - bot];
- top = &lim[BSIZE];
- delete [] bot;
- bot = buf;
- }
- if((cnt = in.rdbuf()->sgetn((char*) lim, BSIZE)) != BSIZE){
- eof = &lim[cnt]; *eof++ = '\0';
+char *Scanner::fill(char *cursor)
+{
+ if(!eof)
+ {
+ uint cnt = tok - bot;
+ if(cnt)
+ {
+ memcpy(bot, tok, lim - tok);
+ tok = bot;
+ ptr -= cnt;
+ cursor -= cnt;
+ pos -= cnt;
+ lim -= cnt;
+ }
+ if((top - lim) < BSIZE)
+ {
+ char *buf = new char[(lim - bot) + BSIZE];
+ memcpy(buf, tok, lim - tok);
+ tok = buf;
+ ptr = &buf[ptr - bot];
+ cursor = &buf[cursor - bot];
+ pos = &buf[pos - bot];
+ lim = &buf[lim - bot];
+ top = &lim[BSIZE];
+ delete [] bot;
+ bot = buf;
+ }
+ if((cnt = in.rdbuf()->sgetn((char*) lim, BSIZE)) != BSIZE)
+ {
+ eof = &lim[cnt]; *eof++ = '\0';
+ }
+ lim += cnt;
}
- lim += cnt;
- }
- return cursor;
+ return cursor;
}
-#line 77 "scanner.re"
+#line 83 "scanner.re"
int Scanner::echo(std::ostream &out){
tok = cursor;
echo:
-#line 85 "scanner.cc"
+#line 91 "scanner.cc"
{
YYCTYPE yych;
unsigned int yyaccept = 0;
if(yych == '*') goto yy12;
goto yy3;
yy3:
-#line 123 "scanner.re"
+#line 129 "scanner.re"
{
goto echo;
}
-#line 112 "scanner.cc"
+#line 118 "scanner.cc"
yy4: yych = *++YYCURSOR;
if(yych == '/') goto yy10;
goto yy3;
yy5: ++YYCURSOR;
goto yy6;
yy6:
-#line 112 "scanner.re"
+#line 118 "scanner.re"
{
out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok));
tok = pos = cursor; cline++; oline++;
goto echo;
}
-#line 125 "scanner.cc"
+#line 131 "scanner.cc"
yy7: ++YYCURSOR;
goto yy8;
yy8:
-#line 117 "scanner.re"
+#line 123 "scanner.re"
{
out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok) - 1); // -1 so we don't write out the \0
if(cursor == eof) {
RETURN(0);
}
}
-#line 136 "scanner.cc"
+#line 142 "scanner.cc"
yy9: yych = *++YYCURSOR;
goto yy3;
yy10: ++YYCURSOR;
goto yy11;
yy11:
-#line 103 "scanner.re"
+#line 109 "scanner.re"
{
if (ignore_eoc) {
ignore_eoc = false;
tok = pos = cursor;
goto echo;
}
-#line 152 "scanner.cc"
+#line 158 "scanner.cc"
yy12: yych = *++YYCURSOR;
if(yych == '!') goto yy14;
goto yy13;
yy19: ++YYCURSOR;
goto yy20;
yy20:
-#line 92 "scanner.re"
+#line 98 "scanner.re"
{
out.write((const char*)(tok), (const char*)(&cursor[-7]) - (const char*)(tok));
tok = cursor;
RETURN(1);
}
-#line 185 "scanner.cc"
+#line 191 "scanner.cc"
yy21: yych = *++YYCURSOR;
if(yych != 'x') goto yy13;
goto yy22;
yy27: ++YYCURSOR;
goto yy28;
yy28:
-#line 97 "scanner.re"
+#line 103 "scanner.re"
{
out << "#define YYMAXFILL " << maxFill << std::endl;
tok = pos = cursor;
ignore_eoc = true;
goto echo;
}
-#line 214 "scanner.cc"
+#line 220 "scanner.cc"
}
-#line 126 "scanner.re"
+#line 132 "scanner.re"
}
tline = cline;
tok = cursor;
-#line 230 "scanner.cc"
+#line 236 "scanner.cc"
{
YYCTYPE yych;
unsigned int yyaccept = 0;
yy29:
if((YYLIMIT - YYCURSOR) < 4) YYFILL(4);
yych = *YYCURSOR;
- if(yych <= '/'){
- if(yych <= '"'){
- if(yych <= 0x0A){
- if(yych <= 0x08) goto yy53;
+ if(yych <= '.'){
+ if(yych <= '!'){
+ if(yych <= 0x0C){
+ if(yych <= 0x08) goto yy55;
if(yych <= 0x09) goto yy49;
- goto yy51;
+ if(yych <= 0x0A) goto yy53;
+ goto yy55;
} else {
+ if(yych <= 0x0D) goto yy51;
if(yych == ' ') goto yy49;
- if(yych <= '!') goto yy53;
- goto yy37;
+ goto yy55;
}
} else {
- if(yych <= '*'){
- if(yych <= '&') goto yy53;
+ if(yych <= ')'){
+ if(yych <= '"') goto yy37;
+ if(yych <= '&') goto yy55;
if(yych <= '\'') goto yy39;
- if(yych <= ')') goto yy43;
- goto yy35;
+ goto yy43;
} else {
+ if(yych <= '*') goto yy35;
if(yych <= '+') goto yy44;
- if(yych <= '-') goto yy53;
- if(yych <= '.') goto yy47;
- goto yy33;
+ if(yych <= '-') goto yy55;
+ goto yy47;
}
}
} else {
if(yych <= '@'){
if(yych <= '<'){
+ if(yych <= '/') goto yy33;
if(yych == ';') goto yy43;
- goto yy53;
+ goto yy55;
} else {
if(yych <= '=') goto yy43;
if(yych == '?') goto yy44;
- goto yy53;
+ goto yy55;
}
} else {
if(yych <= '`'){
if(yych <= 'Z') goto yy45;
if(yych <= '[') goto yy41;
if(yych <= '\\') goto yy43;
- goto yy53;
+ goto yy55;
} else {
if(yych <= 'z') goto yy45;
if(yych <= '{') goto yy31;
if(yych <= '|') goto yy43;
- goto yy53;
+ goto yy55;
}
}
}
yy31: yyaccept = 0;
yych = *(YYMARKER = ++YYCURSOR);
if(yych <= '/'){
- if(yych == ',') goto yy87;
+ if(yych == ',') goto yy90;
goto yy32;
} else {
- if(yych <= '0') goto yy84;
- if(yych <= '9') goto yy85;
+ if(yych <= '0') goto yy87;
+ if(yych <= '9') goto yy88;
goto yy32;
}
yy32:
-#line 139 "scanner.re"
+#line 145 "scanner.re"
{ depth = 1;
goto code;
}
-#line 302 "scanner.cc"
+#line 310 "scanner.cc"
yy33: ++YYCURSOR;
- if((yych = *YYCURSOR) == '*') goto yy82;
+ if((yych = *YYCURSOR) == '*') goto yy85;
goto yy34;
yy34:
-#line 169 "scanner.re"
+#line 175 "scanner.re"
{ RETURN(*tok); }
-#line 309 "scanner.cc"
+#line 317 "scanner.cc"
yy35: ++YYCURSOR;
- if((yych = *YYCURSOR) == '/') goto yy80;
+ if((yych = *YYCURSOR) == '/') goto yy83;
goto yy36;
yy36:
-#line 171 "scanner.re"
+#line 177 "scanner.re"
{ yylval.op = *tok;
RETURN(CLOSE); }
-#line 317 "scanner.cc"
+#line 325 "scanner.cc"
yy37: yyaccept = 1;
yych = *(YYMARKER = ++YYCURSOR);
- if(yych != 0x0A) goto yy76;
+ if(yych != 0x0A) goto yy79;
goto yy38;
yy38:
-#line 156 "scanner.re"
+#line 162 "scanner.re"
{ fatal("unterminated string constant (missing \")"); }
-#line 325 "scanner.cc"
+#line 333 "scanner.cc"
yy39: yyaccept = 2;
yych = *(YYMARKER = ++YYCURSOR);
- if(yych != 0x0A) goto yy71;
+ if(yych != 0x0A) goto yy74;
goto yy40;
yy40:
-#line 157 "scanner.re"
+#line 163 "scanner.re"
{ fatal("unterminated string constant (missing ')"); }
-#line 333 "scanner.cc"
+#line 341 "scanner.cc"
yy41: yyaccept = 3;
yych = *(YYMARKER = ++YYCURSOR);
if(yych == 0x0A) goto yy42;
- if(yych == '^') goto yy62;
- goto yy60;
+ if(yych == '^') goto yy65;
+ goto yy63;
yy42:
-#line 167 "scanner.re"
+#line 173 "scanner.re"
{ fatal("unterminated range (missing ])"); }
-#line 342 "scanner.cc"
+#line 350 "scanner.cc"
yy43: yych = *++YYCURSOR;
goto yy34;
yy44: yych = *++YYCURSOR;
goto yy36;
yy45: ++YYCURSOR;
yych = *YYCURSOR;
- goto yy58;
+ goto yy61;
yy46:
-#line 191 "scanner.re"
+#line 197 "scanner.re"
{ cur = cursor;
yylval.symbol = Symbol::find(token());
return ID; }
-#line 355 "scanner.cc"
+#line 363 "scanner.cc"
yy47: ++YYCURSOR;
goto yy48;
yy48:
-#line 195 "scanner.re"
+#line 201 "scanner.re"
{ cur = cursor;
yylval.regexp = mkDot();
return RANGE;
}
-#line 364 "scanner.cc"
+#line 372 "scanner.cc"
yy49: ++YYCURSOR;
yych = *YYCURSOR;
- goto yy56;
+ goto yy59;
yy50:
-#line 200 "scanner.re"
+#line 206 "scanner.re"
{ goto scan; }
-#line 371 "scanner.cc"
+#line 379 "scanner.cc"
yy51: ++YYCURSOR;
+ if((yych = *YYCURSOR) == 0x0A) goto yy56;
goto yy52;
yy52:
-#line 202 "scanner.re"
-{ if(cursor == eof) RETURN(0);
- pos = cursor; cline++;
+#line 217 "scanner.re"
+{ std::cerr << "line " << tline << ", column " << (tchar + 1)
+ << ": unexpected character: ";
+ if (isprint(*tok))
+ {
+ std::cerr << *tok << std::endl;
+ }
+ else
+ {
+ std::cerr << "0x" << hexCh(*tok >> 4) << hexCh(*tok) << std::endl;
+ }
goto scan;
- }
-#line 380 "scanner.cc"
+ }
+#line 397 "scanner.cc"
yy53: ++YYCURSOR;
goto yy54;
yy54:
-#line 207 "scanner.re"
-{ std::cerr << "unexpected character: " << *tok << std::endl;
+#line 212 "scanner.re"
+{ if(cursor == eof) RETURN(0);
+ pos = cursor; cline++;
goto scan;
- }
-#line 388 "scanner.cc"
-yy55: ++YYCURSOR;
+ }
+#line 406 "scanner.cc"
+yy55: yych = *++YYCURSOR;
+ goto yy52;
+yy56: ++YYCURSOR;
+ goto yy57;
+yy57:
+#line 208 "scanner.re"
+{ if(cursor == eof) RETURN(0);
+ pos = cursor; cline++;
+ goto scan;
+ }
+#line 417 "scanner.cc"
+yy58: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
- goto yy56;
-yy56: if(yych == 0x09) goto yy55;
- if(yych == ' ') goto yy55;
+ goto yy59;
+yy59: if(yych == 0x09) goto yy58;
+ if(yych == ' ') goto yy58;
goto yy50;
-yy57: ++YYCURSOR;
+yy60: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
- goto yy58;
-yy58: if(yych <= '@'){
+ goto yy61;
+yy61: if(yych <= '@'){
if(yych <= '/') goto yy46;
- if(yych <= '9') goto yy57;
+ if(yych <= '9') goto yy60;
goto yy46;
} else {
- if(yych <= 'Z') goto yy57;
+ if(yych <= 'Z') goto yy60;
if(yych <= '`') goto yy46;
- if(yych <= 'z') goto yy57;
+ if(yych <= 'z') goto yy60;
goto yy46;
}
-yy59: ++YYCURSOR;
+yy62: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
- goto yy60;
-yy60: if(yych <= '['){
- if(yych != 0x0A) goto yy59;
- goto yy61;
+ goto yy63;
+yy63: if(yych <= '['){
+ if(yych != 0x0A) goto yy62;
+ goto yy64;
} else {
- if(yych <= '\\') goto yy64;
- if(yych <= ']') goto yy65;
- goto yy59;
+ if(yych <= '\\') goto yy67;
+ if(yych <= ']') goto yy68;
+ goto yy62;
}
-yy61: YYCURSOR = YYMARKER;
+yy64: YYCURSOR = YYMARKER;
switch(yyaccept){
case 0: goto yy32;
case 1: goto yy38;
case 2: goto yy40;
case 3: goto yy42;
- case 4: goto yy88;
+ case 4: goto yy91;
}
-yy62: ++YYCURSOR;
+yy65: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
- goto yy63;
-yy63: if(yych <= '['){
- if(yych == 0x0A) goto yy61;
- goto yy62;
+ goto yy66;
+yy66: if(yych <= '['){
+ if(yych == 0x0A) goto yy64;
+ goto yy65;
} else {
- if(yych <= '\\') goto yy67;
- if(yych <= ']') goto yy68;
- goto yy62;
+ if(yych <= '\\') goto yy70;
+ if(yych <= ']') goto yy71;
+ goto yy65;
}
-yy64: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
- if(yych == 0x0A) goto yy61;
- goto yy59;
-yy65: ++YYCURSOR;
- goto yy66;
-yy66:
-#line 163 "scanner.re"
-{ cur = cursor;
- yylval.regexp = ranToRE(token());
- return RANGE; }
-#line 454 "scanner.cc"
yy67: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
- if(yych == 0x0A) goto yy61;
+ if(yych == 0x0A) goto yy64;
goto yy62;
yy68: ++YYCURSOR;
goto yy69;
yy69:
-#line 159 "scanner.re"
+#line 169 "scanner.re"
{ cur = cursor;
- yylval.regexp = invToRE(token());
+ yylval.regexp = ranToRE(token());
return RANGE; }
-#line 467 "scanner.cc"
+#line 483 "scanner.cc"
yy70: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
- goto yy71;
-yy71: if(yych <= '&'){
- if(yych == 0x0A) goto yy61;
- goto yy70;
+ if(yych == 0x0A) goto yy64;
+ goto yy65;
+yy71: ++YYCURSOR;
+ goto yy72;
+yy72:
+#line 165 "scanner.re"
+{ cur = cursor;
+ yylval.regexp = invToRE(token());
+ return RANGE; }
+#line 496 "scanner.cc"
+yy73: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy74;
+yy74: if(yych <= '&'){
+ if(yych == 0x0A) goto yy64;
+ goto yy73;
} else {
- if(yych <= '\'') goto yy73;
- if(yych != '\\') goto yy70;
- goto yy72;
+ if(yych <= '\'') goto yy76;
+ if(yych != '\\') goto yy73;
+ goto yy75;
}
-yy72: ++YYCURSOR;
+yy75: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
- if(yych == 0x0A) goto yy61;
- goto yy70;
-yy73: ++YYCURSOR;
- goto yy74;
-yy74:
-#line 152 "scanner.re"
+ if(yych == 0x0A) goto yy64;
+ goto yy73;
+yy76: ++YYCURSOR;
+ goto yy77;
+yy77:
+#line 158 "scanner.re"
{ cur = cursor;
yylval.regexp = strToCaseInsensitiveRE(token());
return STRING; }
-#line 492 "scanner.cc"
-yy75: ++YYCURSOR;
+#line 521 "scanner.cc"
+yy78: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
- goto yy76;
-yy76: if(yych <= '!'){
- if(yych == 0x0A) goto yy61;
- goto yy75;
+ goto yy79;
+yy79: if(yych <= '!'){
+ if(yych == 0x0A) goto yy64;
+ goto yy78;
} else {
- if(yych <= '"') goto yy78;
- if(yych != '\\') goto yy75;
- goto yy77;
+ if(yych <= '"') goto yy81;
+ if(yych != '\\') goto yy78;
+ goto yy80;
}
-yy77: ++YYCURSOR;
+yy80: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
- if(yych == 0x0A) goto yy61;
- goto yy75;
-yy78: ++YYCURSOR;
- goto yy79;
-yy79:
-#line 148 "scanner.re"
+ if(yych == 0x0A) goto yy64;
+ goto yy78;
+yy81: ++YYCURSOR;
+ goto yy82;
+yy82:
+#line 154 "scanner.re"
{ cur = cursor;
yylval.regexp = strToRE(token());
return STRING; }
-#line 517 "scanner.cc"
-yy80: ++YYCURSOR;
- goto yy81;
-yy81:
-#line 145 "scanner.re"
+#line 546 "scanner.cc"
+yy83: ++YYCURSOR;
+ goto yy84;
+yy84:
+#line 151 "scanner.re"
{ tok = cursor;
RETURN(0); }
-#line 524 "scanner.cc"
-yy82: ++YYCURSOR;
- goto yy83;
-yy83:
-#line 142 "scanner.re"
+#line 553 "scanner.cc"
+yy85: ++YYCURSOR;
+ goto yy86;
+yy86:
+#line 148 "scanner.re"
{ depth = 1;
goto comment; }
-#line 531 "scanner.cc"
-yy84: yych = *++YYCURSOR;
- if(yych == ',') goto yy98;
- goto yy86;
-yy85: ++YYCURSOR;
+#line 560 "scanner.cc"
+yy87: yych = *++YYCURSOR;
+ if(yych == ',') goto yy101;
+ goto yy89;
+yy88: ++YYCURSOR;
if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yych = *YYCURSOR;
- goto yy86;
-yy86: if(yych <= '/'){
- if(yych == ',') goto yy91;
- goto yy61;
+ goto yy89;
+yy89: if(yych <= '/'){
+ if(yych == ',') goto yy94;
+ goto yy64;
} else {
- if(yych <= '9') goto yy85;
- if(yych == '}') goto yy89;
- goto yy61;
+ if(yych <= '9') goto yy88;
+ if(yych == '}') goto yy92;
+ goto yy64;
}
-yy87: ++YYCURSOR;
- goto yy88;
-yy88:
-#line 189 "scanner.re"
+yy90: ++YYCURSOR;
+ goto yy91;
+yy91:
+#line 195 "scanner.re"
{ fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); }
-#line 552 "scanner.cc"
-yy89: ++YYCURSOR;
- goto yy90;
-yy90:
-#line 177 "scanner.re"
-{ yylval.extop.minsize = atoi((char *)tok+1);
- yylval.extop.maxsize = atoi((char *)tok+1);
- RETURN(CLOSESIZE); }
-#line 560 "scanner.cc"
-yy91: yyaccept = 4;
- yych = *(YYMARKER = ++YYCURSOR);
- if(yych <= '/') goto yy88;
- if(yych <= '9') goto yy94;
- if(yych != '}') goto yy88;
- goto yy92;
+#line 581 "scanner.cc"
yy92: ++YYCURSOR;
goto yy93;
yy93:
-#line 185 "scanner.re"
+#line 183 "scanner.re"
{ yylval.extop.minsize = atoi((char *)tok+1);
- yylval.extop.maxsize = -1;
+ yylval.extop.maxsize = atoi((char *)tok+1);
RETURN(CLOSESIZE); }
-#line 574 "scanner.cc"
-yy94: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
+#line 589 "scanner.cc"
+yy94: yyaccept = 4;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych <= '/') goto yy91;
+ if(yych <= '9') goto yy97;
+ if(yych != '}') goto yy91;
goto yy95;
-yy95: if(yych <= '/') goto yy61;
- if(yych <= '9') goto yy94;
- if(yych != '}') goto yy61;
+yy95: ++YYCURSOR;
goto yy96;
-yy96: ++YYCURSOR;
- goto yy97;
-yy97:
-#line 181 "scanner.re"
+yy96:
+#line 191 "scanner.re"
{ yylval.extop.minsize = atoi((char *)tok+1);
- yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)tok, ',')+1));
+ yylval.extop.maxsize = -1;
RETURN(CLOSESIZE); }
-#line 590 "scanner.cc"
-yy98: yyaccept = 4;
- yych = *(YYMARKER = ++YYCURSOR);
- if(yych <= '/') goto yy88;
- if(yych <= '9') goto yy94;
- if(yych != '}') goto yy88;
+#line 603 "scanner.cc"
+yy97: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy98;
+yy98: if(yych <= '/') goto yy64;
+ if(yych <= '9') goto yy97;
+ if(yych != '}') goto yy64;
goto yy99;
yy99: ++YYCURSOR;
goto yy100;
yy100:
-#line 174 "scanner.re"
+#line 187 "scanner.re"
+{ yylval.extop.minsize = atoi((char *)tok+1);
+ yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)tok, ',')+1));
+ RETURN(CLOSESIZE); }
+#line 619 "scanner.cc"
+yy101: yyaccept = 4;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych <= '/') goto yy91;
+ if(yych <= '9') goto yy97;
+ if(yych != '}') goto yy91;
+ goto yy102;
+yy102: ++YYCURSOR;
+ goto yy103;
+yy103:
+#line 180 "scanner.re"
{ yylval.op = '*';
RETURN(CLOSE); }
-#line 603 "scanner.cc"
+#line 632 "scanner.cc"
}
-#line 210 "scanner.re"
+#line 229 "scanner.re"
code:
-#line 610 "scanner.cc"
+#line 639 "scanner.cc"
{
YYCTYPE yych;
unsigned int yyaccept = 0;
- goto yy101;
+ goto yy104;
++YYCURSOR;
-yy101:
+yy104:
if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yych = *YYCURSOR;
if(yych <= '&'){
if(yych <= 0x0A){
- if(yych <= 0x09) goto yy109;
- goto yy107;
+ if(yych <= 0x09) goto yy112;
+ goto yy110;
} else {
- if(yych == '"') goto yy111;
- goto yy109;
+ if(yych == '"') goto yy114;
+ goto yy112;
}
} else {
if(yych <= '{'){
- if(yych <= '\'') goto yy112;
- if(yych <= 'z') goto yy109;
- goto yy105;
+ if(yych <= '\'') goto yy115;
+ if(yych <= 'z') goto yy112;
+ goto yy108;
} else {
- if(yych != '}') goto yy109;
- goto yy103;
+ if(yych != '}') goto yy112;
+ goto yy106;
}
}
-yy103: ++YYCURSOR;
- goto yy104;
-yy104:
-#line 214 "scanner.re"
+yy106: ++YYCURSOR;
+ goto yy107;
+yy107:
+#line 233 "scanner.re"
{ if(--depth == 0){
cur = cursor;
yylval.token = new Token(token(), tline);
return CODE;
}
goto code; }
-#line 647 "scanner.cc"
-yy105: ++YYCURSOR;
- goto yy106;
-yy106:
-#line 220 "scanner.re"
+#line 676 "scanner.cc"
+yy108: ++YYCURSOR;
+ goto yy109;
+yy109:
+#line 239 "scanner.re"
{ ++depth;
goto code; }
-#line 654 "scanner.cc"
-yy107: ++YYCURSOR;
- goto yy108;
-yy108:
-#line 222 "scanner.re"
+#line 683 "scanner.cc"
+yy110: ++YYCURSOR;
+ goto yy111;
+yy111:
+#line 241 "scanner.re"
{ if(cursor == eof) fatal("missing '}'");
pos = cursor; cline++;
goto code;
}
-#line 663 "scanner.cc"
-yy109: ++YYCURSOR;
- goto yy110;
-yy110:
-#line 226 "scanner.re"
+#line 692 "scanner.cc"
+yy112: ++YYCURSOR;
+ goto yy113;
+yy113:
+#line 245 "scanner.re"
{ goto code; }
-#line 669 "scanner.cc"
-yy111: yyaccept = 0;
+#line 698 "scanner.cc"
+yy114: yyaccept = 0;
yych = *(YYMARKER = ++YYCURSOR);
- if(yych == 0x0A) goto yy110;
- goto yy118;
-yy112: yyaccept = 0;
+ if(yych == 0x0A) goto yy113;
+ goto yy121;
+yy115: yyaccept = 0;
yych = *(YYMARKER = ++YYCURSOR);
- if(yych == 0x0A) goto yy110;
- goto yy114;
-yy113: ++YYCURSOR;
+ if(yych == 0x0A) goto yy113;
+ goto yy117;
+yy116: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
- goto yy114;
-yy114: if(yych <= '&'){
- if(yych != 0x0A) goto yy113;
- goto yy115;
+ goto yy117;
+yy117: if(yych <= '&'){
+ if(yych != 0x0A) goto yy116;
+ goto yy118;
} else {
- if(yych <= '\'') goto yy109;
- if(yych == '\\') goto yy116;
- goto yy113;
+ if(yych <= '\'') goto yy112;
+ if(yych == '\\') goto yy119;
+ goto yy116;
}
-yy115: YYCURSOR = YYMARKER;
+yy118: YYCURSOR = YYMARKER;
switch(yyaccept){
- case 0: goto yy110;
+ case 0: goto yy113;
}
-yy116: ++YYCURSOR;
+yy119: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
- if(yych == 0x0A) goto yy115;
- goto yy113;
-yy117: ++YYCURSOR;
+ if(yych == 0x0A) goto yy118;
+ goto yy116;
+yy120: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
- goto yy118;
-yy118: if(yych <= '!'){
- if(yych == 0x0A) goto yy115;
- goto yy117;
+ goto yy121;
+yy121: if(yych <= '!'){
+ if(yych == 0x0A) goto yy118;
+ goto yy120;
} else {
- if(yych <= '"') goto yy109;
- if(yych != '\\') goto yy117;
- goto yy119;
+ if(yych <= '"') goto yy112;
+ if(yych != '\\') goto yy120;
+ goto yy122;
}
-yy119: ++YYCURSOR;
+yy122: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
- if(yych == 0x0A) goto yy115;
- goto yy117;
+ if(yych == 0x0A) goto yy118;
+ goto yy120;
}
-#line 227 "scanner.re"
+#line 246 "scanner.re"
comment:
-#line 722 "scanner.cc"
+#line 751 "scanner.cc"
{
YYCTYPE yych;
- goto yy120;
+ goto yy123;
++YYCURSOR;
-yy120:
+yy123:
if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yych = *YYCURSOR;
if(yych <= ')'){
- if(yych == 0x0A) goto yy125;
- goto yy127;
+ if(yych == 0x0A) goto yy128;
+ goto yy130;
} else {
- if(yych <= '*') goto yy122;
- if(yych == '/') goto yy124;
- goto yy127;
+ if(yych <= '*') goto yy125;
+ if(yych == '/') goto yy127;
+ goto yy130;
}
-yy122: ++YYCURSOR;
- if((yych = *YYCURSOR) == '/') goto yy130;
- goto yy123;
-yy123:
-#line 241 "scanner.re"
-{ goto comment; }
-#line 744 "scanner.cc"
-yy124: yych = *++YYCURSOR;
- if(yych == '*') goto yy128;
- goto yy123;
yy125: ++YYCURSOR;
+ if((yych = *YYCURSOR) == '/') goto yy133;
goto yy126;
yy126:
-#line 237 "scanner.re"
-{ if(cursor == eof) RETURN(0);
- tok = pos = cursor; cline++;
- goto comment;
- }
-#line 756 "scanner.cc"
+#line 260 "scanner.re"
+{ goto comment; }
+#line 773 "scanner.cc"
yy127: yych = *++YYCURSOR;
- goto yy123;
+ if(yych == '*') goto yy131;
+ goto yy126;
yy128: ++YYCURSOR;
goto yy129;
yy129:
-#line 235 "scanner.re"
+#line 256 "scanner.re"
+{ if(cursor == eof) RETURN(0);
+ tok = pos = cursor; cline++;
+ goto comment;
+ }
+#line 785 "scanner.cc"
+yy130: yych = *++YYCURSOR;
+ goto yy126;
+yy131: ++YYCURSOR;
+ goto yy132;
+yy132:
+#line 254 "scanner.re"
{ ++depth;
goto comment; }
-#line 765 "scanner.cc"
-yy130: ++YYCURSOR;
- goto yy131;
-yy131:
-#line 231 "scanner.re"
+#line 794 "scanner.cc"
+yy133: ++YYCURSOR;
+ goto yy134;
+yy134:
+#line 250 "scanner.re"
{ if(--depth == 0)
goto scan;
else
goto comment; }
-#line 774 "scanner.cc"
+#line 803 "scanner.cc"
}
-#line 242 "scanner.re"
+#line 261 "scanner.re"
}
}
} // end namespace re2c
-
namespace re2c
{
-inline char octCh(uint c)
+void prtChOrHex(std::ostream& o, uint c)
{
- return '0' + c % 8;
-}
-
-inline char hexCh(uint c)
-{
- const char * sHex = "0123456789ABCDEF";
-
- return sHex[c & 0x0F];
-}
-
-void prtChOrHex(std::ostream& o, uchar c)
-{
- uchar oc = talx[c];
+ int oc = (int)(re2c::wFlag ? c : re2c::talx[c]);
- if (isprint(oc))
+ if ((oc < 256) && isprint(oc))
{
o << '\'';
- prtCh(o, c);
+ prtCh(o, oc);
o << '\'';
}
+ else if (re2c::wFlag)
+ {
+ o << "0x"
+ << hexCh(oc >> 12)
+ << hexCh(oc >> 8)
+ << hexCh(oc >> 4)
+ << hexCh(oc);
+ }
else
{
- o << "0x" << hexCh(c >> 4) << hexCh(c);
+ o << "0x"
+ << hexCh(oc >> 4)
+ << hexCh(oc);
}
}
-void prtCh(std::ostream &o, uchar c)
+void prtCh(std::ostream &o, uint c)
{
- uchar oc = talx[c];
+ int oc = (int)(re2c::wFlag ? c : re2c::talx[c]);
switch (oc)
{
-
case '\'':
o << "\\'";
break;
default:
- if (isprint(oc))
+ if ((oc < 256) && isprint(oc))
+ {
o << (char) oc;
+ }
+ else if (re2c::wFlag)
+ {
+ o << "0x"
+ << hexCh(oc >> 12)
+ << hexCh(oc >> 8)
+ << hexCh(oc >> 4)
+ << hexCh(oc);
+ }
else
- o << '\\' << octCh(c / 64) << octCh(c / 8) << octCh(c);
+ {
+ o << '\\' << octCh(oc / 64) << octCh(oc / 8) << octCh(oc);
+ }
}
}
void printSpan(std::ostream &o, uint lb, uint ub)
{
if (lb > ub)
+ {
o << "*";
+ }
o << "[";
o << "state " << s.label;
if (s.rule)
+ {
o << " accepts " << s.rule->accept;
+ }
o << "\n";
uint lb = 0;
for (uint i = 0; i < s.go.nSpans; ++i)
+ {
lb = s.go.span[i].show(o, lb);
+ }
return o;
}
{
State *to = (State*) goTo[rep[j]].to;
- while (++j < nc && goTo[rep[j]].to == to)
+ while (++j < nc && goTo[rep[j]].to == to) ;
- ;
span[s->go.nSpans].ub = lb + j;
span[s->go.nSpans].to = to;
namespace re2c
{
-extern void prtCh(std::ostream&, uchar);
-extern void prtChOrHex(std::ostream&, uchar);
+extern void prtCh(std::ostream&, uint);
+extern void prtChOrHex(std::ostream&, uint);
extern void printSpan(std::ostream&, uint, uint);
class DFA;
extern char *fileName;
extern char *outputFileName;
-extern bool sFlag;
extern bool bFlag;
extern bool dFlag;
+extern bool eFlag;
extern bool iFlag;
+extern bool sFlag;
+extern bool wFlag;
+
extern bool bUsedYYAccept;
extern unsigned int oline;
extern uint maxFill;
-extern uchar asc2ebc[256];
-extern uchar ebc2asc[256];
+extern uint asc2ebc[256];
+extern uint ebc2asc[256];
-extern uchar *xlat, *talx;
+extern uint *xlat, *talx;
extern int vFillIndexes;
extern label_list<uint> vUsedLabels;
+extern uint nRealChars;
+
+inline char octCh(uint c)
+{
+ return '0' + c % 8;
+}
+
+inline char hexCh(uint c)
+{
+ static const char * sHex = "0123456789ABCDEF";
+
+ return sHex[c & 0x0F];
+}
+
} // end namespace re2c
#endif
namespace re2c
{
-const uint nChars = 256;
-typedef uchar Char;
+const uint nChars = (1<<16);
+typedef unsigned short Char;
const uint CHAR = 0;
const uint GOTO = 1;
char *fileName = 0;
char *outputFileName = 0;
-bool sFlag = false;
bool bFlag = false;
-bool dFlag = false;
+bool dFlag = false;
+bool eFlag = false;
bool iFlag = false;
+bool sFlag = false;
+bool wFlag = false;
bool bUsedYYAccept = false;
unsigned int oline = 1;
uint maxFill = 1;
+uint nRealChars = 256;
int vFillIndexes = -1;
label_list<uint> vUsedLabels;
mbo_opt_struct('s', 0, "nested-ifs"),
mbo_opt_struct('v', 0, "version"),
mbo_opt_struct('V', 0, "vernum"),
+ mbo_opt_struct('w', 0, "wide-chars"),
mbo_opt_struct('-', 0, NULL) /* end of args */
};
{
cerr << "usage: re2c [-esbvhd] file\n"
"\n"
- "-? -h --help Display this info.\n"
+ "-? -h --help Display this info.\n"
"\n"
- "-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n"
+ "-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n"
" coax better code out of the compiler. Most useful for\n"
" specifications with more than a few keywords (e.g. for\n"
" most programming languages).\n"
"\n"
- "-e --ecb Cross-compile from an ASCII platform to\n"
+ "-d --debug-output Creates a parser that dumps information during\n"
+ " about the current position and in which state the\n"
+ " parser is.\n"
+ "\n"
+ "-e --ecb Cross-compile from an ASCII platform to\n"
" an EBCDIC one.\n"
"\n"
- "-s --nested-ifs Generate nested ifs for some switches. Many compilers\n"
- " need this assist to generate better code.\n"
+ "-f --storable-state Generate a scanner with support for storable state\n"
"\n"
- "-f --storable-state Generate a scanner with support for storable state\n"
+ "-i --no-debug-info Do not generate '#line' info (usefull for versioning).\n"
"\n"
- "-o --output=output Specify the output file instead of stdout\n"
+ "-o --output=output Specify the output file instead of stdout\n"
+ " This cannot be used together with switches -b or -e.\n"
"\n"
- "-d --debug-output Creates a parser that dumps information during\n"
- " about the current position and in which state the\n"
- " parser is.\n"
+ "-s --nested-ifs Generate nested ifs for some switches. Many compilers\n"
+ " need this assist to generate better code.\n"
"\n"
- "-i --no-debug-info Do not generate '#line' info (usefull for versioning).\n"
+ "-v --version Show version information.\n"
+ "-V --vernum Show version as one number.\n"
"\n"
- "-v --version Show version information.\n"
- "-V --vernum Show version as one number.\n"
+ "-w --wide-chars Create a parser that supports wide chars (UCS-2).\n"
;
}
case 'e':
xlat = asc2ebc;
talx = ebc2asc;
+ eFlag = true;
break;
case 's':
case 'v':
cout << "re2c " << PACKAGE_VERSION << "\n";
return 2;
-
+
case 'V': {
int v1, v2, v3;
char version[16];
return 2;
}
+ case 'w':
+ nRealChars = (1<<16);
+ wFlag = true;
+ break;
+
case 'h':
-
case '?':
-
default:
usage();
return 2;
}
}
- if (argc == opt_ind + 1)
+ if (wFlag && (bFlag || eFlag))
+ {
+ usage();
+ return 2;
+ }
+ else if (argc == opt_ind + 1)
{
fileName = argv[opt_ind];
}
.ds rx regular expression
.ds lx \fIl\fP-expression
\"$Log$
+\"Revision 1.27 2005/12/28 18:33:37 helly
+\"- Added experimental unicode support
+\"
\"Revision 1.26 2005/12/18 18:47:06 helly
\"- Apply #1362806 Addition to man on flag -f
\"
.SH OPTIONS
\*(re provides the following options:
.TP
-\fB-e\fP
-Cross-compile from an ASCII platform to an EBCDIC one.
-.TP
-\fB-f\fP
-Generate a scanner with support for storable state.
-For details see below at \fBSCANNER WITH STORABLE STATES\fP.
-.TP
-\fB-s\fP
-Generate nested \fCif\fPs for some \fCswitch\fPes. Many compilers need this
-assist to generate better code.
+\fB-?\fP
+\fB-h\fP
+Invoke a short help.
.TP
\fB-b\fP
Implies \fB-s\fP. Use bit vectors as well in the attempt to coax better
\fIvoid YYDEBUG(int state, char current)\fP. The first parameter receives the
state or -1 and the second parameter receives the input at the current cursor.
.TP
+\fB-e\fP
+Cross-compile from an ASCII platform to an EBCDIC one.
+.TP
+\fB-f\fP
+Generate a scanner with support for storable state.
+For details see below at \fBSCANNER WITH STORABLE STATES\fP.
+.TP
\fB-i\fP
Do not output #line information. This is usefull when you want use a CMS tool
with the re2c output which you might want if you do not require your users to
have re2c themselves when building from your source.
+\fB-o output\fP
+Specify the output file.
.TP
-\fB-h\fP
-\fB-?\fP
-Invoke a short help.
+\fB-s\fP
+Generate nested \fCif\fPs for some \fCswitch\fPes. Many compilers need this
+assist to generate better code.
.TP
\fB-v\fP
Show version information.
\fB-V\fP
Show the version as a number XXYYZZ.
.TP
-\fB-o output\fP
-Specify the output file.
-
+\fB-w\fP
+Create a parser that supports wide chars (UCS-2). This cannot be used together
+with switches \fB-b\fP or \fB-e\fP.
.SH "INTERFACE CODE"
Unlike other scanner generators, \*(re does not generate complete scanners:
the user must supply some interface code.
#include <iosfwd>
#include "token.h"
#include "re.h"
+#include "globals.h"
namespace re2c
{
int echo(std::ostream&);
int scan();
void fatal(char*) const;
- SubStr token();
- uint line();
-
- uchar unescape(SubStr &s) const;
+ SubStr token() const;
+ uint line() const;
+ uint xlat(uint c) const;
+ uint unescape(SubStr &s) const;
Range * getRange(SubStr &s) const;
RegExp * matchChar(uint c) const;
RegExp * strToRE(SubStr s) const;
RegExp * mkDot() const;
};
-inline SubStr Scanner::token()
+inline SubStr Scanner::token() const
{
return SubStr(tok, cur - tok);
}
-inline uint Scanner::line()
+inline uint Scanner::line() const
{
return cline;
}
+inline uint Scanner::xlat(uint c) const
+{
+ return re2c::wFlag ? c : re2c::xlat[c];
+}
+
} // end namespace re2c
#endif
Scanner::Scanner(std::istream& i) : in(i),
bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL),
- top(NULL), eof(NULL), tchar(0), tline(0), cline(1) {
+ top(NULL), eof(NULL), tchar(0), tline(0), cline(1)
+{
;
}
-char *Scanner::fill(char *cursor){
- if(!eof){
- uint cnt = tok - bot;
- if(cnt){
- memcpy(bot, tok, lim - tok);
- tok = bot;
- ptr -= cnt;
- cursor -= cnt;
- pos -= cnt;
- lim -= cnt;
- }
- if((top - lim) < BSIZE){
- char *buf = new char[(lim - bot) + BSIZE];
- memcpy(buf, tok, lim - tok);
- tok = buf;
- ptr = &buf[ptr - bot];
- cursor = &buf[cursor - bot];
- pos = &buf[pos - bot];
- lim = &buf[lim - bot];
- top = &lim[BSIZE];
- delete [] bot;
- bot = buf;
- }
- if((cnt = in.rdbuf()->sgetn((char*) lim, BSIZE)) != BSIZE){
- eof = &lim[cnt]; *eof++ = '\0';
+char *Scanner::fill(char *cursor)
+{
+ if(!eof)
+ {
+ uint cnt = tok - bot;
+ if(cnt)
+ {
+ memcpy(bot, tok, lim - tok);
+ tok = bot;
+ ptr -= cnt;
+ cursor -= cnt;
+ pos -= cnt;
+ lim -= cnt;
+ }
+ if((top - lim) < BSIZE)
+ {
+ char *buf = new char[(lim - bot) + BSIZE];
+ memcpy(buf, tok, lim - tok);
+ tok = buf;
+ ptr = &buf[ptr - bot];
+ cursor = &buf[cursor - bot];
+ pos = &buf[pos - bot];
+ lim = &buf[lim - bot];
+ top = &lim[BSIZE];
+ delete [] bot;
+ bot = buf;
+ }
+ if((cnt = in.rdbuf()->sgetn((char*) lim, BSIZE)) != BSIZE)
+ {
+ eof = &lim[cnt]; *eof++ = '\0';
+ }
+ lim += cnt;
}
- lim += cnt;
- }
- return cursor;
+ return cursor;
}
/*!re2c
[ \t]+ { goto scan; }
+ "\r\n" { if(cursor == eof) RETURN(0);
+ pos = cursor; cline++;
+ goto scan;
+ }
"\n" { if(cursor == eof) RETURN(0);
pos = cursor; cline++;
goto scan;
}
- any { std::cerr << "unexpected character: " << *tok << std::endl;
+ any { std::cerr << "line " << tline << ", column " << (tchar + 1)
+ << ": unexpected character: ";
+ if (isprint(*tok))
+ {
+ std::cerr << *tok << std::endl;
+ }
+ else
+ {
+ std::cerr << "0x" << hexCh(*tok >> 4) << hexCh(*tok) << std::endl;
+ }
goto scan;
}
*/
}
} // end namespace re2c
-
namespace re2c
{
-uchar asc2asc[256] =
+uint asc2asc[256] =
{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
};
-uchar *xlat = asc2asc;
-uchar *talx = asc2asc;
+uint *xlat = asc2asc;
+uint *talx = asc2asc;
-uchar asc2ebc[256] =
+uint asc2ebc[256] =
{ /* Based on ISO 8859/1 and Code Page 37 */
0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
0x8c, 0x49, 0xcd, 0xce, 0xcb, 0xcf, 0xcc, 0xe1, 0x70, 0xdd, 0xde, 0xdb, 0xdc, 0x8d, 0xae, 0xdf
};
-uchar ebc2asc[256] =
+uint ebc2asc[256] =
{ /* Based on ISO 8859/1 and Code Page 37 */
0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x9d, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f,
};
} // end namespace re2c
-