src/ir/regexp/encoding/utf16/utf16_regexp.h \
src/ir/regexp/encoding/utf16/utf16.h \
src/ir/regexp/empty_class_policy.h \
- src/ir/regexp/regexp_alt.h \
- src/ir/regexp/regexp_match.h \
- src/ir/regexp/regexp_rule.h \
- src/ir/regexp/regexp_cat.h \
- src/ir/regexp/regexp_null.h \
src/ir/regexp/regexp.h \
- src/ir/regexp/regexp_close.h \
src/ir/compile.h \
src/ir/rule_rank.h \
src/ir/skeleton/path.h \
src/conf/msg.cc \
src/conf/opt.cc \
src/conf/warn.cc \
- src/ir/nfa/calc_size.cc \
src/ir/nfa/nfa.cc \
- src/ir/nfa/split.cc \
src/ir/adfa/adfa.cc \
src/ir/adfa/prepare.cc \
src/ir/dfa/determinization.cc \
src/ir/dfa/fillpoints.cc \
src/ir/dfa/minimization.cc \
- src/ir/regexp/display.cc \
src/ir/regexp/encoding/enc.cc \
src/ir/regexp/encoding/range_suffix.cc \
src/ir/regexp/encoding/utf8/utf8_regexp.cc \
src/ir/regexp/encoding/utf16/utf16_regexp.cc \
src/ir/regexp/encoding/utf16/utf16.cc \
src/ir/regexp/encoding/utf16/utf16_range.cc \
- src/ir/regexp/fixed_length.cc \
src/ir/regexp/nullable.cc \
src/ir/regexp/regexp.cc \
+ src/ir/regexp/split_charset.cc \
src/ir/compile.cc \
src/ir/rule_rank.cc \
src/ir/skeleton/control_flow.cc \
-/* Generated by re2c 0.16 on Sat Feb 20 16:51:22 2016 */
+/* Generated by re2c 0.16 on Mon Feb 22 12:14:07 2016 */
#line 1 "../src/parse/lex.re"
#include "src/util/c99_stdint.h"
#include <stddef.h>
#include "src/globals.h"
#include "src/ir/regexp/encoding/enc.h"
#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_null.h"
#include "src/parse/code.h"
#include "src/parse/extop.h"
#include "src/parse/input.h"
// source code is in ASCII: pointers have type 'char *'
// but re2c makes an implicit assumption that YYCTYPE is unsigned
// when it generates comparisons
-#line 42 "../src/parse/lex.re"
+#line 41 "../src/parse/lex.re"
-#line 62 "../src/parse/lex.re"
+#line 61 "../src/parse/lex.re"
Scanner::ParseMode Scanner::echo()
tok = cur;
echo:
-#line 62 "src/parse/lex.cc"
+#line 61 "src/parse/lex.cc"
{
YYCTYPE yych;
unsigned int yyaccept = 0;
}
}
++YYCURSOR;
-#line 202 "../src/parse/lex.re"
+#line 201 "../src/parse/lex.re"
{
if (!ignore_eoc && opts->target == opt_t::CODE)
{
return Stop;
}
}
-#line 132 "src/parse/lex.cc"
+#line 131 "src/parse/lex.cc"
yy4:
++YYCURSOR;
yy5:
-#line 213 "../src/parse/lex.re"
+#line 212 "../src/parse/lex.re"
{
goto echo;
}
-#line 140 "src/parse/lex.cc"
+#line 139 "src/parse/lex.cc"
yy6:
yyaccept = 0;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
}
if (yych == '#') goto yy14;
yy7:
-#line 189 "../src/parse/lex.re"
+#line 188 "../src/parse/lex.re"
{
if (ignore_eoc)
{
cline++;
goto echo;
}
-#line 163 "src/parse/lex.cc"
+#line 162 "src/parse/lex.cc"
yy8:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '{') goto yy16;
}
yy16:
++YYCURSOR;
-#line 78 "../src/parse/lex.re"
+#line 77 "../src/parse/lex.re"
{
if (opts->rFlag)
{
tok = cur;
return Parse;
}
-#line 226 "src/parse/lex.cc"
+#line 225 "src/parse/lex.cc"
yy18:
yyaccept = 2;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
if (yych == '\n') goto yy22;
if (yych == '\r') goto yy24;
yy19:
-#line 168 "../src/parse/lex.re"
+#line 167 "../src/parse/lex.re"
{
if (ignore_eoc)
{
tok = pos = cur;
goto echo;
}
-#line 251 "src/parse/lex.cc"
+#line 250 "src/parse/lex.cc"
yy20:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '!') goto yy25;
goto yy13;
yy22:
++YYCURSOR;
-#line 150 "../src/parse/lex.re"
+#line 149 "../src/parse/lex.re"
{
cline++;
if (ignore_eoc)
tok = pos = cur;
goto echo;
}
-#line 281 "src/parse/lex.cc"
+#line 280 "src/parse/lex.cc"
yy24:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '\n') goto yy22;
yy67:
++YYCURSOR;
YYCURSOR = YYCTXMARKER;
-#line 185 "../src/parse/lex.re"
+#line 184 "../src/parse/lex.re"
{
set_sourceline ();
goto echo;
}
-#line 491 "src/parse/lex.cc"
+#line 490 "src/parse/lex.cc"
yy69:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '\n') goto yy67;
goto yy13;
yy88:
++YYCURSOR;
-#line 119 "../src/parse/lex.re"
+#line 118 "../src/parse/lex.re"
{
if (opts->target != opt_t::DOT)
{
ignore_eoc = true;
goto echo;
}
-#line 585 "src/parse/lex.cc"
+#line 584 "src/parse/lex.cc"
yy90:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '2') goto yy96;
goto yy13;
yy92:
++YYCURSOR;
-#line 105 "../src/parse/lex.re"
+#line 104 "../src/parse/lex.re"
{
if (!opts->rFlag)
{
tok = cur;
return Reuse;
}
-#line 611 "src/parse/lex.cc"
+#line 610 "src/parse/lex.cc"
yy94:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == 'r') goto yy98;
goto yy13;
yy100:
++YYCURSOR;
-#line 93 "../src/parse/lex.re"
+#line 92 "../src/parse/lex.re"
{
if (opts->rFlag)
{
tok = cur;
return Rules;
}
-#line 651 "src/parse/lex.cc"
+#line 650 "src/parse/lex.cc"
yy102:
++YYCURSOR;
-#line 139 "../src/parse/lex.re"
+#line 138 "../src/parse/lex.re"
{
tok = pos = cur;
ignore_eoc = true;
}
goto echo;
}
-#line 666 "src/parse/lex.cc"
+#line 665 "src/parse/lex.cc"
yy104:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '2') goto yy107;
goto yy13;
yy105:
++YYCURSOR;
-#line 134 "../src/parse/lex.re"
+#line 133 "../src/parse/lex.re"
{
tok = pos = cur;
ignore_eoc = true;
goto echo;
}
-#line 679 "src/parse/lex.cc"
+#line 678 "src/parse/lex.cc"
yy107:
yych = (YYCTYPE)*++YYCURSOR;
if (yych != 'c') goto yy13;
++YYCURSOR;
-#line 128 "../src/parse/lex.re"
+#line 127 "../src/parse/lex.re"
{
tok = pos = cur;
out.wdelay_state_goto (opts->topIndent);
ignore_eoc = true;
goto echo;
}
-#line 691 "src/parse/lex.cc"
+#line 690 "src/parse/lex.cc"
}
-#line 216 "../src/parse/lex.re"
+#line 215 "../src/parse/lex.re"
}
start:
-#line 713 "src/parse/lex.cc"
+#line 712 "src/parse/lex.cc"
{
YYCTYPE yych;
unsigned int yyaccept = 0;
yy112:
++YYCURSOR;
yy113:
-#line 388 "../src/parse/lex.re"
+#line 387 "../src/parse/lex.re"
{
fatalf("unexpected character: '%c'", *tok);
goto scan;
}
-#line 823 "src/parse/lex.cc"
+#line 822 "src/parse/lex.cc"
yy114:
++YYCURSOR;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
if (yybm[0+yych] & 16) {
goto yy114;
}
-#line 372 "../src/parse/lex.re"
+#line 371 "../src/parse/lex.re"
{
goto scan;
}
-#line 835 "src/parse/lex.cc"
+#line 834 "src/parse/lex.cc"
yy117:
yyaccept = 0;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
if (yych == '#') goto yy145;
}
yy118:
-#line 381 "../src/parse/lex.re"
+#line 380 "../src/parse/lex.re"
{
if (cur == eof) return 0;
pos = cur;
cline++;
goto scan;
}
-#line 853 "src/parse/lex.cc"
+#line 852 "src/parse/lex.cc"
yy119:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '\n') goto yy117;
goto yy113;
yy120:
++YYCURSOR;
-#line 265 "../src/parse/lex.re"
+#line 264 "../src/parse/lex.re"
{ yylval.regexp = lex_str('"', opts->bCaseInsensitive || opts->bCaseInverted); return TOKEN_REGEXP; }
-#line 862 "src/parse/lex.cc"
+#line 861 "src/parse/lex.cc"
yy122:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '}') goto yy147;
goto yy113;
yy123:
++YYCURSOR;
-#line 264 "../src/parse/lex.re"
+#line 263 "../src/parse/lex.re"
{ yylval.regexp = lex_str('\'', opts->bCaseInsensitive || !opts->bCaseInverted); return TOKEN_REGEXP; }
-#line 871 "src/parse/lex.cc"
+#line 870 "src/parse/lex.cc"
yy125:
++YYCURSOR;
yy126:
-#line 275 "../src/parse/lex.re"
+#line 274 "../src/parse/lex.re"
{
return *tok;
}
-#line 879 "src/parse/lex.cc"
+#line 878 "src/parse/lex.cc"
yy127:
++YYCURSOR;
if ((yych = (YYCTYPE)*YYCURSOR) == '/') goto yy147;
-#line 279 "../src/parse/lex.re"
+#line 278 "../src/parse/lex.re"
{
yylval.op = *tok;
return TOKEN_STAR;
}
-#line 888 "src/parse/lex.cc"
+#line 887 "src/parse/lex.cc"
yy129:
++YYCURSOR;
-#line 283 "../src/parse/lex.re"
+#line 282 "../src/parse/lex.re"
{
yylval.op = *tok;
return TOKEN_CLOSE;
}
-#line 896 "src/parse/lex.cc"
+#line 895 "src/parse/lex.cc"
yy131:
++YYCURSOR;
-#line 367 "../src/parse/lex.re"
+#line 366 "../src/parse/lex.re"
{
yylval.regexp = mkDot();
return TOKEN_REGEXP;
}
-#line 904 "src/parse/lex.cc"
+#line 903 "src/parse/lex.cc"
yy133:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '*') goto yy149;
yy137:
++YYCURSOR;
if ((yych = (YYCTYPE)*YYCURSOR) == '^') goto yy165;
-#line 266 "../src/parse/lex.re"
+#line 265 "../src/parse/lex.re"
{ yylval.regexp = lex_cls(false); return TOKEN_REGEXP; }
-#line 929 "src/parse/lex.cc"
+#line 928 "src/parse/lex.cc"
yy139:
yych = (YYCTYPE)*++YYCURSOR;
YYCTXMARKER = YYCURSOR;
}
}
yy141:
-#line 235 "../src/parse/lex.re"
+#line 234 "../src/parse/lex.re"
{
depth = 1;
goto code;
}
-#line 958 "src/parse/lex.cc"
+#line 957 "src/parse/lex.cc"
yy142:
++YYCURSOR;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
}
yy147:
++YYCURSOR;
-#line 259 "../src/parse/lex.re"
+#line 258 "../src/parse/lex.re"
{
tok = cur;
return 0;
}
-#line 1003 "src/parse/lex.cc"
+#line 1002 "src/parse/lex.cc"
yy149:
++YYCURSOR;
-#line 253 "../src/parse/lex.re"
+#line 252 "../src/parse/lex.re"
{
depth = 1;
goto comment;
}
-#line 1011 "src/parse/lex.cc"
+#line 1010 "src/parse/lex.cc"
yy151:
++YYCURSOR;
-#line 250 "../src/parse/lex.re"
+#line 249 "../src/parse/lex.re"
{
goto nextLine;
}
-#line 1018 "src/parse/lex.cc"
+#line 1017 "src/parse/lex.cc"
yy153:
++YYCURSOR;
YYCURSOR -= 1;
-#line 351 "../src/parse/lex.re"
+#line 350 "../src/parse/lex.re"
{
if (!opts->FFlag) {
yylval.str = new std::string (tok, tok_len());
const uint32_t c = static_cast<uint8_t>(*s);
r = doCat(r, casing ? ichr(c) : schr(c));
}
- yylval.regexp = r ? r : new NullOp;
+ yylval.regexp = r ? r : RegExp::nil();
return TOKEN_REGEXP;
}
}
-#line 1038 "src/parse/lex.cc"
+#line 1037 "src/parse/lex.cc"
yy155:
yych = (YYCTYPE)*++YYCURSOR;
goto yy178;
yy156:
++YYCURSOR;
YYCURSOR = YYCTXMARKER;
-#line 346 "../src/parse/lex.re"
+#line 345 "../src/parse/lex.re"
{
yylval.str = new std::string (tok, tok_len ());
return TOKEN_ID;
}
-#line 1050 "src/parse/lex.cc"
+#line 1049 "src/parse/lex.cc"
yy158:
++YYCURSOR;
if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yy160:
++YYCURSOR;
if ((yych = (YYCTYPE)*YYCURSOR) == '>') goto yy179;
-#line 244 "../src/parse/lex.re"
+#line 243 "../src/parse/lex.re"
{
tok += 2; /* skip ":=" */
depth = 0;
goto code;
}
-#line 1083 "src/parse/lex.cc"
+#line 1082 "src/parse/lex.cc"
yy162:
++YYCURSOR;
-#line 272 "../src/parse/lex.re"
+#line 271 "../src/parse/lex.re"
{
return TOKEN_SETUP;
}
-#line 1090 "src/parse/lex.cc"
+#line 1089 "src/parse/lex.cc"
yy164:
yych = (YYCTYPE)*++YYCURSOR;
YYCTXMARKER = YYCURSOR;
goto yy182;
yy165:
++YYCURSOR;
-#line 267 "../src/parse/lex.re"
+#line 266 "../src/parse/lex.re"
{ yylval.regexp = lex_cls(true); return TOKEN_REGEXP; }
-#line 1099 "src/parse/lex.cc"
+#line 1098 "src/parse/lex.cc"
yy167:
yych = (YYCTYPE)*++YYCURSOR;
YYCTXMARKER = YYCURSOR;
yy168:
++YYCURSOR;
yy169:
-#line 319 "../src/parse/lex.re"
+#line 318 "../src/parse/lex.re"
{
fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers");
}
-#line 1112 "src/parse/lex.cc"
+#line 1111 "src/parse/lex.cc"
yy170:
++YYCURSOR;
if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yy175:
++YYCURSOR;
YYCURSOR = YYCTXMARKER;
-#line 333 "../src/parse/lex.re"
+#line 332 "../src/parse/lex.re"
{
yylval.str = new std::string (tok, tok_len ());
if (opts->FFlag)
return TOKEN_ID;
}
}
-#line 1178 "src/parse/lex.cc"
+#line 1177 "src/parse/lex.cc"
yy177:
++YYCURSOR;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
yy179:
++YYCURSOR;
YYCURSOR -= 2;
-#line 240 "../src/parse/lex.re"
+#line 239 "../src/parse/lex.re"
{
return *tok;
}
-#line 1205 "src/parse/lex.cc"
+#line 1204 "src/parse/lex.cc"
yy181:
++YYCURSOR;
if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yy185:
++YYCURSOR;
YYCURSOR = YYCTXMARKER;
-#line 269 "../src/parse/lex.re"
+#line 268 "../src/parse/lex.re"
{
return TOKEN_NOCOND;
}
-#line 1243 "src/parse/lex.cc"
+#line 1242 "src/parse/lex.cc"
yy187:
yych = (YYCTYPE)*++YYCURSOR;
YYCTXMARKER = YYCURSOR;
goto yy169;
yy189:
++YYCURSOR;
-#line 288 "../src/parse/lex.re"
+#line 287 "../src/parse/lex.re"
{
if (!s_to_u32_unsafe (tok + 1, cur - 1, yylval.extop.min))
{
yylval.extop.max = yylval.extop.min;
return TOKEN_CLOSESIZE;
}
-#line 1267 "src/parse/lex.cc"
+#line 1266 "src/parse/lex.cc"
yy191:
++YYCURSOR;
-#line 323 "../src/parse/lex.re"
+#line 322 "../src/parse/lex.re"
{
if (!opts->FFlag) {
fatal("curly braces for names only allowed with -F switch");
yylval.str = new std::string (tok + 1, tok_len () - 2); // -2 to omit braces
return TOKEN_ID;
}
-#line 1278 "src/parse/lex.cc"
+#line 1277 "src/parse/lex.cc"
yy193:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == 'n') goto yy199;
goto yy144;
yy197:
++YYCURSOR;
-#line 310 "../src/parse/lex.re"
+#line 309 "../src/parse/lex.re"
{
if (!s_to_u32_unsafe (tok + 1, cur - 2, yylval.extop.min))
{
yylval.extop.max = std::numeric_limits<uint32_t>::max();
return TOKEN_CLOSESIZE;
}
-#line 1307 "src/parse/lex.cc"
+#line 1306 "src/parse/lex.cc"
yy199:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == 'e') goto yy204;
goto yy144;
yy200:
++YYCURSOR;
-#line 331 "../src/parse/lex.re"
+#line 330 "../src/parse/lex.re"
{ lex_conf (); return TOKEN_CONF; }
-#line 1316 "src/parse/lex.cc"
+#line 1315 "src/parse/lex.cc"
yy202:
++YYCURSOR;
-#line 297 "../src/parse/lex.re"
+#line 296 "../src/parse/lex.re"
{
const char * p = strchr (tok, ',');
if (!s_to_u32_unsafe (tok + 1, p, yylval.extop.min))
}
return TOKEN_CLOSESIZE;
}
-#line 1332 "src/parse/lex.cc"
+#line 1331 "src/parse/lex.cc"
yy204:
yych = (YYCTYPE)*++YYCURSOR;
if (yych <= '0') goto yy206;
yy211:
++YYCURSOR;
YYCURSOR = YYCTXMARKER;
-#line 376 "../src/parse/lex.re"
+#line 375 "../src/parse/lex.re"
{
set_sourceline ();
goto scan;
}
-#line 1393 "src/parse/lex.cc"
+#line 1392 "src/parse/lex.cc"
yy213:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '\n') goto yy211;
if (yych == '\n') goto yy144;
goto yy214;
}
-#line 392 "../src/parse/lex.re"
+#line 391 "../src/parse/lex.re"
flex_name:
-#line 1423 "src/parse/lex.cc"
+#line 1422 "src/parse/lex.cc"
{
YYCTYPE yych;
if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
if (yych == '\r') goto yy224;
++YYCURSOR;
yy221:
-#line 403 "../src/parse/lex.re"
+#line 402 "../src/parse/lex.re"
{
YYCURSOR = tok;
goto start;
}
-#line 1437 "src/parse/lex.cc"
+#line 1436 "src/parse/lex.cc"
yy222:
++YYCURSOR;
-#line 397 "../src/parse/lex.re"
+#line 396 "../src/parse/lex.re"
{
YYCURSOR = tok;
lexer_state = LEX_NORMAL;
return TOKEN_FID_END;
}
-#line 1446 "src/parse/lex.cc"
+#line 1445 "src/parse/lex.cc"
yy224:
++YYCURSOR;
if ((yych = (YYCTYPE)*YYCURSOR) == '\n') goto yy222;
goto yy221;
}
-#line 407 "../src/parse/lex.re"
+#line 406 "../src/parse/lex.re"
code:
-#line 1457 "src/parse/lex.cc"
+#line 1456 "src/parse/lex.cc"
{
YYCTYPE yych;
unsigned int yyaccept = 0;
}
yy227:
++YYCURSOR;
-#line 470 "../src/parse/lex.re"
+#line 469 "../src/parse/lex.re"
{
if (cur == eof)
{
}
goto code;
}
-#line 1530 "src/parse/lex.cc"
+#line 1529 "src/parse/lex.cc"
yy229:
++YYCURSOR;
yy230:
-#line 484 "../src/parse/lex.re"
+#line 483 "../src/parse/lex.re"
{
goto code;
}
-#line 1538 "src/parse/lex.cc"
+#line 1537 "src/parse/lex.cc"
yy231:
yyaccept = 0;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
}
}
yy232:
-#line 451 "../src/parse/lex.re"
+#line 450 "../src/parse/lex.re"
{
if (depth == 0)
{
cline++;
goto code;
}
-#line 1577 "src/parse/lex.cc"
+#line 1576 "src/parse/lex.cc"
yy233:
yyaccept = 1;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
goto yy251;
yy235:
++YYCURSOR;
-#line 423 "../src/parse/lex.re"
+#line 422 "../src/parse/lex.re"
{
if (depth == 0)
{
}
goto code;
}
-#line 1602 "src/parse/lex.cc"
+#line 1601 "src/parse/lex.cc"
yy237:
++YYCURSOR;
-#line 411 "../src/parse/lex.re"
+#line 410 "../src/parse/lex.re"
{
if (depth == 0)
{
}
goto code;
}
-#line 1618 "src/parse/lex.cc"
+#line 1617 "src/parse/lex.cc"
yy239:
yyaccept = 2;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
}
yy240:
YYCURSOR -= 1;
-#line 438 "../src/parse/lex.re"
+#line 437 "../src/parse/lex.re"
{
if (depth == 0)
{
cline++;
goto code;
}
-#line 1644 "src/parse/lex.cc"
+#line 1643 "src/parse/lex.cc"
yy241:
yych = (YYCTYPE)*++YYCURSOR;
goto yy240;
if (yych >= '#') goto yy249;
yy247:
++YYCURSOR;
-#line 481 "../src/parse/lex.re"
+#line 480 "../src/parse/lex.re"
{
goto code;
}
-#line 1683 "src/parse/lex.cc"
+#line 1682 "src/parse/lex.cc"
yy249:
++YYCURSOR;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
yy265:
++YYCURSOR;
YYCURSOR = YYCTXMARKER;
-#line 434 "../src/parse/lex.re"
+#line 433 "../src/parse/lex.re"
{
set_sourceline ();
goto code;
}
-#line 1778 "src/parse/lex.cc"
+#line 1777 "src/parse/lex.cc"
yy267:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '\n') goto yy265;
if (yych == '\n') goto yy244;
goto yy268;
}
-#line 487 "../src/parse/lex.re"
+#line 486 "../src/parse/lex.re"
comment:
-#line 1812 "src/parse/lex.cc"
+#line 1811 "src/parse/lex.cc"
{
YYCTYPE yych;
static const unsigned char yybm[] = {
}
++YYCURSOR;
yy275:
-#line 519 "../src/parse/lex.re"
+#line 518 "../src/parse/lex.re"
{
if (cur == eof)
{
}
goto comment;
}
-#line 1867 "src/parse/lex.cc"
+#line 1866 "src/parse/lex.cc"
yy276:
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
if (yybm[0+yych] & 32) {
}
if (yych == '#') goto yy283;
yy277:
-#line 510 "../src/parse/lex.re"
+#line 509 "../src/parse/lex.re"
{
if (cur == eof)
{
cline++;
goto comment;
}
-#line 1885 "src/parse/lex.cc"
+#line 1884 "src/parse/lex.cc"
yy278:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '/') goto yy285;
}
yy285:
++YYCURSOR;
-#line 491 "../src/parse/lex.re"
+#line 490 "../src/parse/lex.re"
{
if (--depth == 0)
{
goto comment;
}
}
-#line 1930 "src/parse/lex.cc"
+#line 1929 "src/parse/lex.cc"
yy287:
++YYCURSOR;
-#line 501 "../src/parse/lex.re"
+#line 500 "../src/parse/lex.re"
{
++depth;
fatal("ambiguous /* found");
goto comment;
}
-#line 1939 "src/parse/lex.cc"
+#line 1938 "src/parse/lex.cc"
yy289:
yych = (YYCTYPE)*++YYCURSOR;
if (yych != 'i') goto yy282;
yy299:
++YYCURSOR;
YYCURSOR = YYCTXMARKER;
-#line 506 "../src/parse/lex.re"
+#line 505 "../src/parse/lex.re"
{
set_sourceline ();
goto comment;
}
-#line 2001 "src/parse/lex.cc"
+#line 2000 "src/parse/lex.cc"
yy301:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '\n') goto yy299;
if (yych == '\n') goto yy282;
goto yy302;
}
-#line 526 "../src/parse/lex.re"
+#line 525 "../src/parse/lex.re"
nextLine:
-#line 2031 "src/parse/lex.cc"
+#line 2030 "src/parse/lex.cc"
{
YYCTYPE yych;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
yych = (YYCTYPE)*YYCURSOR;
if (yych == '\n') goto yy310;
++YYCURSOR;
-#line 537 "../src/parse/lex.re"
+#line 536 "../src/parse/lex.re"
{ if(cur == eof) {
return 0;
}
goto nextLine;
}
-#line 2044 "src/parse/lex.cc"
+#line 2043 "src/parse/lex.cc"
yy310:
++YYCURSOR;
-#line 530 "../src/parse/lex.re"
+#line 529 "../src/parse/lex.re"
{ if(cur == eof) {
return 0;
}
cline++;
goto scan;
}
-#line 2055 "src/parse/lex.cc"
+#line 2054 "src/parse/lex.cc"
}
-#line 542 "../src/parse/lex.re"
+#line 541 "../src/parse/lex.re"
}
uint32_t u, l;
fst:
-#line 2081 "src/parse/lex.cc"
+#line 2080 "src/parse/lex.cc"
{
YYCTYPE yych;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
yych = (YYCTYPE)*YYCURSOR;
if (yych == ']') goto yy315;
-#line 566 "../src/parse/lex.re"
+#line 565 "../src/parse/lex.re"
{ l = lex_cls_chr(); goto snd; }
-#line 2089 "src/parse/lex.cc"
+#line 2088 "src/parse/lex.cc"
yy315:
++YYCURSOR;
-#line 565 "../src/parse/lex.re"
+#line 564 "../src/parse/lex.re"
{ goto end; }
-#line 2094 "src/parse/lex.cc"
+#line 2093 "src/parse/lex.cc"
}
-#line 567 "../src/parse/lex.re"
+#line 566 "../src/parse/lex.re"
snd:
-#line 2100 "src/parse/lex.cc"
+#line 2099 "src/parse/lex.cc"
{
YYCTYPE yych;
if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yych = (YYCTYPE)*(YYMARKER = YYCURSOR);
if (yych == '-') goto yy320;
yy319:
-#line 570 "../src/parse/lex.re"
+#line 569 "../src/parse/lex.re"
{ u = l; goto add; }
-#line 2109 "src/parse/lex.cc"
+#line 2108 "src/parse/lex.cc"
yy320:
yych = (YYCTYPE)*++YYCURSOR;
if (yych != ']') goto yy322;
yy322:
++YYCURSOR;
YYCURSOR -= 1;
-#line 571 "../src/parse/lex.re"
+#line 570 "../src/parse/lex.re"
{
u = lex_cls_chr();
if (l > u) {
}
goto add;
}
-#line 2127 "src/parse/lex.cc"
+#line 2126 "src/parse/lex.cc"
}
-#line 579 "../src/parse/lex.re"
+#line 578 "../src/parse/lex.re"
add:
if (!(s = opts->encoding.encodeRange(l, u))) {
{
tok = cur;
-#line 2148 "src/parse/lex.cc"
+#line 2147 "src/parse/lex.cc"
{
YYCTYPE yych;
unsigned int yyaccept = 0;
if (yych == '\n') goto yy328;
if (yych == '\\') goto yy330;
++YYCURSOR;
-#line 602 "../src/parse/lex.re"
+#line 601 "../src/parse/lex.re"
{ return static_cast<uint8_t>(tok[0]); }
-#line 2159 "src/parse/lex.cc"
+#line 2158 "src/parse/lex.cc"
yy328:
++YYCURSOR;
-#line 597 "../src/parse/lex.re"
+#line 596 "../src/parse/lex.re"
{ fatal ((tok - pos) - tchar, "syntax error"); }
-#line 2164 "src/parse/lex.cc"
+#line 2163 "src/parse/lex.cc"
yy330:
++YYCURSOR;
if ((yych = (YYCTYPE)*YYCURSOR) <= '`') {
}
}
}
-#line 600 "../src/parse/lex.re"
+#line 599 "../src/parse/lex.re"
{ fatal ((tok - pos) - tchar, "syntax error in escape sequence"); }
-#line 2215 "src/parse/lex.cc"
+#line 2214 "src/parse/lex.cc"
yy332:
++YYCURSOR;
-#line 615 "../src/parse/lex.re"
+#line 614 "../src/parse/lex.re"
{
warn.useless_escape(tline, tok - pos, tok[1]);
return static_cast<uint8_t>(tok[1]);
}
-#line 2223 "src/parse/lex.cc"
+#line 2222 "src/parse/lex.cc"
yy334:
++YYCURSOR;
-#line 613 "../src/parse/lex.re"
+#line 612 "../src/parse/lex.re"
{ return static_cast<uint8_t>('-'); }
-#line 2228 "src/parse/lex.cc"
+#line 2227 "src/parse/lex.cc"
yy336:
yyaccept = 0;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
if (yych <= '/') goto yy337;
if (yych <= '7') goto yy361;
yy337:
-#line 599 "../src/parse/lex.re"
+#line 598 "../src/parse/lex.re"
{ fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); }
-#line 2237 "src/parse/lex.cc"
+#line 2236 "src/parse/lex.cc"
yy338:
yych = (YYCTYPE)*++YYCURSOR;
goto yy337;
if (yych <= 'f') goto yy363;
}
yy340:
-#line 598 "../src/parse/lex.re"
+#line 597 "../src/parse/lex.re"
{ fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); }
-#line 2255 "src/parse/lex.cc"
+#line 2254 "src/parse/lex.cc"
yy341:
yyaccept = 1;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
}
yy342:
++YYCURSOR;
-#line 612 "../src/parse/lex.re"
+#line 611 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\\'); }
-#line 2273 "src/parse/lex.cc"
+#line 2272 "src/parse/lex.cc"
yy344:
++YYCURSOR;
-#line 614 "../src/parse/lex.re"
+#line 613 "../src/parse/lex.re"
{ return static_cast<uint8_t>(']'); }
-#line 2278 "src/parse/lex.cc"
+#line 2277 "src/parse/lex.cc"
yy346:
++YYCURSOR;
-#line 605 "../src/parse/lex.re"
+#line 604 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\a'); }
-#line 2283 "src/parse/lex.cc"
+#line 2282 "src/parse/lex.cc"
yy348:
++YYCURSOR;
-#line 606 "../src/parse/lex.re"
+#line 605 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\b'); }
-#line 2288 "src/parse/lex.cc"
+#line 2287 "src/parse/lex.cc"
yy350:
++YYCURSOR;
-#line 607 "../src/parse/lex.re"
+#line 606 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\f'); }
-#line 2293 "src/parse/lex.cc"
+#line 2292 "src/parse/lex.cc"
yy352:
++YYCURSOR;
-#line 608 "../src/parse/lex.re"
+#line 607 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\n'); }
-#line 2298 "src/parse/lex.cc"
+#line 2297 "src/parse/lex.cc"
yy354:
++YYCURSOR;
-#line 609 "../src/parse/lex.re"
+#line 608 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\r'); }
-#line 2303 "src/parse/lex.cc"
+#line 2302 "src/parse/lex.cc"
yy356:
++YYCURSOR;
-#line 610 "../src/parse/lex.re"
+#line 609 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\t'); }
-#line 2308 "src/parse/lex.cc"
+#line 2307 "src/parse/lex.cc"
yy358:
++YYCURSOR;
-#line 611 "../src/parse/lex.re"
+#line 610 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\v'); }
-#line 2313 "src/parse/lex.cc"
+#line 2312 "src/parse/lex.cc"
yy360:
yyaccept = 1;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
}
yy366:
++YYCURSOR;
-#line 604 "../src/parse/lex.re"
+#line 603 "../src/parse/lex.re"
{ return unesc_oct(tok, cur); }
-#line 2378 "src/parse/lex.cc"
+#line 2377 "src/parse/lex.cc"
yy368:
yych = (YYCTYPE)*++YYCURSOR;
if (yych <= '@') {
}
yy370:
++YYCURSOR;
-#line 603 "../src/parse/lex.re"
+#line 602 "../src/parse/lex.re"
{ return unesc_hex(tok, cur); }
-#line 2407 "src/parse/lex.cc"
+#line 2406 "src/parse/lex.cc"
yy372:
yych = (YYCTYPE)*++YYCURSOR;
if (yych <= '@') {
goto yy362;
}
}
-#line 619 "../src/parse/lex.re"
+#line 618 "../src/parse/lex.re"
}
end = false;
tok = cur;
-#line 2440 "src/parse/lex.cc"
+#line 2439 "src/parse/lex.cc"
{
YYCTYPE yych;
unsigned int yyaccept = 0;
if (yych == '\n') goto yy378;
if (yych == '\\') goto yy380;
++YYCURSOR;
-#line 632 "../src/parse/lex.re"
+#line 631 "../src/parse/lex.re"
{
end = tok[0] == quote;
return static_cast<uint8_t>(tok[0]);
}
-#line 2454 "src/parse/lex.cc"
+#line 2453 "src/parse/lex.cc"
yy378:
++YYCURSOR;
-#line 627 "../src/parse/lex.re"
+#line 626 "../src/parse/lex.re"
{ fatal ((tok - pos) - tchar, "syntax error"); }
-#line 2459 "src/parse/lex.cc"
+#line 2458 "src/parse/lex.cc"
yy380:
++YYCURSOR;
if ((yych = (YYCTYPE)*YYCURSOR) <= 'a') {
}
}
}
-#line 630 "../src/parse/lex.re"
+#line 629 "../src/parse/lex.re"
{ fatal ((tok - pos) - tchar, "syntax error in escape sequence"); }
-#line 2507 "src/parse/lex.cc"
+#line 2506 "src/parse/lex.cc"
yy382:
++YYCURSOR;
-#line 646 "../src/parse/lex.re"
+#line 645 "../src/parse/lex.re"
{
if (tok[1] != quote) {
warn.useless_escape(tline, tok - pos, tok[1]);
}
return static_cast<uint8_t>(tok[1]);
}
-#line 2517 "src/parse/lex.cc"
+#line 2516 "src/parse/lex.cc"
yy384:
yyaccept = 0;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
if (yych <= '/') goto yy385;
if (yych <= '7') goto yy407;
yy385:
-#line 629 "../src/parse/lex.re"
+#line 628 "../src/parse/lex.re"
{ fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); }
-#line 2526 "src/parse/lex.cc"
+#line 2525 "src/parse/lex.cc"
yy386:
yych = (YYCTYPE)*++YYCURSOR;
goto yy385;
if (yych <= 'f') goto yy409;
}
yy388:
-#line 628 "../src/parse/lex.re"
+#line 627 "../src/parse/lex.re"
{ fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); }
-#line 2544 "src/parse/lex.cc"
+#line 2543 "src/parse/lex.cc"
yy389:
yyaccept = 1;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
}
yy390:
++YYCURSOR;
-#line 645 "../src/parse/lex.re"
+#line 644 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\\'); }
-#line 2562 "src/parse/lex.cc"
+#line 2561 "src/parse/lex.cc"
yy392:
++YYCURSOR;
-#line 638 "../src/parse/lex.re"
+#line 637 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\a'); }
-#line 2567 "src/parse/lex.cc"
+#line 2566 "src/parse/lex.cc"
yy394:
++YYCURSOR;
-#line 639 "../src/parse/lex.re"
+#line 638 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\b'); }
-#line 2572 "src/parse/lex.cc"
+#line 2571 "src/parse/lex.cc"
yy396:
++YYCURSOR;
-#line 640 "../src/parse/lex.re"
+#line 639 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\f'); }
-#line 2577 "src/parse/lex.cc"
+#line 2576 "src/parse/lex.cc"
yy398:
++YYCURSOR;
-#line 641 "../src/parse/lex.re"
+#line 640 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\n'); }
-#line 2582 "src/parse/lex.cc"
+#line 2581 "src/parse/lex.cc"
yy400:
++YYCURSOR;
-#line 642 "../src/parse/lex.re"
+#line 641 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\r'); }
-#line 2587 "src/parse/lex.cc"
+#line 2586 "src/parse/lex.cc"
yy402:
++YYCURSOR;
-#line 643 "../src/parse/lex.re"
+#line 642 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\t'); }
-#line 2592 "src/parse/lex.cc"
+#line 2591 "src/parse/lex.cc"
yy404:
++YYCURSOR;
-#line 644 "../src/parse/lex.re"
+#line 643 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\v'); }
-#line 2597 "src/parse/lex.cc"
+#line 2596 "src/parse/lex.cc"
yy406:
yyaccept = 1;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
}
yy412:
++YYCURSOR;
-#line 637 "../src/parse/lex.re"
+#line 636 "../src/parse/lex.re"
{ return unesc_oct(tok, cur); }
-#line 2662 "src/parse/lex.cc"
+#line 2661 "src/parse/lex.cc"
yy414:
yych = (YYCTYPE)*++YYCURSOR;
if (yych <= '@') {
}
yy416:
++YYCURSOR;
-#line 636 "../src/parse/lex.re"
+#line 635 "../src/parse/lex.re"
{ return unesc_hex(tok, cur); }
-#line 2691 "src/parse/lex.cc"
+#line 2690 "src/parse/lex.cc"
yy418:
yych = (YYCTYPE)*++YYCURSOR;
if (yych <= '@') {
goto yy408;
}
}
-#line 652 "../src/parse/lex.re"
+#line 651 "../src/parse/lex.re"
}
for (bool end;;) {
const uint32_t c = lex_str_chr(quote, end);
if (end) {
- return r ? r : new NullOp;
+ return r ? r : RegExp::nil();
}
r = doCat(r, casing ? ichr(c) : schr(c));
}
sourceline:
tok = cur;
-#line 2736 "src/parse/lex.cc"
+#line 2735 "src/parse/lex.cc"
{
YYCTYPE yych;
static const unsigned char yybm[] = {
yy422:
++YYCURSOR;
yy423:
-#line 695 "../src/parse/lex.re"
+#line 694 "../src/parse/lex.re"
{
goto sourceline;
}
-#line 2789 "src/parse/lex.cc"
+#line 2788 "src/parse/lex.cc"
yy424:
++YYCURSOR;
-#line 683 "../src/parse/lex.re"
+#line 682 "../src/parse/lex.re"
{
if (cur == eof)
{
tok = cur;
return;
}
-#line 2805 "src/parse/lex.cc"
+#line 2804 "src/parse/lex.cc"
yy426:
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
if (yych == '\n') goto yy423;
if (yybm[0+yych] & 64) {
goto yy427;
}
-#line 672 "../src/parse/lex.re"
+#line 671 "../src/parse/lex.re"
{
if (!s_to_u32_unsafe (tok, cur, cline))
{
}
goto sourceline;
}
-#line 2825 "src/parse/lex.cc"
+#line 2824 "src/parse/lex.cc"
yy430:
++YYCURSOR;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
goto yy423;
yy433:
++YYCURSOR;
-#line 679 "../src/parse/lex.re"
+#line 678 "../src/parse/lex.re"
{
escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes
goto sourceline;
}
-#line 2847 "src/parse/lex.cc"
+#line 2846 "src/parse/lex.cc"
yy435:
++YYCURSOR;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
if (yych == '\n') goto yy432;
goto yy430;
}
-#line 698 "../src/parse/lex.re"
+#line 697 "../src/parse/lex.re"
}
#include "src/ir/regexp/encoding/enc.h"
#include "src/ir/regexp/encoding/range_suffix.h"
#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_cat.h"
-#include "src/ir/regexp/regexp_close.h"
-#include "src/ir/regexp/regexp_null.h"
-#include "src/ir/regexp/regexp_rule.h"
#include "src/ir/rule_rank.h"
#include "src/ir/skeleton/skeleton.h"
#include "src/parse/code.h"
static std::vector<std::string> condnames;
static re2c::SpecMap specMap;
static Spec spec;
-static RuleOp *specNone = NULL;
-static RuleOpList specStar;
-static RuleOp * star_default = NULL;
+static RegExp *specNone = NULL;
+static RuleList specStar;
+static RegExp *star_default = NULL;
static Scanner *in = NULL;
static Scanner::ParseMode parseMode;
static SetupMap ruleSetupMap;
condnames.push_back (*it);
}
- RuleOp * rule = new RuleOp
+ RegExp *rule = RegExp::rule
( loc
, expr
, look
context_check(clist);
for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it)
{
- RuleOp * def = new RuleOp
+ RegExp * def = RegExp::rule
( code->loc
, in->mkDefault ()
- , new NullOp
+ , RegExp::nil()
, rule_rank_t::def ()
, code
, NULL
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
static const yytype_uint16 yyrline[] =
{
- 0, 193, 193, 195, 199, 203, 211, 219, 223, 227,
- 231, 247, 264, 268, 274, 279, 285, 289, 303, 319,
- 324, 330, 345, 362, 381, 387, 395, 398, 405, 411,
- 421, 424, 432, 435, 442, 446, 453, 457, 464, 468,
- 475, 479, 494, 513, 517, 521, 525, 532, 542, 546
+ 0, 189, 189, 191, 195, 199, 207, 215, 219, 223,
+ 227, 243, 260, 264, 270, 275, 281, 285, 299, 315,
+ 320, 326, 341, 358, 377, 383, 391, 394, 401, 407,
+ 417, 420, 428, 431, 438, 442, 449, 453, 460, 464,
+ 471, 475, 490, 509, 513, 517, 521, 528, 538, 542
};
#endif
{
in->fatal("condition or '<*>' required when using -c switch");
}
- RuleOp * rule = new RuleOp
+ RegExp * rule = RegExp::rule
( (yyvsp[(3) - (3)].code)->loc
, (yyvsp[(1) - (3)].regexp)
, (yyvsp[(2) - (3)].regexp)
{
if (opts->cFlag)
in->fatal("condition or '<*>' required when using -c switch");
- RuleOp * def = new RuleOp
+ RegExp * def = RegExp::rule
( (yyvsp[(2) - (2)].code)->loc
, in->mkDefault ()
- , new NullOp
+ , RegExp::nil()
, rule_rank_t::def ()
, (yyvsp[(2) - (2)].code)
, NULL
{
context_check(NULL);
- RuleOp * rule = new RuleOp
+ RegExp * rule = RegExp::rule
( (yyvsp[(7) - (7)].code)->loc
, (yyvsp[(4) - (7)].regexp)
, (yyvsp[(5) - (7)].regexp)
assert((yyvsp[(7) - (7)].str));
context_check(NULL);
Loc loc (in->get_fname (), in->get_cline ());
- RuleOp * rule = new RuleOp
+ RegExp * rule = RegExp::rule
( loc
, (yyvsp[(4) - (7)].regexp)
, (yyvsp[(5) - (7)].regexp)
{
in->fatal ("code to default rule '*' is already defined");
}
- star_default = new RuleOp
+ star_default = RegExp::rule
( (yyvsp[(5) - (5)].code)->loc
, in->mkDefault ()
- , new NullOp
+ , RegExp::nil()
, rule_rank_t::def ()
, (yyvsp[(5) - (5)].code)
, NULL
{
in->fatal("code to handle illegal condition already defined");
}
- (yyval.regexp) = specNone = new RuleOp
+ (yyval.regexp) = specNone = RegExp::rule
( (yyvsp[(3) - (3)].code)->loc
- , new NullOp
- , new NullOp
+ , RegExp::nil()
+ , RegExp::nil()
, rank_counter.next ()
, (yyvsp[(3) - (3)].code)
, (yyvsp[(2) - (3)].str)
in->fatal("code to handle illegal condition already defined");
}
Loc loc (in->get_fname (), in->get_cline ());
- (yyval.regexp) = specNone = new RuleOp
+ (yyval.regexp) = specNone = RegExp::rule
( loc
- , new NullOp
- , new NullOp
+ , RegExp::nil()
+ , RegExp::nil()
, rank_counter.next ()
, NULL
, (yyvsp[(3) - (3)].str)
case 32:
{
- (yyval.regexp) = new NullOp;
+ (yyval.regexp) = RegExp::nil();
;}
break;
case 39:
{
- (yyval.regexp) = new CatOp((yyvsp[(1) - (2)].regexp), (yyvsp[(2) - (2)].regexp));
+ (yyval.regexp) = RegExp::cat((yyvsp[(1) - (2)].regexp), (yyvsp[(2) - (2)].regexp));
;}
break;
switch((yyvsp[(2) - (2)].op))
{
case '*':
- (yyval.regexp) = new CloseOp((yyvsp[(1) - (2)].regexp));
+ (yyval.regexp) = RegExp::iter((yyvsp[(1) - (2)].regexp));
break;
case '+':
- (yyval.regexp) = new CatOp (new CloseOp((yyvsp[(1) - (2)].regexp)), (yyvsp[(1) - (2)].regexp));
+ (yyval.regexp) = RegExp::cat(RegExp::iter((yyvsp[(1) - (2)].regexp)), (yyvsp[(1) - (2)].regexp));
break;
case '?':
- (yyval.regexp) = mkAlt((yyvsp[(1) - (2)].regexp), new NullOp());
+ (yyval.regexp) = mkAlt((yyvsp[(1) - (2)].regexp), RegExp::nil());
break;
}
;}
{
(yyval.regexp) = repeat_from_to ((yyvsp[(1) - (2)].regexp), (yyvsp[(2) - (2)].extop).min, (yyvsp[(2) - (2)].extop).max);
}
- (yyval.regexp) = (yyval.regexp) ? (yyval.regexp) : new NullOp;
+ (yyval.regexp) = (yyval.regexp) ? (yyval.regexp) : RegExp::nil();
;}
break;
// merge <*> rules to all conditions with lowest priority
for (it = specMap.begin(); it != specMap.end(); ++it)
{
- for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp)
+ for (RuleList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp)
{
- RuleOp *r = new RuleOp(*itOp, rank_counter.next());
+ RegExp *r = RegExp::rule_copy(*itOp, rank_counter.next());
it->second.add (r);
}
if (star_default)
void parse_cleanup()
{
- RegExp::vFreeList.clear();
+ RegExp::flist.clear();
Range::vFreeList.clear();
RangeSuffix::freeList.clear();
Code::freelist.clear();
// Don't forget to include zero and upper bound, even if they
// do not explicitely apper in ranges.
std::set<uint32_t> bounds;
- spec.re->split(bounds);
+ split(spec.re, bounds);
bounds.insert(0);
bounds.insert(cunits);
charset_t cs;
// skeleton must be constructed after DFA construction
// but prior to any other DFA transformations
Skeleton *skeleton = new Skeleton(dfa, cs, spec.rules, name, cond, line);
- spec.re->nullable_rules(skeleton->nullable_rules);
+ nullable_rules(spec.re, skeleton->nullable_rules);
minimization(dfa);
+++ /dev/null
-#include "src/util/c99_stdint.h"
-
-#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_alt.h"
-#include "src/ir/regexp/regexp_cat.h"
-#include "src/ir/regexp/regexp_close.h"
-#include "src/ir/regexp/regexp_match.h"
-#include "src/ir/regexp/regexp_null.h"
-#include "src/ir/regexp/regexp_rule.h"
-
-namespace re2c
-{
-
-uint32_t AltOp::calc_size() const
-{
- return exp1->calc_size()
- + exp2->calc_size()
- + 1;
-}
-
-uint32_t CatOp::calc_size() const
-{
- return exp1->calc_size()
- + exp2->calc_size();
-}
-
-uint32_t CloseOp::calc_size() const
-{
- return exp->calc_size() + 1;
-}
-
-uint32_t MatchOp::calc_size() const
-{
- return 1;
-}
-
-uint32_t NullOp::calc_size() const
-{
- return 0;
-}
-
-uint32_t RuleOp::calc_size() const
-{
- const uint32_t n = ctx->calc_size();
- return exp->calc_size()
- + (n > 0 ? n + 1 : 0)
- + 1;
-}
-
-} // end namespace re2c
#include "src/ir/nfa/nfa.h"
#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_alt.h"
-#include "src/ir/regexp/regexp_cat.h"
-#include "src/ir/regexp/regexp_close.h"
-#include "src/ir/regexp/regexp_match.h"
-#include "src/ir/regexp/regexp_null.h"
-#include "src/ir/regexp/regexp_rule.h"
namespace re2c {
+static uint32_t calc_size(const RegExp *re);
+static nfa_state_t *compile(const RegExp *re, nfa_t &nfa, nfa_state_t *n);
+
nfa_t::nfa_t(RegExp *re)
- : max_size(re->calc_size())
+ : max_size(calc_size(re))
, size(0)
, states(new nfa_state_t[max_size])
- , root(re->compile(*this, NULL))
+ , root(compile(re, *this, NULL))
{}
nfa_t::~nfa_t()
delete[] states;
}
-nfa_state_t *AltOp::compile(nfa_t &nfa, nfa_state_t *t)
-{
- nfa_state_t *s = &nfa.states[nfa.size++];
- s->alt(exp1->compile(nfa, t),
- exp2->compile(nfa, t));
- return s;
-}
-
-nfa_state_t *CatOp::compile(nfa_t &nfa, nfa_state_t *t)
+uint32_t calc_size(const RegExp *re)
{
- nfa_state_t *s2 = exp2->compile(nfa, t);
- nfa_state_t *s1 = exp1->compile(nfa, s2);
- return s1;
-}
-
-nfa_state_t *CloseOp::compile(nfa_t &nfa, nfa_state_t *t)
-{
- nfa_state_t *s = &nfa.states[nfa.size++];
- s->alt(t, exp->compile(nfa, s));
- return s;
+ switch (re->tag) {
+ default:
+ case RegExp::NIL:
+ return 0;
+ case RegExp::SYM:
+ return 1;
+ case RegExp::ALT:
+ return calc_size(re->pld.alt.re1)
+ + calc_size(re->pld.alt.re2)
+ + 1;
+ case RegExp::CAT:
+ return calc_size(re->pld.cat.re1)
+ + calc_size(re->pld.cat.re2);
+ case RegExp::ITER:
+ return calc_size(re->pld.iter.re)
+ + 1;
+ case RegExp::RULE: {
+ const uint32_t re_size = calc_size(re->pld.rule.re) + 1;
+ uint32_t ctx_size = calc_size(re->pld.rule.ctx);
+ if (ctx_size > 0) {
+ ctx_size += 1;
+ }
+ return re_size + ctx_size;
+ }
+ }
}
-nfa_state_t *MatchOp::compile(nfa_t &nfa, nfa_state_t *t)
+nfa_state_t *compile(const RegExp *re, nfa_t &nfa, nfa_state_t *t)
{
- nfa_state_t *s = &nfa.states[nfa.size++];
- s->ran(t, match);
+ nfa_state_t *s = NULL;
+ switch (re->tag) {
+ case RegExp::NIL:
+ s = t;
+ break;
+ case RegExp::SYM:
+ s = &nfa.states[nfa.size++];
+ s->ran(t, re->pld.sym.range);
+ break;
+ case RegExp::ALT:
+ s = &nfa.states[nfa.size++];
+ s->alt(compile(re->pld.alt.re1, nfa, t),
+ compile(re->pld.alt.re2, nfa, t));
+ break;
+ case RegExp::CAT:
+ s = compile(re->pld.cat.re2, nfa, t);
+ s = compile(re->pld.cat.re1, nfa, s);
+ break;
+ case RegExp::ITER:
+ s = &nfa.states[nfa.size++];
+ s->alt(t, compile(re->pld.iter.re, nfa, s));
+ break;
+ case RegExp::RULE: {
+ s = &nfa.states[nfa.size++];
+ RuleInfo *info = re->pld.rule.info;
+ s->fin(info);
+ if (info->ctx_len == ~0u) {
+ // dynamic context
+ nfa_state_t *q = compile(re->pld.rule.ctx, nfa, s);
+ s = &nfa.states[nfa.size++];
+ s->ctx(q);
+ } else if (info->ctx_len > 0) {
+ // static context
+ s = compile(re->pld.rule.ctx, nfa, s);
+ }
+ s = compile(re->pld.rule.re, nfa, s);
+ break;
+ }
+ }
return s;
}
-nfa_state_t *NullOp::compile(nfa_t &, nfa_state_t *t)
-{
- return t;
-}
-
-nfa_state_t *RuleOp::compile(nfa_t &nfa, nfa_state_t *)
-{
- nfa_state_t *s3 = &nfa.states[nfa.size++];
- s3->fin(info);
-
- if (info->ctx_len == ~0u)
- {
- nfa_state_t *s2 = &nfa.states[nfa.size++];
- s2->ctx(ctx->compile(nfa, s3));
- s3 = s2;
- }
- else if (info->ctx_len > 0)
- {
- s3 = ctx->compile(nfa, s3);
- }
-
- nfa_state_t *s1 = exp->compile(nfa, s3);
- return s1;
-}
} // namespace re2c
+++ /dev/null
-#include "src/util/c99_stdint.h"
-#include <set>
-
-#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_alt.h"
-#include "src/ir/regexp/regexp_cat.h"
-#include "src/ir/regexp/regexp_close.h"
-#include "src/ir/regexp/regexp_match.h"
-#include "src/ir/regexp/regexp_null.h"
-#include "src/ir/regexp/regexp_rule.h"
-#include "src/util/range.h"
-
-namespace re2c {
-
-void AltOp::split (std::set<uint32_t> & cs)
-{
- exp1->split (cs);
- exp2->split (cs);
-}
-
-void CatOp::split (std::set<uint32_t> & cs)
-{
- exp1->split (cs);
- exp2->split (cs);
-}
-
-void CloseOp::split (std::set<uint32_t> & cs)
-{
- exp->split (cs);
-}
-
-void MatchOp::split (std::set<uint32_t> & cs)
-{
- for (Range *r = match; r; r = r->next ())
- {
- cs.insert (r->lower ());
- cs.insert (r->upper ());
- }
-}
-
-void NullOp::split (std::set<uint32_t> &) {}
-
-void RuleOp::split (std::set<uint32_t> & cs)
-{
- exp->split (cs);
- ctx->split (cs);
-}
-
-} // namespace re2c
+++ /dev/null
-#include <iostream>
-
-#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_alt.h"
-#include "src/ir/regexp/regexp_cat.h"
-#include "src/ir/regexp/regexp_close.h"
-#include "src/ir/regexp/regexp_match.h"
-#include "src/ir/regexp/regexp_null.h"
-#include "src/ir/regexp/regexp_rule.h"
-
-namespace re2c
-{
-
-std::ostream & operator << (std::ostream & o, const RegExp & re)
-{
- re.display (o);
- return o;
-}
-
-void AltOp::display (std::ostream & o) const
-{
- o << exp1 << "|" << exp2;
-}
-
-void CatOp::display (std::ostream & o) const
-{
- o << exp1 << exp2;
-}
-
-void CloseOp::display (std::ostream & o) const
-{
- o << exp << "+";
-}
-
-void MatchOp::display (std::ostream & o) const
-{
- o << match;
-}
-
-void NullOp::display (std::ostream & o) const
-{
- o << "_";
-}
-
-void RuleOp::display (std::ostream & o) const
-{
- o << exp << "/" << ctx << ";";
-}
-
-} // end namespace re2c
-
#include "src/ir/regexp/encoding/range_suffix.h"
#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_match.h"
#include "src/util/range.h"
namespace re2c {
{
return p
? emit (p, NULL)
- : new MatchOp (NULL);
+ : RegExp::sym(NULL);
}
/*
RegExp * regexp = NULL;
for (; p != NULL; p = p->next)
{
- RegExp * re1 = doCat(new MatchOp(Range::ran (p->l, p->h + 1)), re);
+ RegExp * re1 = doCat(RegExp::sym(Range::ran (p->l, p->h + 1)), re);
regexp = doAlt(regexp, emit(p->child, re1));
}
return regexp;
#include "src/ir/regexp/encoding/utf16/utf16_regexp.h"
#include "src/ir/regexp/encoding/range_suffix.h"
#include "src/ir/regexp/encoding/utf16/utf16_range.h"
-#include "src/ir/regexp/regexp_cat.h"
-#include "src/ir/regexp/regexp_match.h"
+#include "src/ir/regexp/regexp.h"
#include "src/util/range.h"
namespace re2c {
RegExp * UTF16Symbol(utf16::rune r)
{
if (r <= utf16::MAX_1WORD_RUNE)
- return new MatchOp(Range::sym (r));
+ return RegExp::sym(Range::sym (r));
else
{
const uint32_t ld = utf16::lead_surr(r);
const uint32_t tr = utf16::trail_surr(r);
- return new CatOp(new MatchOp(Range::sym (ld)), new MatchOp(Range::sym (tr)));
+ return RegExp::cat(RegExp::sym(Range::sym (ld)), RegExp::sym(Range::sym (tr)));
}
}
#include "src/ir/regexp/encoding/utf8/utf8_regexp.h"
#include "src/ir/regexp/encoding/range_suffix.h"
#include "src/ir/regexp/encoding/utf8/utf8_range.h"
-#include "src/ir/regexp/regexp_cat.h"
-#include "src/ir/regexp/regexp_match.h"
+#include "src/ir/regexp/regexp.h"
#include "src/util/range.h"
namespace re2c {
{
uint32_t chars[utf8::MAX_RUNE_LENGTH];
const uint32_t chars_count = utf8::rune_to_bytes(chars, r);
- RegExp * re = new MatchOp(Range::sym (chars[0]));
+ RegExp * re = RegExp::sym(Range::sym (chars[0]));
for (uint32_t i = 1; i < chars_count; ++i)
- re = new CatOp(re, new MatchOp(Range::sym (chars[i])));
+ re = RegExp::cat(re, RegExp::sym(Range::sym (chars[i])));
return re;
}
+++ /dev/null
-#include "src/util/c99_stdint.h"
-
-#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_alt.h"
-#include "src/ir/regexp/regexp_cat.h"
-#include "src/ir/regexp/regexp_match.h"
-#include "src/ir/regexp/regexp_null.h"
-
-namespace re2c
-{
-
-uint32_t RegExp::fixedLength ()
-{
- return ~0u;
-}
-
-uint32_t AltOp::fixedLength ()
-{
- uint32_t l1 = exp1->fixedLength ();
- uint32_t l2 = exp2->fixedLength ();
-
- if (l1 != l2 || l1 == ~0u)
- {
- return ~0u;
- }
-
- return l1;
-}
-
-uint32_t CatOp::fixedLength ()
-{
- const uint32_t l1 = exp1->fixedLength ();
- if (l1 != ~0u)
- {
- const uint32_t l2 = exp2->fixedLength ();
- if (l2 != ~0u)
- {
- return l1 + l2;
- }
- }
- return ~0u;
-}
-
-uint32_t MatchOp::fixedLength ()
-{
- return 1;
-}
-
-uint32_t NullOp::fixedLength ()
-{
- return 0;
-}
-
-} // end namespace re2c
-
#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_alt.h"
-#include "src/ir/regexp/regexp_cat.h"
-#include "src/ir/regexp/regexp_close.h"
-#include "src/ir/regexp/regexp_match.h"
-#include "src/ir/regexp/regexp_null.h"
-#include "src/ir/regexp/regexp_rule.h"
namespace re2c
{
-bool AltOp::nullable() const
-{
- return exp1->nullable()
- || exp2->nullable();
-}
-
-bool CatOp::nullable() const
-{
- return exp1->nullable()
- && exp2->nullable();
-}
-
-bool CloseOp::nullable() const
-{
- return true;
-}
-
-bool MatchOp::nullable() const
-{
- return false;
-}
-
-bool NullOp::nullable() const
-{
- return true;
-}
-
-bool RuleOp::nullable() const
-{
- return exp->nullable();
-}
-
-void RegExp::nullable_rules(std::vector<RuleInfo*>&) const {}
-
-void AltOp::nullable_rules(std::vector<RuleInfo*> &rs) const
-{
- exp1->nullable_rules(rs);
- exp2->nullable_rules(rs);
+static bool nullable(const RegExp *re)
+{
+ switch (re->tag) {
+ default:
+ case RegExp::NIL:
+ return true;
+ case RegExp::SYM:
+ return false;
+ case RegExp::ALT:
+ return nullable(re->pld.alt.re1)
+ || nullable(re->pld.alt.re2);
+ case RegExp::CAT:
+ return nullable(re->pld.cat.re1)
+ && nullable(re->pld.cat.re2);
+ case RegExp::ITER:
+ return true;
+ case RegExp::RULE:
+ return nullable(re->pld.rule.re);
+ }
}
-void RuleOp::nullable_rules(std::vector<RuleInfo*> &rs) const
-{
- if (exp->nullable())
- {
- rs.push_back(info);
+void nullable_rules(const RegExp *re, std::vector<RuleInfo*> &rs)
+{
+ switch (re->tag) {
+ case RegExp::ALT:
+ nullable_rules(re->pld.alt.re1, rs);
+ nullable_rules(re->pld.alt.re2, rs);
+ break;
+ case RegExp::RULE:
+ if (nullable(re->pld.rule.re)) {
+ rs.push_back(re->pld.rule.info);
+ }
+ break;
+ default:
+ break;
}
}
#include "src/ir/regexp/encoding/utf16/utf16_regexp.h"
#include "src/ir/regexp/encoding/utf8/utf8_regexp.h"
#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_alt.h"
-#include "src/ir/regexp/regexp_cat.h"
-#include "src/ir/regexp/regexp_close.h"
-#include "src/ir/regexp/regexp_match.h"
-#include "src/ir/regexp/regexp_null.h"
#include "src/parse/scanner.h"
#include "src/util/range.h"
namespace re2c
{
-static MatchOp * merge (MatchOp * m1, MatchOp * m2);
+static uint32_t fixlen(const RegExp *re);
-free_list<RegExp*> RegExp::vFreeList;
+free_list<RegExp*> RegExp::flist;
-RegExp * doAlt (RegExp * e1, RegExp * e2)
+RegExp *doAlt(RegExp *re1, RegExp *re2)
{
- if (!e1)
- {
- return e2;
+ if (!re1) {
+ return re2;
}
- if (!e2)
- {
- return e1;
+ if (!re2) {
+ return re1;
}
- return new AltOp (e1, e2);
+ return RegExp::alt(re1, re2);
}
-RegExp * mkAlt (RegExp * e1, RegExp * e2)
+static RegExp *merge(RegExp *sym1, RegExp *sym2)
{
- AltOp * a;
- MatchOp * m1;
- MatchOp * m2;
-
- a = dynamic_cast<AltOp*> (e1);
- if (a != NULL)
- {
- m1 = dynamic_cast<MatchOp*> (a->exp1);
- if (m1 != NULL)
- {
- e1 = a->exp2;
- }
+ if (!sym1) {
+ return sym2;
}
- else
- {
- m1 = dynamic_cast<MatchOp*> (e1);
- if (m1 != NULL)
- {
- e1 = NULL;
- }
+ if (!sym2) {
+ return sym1;
}
- a = dynamic_cast<AltOp*> (e2);
- if (a != NULL)
- {
- m2 = dynamic_cast<MatchOp*> (a->exp1);
- if (m2 != NULL)
- {
- e2 = a->exp2;
- }
+ return RegExp::sym(Range::add(
+ sym1->pld.sym.range,
+ sym2->pld.sym.range));
+}
+
+static RegExp *lift_sym(RegExp *&re)
+{
+ if (!re) {
+ return NULL;
}
- else
- {
- m2 = dynamic_cast<MatchOp*> (e2);
- if (m2 != NULL)
- {
- e2 = NULL;
+ if (re->tag == RegExp::SYM) {
+ RegExp *sym = re;
+ re = NULL;
+ return sym;
+ }
+ if (re->tag == RegExp::ALT) {
+ // second alternative cannot be SYM by construction
+ RegExp *alt1 = re->pld.alt.re1;
+ if (alt1 && alt1->tag == RegExp::SYM) {
+ re = re->pld.alt.re2;
+ return alt1;
}
}
-
- return doAlt (merge (m1, m2), doAlt (e1, e2));
+ return NULL;
}
-MatchOp * merge (MatchOp * m1, MatchOp * m2)
+RegExp *mkAlt(RegExp *re1, RegExp *re2)
{
- if (!m1)
- {
- return m2;
- }
- if (!m2)
- {
- return m1;
- }
- MatchOp * m = new MatchOp (Range::add (m1->match, m2->match));
- return m;
+ RegExp *sym1 = lift_sym(re1);
+ RegExp *sym2 = lift_sym(re2);
+ return doAlt(
+ merge(sym1, sym2),
+ doAlt(re1, re2));
}
-RegExp * doCat (RegExp * e1, RegExp * e2)
+RegExp *doCat(RegExp *re1, RegExp *re2)
{
- if (!e1)
- {
- return e2;
+ if (!re1) {
+ return re2;
}
- if (!e2)
- {
- return e1;
+ if (!re2) {
+ return re1;
}
- return new CatOp (e1, e2);
+ return RegExp::cat(re1, re2);
}
RegExp *Scanner::schr(uint32_t c) const
switch (opts->encoding.type ()) {
case Enc::UTF16: return UTF16Symbol(c);
case Enc::UTF8: return UTF8Symbol(c);
- default: return new MatchOp(Range::sym(c));
+ default: return RegExp::sym(Range::sym(c));
}
}
RegExp *Scanner::cls(Range *r) const
{
- if (!r)
- {
- switch (opts->empty_class_policy)
- {
+ if (!r) {
+ switch (opts->empty_class_policy) {
case EMPTY_CLASS_MATCH_EMPTY:
- warn.empty_class (get_line ());
- return new NullOp;
+ warn.empty_class(get_line());
+ return RegExp::nil();
case EMPTY_CLASS_MATCH_NONE:
- warn.empty_class (get_line ());
+ warn.empty_class(get_line());
break;
case EMPTY_CLASS_ERROR:
- fatal ("empty character class");
+ fatal("empty character class");
break;
}
}
- switch (opts->encoding.type ())
- {
+ switch (opts->encoding.type()) {
case Enc::UTF16: return UTF16Range(r);
case Enc::UTF8: return UTF8Range(r);
- default: return new MatchOp(r);
+ default: return RegExp::sym(r);
}
}
-RegExp * Scanner::mkDiff (RegExp * e1, RegExp * e2) const
+RegExp *Scanner::mkDiff(RegExp *re1, RegExp *re2) const
{
- MatchOp * m1 = dynamic_cast<MatchOp *> (e1);
- MatchOp * m2 = dynamic_cast<MatchOp *> (e2);
- if (m1 == NULL || m2 == NULL)
- {
- fatal("can only difference char sets");
+ if (re1 && re2
+ && re1->tag == RegExp::SYM
+ && re2->tag == RegExp::SYM) {
+ return cls(Range::sub(
+ re1->pld.sym.range,
+ re2->pld.sym.range));
}
- Range * r = Range::sub (m1->match, m2->match);
-
- return cls(r);
+ fatal("can only difference char sets");
+ return NULL;
}
-RegExp * Scanner::mkDot() const
+RegExp *Scanner::mkDot() const
{
- Range * full = opts->encoding.fullRange();
uint32_t c = '\n';
- if (!opts->encoding.encode(c))
+ if (!opts->encoding.encode(c)) {
fatalf("Bad code point: '0x%X'", c);
- Range * ran = Range::sym (c);
- Range * inv = Range::sub (full, ran);
-
- return cls(inv);
+ }
+ return cls(Range::sub(
+ opts->encoding.fullRange(),
+ Range::sym(c)));
}
/*
* Also note that default range doesn't respect encoding policy
* (the way invalid code points are treated).
*/
-RegExp * Scanner::mkDefault() const
+RegExp *Scanner::mkDefault() const
{
- Range * def = Range::ran (0, opts->encoding.nCodeUnits());
- return new MatchOp(def);
+ return RegExp::sym(Range::ran(0,
+ opts->encoding.nCodeUnits()));
}
/*
*/
// see note [counted repetition expansion]
-RegExp * repeat (RegExp * e, uint32_t n)
+RegExp *repeat(RegExp *re, uint32_t n)
{
- RegExp * r = NULL;
- for (uint32_t i = 0; i < n; ++i)
- {
- r = doCat (r, e);
+ RegExp *r = NULL;
+ for (uint32_t i = 0; i < n; ++i) {
+ r = doCat(r, re);
}
return r;
}
// see note [counted repetition expansion]
-RegExp * repeat_from_to (RegExp * e, uint32_t n, uint32_t m)
+RegExp *repeat_from_to(RegExp *re, uint32_t n, uint32_t m)
{
- RegExp * r1 = repeat (e, n);
- RegExp * r2 = NULL;
- for (uint32_t i = n; i < m; ++i)
- {
- r2 = mkAlt (new NullOp, doCat (e, r2));
+ RegExp *r1 = repeat(re, n);
+ RegExp *r2 = NULL;
+ for (uint32_t i = n; i < m; ++i) {
+ r2 = mkAlt(
+ RegExp::nil(),
+ doCat(re, r2));
}
- return doCat (r1, r2);
+ return doCat(r1, r2);
}
// see note [counted repetition expansion]
-RegExp * repeat_from (RegExp * e, uint32_t n)
+RegExp *repeat_from(RegExp *re, uint32_t n)
+{
+ return doCat(
+ repeat(re, n),
+ RegExp::iter(re));
+}
+
+RegExp* RegExp::rule(const Loc &loc, RegExp *r1, RegExp *r2,
+ rule_rank_t rank, const Code *code, const std::string *newcond)
+{
+ RegExp *re = new RegExp(RULE);
+ re->pld.rule.re = r1;
+ re->pld.rule.ctx = r2;
+
+ uint32_t ctx_len = fixlen(r2);
+ // cannot emulate 'YYCURSOR -= N' operation with generic API
+ if (ctx_len != 0
+ && opts->input_api.type() == InputAPI::CUSTOM)
+ {
+ ctx_len = ~0u;
+ }
+
+ re->pld.rule.info = new RuleInfo(loc, rank, code, newcond, ctx_len);
+ return re;
+}
+
+// shallow-copies regexps, but deep-copies rule info
+// used to duplicate <*> rules in conditions
+RegExp* RegExp::rule_copy(const RegExp *rule, rule_rank_t rank)
{
- RegExp * r1 = repeat (e, n);
- RegExp * r2 = new CloseOp (e);
- return doCat (r1, r2);
+ RegExp *re = new RegExp(RULE);
+ re->pld.rule.re = rule->pld.rule.re;
+ re->pld.rule.ctx = rule->pld.rule.ctx;
+ const RuleInfo *info = rule->pld.rule.info;
+ re->pld.rule.info = new RuleInfo(info->loc, rank,
+ info->code, &info->newcond, info->ctx_len);
+ return re;
+}
+
+uint32_t fixlen(const RegExp *re)
+{
+ switch (re->tag) {
+ case RegExp::NIL:
+ return 0;
+ case RegExp::SYM:
+ return 1;
+ case RegExp::ALT:
+ {
+ const uint32_t l1 = fixlen(re->pld.alt.re1);
+ const uint32_t l2 = fixlen(re->pld.alt.re2);
+ return l1 == l2 ? l1 : ~0u;
+ }
+ case RegExp::CAT:
+ {
+ const uint32_t l1 = fixlen(re->pld.cat.re1);
+ if (l1 == ~0u) {
+ return ~0u;
+ }
+ const uint32_t l2 = fixlen(re->pld.cat.re2);
+ if (l2 == ~0u) {
+ return ~0u;
+ }
+ return l1 + l2;
+ }
+ case RegExp::ITER:
+ case RegExp::RULE:
+ default:
+ return ~0u;
+ }
}
} // namespace re2c
#define _RE2C_IR_REGEXP_REGEXP_
#include "src/util/c99_stdint.h"
-#include <iosfwd>
#include <set>
#include <vector>
#include "src/parse/rules.h"
#include "src/util/free_list.h"
-#include "src/util/forbid_copy.h"
+#include "src/util/range.h"
namespace re2c
{
typedef std::vector<uint32_t> charset_t;
-class RegExp
+struct RegExp
{
-public:
- static free_list <RegExp *> vFreeList;
+ enum tag_t
+ {
+ NIL,
+ SYM,
+ ALT,
+ CAT,
+ ITER,
+ RULE
+ };
+ union payload_t
+ {
+ struct
+ {
+ Range *range;
+ } sym;
+ struct
+ {
+ RegExp *re1;
+ RegExp *re2;
+ } alt;
+ struct
+ {
+ RegExp *re1;
+ RegExp *re2;
+ } cat;
+ struct
+ {
+ RegExp *re;
+ } iter;
+ struct
+ {
+ RegExp *re;
+ RegExp *ctx;
+ RuleInfo *info;
+ } rule;
+ };
+
+ static free_list<RegExp*> flist;
+
+ tag_t tag;
+ payload_t pld;
- inline RegExp ()
+ static RegExp *nil()
+ {
+ return new RegExp(NIL);
+ }
+ static RegExp *sym(Range *r)
+ {
+ RegExp *re = new RegExp(SYM);
+ re->pld.sym.range = r;
+ return re;
+ }
+ static RegExp *alt(RegExp *r1, RegExp *r2)
{
- vFreeList.insert (this);
+ RegExp *re = new RegExp(ALT);
+ re->pld.alt.re1 = r1;
+ re->pld.alt.re2 = r2;
+ return re;
}
- inline virtual ~RegExp ()
+ static RegExp *cat(RegExp *r1, RegExp *r2)
{
- vFreeList.erase (this);
+ RegExp *re = new RegExp(CAT);
+ re->pld.cat.re1 = r1;
+ re->pld.cat.re2 = r2;
+ return re;
+ }
+ static RegExp *iter(RegExp *r)
+ {
+ RegExp *re = new RegExp(ITER);
+ re->pld.iter.re = r;
+ return re;
+ }
+ static RegExp *rule(const Loc &loc, RegExp *r1, RegExp *r2,
+ rule_rank_t rank, const Code *code, const std::string *newcond);
+ static RegExp *rule_copy(const RegExp *rule, rule_rank_t rank);
+ inline ~RegExp()
+ {
+ if (tag == RULE) {
+ delete pld.rule.info;
+ }
+ flist.erase(this);
+ }
+
+private:
+ inline RegExp(tag_t t) : tag(t), pld()
+ {
+ flist.insert(this);
}
- virtual void split (std::set<uint32_t> &) = 0;
- virtual uint32_t calc_size() const = 0;
- virtual uint32_t fixedLength ();
- virtual bool nullable() const = 0;
- virtual void nullable_rules(std::vector<RuleInfo*>&) const;
- virtual nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n) = 0;
- virtual void display (std::ostream &) const = 0;
- friend std::ostream & operator << (std::ostream & o, const RegExp & re);
-
- FORBID_COPY (RegExp);
};
-RegExp * doAlt (RegExp * e1, RegExp * e2);
-RegExp * mkAlt (RegExp * e1, RegExp * e2);
-RegExp * doCat (RegExp * e1, RegExp * e2);
-RegExp * repeat (RegExp * e, uint32_t n);
-RegExp * repeat_from_to (RegExp * e, uint32_t n, uint32_t m);
-RegExp * repeat_from (RegExp * e, uint32_t n);
+void split(const RegExp *re, std::set<uint32_t> &cs);
+void nullable_rules(const RegExp *re, std::vector<RuleInfo*> &rs);
+
+RegExp *mkAlt(RegExp *re1, RegExp *re2);
+RegExp *doAlt(RegExp *re1, RegExp *re2);
+RegExp *doCat(RegExp *re1, RegExp *re2);
+RegExp *repeat(RegExp *re, uint32_t n);
+RegExp *repeat_from_to(RegExp *re, uint32_t n, uint32_t m);
+RegExp *repeat_from(RegExp *re, uint32_t n);
} // end namespace re2c
+++ /dev/null
-#ifndef _RE2C_IR_REGEXP_REGEXP_ALT_
-#define _RE2C_IR_REGEXP_REGEXP_ALT_
-
-#include "src/ir/regexp/regexp.h"
-
-namespace re2c
-{
-
-class AltOp: public RegExp
-{
- RegExp * exp1;
- RegExp * exp2;
-
-public:
- inline AltOp (RegExp * e1, RegExp * e2)
- : exp1 (e1)
- , exp2 (e2)
- {}
- void split (std::set<uint32_t> &);
- uint32_t calc_size() const;
- uint32_t fixedLength ();
- bool nullable() const;
- void nullable_rules(std::vector<RuleInfo*>&) const;
- nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n);
- void display (std::ostream & o) const;
- friend RegExp * mkAlt (RegExp *, RegExp *);
-
- FORBID_COPY (AltOp);
-};
-
-} // end namespace re2c
-
-#endif // _RE2C_IR_REGEXP_REGEXP_ALT_
+++ /dev/null
-#ifndef _RE2C_IR_REGEXP_REGEXP_CAT_
-#define _RE2C_IR_REGEXP_REGEXP_CAT_
-
-#include "src/ir/regexp/regexp.h"
-
-namespace re2c
-{
-
-class CatOp: public RegExp
-{
- RegExp * exp1;
- RegExp * exp2;
-
-public:
- inline CatOp (RegExp * e1, RegExp * e2)
- : exp1 (e1)
- , exp2 (e2)
- {}
- void split (std::set<uint32_t> &);
- uint32_t calc_size() const;
- uint32_t fixedLength ();
- bool nullable() const;
- nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n);
- void display (std::ostream & o) const;
-
- FORBID_COPY (CatOp);
-};
-
-} // end namespace re2c
-
-#endif // _RE2C_IR_REGEXP_REGEXP_CAT_
+++ /dev/null
-#ifndef _RE2C_IR_REGEXP_REGEXP_CLOSE_
-#define _RE2C_IR_REGEXP_REGEXP_CLOSE_
-
-#include "src/ir/regexp/regexp.h"
-
-namespace re2c
-{
-
-class CloseOp: public RegExp
-{
- RegExp * exp;
-
-public:
- inline CloseOp (RegExp * e)
- : exp (e)
- {}
- void split (std::set<uint32_t> &);
- uint32_t calc_size() const;
- bool nullable() const;
- nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n);
- void display (std::ostream & o) const;
-
- FORBID_COPY (CloseOp);
-};
-
-} // end namespace re2c
-
-#endif // _RE2C_IR_REGEXP_REGEXP_CLOSE_
+++ /dev/null
-#ifndef _RE2C_IR_REGEXP_REGEXP_MATCH_
-#define _RE2C_IR_REGEXP_REGEXP_MATCH_
-
-#include "src/ir/regexp/regexp.h"
-#include "src/util/range.h"
-
-namespace re2c
-{
-
-class MatchOp: public RegExp
-{
-public:
- Range * match;
-
- inline MatchOp (Range * m)
- : match (m)
- {}
- void split (std::set<uint32_t> &);
- uint32_t calc_size() const;
- uint32_t fixedLength ();
- bool nullable() const;
- nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n);
- void display (std::ostream & o) const;
-
- FORBID_COPY (MatchOp);
-};
-
-} // end namespace re2c
-
-#endif // _RE2C_IR_REGEXP_REGEXP_MATCH_
+++ /dev/null
-#ifndef _RE2C_IR_REGEXP_REGEXP_NULL_
-#define _RE2C_IR_REGEXP_REGEXP_NULL_
-
-#include "src/ir/regexp/regexp.h"
-
-namespace re2c
-{
-
-class NullOp: public RegExp
-{
-public:
- void split (std::set<uint32_t> &);
- uint32_t calc_size() const;
- uint32_t fixedLength ();
- bool nullable() const;
- nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n);
- void display (std::ostream & o) const;
-};
-
-} // end namespace re2c
-
-#endif // _RE2C_IR_REGEXP_REGEXP_NULL_
+++ /dev/null
-#ifndef _RE2C_IR_REGEXP_REGEXP_RULE_
-#define _RE2C_IR_REGEXP_REGEXP_RULE_
-
-#include <string>
-
-#include "src/ir/regexp/regexp.h"
-#include "src/globals.h"
-
-namespace re2c
-{
-
-class RuleOp: public RegExp
-{
- RegExp * exp;
-
-public:
- RegExp * ctx;
- RuleInfo *info;
-
- inline RuleOp
- ( const Loc & l
- , RegExp * r1
- , RegExp * r2
- , rule_rank_t r
- , const Code * c
- , const std::string * cond
- )
- : exp (r1)
- , ctx (r2)
- , info (NULL)
-
- {
- uint32_t ctx_len = r2->fixedLength();
- // cannot emulate 'YYCURSOR -= N' operation with generic API
- if (ctx_len != 0
- && opts->input_api.type() == InputAPI::CUSTOM)
- {
- ctx_len = ~0u;
- }
- info = new RuleInfo(l, r, c, cond, ctx_len);
- }
- RuleOp(RuleOp *rule, rule_rank_t r)
- : exp(rule->exp)
- , ctx(rule->ctx)
- , info(new RuleInfo(rule->info->loc, r, rule->info->code,
- &rule->info->newcond, rule->info->ctx_len))
- {}
- ~RuleOp()
- {
- delete info;
- }
- bool nullable() const;
- void nullable_rules(std::vector<RuleInfo*>&) const;
- void display (std::ostream & o) const;
- void split (std::set<uint32_t> &);
- uint32_t calc_size() const;
- nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n);
-
- FORBID_COPY (RuleOp);
-};
-
-} // end namespace re2c
-
-#endif // _RE2C_IR_REGEXP_REGEXP_RULE_
--- /dev/null
+#include "src/util/c99_stdint.h"
+#include <set>
+
+#include "src/ir/regexp/regexp.h"
+#include "src/util/range.h"
+
+namespace re2c {
+
+void split(const RegExp* re, std::set<uint32_t> &cs)
+{
+ switch (re->tag) {
+ case RegExp::NIL:
+ break;
+ case RegExp::SYM:
+ for (Range *r = re->pld.sym.range; r; r = r->next()) {
+ cs.insert(r->lower());
+ cs.insert(r->upper());
+ }
+ break;
+ case RegExp::ALT:
+ split(re->pld.alt.re1, cs);
+ split(re->pld.alt.re2, cs);
+ break;
+ case RegExp::CAT:
+ split(re->pld.cat.re1, cs);
+ split(re->pld.cat.re2, cs);
+ break;
+ case RegExp::ITER:
+ split(re->pld.iter.re, cs);
+ break;
+ case RegExp::RULE:
+ split(re->pld.rule.re, cs);
+ split(re->pld.rule.ctx, cs);
+ break;
+ }
+}
+
+} // namespace re2c
#include "src/globals.h"
#include "src/ir/regexp/encoding/enc.h"
#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_null.h"
#include "src/parse/code.h"
#include "src/parse/extop.h"
#include "src/parse/input.h"
const uint32_t c = static_cast<uint8_t>(*s);
r = doCat(r, casing ? ichr(c) : schr(c));
}
- yylval.regexp = r ? r : new NullOp;
+ yylval.regexp = r ? r : RegExp::nil();
return TOKEN_REGEXP;
}
}
for (bool end;;) {
const uint32_t c = lex_str_chr(quote, end);
if (end) {
- return r ? r : new NullOp;
+ return r ? r : RegExp::nil();
}
r = doCat(r, casing ? ichr(c) : schr(c));
}
#ifndef _RE2C_PARSE_PARSER_
#define _RE2C_PARSE_PARSER_
-#include <map>
#include <list>
+#include <map>
#include <string>
#include "src/codegen/output.h"
#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_rule.h"
#include "src/parse/scanner.h"
#include "src/parse/spec.h"
extern void parse_cleanup();
typedef std::set<std::string> CondList;
-typedef std::list<RuleOp*> RuleOpList;
+typedef std::list<RegExp*> RuleList;
typedef std::map<std::string, Spec> SpecMap;
typedef std::map<std::string, std::pair<uint32_t, std::string> > SetupMap;
typedef std::map<std::string, const Code *> DefaultMap;
#include "src/ir/regexp/encoding/enc.h"
#include "src/ir/regexp/encoding/range_suffix.h"
#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_cat.h"
-#include "src/ir/regexp/regexp_close.h"
-#include "src/ir/regexp/regexp_null.h"
-#include "src/ir/regexp/regexp_rule.h"
#include "src/ir/rule_rank.h"
#include "src/ir/skeleton/skeleton.h"
#include "src/parse/code.h"
static std::vector<std::string> condnames;
static re2c::SpecMap specMap;
static Spec spec;
-static RuleOp *specNone = NULL;
-static RuleOpList specStar;
-static RuleOp * star_default = NULL;
+static RegExp *specNone = NULL;
+static RuleList specStar;
+static RegExp *star_default = NULL;
static Scanner *in = NULL;
static Scanner::ParseMode parseMode;
static SetupMap ruleSetupMap;
condnames.push_back (*it);
}
- RuleOp * rule = new RuleOp
+ RegExp *rule = RegExp::rule
( loc
, expr
, look
context_check(clist);
for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it)
{
- RuleOp * def = new RuleOp
+ RegExp * def = RegExp::rule
( code->loc
, in->mkDefault ()
- , new NullOp
+ , RegExp::nil()
, rule_rank_t::def ()
, code
, NULL
{
in->fatal("condition or '<*>' required when using -c switch");
}
- RuleOp * rule = new RuleOp
+ RegExp * rule = RegExp::rule
( $3->loc
, $1
, $2
{
if (opts->cFlag)
in->fatal("condition or '<*>' required when using -c switch");
- RuleOp * def = new RuleOp
+ RegExp * def = RegExp::rule
( $2->loc
, in->mkDefault ()
- , new NullOp
+ , RegExp::nil()
, rule_rank_t::def ()
, $2
, NULL
| '<' TOKEN_STAR '>' expr look newcond TOKEN_CODE
{
context_check(NULL);
- RuleOp * rule = new RuleOp
+ RegExp * rule = RegExp::rule
( $7->loc
, $4
, $5
assert($7);
context_check(NULL);
Loc loc (in->get_fname (), in->get_cline ());
- RuleOp * rule = new RuleOp
+ RegExp * rule = RegExp::rule
( loc
, $4
, $5
{
in->fatal ("code to default rule '*' is already defined");
}
- star_default = new RuleOp
+ star_default = RegExp::rule
( $5->loc
, in->mkDefault ()
- , new NullOp
+ , RegExp::nil()
, rule_rank_t::def ()
, $5
, NULL
{
in->fatal("code to handle illegal condition already defined");
}
- $$ = specNone = new RuleOp
+ $$ = specNone = RegExp::rule
( $3->loc
- , new NullOp
- , new NullOp
+ , RegExp::nil()
+ , RegExp::nil()
, rank_counter.next ()
, $3
, $2
in->fatal("code to handle illegal condition already defined");
}
Loc loc (in->get_fname (), in->get_cline ());
- $$ = specNone = new RuleOp
+ $$ = specNone = RegExp::rule
( loc
- , new NullOp
- , new NullOp
+ , RegExp::nil()
+ , RegExp::nil()
, rank_counter.next ()
, NULL
, $3
look:
/* empty */
{
- $$ = new NullOp;
+ $$ = RegExp::nil();
}
| '/' expr
{
}
| term factor
{
- $$ = new CatOp($1, $2);
+ $$ = RegExp::cat($1, $2);
}
;
switch($2)
{
case '*':
- $$ = new CloseOp($1);
+ $$ = RegExp::iter($1);
break;
case '+':
- $$ = new CatOp (new CloseOp($1), $1);
+ $$ = RegExp::cat(RegExp::iter($1), $1);
break;
case '?':
- $$ = mkAlt($1, new NullOp());
+ $$ = mkAlt($1, RegExp::nil());
break;
}
}
{
$$ = repeat_from_to ($1, $2.min, $2.max);
}
- $$ = $$ ? $$ : new NullOp;
+ $$ = $$ ? $$ : RegExp::nil();
}
;
// merge <*> rules to all conditions with lowest priority
for (it = specMap.begin(); it != specMap.end(); ++it)
{
- for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp)
+ for (RuleList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp)
{
- RuleOp *r = new RuleOp(*itOp, rank_counter.next());
+ RegExp *r = RegExp::rule_copy(*itOp, rank_counter.next());
it->second.add (r);
}
if (star_default)
void parse_cleanup()
{
- RegExp::vFreeList.clear();
+ RegExp::flist.clear();
Range::vFreeList.clear();
RangeSuffix::freeList.clear();
Code::freelist.clear();
#include <algorithm>
#include "src/ir/regexp/regexp.h"
-#include "src/ir/regexp/regexp_rule.h"
#include "src/parse/rules.h"
namespace re2c
rules = spec.rules;
return *this;
}
- bool add_def (RuleOp * r)
+ bool add_def (RegExp * r)
{
if (std::find_if(rules.begin(), rules.end(), is_def) != rules.end())
{
return true;
}
}
- void add (RuleOp * r)
+ void add (RegExp * r)
{
- rules.push_back(r->info);
+ rules.push_back(r->pld.rule.info);
re = mkAlt (re, r);
}
void clear ()