-/* Generated by re2c 0.16 on Fri Mar 3 02:04:11 2017 */
+/* Generated by re2c 0.16 on Sat Mar 4 16:56:34 2017 */
#line 1 "../src/parse/lex.re"
#include "src/util/c99_stdint.h"
#include <stddef.h>
yy159:
++YYCURSOR;
yy160:
-#line 376 "../src/parse/lex.re"
+#line 380 "../src/parse/lex.re"
{
fatalf("unexpected character: '%c'", *tok);
goto scan;
if (yybm[0+yych] & 16) {
goto yy161;
}
-#line 360 "../src/parse/lex.re"
+#line 364 "../src/parse/lex.re"
{
goto scan;
}
if (yych == '#') goto yy190;
}
yy165:
-#line 369 "../src/parse/lex.re"
+#line 373 "../src/parse/lex.re"
{
if (cur == eof) return 0;
pos = cur;
goto yy168;
yy175:
++YYCURSOR;
-#line 355 "../src/parse/lex.re"
+#line 359 "../src/parse/lex.re"
{
- yylval.regexp = mkDot();
+ yylval.regexp = RegExp::make_dot(cline, get_column(), opts, warn);
return TOKEN_REGEXP;
}
#line 1058 "src/parse/lex.cc"
const RegExp *r = NULL;
const bool casing = opts->bCaseInsensitive || opts->bCaseInverted;
for (char *s = tok; s < cur; ++s) {
- const uint32_t c = static_cast<uint8_t>(*s);
- r = doCat(r, casing ? ichr(c) : schr(c));
+ const uint32_t
+ c = static_cast<uint8_t>(*s),
+ column = static_cast<uint32_t>(s - pos);
+ r = RegExp::make_cat(r, casing
+ ? RegExp::make_ichar(cline, column, c, opts)
+ : RegExp::make_schar(cline, column, c, opts));
}
- yylval.regexp = r ? r : RegExp::make_nil();
+ yylval.regexp = r ? r : RegExp::make_nil(cline, get_column());
return TOKEN_REGEXP;
}
}
-#line 1225 "src/parse/lex.cc"
+#line 1229 "src/parse/lex.cc"
yy200:
++YYCURSOR;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
yylval.str = new std::string (tok, tok_len ());
return TOKEN_ID;
}
-#line 1250 "src/parse/lex.cc"
+#line 1254 "src/parse/lex.cc"
yy204:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '>') goto yy221;
code_line = cline;
goto code;
}
-#line 1261 "src/parse/lex.cc"
+#line 1265 "src/parse/lex.cc"
yy206:
++YYCURSOR;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
fatal("tags are only allowed with '-T, --tags' option");
}
const std::string *name = new std::string(tok + 1, tok_len() - 1);
- yylval.regexp = RegExp::make_tag(name);
+ yylval.regexp = RegExp::make_tag(cline, get_column(), name);
return TOKEN_REGEXP;
}
-#line 1288 "src/parse/lex.cc"
+#line 1292 "src/parse/lex.cc"
yy209:
++YYCURSOR;
#line 263 "../src/parse/lex.re"
{ yylval.regexp = lex_cls(true); return TOKEN_REGEXP; }
-#line 1293 "src/parse/lex.cc"
+#line 1297 "src/parse/lex.cc"
yy211:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '2') goto yy223;
{
fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers");
}
-#line 1305 "src/parse/lex.cc"
+#line 1309 "src/parse/lex.cc"
yy214:
++YYCURSOR;
if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
return TOKEN_ID;
}
}
-#line 1371 "src/parse/lex.cc"
+#line 1375 "src/parse/lex.cc"
yy221:
++YYCURSOR;
YYCURSOR -= 2;
{
return *tok;
}
-#line 1379 "src/parse/lex.cc"
+#line 1383 "src/parse/lex.cc"
yy223:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == 'c') goto yy230;
yylval.extop.max = yylval.extop.min;
return TOKEN_CLOSESIZE;
}
-#line 1402 "src/parse/lex.cc"
+#line 1406 "src/parse/lex.cc"
yy227:
++YYCURSOR;
#line 311 "../src/parse/lex.re"
yylval.str = new std::string (tok + 1, tok_len () - 2); // -2 to omit braces
return TOKEN_ID;
}
-#line 1413 "src/parse/lex.cc"
+#line 1417 "src/parse/lex.cc"
yy229:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == 'n') goto yy235;
yylval.extop.max = std::numeric_limits<uint32_t>::max();
return TOKEN_CLOSESIZE;
}
-#line 1441 "src/parse/lex.cc"
+#line 1445 "src/parse/lex.cc"
yy235:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == 'e') goto yy240;
++YYCURSOR;
#line 319 "../src/parse/lex.re"
{ lex_conf (); return TOKEN_CONF; }
-#line 1450 "src/parse/lex.cc"
+#line 1454 "src/parse/lex.cc"
yy238:
++YYCURSOR;
#line 285 "../src/parse/lex.re"
}
return TOKEN_CLOSESIZE;
}
-#line 1466 "src/parse/lex.cc"
+#line 1470 "src/parse/lex.cc"
yy240:
yych = (YYCTYPE)*++YYCURSOR;
if (yych <= '0') goto yy242;
yy247:
++YYCURSOR;
YYCURSOR = YYCTXMARKER;
-#line 364 "../src/parse/lex.re"
+#line 368 "../src/parse/lex.re"
{
set_sourceline ();
goto scan;
}
-#line 1527 "src/parse/lex.cc"
+#line 1531 "src/parse/lex.cc"
yy249:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '\n') goto yy247;
if (yych == '\n') goto yy189;
goto yy250;
}
-#line 380 "../src/parse/lex.re"
+#line 384 "../src/parse/lex.re"
flex_name:
-#line 1557 "src/parse/lex.cc"
+#line 1561 "src/parse/lex.cc"
{
YYCTYPE yych;
if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
if (yych == '\r') goto yy260;
++YYCURSOR;
yy257:
-#line 391 "../src/parse/lex.re"
+#line 395 "../src/parse/lex.re"
{
YYCURSOR = tok;
goto start;
}
-#line 1571 "src/parse/lex.cc"
+#line 1575 "src/parse/lex.cc"
yy258:
++YYCURSOR;
-#line 385 "../src/parse/lex.re"
+#line 389 "../src/parse/lex.re"
{
YYCURSOR = tok;
lexer_state = LEX_NORMAL;
return TOKEN_FID_END;
}
-#line 1580 "src/parse/lex.cc"
+#line 1584 "src/parse/lex.cc"
yy260:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '\n') goto yy258;
goto yy257;
}
-#line 395 "../src/parse/lex.re"
+#line 399 "../src/parse/lex.re"
code:
-#line 1591 "src/parse/lex.cc"
+#line 1595 "src/parse/lex.cc"
{
YYCTYPE yych;
unsigned int yyaccept = 0;
}
yy263:
++YYCURSOR;
-#line 458 "../src/parse/lex.re"
+#line 462 "../src/parse/lex.re"
{
if (cur == eof)
{
}
goto code;
}
-#line 1664 "src/parse/lex.cc"
+#line 1668 "src/parse/lex.cc"
yy265:
++YYCURSOR;
yy266:
-#line 472 "../src/parse/lex.re"
+#line 476 "../src/parse/lex.re"
{
goto code;
}
-#line 1672 "src/parse/lex.cc"
+#line 1676 "src/parse/lex.cc"
yy267:
yyaccept = 0;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
}
}
yy268:
-#line 439 "../src/parse/lex.re"
+#line 443 "../src/parse/lex.re"
{
if (depth == 0)
{
cline++;
goto code;
}
-#line 1711 "src/parse/lex.cc"
+#line 1715 "src/parse/lex.cc"
yy269:
yyaccept = 1;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
goto yy287;
yy271:
++YYCURSOR;
-#line 411 "../src/parse/lex.re"
+#line 415 "../src/parse/lex.re"
{
if (depth == 0)
{
}
goto code;
}
-#line 1736 "src/parse/lex.cc"
+#line 1740 "src/parse/lex.cc"
yy273:
++YYCURSOR;
-#line 399 "../src/parse/lex.re"
+#line 403 "../src/parse/lex.re"
{
if (depth == 0)
{
}
goto code;
}
-#line 1752 "src/parse/lex.cc"
+#line 1756 "src/parse/lex.cc"
yy275:
yyaccept = 2;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
}
yy276:
YYCURSOR -= 1;
-#line 426 "../src/parse/lex.re"
+#line 430 "../src/parse/lex.re"
{
if (depth == 0)
{
cline++;
goto code;
}
-#line 1778 "src/parse/lex.cc"
+#line 1782 "src/parse/lex.cc"
yy277:
++YYCURSOR;
goto yy276;
if (yych >= '#') goto yy285;
yy283:
++YYCURSOR;
-#line 469 "../src/parse/lex.re"
+#line 473 "../src/parse/lex.re"
{
goto code;
}
-#line 1817 "src/parse/lex.cc"
+#line 1821 "src/parse/lex.cc"
yy285:
++YYCURSOR;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
yy301:
++YYCURSOR;
YYCURSOR = YYCTXMARKER;
-#line 422 "../src/parse/lex.re"
+#line 426 "../src/parse/lex.re"
{
set_sourceline ();
goto code;
}
-#line 1912 "src/parse/lex.cc"
+#line 1916 "src/parse/lex.cc"
yy303:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '\n') goto yy301;
if (yych == '\n') goto yy280;
goto yy304;
}
-#line 475 "../src/parse/lex.re"
+#line 479 "../src/parse/lex.re"
comment:
-#line 1946 "src/parse/lex.cc"
+#line 1950 "src/parse/lex.cc"
{
YYCTYPE yych;
static const unsigned char yybm[] = {
}
++YYCURSOR;
yy311:
-#line 507 "../src/parse/lex.re"
+#line 511 "../src/parse/lex.re"
{
if (cur == eof)
{
}
goto comment;
}
-#line 2001 "src/parse/lex.cc"
+#line 2005 "src/parse/lex.cc"
yy312:
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
if (yybm[0+yych] & 32) {
}
if (yych == '#') goto yy319;
yy313:
-#line 498 "../src/parse/lex.re"
+#line 502 "../src/parse/lex.re"
{
if (cur == eof)
{
cline++;
goto comment;
}
-#line 2019 "src/parse/lex.cc"
+#line 2023 "src/parse/lex.cc"
yy314:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '/') goto yy321;
}
yy321:
++YYCURSOR;
-#line 479 "../src/parse/lex.re"
+#line 483 "../src/parse/lex.re"
{
if (--depth == 0)
{
goto comment;
}
}
-#line 2064 "src/parse/lex.cc"
+#line 2068 "src/parse/lex.cc"
yy323:
++YYCURSOR;
-#line 489 "../src/parse/lex.re"
+#line 493 "../src/parse/lex.re"
{
++depth;
fatal("ambiguous /* found");
goto comment;
}
-#line 2073 "src/parse/lex.cc"
+#line 2077 "src/parse/lex.cc"
yy325:
yych = (YYCTYPE)*++YYCURSOR;
if (yych != 'i') goto yy318;
yy335:
++YYCURSOR;
YYCURSOR = YYCTXMARKER;
-#line 494 "../src/parse/lex.re"
+#line 498 "../src/parse/lex.re"
{
set_sourceline ();
goto comment;
}
-#line 2135 "src/parse/lex.cc"
+#line 2139 "src/parse/lex.cc"
yy337:
yych = (YYCTYPE)*++YYCURSOR;
if (yych == '\n') goto yy335;
if (yych == '\n') goto yy318;
goto yy338;
}
-#line 514 "../src/parse/lex.re"
+#line 518 "../src/parse/lex.re"
nextLine:
-#line 2165 "src/parse/lex.cc"
+#line 2169 "src/parse/lex.cc"
{
YYCTYPE yych;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
yych = (YYCTYPE)*YYCURSOR;
if (yych == '\n') goto yy346;
++YYCURSOR;
-#line 525 "../src/parse/lex.re"
+#line 529 "../src/parse/lex.re"
{ if(cur == eof) {
return 0;
}
goto nextLine;
}
-#line 2178 "src/parse/lex.cc"
+#line 2182 "src/parse/lex.cc"
yy346:
++YYCURSOR;
-#line 518 "../src/parse/lex.re"
+#line 522 "../src/parse/lex.re"
{ if(cur == eof) {
return 0;
}
cline++;
goto scan;
}
-#line 2189 "src/parse/lex.cc"
+#line 2193 "src/parse/lex.cc"
}
-#line 530 "../src/parse/lex.re"
+#line 534 "../src/parse/lex.re"
}
const RegExp *Scanner::lex_cls(bool neg)
{
+ const uint32_t column = get_column();
Range *r = NULL, *s;
uint32_t u, l;
fst:
-#line 2215 "src/parse/lex.cc"
+#line 2220 "src/parse/lex.cc"
{
YYCTYPE yych;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
yych = (YYCTYPE)*YYCURSOR;
if (yych == ']') goto yy351;
-#line 554 "../src/parse/lex.re"
+#line 559 "../src/parse/lex.re"
{ l = lex_cls_chr(); goto snd; }
-#line 2223 "src/parse/lex.cc"
+#line 2228 "src/parse/lex.cc"
yy351:
++YYCURSOR;
-#line 553 "../src/parse/lex.re"
+#line 558 "../src/parse/lex.re"
{ goto end; }
-#line 2228 "src/parse/lex.cc"
+#line 2233 "src/parse/lex.cc"
}
-#line 555 "../src/parse/lex.re"
+#line 560 "../src/parse/lex.re"
snd:
-#line 2234 "src/parse/lex.cc"
+#line 2239 "src/parse/lex.cc"
{
YYCTYPE yych;
if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yych = (YYCTYPE)*(YYMARKER = YYCURSOR);
if (yych == '-') goto yy356;
yy355:
-#line 558 "../src/parse/lex.re"
+#line 563 "../src/parse/lex.re"
{ u = l; goto add; }
-#line 2243 "src/parse/lex.cc"
+#line 2248 "src/parse/lex.cc"
yy356:
yych = (YYCTYPE)*++YYCURSOR;
if (yych != ']') goto yy358;
yy358:
++YYCURSOR;
YYCURSOR -= 1;
-#line 559 "../src/parse/lex.re"
+#line 564 "../src/parse/lex.re"
{
u = lex_cls_chr();
if (l > u) {
}
goto add;
}
-#line 2261 "src/parse/lex.cc"
+#line 2266 "src/parse/lex.cc"
}
-#line 567 "../src/parse/lex.re"
+#line 572 "../src/parse/lex.re"
add:
if (!(s = opts->encoding.encodeRange(l, u))) {
if (neg) {
r = Range::sub(opts->encoding.fullRange(), r);
}
- return cls(r);
+ return RegExp::make_class(cline, column, r, opts, warn);
}
uint32_t Scanner::lex_cls_chr()
{
tok = cur;
-#line 2282 "src/parse/lex.cc"
+#line 2287 "src/parse/lex.cc"
{
YYCTYPE yych;
unsigned int yyaccept = 0;
if (yych == '\n') goto yy364;
if (yych == '\\') goto yy366;
++YYCURSOR;
-#line 590 "../src/parse/lex.re"
+#line 595 "../src/parse/lex.re"
{ return static_cast<uint8_t>(tok[0]); }
-#line 2293 "src/parse/lex.cc"
+#line 2298 "src/parse/lex.cc"
yy364:
++YYCURSOR;
-#line 585 "../src/parse/lex.re"
- { fatal ((tok - pos) - tchar, "syntax error"); }
-#line 2298 "src/parse/lex.cc"
+#line 590 "../src/parse/lex.re"
+ { fatal (get_column() - tchar, "syntax error"); }
+#line 2303 "src/parse/lex.cc"
yy366:
yych = (YYCTYPE)*++YYCURSOR;
if (yych <= '`') {
}
}
}
-#line 588 "../src/parse/lex.re"
- { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); }
-#line 2349 "src/parse/lex.cc"
+#line 593 "../src/parse/lex.re"
+ { fatal (get_column() - tchar, "syntax error in escape sequence"); }
+#line 2354 "src/parse/lex.cc"
yy368:
++YYCURSOR;
-#line 603 "../src/parse/lex.re"
+#line 608 "../src/parse/lex.re"
{
- warn.useless_escape(cline, tok - pos, tok[1]);
+ warn.useless_escape(cline, get_column(), tok[1]);
return static_cast<uint8_t>(tok[1]);
}
-#line 2357 "src/parse/lex.cc"
+#line 2362 "src/parse/lex.cc"
yy370:
++YYCURSOR;
-#line 601 "../src/parse/lex.re"
+#line 606 "../src/parse/lex.re"
{ return static_cast<uint8_t>('-'); }
-#line 2362 "src/parse/lex.cc"
+#line 2367 "src/parse/lex.cc"
yy372:
yyaccept = 0;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
if (yych <= '/') goto yy373;
if (yych <= '7') goto yy397;
yy373:
-#line 587 "../src/parse/lex.re"
- { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); }
-#line 2371 "src/parse/lex.cc"
+#line 592 "../src/parse/lex.re"
+ { fatal (get_column() - tchar, "syntax error in octal escape sequence"); }
+#line 2376 "src/parse/lex.cc"
yy374:
++YYCURSOR;
goto yy373;
if (yych <= 'f') goto yy399;
}
yy376:
-#line 586 "../src/parse/lex.re"
- { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); }
-#line 2389 "src/parse/lex.cc"
+#line 591 "../src/parse/lex.re"
+ { fatal (get_column() - tchar, "syntax error in hexadecimal escape sequence"); }
+#line 2394 "src/parse/lex.cc"
yy377:
yyaccept = 1;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
}
yy378:
++YYCURSOR;
-#line 600 "../src/parse/lex.re"
+#line 605 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\\'); }
-#line 2407 "src/parse/lex.cc"
+#line 2412 "src/parse/lex.cc"
yy380:
++YYCURSOR;
-#line 602 "../src/parse/lex.re"
+#line 607 "../src/parse/lex.re"
{ return static_cast<uint8_t>(']'); }
-#line 2412 "src/parse/lex.cc"
+#line 2417 "src/parse/lex.cc"
yy382:
++YYCURSOR;
-#line 593 "../src/parse/lex.re"
+#line 598 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\a'); }
-#line 2417 "src/parse/lex.cc"
+#line 2422 "src/parse/lex.cc"
yy384:
++YYCURSOR;
-#line 594 "../src/parse/lex.re"
+#line 599 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\b'); }
-#line 2422 "src/parse/lex.cc"
+#line 2427 "src/parse/lex.cc"
yy386:
++YYCURSOR;
-#line 595 "../src/parse/lex.re"
+#line 600 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\f'); }
-#line 2427 "src/parse/lex.cc"
+#line 2432 "src/parse/lex.cc"
yy388:
++YYCURSOR;
-#line 596 "../src/parse/lex.re"
+#line 601 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\n'); }
-#line 2432 "src/parse/lex.cc"
+#line 2437 "src/parse/lex.cc"
yy390:
++YYCURSOR;
-#line 597 "../src/parse/lex.re"
+#line 602 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\r'); }
-#line 2437 "src/parse/lex.cc"
+#line 2442 "src/parse/lex.cc"
yy392:
++YYCURSOR;
-#line 598 "../src/parse/lex.re"
+#line 603 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\t'); }
-#line 2442 "src/parse/lex.cc"
+#line 2447 "src/parse/lex.cc"
yy394:
++YYCURSOR;
-#line 599 "../src/parse/lex.re"
+#line 604 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\v'); }
-#line 2447 "src/parse/lex.cc"
+#line 2452 "src/parse/lex.cc"
yy396:
yyaccept = 1;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
}
yy402:
++YYCURSOR;
-#line 592 "../src/parse/lex.re"
+#line 597 "../src/parse/lex.re"
{ return unesc_oct(tok, cur); }
-#line 2512 "src/parse/lex.cc"
+#line 2517 "src/parse/lex.cc"
yy404:
yych = (YYCTYPE)*++YYCURSOR;
if (yych <= '@') {
}
yy406:
++YYCURSOR;
-#line 591 "../src/parse/lex.re"
+#line 596 "../src/parse/lex.re"
{ return unesc_hex(tok, cur); }
-#line 2541 "src/parse/lex.cc"
+#line 2546 "src/parse/lex.cc"
yy408:
yych = (YYCTYPE)*++YYCURSOR;
if (yych <= '@') {
goto yy398;
}
}
-#line 607 "../src/parse/lex.re"
+#line 612 "../src/parse/lex.re"
}
end = false;
tok = cur;
-#line 2574 "src/parse/lex.cc"
+#line 2579 "src/parse/lex.cc"
{
YYCTYPE yych;
unsigned int yyaccept = 0;
if (yych == '\n') goto yy414;
if (yych == '\\') goto yy416;
++YYCURSOR;
-#line 620 "../src/parse/lex.re"
+#line 625 "../src/parse/lex.re"
{
end = tok[0] == quote;
return static_cast<uint8_t>(tok[0]);
}
-#line 2588 "src/parse/lex.cc"
+#line 2593 "src/parse/lex.cc"
yy414:
++YYCURSOR;
-#line 615 "../src/parse/lex.re"
- { fatal ((tok - pos) - tchar, "syntax error"); }
-#line 2593 "src/parse/lex.cc"
+#line 620 "../src/parse/lex.re"
+ { fatal (get_column() - tchar, "syntax error"); }
+#line 2598 "src/parse/lex.cc"
yy416:
yych = (YYCTYPE)*++YYCURSOR;
if (yych <= 'a') {
}
}
}
-#line 618 "../src/parse/lex.re"
- { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); }
-#line 2641 "src/parse/lex.cc"
+#line 623 "../src/parse/lex.re"
+ { fatal (get_column() - tchar, "syntax error in escape sequence"); }
+#line 2646 "src/parse/lex.cc"
yy418:
++YYCURSOR;
-#line 634 "../src/parse/lex.re"
+#line 639 "../src/parse/lex.re"
{
if (tok[1] != quote) {
- warn.useless_escape(cline, tok - pos, tok[1]);
+ warn.useless_escape(cline, get_column(), tok[1]);
}
return static_cast<uint8_t>(tok[1]);
}
-#line 2651 "src/parse/lex.cc"
+#line 2656 "src/parse/lex.cc"
yy420:
yyaccept = 0;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
if (yych <= '/') goto yy421;
if (yych <= '7') goto yy443;
yy421:
-#line 617 "../src/parse/lex.re"
- { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); }
-#line 2660 "src/parse/lex.cc"
+#line 622 "../src/parse/lex.re"
+ { fatal (get_column() - tchar, "syntax error in octal escape sequence"); }
+#line 2665 "src/parse/lex.cc"
yy422:
++YYCURSOR;
goto yy421;
if (yych <= 'f') goto yy445;
}
yy424:
-#line 616 "../src/parse/lex.re"
- { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); }
-#line 2678 "src/parse/lex.cc"
+#line 621 "../src/parse/lex.re"
+ { fatal (get_column() - tchar, "syntax error in hexadecimal escape sequence"); }
+#line 2683 "src/parse/lex.cc"
yy425:
yyaccept = 1;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
}
yy426:
++YYCURSOR;
-#line 633 "../src/parse/lex.re"
+#line 638 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\\'); }
-#line 2696 "src/parse/lex.cc"
+#line 2701 "src/parse/lex.cc"
yy428:
++YYCURSOR;
-#line 626 "../src/parse/lex.re"
+#line 631 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\a'); }
-#line 2701 "src/parse/lex.cc"
+#line 2706 "src/parse/lex.cc"
yy430:
++YYCURSOR;
-#line 627 "../src/parse/lex.re"
+#line 632 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\b'); }
-#line 2706 "src/parse/lex.cc"
+#line 2711 "src/parse/lex.cc"
yy432:
++YYCURSOR;
-#line 628 "../src/parse/lex.re"
+#line 633 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\f'); }
-#line 2711 "src/parse/lex.cc"
+#line 2716 "src/parse/lex.cc"
yy434:
++YYCURSOR;
-#line 629 "../src/parse/lex.re"
+#line 634 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\n'); }
-#line 2716 "src/parse/lex.cc"
+#line 2721 "src/parse/lex.cc"
yy436:
++YYCURSOR;
-#line 630 "../src/parse/lex.re"
+#line 635 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\r'); }
-#line 2721 "src/parse/lex.cc"
+#line 2726 "src/parse/lex.cc"
yy438:
++YYCURSOR;
-#line 631 "../src/parse/lex.re"
+#line 636 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\t'); }
-#line 2726 "src/parse/lex.cc"
+#line 2731 "src/parse/lex.cc"
yy440:
++YYCURSOR;
-#line 632 "../src/parse/lex.re"
+#line 637 "../src/parse/lex.re"
{ return static_cast<uint8_t>('\v'); }
-#line 2731 "src/parse/lex.cc"
+#line 2736 "src/parse/lex.cc"
yy442:
yyaccept = 1;
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
}
yy448:
++YYCURSOR;
-#line 625 "../src/parse/lex.re"
+#line 630 "../src/parse/lex.re"
{ return unesc_oct(tok, cur); }
-#line 2796 "src/parse/lex.cc"
+#line 2801 "src/parse/lex.cc"
yy450:
yych = (YYCTYPE)*++YYCURSOR;
if (yych <= '@') {
}
yy452:
++YYCURSOR;
-#line 624 "../src/parse/lex.re"
+#line 629 "../src/parse/lex.re"
{ return unesc_hex(tok, cur); }
-#line 2825 "src/parse/lex.cc"
+#line 2830 "src/parse/lex.cc"
yy454:
yych = (YYCTYPE)*++YYCURSOR;
if (yych <= '@') {
goto yy444;
}
}
-#line 640 "../src/parse/lex.re"
+#line 645 "../src/parse/lex.re"
}
for (bool end;;) {
const uint32_t c = lex_str_chr(quote, end);
if (end) {
- return r ? r : RegExp::make_nil();
+ return r ? r : RegExp::make_nil(cline, get_column());
}
- r = doCat(r, casing ? ichr(c) : schr(c));
+ r = RegExp::make_cat(r, casing
+ ? RegExp::make_ichar(cline, get_column(), c, opts)
+ : RegExp::make_schar(cline, get_column(), c, opts));
}
}
sourceline:
tok = cur;
-#line 2870 "src/parse/lex.cc"
+#line 2877 "src/parse/lex.cc"
{
YYCTYPE yych;
static const unsigned char yybm[] = {
yy458:
++YYCURSOR;
yy459:
-#line 683 "../src/parse/lex.re"
+#line 690 "../src/parse/lex.re"
{
goto sourceline;
}
-#line 2923 "src/parse/lex.cc"
+#line 2930 "src/parse/lex.cc"
yy460:
++YYCURSOR;
-#line 671 "../src/parse/lex.re"
+#line 678 "../src/parse/lex.re"
{
if (cur == eof)
{
tok = cur;
return;
}
-#line 2939 "src/parse/lex.cc"
+#line 2946 "src/parse/lex.cc"
yy462:
yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR);
if (yych == '\n') goto yy459;
if (yybm[0+yych] & 64) {
goto yy463;
}
-#line 660 "../src/parse/lex.re"
+#line 667 "../src/parse/lex.re"
{
if (!s_to_u32_unsafe (tok, cur, cline))
{
}
goto sourceline;
}
-#line 2959 "src/parse/lex.cc"
+#line 2966 "src/parse/lex.cc"
yy466:
++YYCURSOR;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
goto yy459;
yy469:
++YYCURSOR;
-#line 667 "../src/parse/lex.re"
+#line 674 "../src/parse/lex.re"
{
escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes
goto sourceline;
}
-#line 2981 "src/parse/lex.cc"
+#line 2988 "src/parse/lex.cc"
yy471:
++YYCURSOR;
if (YYLIMIT <= YYCURSOR) YYFILL(1);
if (yych == '\n') goto yy468;
goto yy466;
}
-#line 686 "../src/parse/lex.re"
+#line 693 "../src/parse/lex.re"
}
-/* Generated by re2c 0.16 on Fri Mar 3 02:03:51 2017 */
+/* Generated by re2c 0.16 on Sat Mar 4 15:12:07 2017 */
#line 1 "../src/parse/lex_conf.re"
#include "src/util/c99_stdint.h"
#include <string>
++cur;
yy3:
#line 40 "../src/parse/lex_conf.re"
- { fatal ((tok - pos) - tchar, "unrecognized configuration"); }
+ { fatal(get_column() - tchar, "unrecognized configuration"); }
#line 80 "src/parse/lex_conf.cc"
yy4:
yyaccept = 0;
// merge default rule with the lowest priority
for (i = b; i != e; ++i) {
if (!i->defs.empty()) {
- i->rules.push_back(RegExpRule(in.mkDefault(), i->defs[0]));
+ const Code *c = i->defs[0];
+ const RegExp *r = RegExp::make_default(c->fline, 0, in.opts);
+ i->rules.push_back(RegExpRule(r, c));
}
}
/* YYRLINE[YYN] -- Source line where rule number YYN was defined. */
static const yytype_uint16 yyrline[] =
{
- 0, 199, 199, 201, 202, 203, 207, 214, 219, 222,
- 226, 226, 229, 233, 237, 244, 251, 258, 263, 265,
- 271, 278, 279, 285, 291, 298, 299, 304, 312, 316,
- 323, 327, 334, 338, 345, 346, 352, 357, 358, 362,
- 363, 364, 368, 369, 379
+ 0, 201, 201, 203, 204, 205, 209, 216, 221, 224,
+ 228, 228, 231, 235, 239, 246, 253, 260, 266, 268,
+ 274, 281, 282, 288, 294, 301, 302, 307, 315, 319,
+ 326, 330, 337, 341, 348, 349, 355, 360, 361, 365,
+ 366, 367, 371, 372, 382
};
#endif
case 17:
{
- find(context.specs, "0").rules.push_back(RegExpRule(RegExp::make_nil(), (yyvsp[0].code)));
+ const RegExp *r = RegExp::make_nil(context.input.get_cline(), 0);
+ find(context.specs, "0").rules.push_back(RegExpRule(r, (yyvsp[0].code)));
}
break;
case 26:
{
- (yyval.regexp) = RegExp::make_cat((yyvsp[-2].regexp), RegExp::make_cat(RegExp::make_tag(NULL), (yyvsp[0].regexp)));
+ (yyval.regexp) = RegExp::make_cat((yyvsp[-2].regexp), RegExp::make_cat(RegExp::make_tag(context.input.get_cline(), 0, NULL), (yyvsp[0].regexp)));
}
break;
case 29:
{
- (yyval.regexp) = mkAlt((yyvsp[-2].regexp), (yyvsp[0].regexp));
+ (yyval.regexp) = RegExp::make_alt((yyvsp[-2].regexp), (yyvsp[0].regexp));
}
break;
case 31:
{
- (yyval.regexp) = context.input.mkDiff((yyvsp[-2].regexp), (yyvsp[0].regexp));
+ (yyval.regexp) = RegExp::make_diff((yyvsp[-2].regexp), (yyvsp[0].regexp), context.input.opts, context.input.warn);
}
break;
context.input.fatal("can't find symbol");
}
(yyval.regexp) = i->second;
- if (context.input.opts->posix_captures && need_wrap((yyval.regexp))) {
+ if (context.input.opts->posix_captures && RegExp::need_wrap((yyval.regexp))) {
(yyval.regexp) = RegExp::make_ref((yyval.regexp), *(yyvsp[0].str));
}
delete (yyvsp[0].str);
#include <stdarg.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string>
#include "config.h"
fprintf (stderr, "\n");
}
+void fatal_error(uint32_t line, uint32_t column, const char *fmt, ...)
+{
+ fprintf (stderr, "re2c: error: line %u, column %u: ", line, column);
+
+ va_list args;
+ va_start (args, fmt);
+ vfprintf (stderr, fmt, args);
+ va_end (args);
+
+ fprintf (stderr, "\n");
+ exit(1);
+}
+
void error_encoding ()
{
error ("only one of switches -e, -w, -x, -u and -8 must be set");
namespace re2c {
void error (const char * fmt, ...) RE2C_GXX_ATTRIBUTE ((format (printf, 1, 2)));
+void fatal_error(uint32_t line, uint32_t column, const char *fmt, ...) RE2C_GXX_ATTRIBUTE ((format (printf, 3, 4)));
void error_encoding ();
void error_arg (const char * option);
void warning_start (uint32_t line, bool error);
namespace re2c {
-static const RegExp * emit (RangeSuffix * p, const RegExp * re);
+static const RegExp *emit(uint32_t l, uint32_t c, RangeSuffix * p, const RegExp * re);
free_list<RangeSuffix *> RangeSuffix::freeList;
-const RegExp * to_regexp (RangeSuffix * p)
+const RegExp *to_regexp(uint32_t l, uint32_t c, RangeSuffix * p)
{
- return p ? emit(p, NULL)
- : RegExp::make_sym(NULL);
+ return p ? emit(l, c, p, NULL)
+ : RegExp::make_sym(l, c, NULL);
}
/*
* Build regexp from suffix tree.
*/
-const RegExp * emit(RangeSuffix * p, const RegExp * re)
+const RegExp *emit(uint32_t l, uint32_t c, RangeSuffix * p, const RegExp * re)
{
if (p == NULL) {
return re;
} else {
const RegExp *regexp = NULL;
for (; p != NULL; p = p->next) {
- const RegExp *re1 = doCat(RegExp::make_sym(Range::ran(p->l, p->h + 1)), re);
- regexp = doAlt(regexp, emit(p->child, re1));
+ const RegExp *re1 = RegExp::make_cat(
+ RegExp::make_sym(l, c, Range::ran(p->l, p->h + 1)), re);
+ regexp = RegExp::make_alt(regexp, emit(l, c, p->child, re1));
}
return regexp;
}
FORBID_COPY (RangeSuffix);
};
-const RegExp * to_regexp (RangeSuffix * p);
+const RegExp *to_regexp(uint32_t l, uint32_t c, RangeSuffix * p);
} // namespace re2c
namespace re2c {
-const RegExp * UTF16Symbol(utf16::rune r)
+const RegExp *UTF16Symbol(uint32_t l, uint32_t c, utf16::rune r)
{
if (r <= utf16::MAX_1WORD_RUNE) {
- return RegExp::make_sym(Range::sym(r));
+ return RegExp::make_sym(l, c, Range::sym(r));
} else {
const uint32_t ld = utf16::lead_surr(r);
const uint32_t tr = utf16::trail_surr(r);
- return RegExp::make_cat(RegExp::make_sym(Range::sym(ld)),
- RegExp::make_sym(Range::sym(tr)));
+ return RegExp::make_cat(RegExp::make_sym(l, c, Range::sym(ld)),
+ RegExp::make_sym(l, c, Range::sym(tr)));
}
}
* them. We store partially built range in suffix tree, which
* allows to eliminate common suffixes while building.
*/
-const RegExp * UTF16Range(const Range * r)
+const RegExp *UTF16Range(uint32_t l, uint32_t c, const Range * r)
{
RangeSuffix * root = NULL;
for (; r != NULL; r = r->next ())
UTF16splitByRuneLength(root, r->lower (), r->upper () - 1);
- return to_regexp (root);
+ return to_regexp(l, c, root);
}
} // namespace re2c
class Range;
struct RegExp;
-const RegExp * UTF16Symbol(utf16::rune r);
-const RegExp * UTF16Range(const Range * r);
+const RegExp * UTF16Symbol(uint32_t l, uint32_t c, utf16::rune r);
+const RegExp * UTF16Range(uint32_t l, uint32_t c, const Range * r);
} // namespace re2c
namespace re2c {
-const RegExp * UTF8Symbol(utf8::rune r)
+const RegExp * UTF8Symbol(uint32_t l, uint32_t c, utf8::rune r)
{
uint32_t chars[utf8::MAX_RUNE_LENGTH];
const uint32_t chars_count = utf8::rune_to_bytes(chars, r);
- const RegExp *re = RegExp::make_sym(Range::sym(chars[0]));
+ const RegExp *re = RegExp::make_sym(l, c, Range::sym(chars[0]));
for (uint32_t i = 1; i < chars_count; ++i) {
- re = RegExp::make_cat(re, RegExp::make_sym(Range::sym(chars[i])));
+ re = RegExp::make_cat(re, RegExp::make_sym(l, c, Range::sym(chars[i])));
}
return re;
}
* them. We store partially built range in suffix tree, which
* allows to eliminate common suffixes while building.
*/
-const RegExp * UTF8Range(const Range * r)
+const RegExp * UTF8Range(uint32_t l, uint32_t c, const Range * r)
{
RangeSuffix * root = NULL;
for (; r != NULL; r = r->next ())
UTF8splitByRuneLength(root, r->lower (), r->upper () - 1);
- return to_regexp (root);
+ return to_regexp (l, c, root);
}
} // namespace re2c
class Range;
struct RegExp;
-const RegExp * UTF8Symbol(utf8::rune r);
-const RegExp * UTF8Range(const Range * r);
+const RegExp * UTF8Symbol(uint32_t l, uint32_t c, utf8::rune r);
+const RegExp * UTF8Range(uint32_t l, uint32_t c, const Range * r);
} // namespace re2c
#include <limits>
#include <stddef.h>
+#include "src/conf/msg.h"
+#include "src/conf/opt.h"
#include "src/ir/regexp/empty_class_policy.h"
#include "src/ir/regexp/encoding/case.h"
#include "src/ir/regexp/encoding/enc.h"
const uint32_t RegExp::MANY = std::numeric_limits<uint32_t>::max();
-const RegExp *doAlt(const RegExp *re1, const RegExp *re2)
-{
- if (!re1) {
- return re2;
- }
- if (!re2) {
- return re1;
- }
- return RegExp::make_alt(re1, re2);
-}
-
-const RegExp *mkAlt(const RegExp *re1, const RegExp *re2)
-{
- if (!re1) return re2;
- if (!re2) return re1;
- if (re1->type == RegExp::SYM && re2->type == RegExp::SYM) {
- return RegExp::make_sym(Range::add(re1->sym, re2->sym));
- }
- return RegExp::make_alt(re1, re2);
-}
-
-const RegExp *doCat(const RegExp *re1, const RegExp *re2)
-{
- if (!re1) {
- return re2;
- }
- if (!re2) {
- return re1;
- }
- return RegExp::make_cat(re1, re2);
-}
-
-const RegExp *Scanner::schr(uint32_t c) const
+const RegExp *RegExp::make_schar(uint32_t line, uint32_t column, uint32_t c, Opt &opts)
{
if (!opts->encoding.encode(c)) {
- fatalf("Bad code point: '0x%X'", c);
+ fatal_error(line, column, "bad code point: '0x%X'", c);
}
switch (opts->encoding.type ()) {
- case Enc::UTF16: return UTF16Symbol(c);
- case Enc::UTF8: return UTF8Symbol(c);
- default: return RegExp::make_sym(Range::sym(c));
+ case Enc::UTF16: return UTF16Symbol(line, column, c);
+ case Enc::UTF8: return UTF8Symbol(line, column, c);
+ default: return RegExp::make_sym(line, column, Range::sym(c));
}
}
-const RegExp *Scanner::ichr(uint32_t c) const
+const RegExp *RegExp::make_ichar(uint32_t line, uint32_t column, uint32_t c, Opt &opts)
{
if (is_alpha(c)) {
- const RegExp *l = schr(to_lower_unsafe(c));
- const RegExp *u = schr(to_upper_unsafe(c));
- return mkAlt(l, u);
+ const RegExp *l = RegExp::make_schar(line, column, to_lower_unsafe(c), opts);
+ const RegExp *u = RegExp::make_schar(line, column, to_upper_unsafe(c), opts);
+ return RegExp::make_alt(l, u);
} else {
- return schr(c);
+ return RegExp::make_schar(line, column, c, opts);
}
}
-const RegExp *Scanner::cls(const Range *r) const
+const RegExp *RegExp::make_class(uint32_t line, uint32_t column, const Range *r, Opt &opts, Warn &warn)
{
if (!r) {
switch (opts->empty_class_policy) {
case EMPTY_CLASS_MATCH_EMPTY:
- warn.empty_class(cline);
- return RegExp::make_nil();
+ warn.empty_class(line);
+ return RegExp::make_nil(line, column);
case EMPTY_CLASS_MATCH_NONE:
- warn.empty_class(cline);
+ warn.empty_class(line);
break;
case EMPTY_CLASS_ERROR:
- fatal("empty character class");
+ fatal_error(line, column, "empty character class");
+ break;
}
}
switch (opts->encoding.type()) {
- case Enc::UTF16: return UTF16Range(r);
- case Enc::UTF8: return UTF8Range(r);
- default: return RegExp::make_sym(r);
+ case Enc::UTF16: return UTF16Range(line, column, r);
+ case Enc::UTF8: return UTF8Range(line, column, r);
+ default: return RegExp::make_sym(line, column, r);
}
}
-const RegExp *Scanner::mkDiff(const RegExp *re1, const RegExp *re2) const
+const RegExp *RegExp::make_diff(const RegExp *re1, const RegExp *re2, Opt &opts, Warn &warn)
{
if (re1 && re2
&& re1->type == RegExp::SYM
&& re2->type == RegExp::SYM) {
- return cls(Range::sub(re1->sym, re2->sym));
+ return RegExp::make_class(re1->line, re1->column,
+ Range::sub(re1->sym, re2->sym), opts, warn);
}
- fatal("can only difference char sets");
+ fatal_error(re1->line, re1->column, "can only difference char sets");
return NULL;
}
-const RegExp *Scanner::mkDot() const
+const RegExp *RegExp::make_dot(uint32_t line, uint32_t column, Opt &opts, Warn &warn)
{
uint32_t c = '\n';
if (!opts->encoding.encode(c)) {
- fatalf("Bad code point: '0x%X'", c);
+ fatal_error(line, column, "bad code point: '0x%X'", c);
}
- return cls(Range::sub(opts->encoding.fullRange(),
- Range::sym(c)));
+ return RegExp::make_class(line, column,
+ Range::sub(opts->encoding.fullRange(), Range::sym(c)), opts, warn);
}
/*
* Also note that default range doesn't respect encoding policy
* (the way invalid code points are treated).
*/
-const RegExp *Scanner::mkDefault() const
+const RegExp *RegExp::make_default(uint32_t line, uint32_t column, Opt &opts)
{
- return RegExp::make_sym(Range::ran(0,
+ return RegExp::make_sym(line, column, Range::ran(0,
opts->encoding.nCodeUnits()));
}
-bool need_wrap(const RegExp *re)
+bool RegExp::need_wrap(const RegExp *re)
{
switch (re->type) {
case RegExp::ITER:
namespace re2c
{
+struct Opt;
+
struct RegExp
{
static free_list<RegExp*> flist;
const std::string *name;
} ref;
};
+ uint32_t line;
+ uint32_t column;
- static const RegExp *make_nil()
+ static const RegExp *make_nil(uint32_t l, uint32_t c)
{
- return new RegExp(NIL);
+ return new RegExp(l, c, NIL);
}
- static const RegExp *make_sym(const Range *r)
+ static const RegExp *make_sym(uint32_t l, uint32_t c, const Range *r)
{
- RegExp *re = new RegExp(SYM);
+ RegExp *re = new RegExp(l, c, SYM);
re->sym = r;
return re;
}
static const RegExp *make_alt(const RegExp *r1, const RegExp *r2)
{
- RegExp *re = new RegExp(ALT);
+ if (!r1) return r2;
+ if (!r2) return r1;
+ if (r1->type == RegExp::SYM && r2->type == RegExp::SYM) {
+ return RegExp::make_sym(r1->line, r1->column,
+ Range::add(r1->sym, r2->sym));
+ }
+ RegExp *re = new RegExp(r1->line, r1->column, ALT);
re->alt.re1 = r1;
re->alt.re2 = r2;
return re;
}
static const RegExp *make_cat(const RegExp *r1, const RegExp *r2)
{
- RegExp *re = new RegExp(CAT);
+ if (!r1) return r2;
+ if (!r2) return r1;
+ RegExp *re = new RegExp(r1->line, r1->column, CAT);
re->cat.re1 = r1;
re->cat.re2 = r2;
return re;
}
static const RegExp *make_iter(const RegExp *r, uint32_t n, uint32_t m)
{
- RegExp *re = new RegExp(ITER);
+ RegExp *re = new RegExp(r->line, r->column, ITER);
re->iter.re = r;
re->iter.min = n;
re->iter.max = m;
return re;
}
- static const RegExp *make_tag(const std::string *t)
+ static const RegExp *make_tag(uint32_t l, uint32_t c, const std::string *t)
{
- RegExp *re = new RegExp(TAG);
+ RegExp *re = new RegExp(l, c, TAG);
re->tag = t;
return re;
}
static const RegExp *make_cap(const RegExp *r)
{
- RegExp *re = new RegExp(CAP);
+ RegExp *re = new RegExp(r->line, r->column, CAP);
re->cap = r;
return re;
}
static const RegExp *make_ref(const RegExp *r, const std::string &n)
{
- RegExp *re = new RegExp(REF);
+ RegExp *re = new RegExp(r->line, r->column, REF);
re->ref.re = r;
re->ref.name = new std::string(n);
return re;
delete ref.name;
}
}
+ static const RegExp *make_schar(uint32_t line, uint32_t column, uint32_t c, Opt &opts);
+ static const RegExp *make_ichar(uint32_t line, uint32_t column, uint32_t c, Opt &opts);
+ static const RegExp *make_class(uint32_t line, uint32_t column, const Range *r, Opt &opts, Warn &warn);
+ static const RegExp *make_diff(const RegExp * e1, const RegExp * e2, Opt &opts, Warn &warn);
+ static const RegExp *make_dot(uint32_t line, uint32_t column, Opt &opts, Warn &warn);
+ static const RegExp *make_default(uint32_t line, uint32_t column, Opt &opts);
+ static bool need_wrap(const RegExp *re);
private:
- inline RegExp(type_t t) : type(t)
+ inline RegExp(uint32_t l, uint32_t c, type_t t)
+ : type(t), line(l), column(c)
{
flist.insert(this);
}
{}
};
-const RegExp *mkAlt(const RegExp *re1, const RegExp *re2);
-const RegExp *doAlt(const RegExp *re1, const RegExp *re2);
-const RegExp *doCat(const RegExp *re1, const RegExp *re2);
-bool need_wrap(const RegExp *re);
-
} // end namespace re2c
#endif // _RE2C_IR_REGEXP_REGEXP_
fatal("tags are only allowed with '-T, --tags' option");
}
const std::string *name = new std::string(tok + 1, tok_len() - 1);
- yylval.regexp = RegExp::make_tag(name);
+ yylval.regexp = RegExp::make_tag(cline, get_column(), name);
return TOKEN_REGEXP;
}
const RegExp *r = NULL;
const bool casing = opts->bCaseInsensitive || opts->bCaseInverted;
for (char *s = tok; s < cur; ++s) {
- const uint32_t c = static_cast<uint8_t>(*s);
- r = doCat(r, casing ? ichr(c) : schr(c));
+ const uint32_t
+ c = static_cast<uint8_t>(*s),
+ column = static_cast<uint32_t>(s - pos);
+ r = RegExp::make_cat(r, casing
+ ? RegExp::make_ichar(cline, column, c, opts)
+ : RegExp::make_schar(cline, column, c, opts));
}
- yylval.regexp = r ? r : RegExp::make_nil();
+ yylval.regexp = r ? r : RegExp::make_nil(cline, get_column());
return TOKEN_REGEXP;
}
}
"." {
- yylval.regexp = mkDot();
+ yylval.regexp = RegExp::make_dot(cline, get_column(), opts, warn);
return TOKEN_REGEXP;
}
const RegExp *Scanner::lex_cls(bool neg)
{
+ const uint32_t column = get_column();
Range *r = NULL, *s;
uint32_t u, l;
fst:
if (neg) {
r = Range::sub(opts->encoding.fullRange(), r);
}
- return cls(r);
+ return RegExp::make_class(cline, column, r, opts, warn);
}
uint32_t Scanner::lex_cls_chr()
{
tok = cur;
/*!re2c
- * { fatal ((tok - pos) - tchar, "syntax error"); }
- esc [xXuU] { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); }
- esc [0-7] { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); }
- esc { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); }
+ * { fatal (get_column() - tchar, "syntax error"); }
+ esc [xXuU] { fatal (get_column() - tchar, "syntax error in hexadecimal escape sequence"); }
+ esc [0-7] { fatal (get_column() - tchar, "syntax error in octal escape sequence"); }
+ esc { fatal (get_column() - tchar, "syntax error in escape sequence"); }
. \ esc { return static_cast<uint8_t>(tok[0]); }
esc_hex { return unesc_hex(tok, cur); }
esc "-" { return static_cast<uint8_t>('-'); }
esc "]" { return static_cast<uint8_t>(']'); }
esc . {
- warn.useless_escape(cline, tok - pos, tok[1]);
+ warn.useless_escape(cline, get_column(), tok[1]);
return static_cast<uint8_t>(tok[1]);
}
*/
end = false;
tok = cur;
/*!re2c
- * { fatal ((tok - pos) - tchar, "syntax error"); }
- esc [xXuU] { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); }
- esc [0-7] { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); }
- esc { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); }
+ * { fatal (get_column() - tchar, "syntax error"); }
+ esc [xXuU] { fatal (get_column() - tchar, "syntax error in hexadecimal escape sequence"); }
+ esc [0-7] { fatal (get_column() - tchar, "syntax error in octal escape sequence"); }
+ esc { fatal (get_column() - tchar, "syntax error in escape sequence"); }
. \ esc {
end = tok[0] == quote;
esc "\\" { return static_cast<uint8_t>('\\'); }
esc . {
if (tok[1] != quote) {
- warn.useless_escape(cline, tok - pos, tok[1]);
+ warn.useless_escape(cline, get_column(), tok[1]);
}
return static_cast<uint8_t>(tok[1]);
}
for (bool end;;) {
const uint32_t c = lex_str_chr(quote, end);
if (end) {
- return r ? r : RegExp::make_nil();
+ return r ? r : RegExp::make_nil(cline, get_column());
}
- r = doCat(r, casing ? ichr(c) : schr(c));
+ r = RegExp::make_cat(r, casing
+ ? RegExp::make_ichar(cline, get_column(), c, opts)
+ : RegExp::make_schar(cline, get_column(), c, opts));
}
}
{
tok = cur;
/*!re2c
- * { fatal ((tok - pos) - tchar, "unrecognized configuration"); }
+ * { fatal(get_column() - tchar, "unrecognized configuration"); }
"flags:" ("D" | "emit-dot") { opts.set_target(opt_t::DOT); lex_conf_semicolon(); return; }
"flags:" ("S" | "skeleton") { opts.set_target(opt_t::SKELETON); lex_conf_semicolon(); return; }
// merge default rule with the lowest priority
for (i = b; i != e; ++i) {
if (!i->defs.empty()) {
- i->rules.push_back(RegExpRule(in.mkDefault(), i->defs[0]));
+ const Code *c = i->defs[0];
+ const RegExp *r = RegExp::make_default(c->fline, 0, in.opts);
+ i->rules.push_back(RegExpRule(r, c));
}
}
}
| '<' '>' ccode {
- find(context.specs, "0").rules.push_back(RegExpRule(RegExp::make_nil(), $3));
+ const RegExp *r = RegExp::make_nil(context.input.get_cline(), 0);
+ find(context.specs, "0").rules.push_back(RegExpRule(r, $3));
};
ccode
trailexpr
: capexpr
| capexpr '/' expr {
- $$ = RegExp::make_cat($1, RegExp::make_cat(RegExp::make_tag(NULL), $3));
+ $$ = RegExp::make_cat($1, RegExp::make_cat(RegExp::make_tag(context.input.get_cline(), 0, NULL), $3));
};
capexpr
}
| expr '|' diff
{
- $$ = mkAlt($1, $3);
+ $$ = RegExp::make_alt($1, $3);
}
;
}
| diff '\\' term
{
- $$ = context.input.mkDiff($1, $3);
+ $$ = RegExp::make_diff($1, $3, context.input.opts, context.input.warn);
}
;
context.input.fatal("can't find symbol");
}
$$ = i->second;
- if (context.input.opts->posix_captures && need_wrap($$)) {
+ if (context.input.opts->posix_captures && RegExp::need_wrap($$)) {
$$ = RegExp::make_ref($$, *$1);
}
delete $1;
uint32_t lex_str_chr(char quote, bool &end);
const RegExp *lex_cls(bool neg);
const RegExp *lex_str(char quote, bool casing);
- const RegExp *schr(uint32_t c) const;
- const RegExp *ichr(uint32_t c) const;
- const RegExp *cls(const Range *r) const;
-
void lex_conf ();
void lex_conf_encoding_policy();
void lex_conf_input();
void restore_state(const ScannerState&);
uint32_t get_cline() const;
+ uint32_t get_column() const;
const std::string & get_fname () const;
void fatal_at(uint32_t line, ptrdiff_t ofs, const char *msg) const RE2C_GXX_ATTRIBUTE ((noreturn));
void fatalf_at(uint32_t line, const char*, ...) const RE2C_GXX_ATTRIBUTE ((format (printf, 3, 4))) RE2C_GXX_ATTRIBUTE ((noreturn));
void fatalf(const char*, ...) const RE2C_GXX_ATTRIBUTE ((format (printf, 2, 3))) RE2C_GXX_ATTRIBUTE ((noreturn));
void fatal(const char*) const RE2C_GXX_ATTRIBUTE ((noreturn));
void fatal(ptrdiff_t, const char*) const RE2C_GXX_ATTRIBUTE ((noreturn));
-
- const RegExp * mkDiff (const RegExp * e1, const RegExp * e2) const;
- const RegExp * mkDot () const;
- const RegExp * mkDefault () const;
-
FORBID_COPY (Scanner);
};
return cline;
}
+inline uint32_t Scanner::get_column() const
+{
+ return static_cast<uint32_t>(tok - pos);
+}
+
inline void Scanner::save_state(ScannerState& state) const
{
state = *this;
-re2c: error: line 2, column 5: empty character class
+re2c: error: line 2, column 4: empty character class
re2c: warning: line 10: rule matches empty string [-Wmatch-empty-string]
re2c: warning: line 14: empty character class [-Wempty-character-class]
re2c: warning: line 14: empty character class [-Wempty-character-class]
-re2c: error: line 14, column 11: can only difference char sets
+re2c: error: line 14, column 4: can only difference char sets
re2c: warning: line 10: rule matches empty string [-Wmatch-empty-string]
re2c: warning: line 14: empty character class [-Wempty-character-class]
re2c: warning: line 14: empty character class [-Wempty-character-class]
-re2c: error: line 14, column 11: can only difference char sets
+re2c: error: line 14, column 4: can only difference char sets
-re2c: error: line 2, column 29: empty character class
+re2c: error: line 2, column 4: empty character class
-re2c: error: line 2, column 5: empty character class
+re2c: error: line 2, column 4: empty character class
re2c: warning: line 2: empty character class [-Wempty-character-class]
re2c: warning: line 2: empty character class [-Wempty-character-class]
-re2c: error: line 2, column 31: can only difference char sets
+re2c: error: line 2, column 4: can only difference char sets
re2c: warning: line 2: empty character class [-Wempty-character-class]
re2c: warning: line 2: empty character class [-Wempty-character-class]
-re2c: error: line 2, column 31: can only difference char sets
+re2c: error: line 2, column 4: can only difference char sets
-re2c: error: line 2, column 5: empty character class
+re2c: error: line 2, column 4: empty character class
re2c: warning: line 2: empty character class [-Wempty-character-class]
re2c: warning: line 2: empty character class [-Wempty-character-class]
-re2c: error: line 2, column 11: can only difference char sets
+re2c: error: line 2, column 4: can only difference char sets
re2c: warning: line 2: empty character class [-Wempty-character-class]
re2c: warning: line 2: empty character class [-Wempty-character-class]
-re2c: error: line 2, column 11: can only difference char sets
+re2c: error: line 2, column 4: can only difference char sets
-re2c: error: line 2, column 5: empty character class
+re2c: error: line 2, column 4: empty character class
-re2c: error: line 2, column 5: empty character class
+re2c: error: line 2, column 4: empty character class
-re2c: error: line 2, column 1: Bad code point: '0x110000'
+re2c: error: line 2, column 21: bad code point: '0x110000'
-re2c: error: line 2, column 1: Bad code point: '0xFFFF'
+re2c: error: line 2, column 5: bad code point: '0xFFFF'
-re2c: error: line 2, column 1: Bad code point: '0xFFFF'
+re2c: error: line 2, column 5: bad code point: '0xFFFF'
-re2c: error: line 2, column 1: Bad code point: '0x110000'
+re2c: error: line 2, column 21: bad code point: '0x110000'
-re2c: error: line 2, column 1: Bad code point: '0x10FFFF'
+re2c: error: line 2, column 11: bad code point: '0x10FFFF'
-re2c: error: line 2, column 1: Bad code point: '0x110000'
+re2c: error: line 2, column 21: bad code point: '0x110000'
-re2c: error: line 3, column 1: empty character class
+re2c: error: line 3, column 0: empty character class
-re2c: error: line 3, column 1: empty character class
+re2c: error: line 3, column 0: empty character class
re2c: warning: line 253: rule matches empty string [-Wmatch-empty-string]
-re2c: error: line 288, column 1: empty character class
+re2c: error: line 288, column 0: empty character class