From 2af08e55d24dc0f2c821eb61300b58edb98f8301 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 15 Jan 2019 00:24:22 +0000 Subject: [PATCH] libre2c_posix: parse repetition counters in the lexer. --- re2c/bootstrap/libre2c_posix/lex.cc | 295 +++++++++++++++++--------- re2c/bootstrap/libre2c_posix/parse.cc | 111 ++++------ re2c/bootstrap/libre2c_posix/parse.h | 6 +- re2c/libre2c_posix/lex.re | 49 +++-- re2c/libre2c_posix/parse.ypp | 23 +- 5 files changed, 290 insertions(+), 194 deletions(-) diff --git a/re2c/bootstrap/libre2c_posix/lex.cc b/re2c/bootstrap/libre2c_posix/lex.cc index 794c0155..c8df8efe 100644 --- a/re2c/bootstrap/libre2c_posix/lex.cc +++ b/re2c/bootstrap/libre2c_posix/lex.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 1.1.1 on Mon Jan 14 22:54:34 2019 */ +/* Generated by re2c 1.1.1 on Tue Jan 15 00:23:54 2019 */ #line 1 "../libre2c_posix/lex.re" #include @@ -8,6 +8,7 @@ #include "src/parse/ast.h" #include "src/options/msg.h" #include "src/util/range.h" +#include "src/util/s_to_n32_unsafe.h" #include "parse.h" #include "libre2c_posix/lex.h" @@ -18,206 +19,306 @@ namespace re2c { static int32_t lex_cls_chr(const char *&, uint32_t &); -#line 27 "../libre2c_posix/lex.re" +#line 29 "../libre2c_posix/lex.re" int lex(const char *&cur) { - const char *mar; + const char *yyt1;const char *yyt2; + const char *mar, *x, *y; std::vector cls; bool neg = false; uint32_t l, u; -#line 33 "libre2c_posix/lex.cc" +#line 35 "libre2c_posix/lex.cc" { char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; yych = *cur; if (yych <= '>') { - if (yych <= '-') { + if (yych <= '\'') { if (yych <= 0x00) goto yy2; - if (yych <= '\'') goto yy4; - if (yych <= '+') goto yy6; + if (yych == '$') goto yy6; goto yy4; } else { - if (yych <= '.') goto yy8; - if (yych <= '/') goto yy4; - if (yych <= '9') goto yy10; + if (yych <= '+') goto yy8; + if (yych == '.') goto yy10; goto yy4; } } else { - if (yych <= '[') { - if (yych <= '?') goto yy6; - if (yych <= '@') goto yy4; - if (yych <= 'Z') goto yy12; - goto yy14; - } else { - if (yych <= '`') goto yy4; - if (yych <= 'z') goto yy12; - if (yych <= '}') goto yy6; + if (yych <= ']') { + if (yych <= '?') goto yy8; + if (yych == '[') goto yy12; goto yy4; + } else { + if (yych <= 'z') { + if (yych <= '^') goto yy6; + goto yy4; + } else { + if (yych <= '{') goto yy14; + if (yych <= '|') goto yy8; + goto yy4; + } } } yy2: ++cur; -#line 39 "../libre2c_posix/lex.re" +#line 42 "../libre2c_posix/lex.re" { return 0; } -#line 66 "libre2c_posix/lex.cc" +#line 103 "libre2c_posix/lex.cc" yy4: ++cur; -#line 37 "../libre2c_posix/lex.re" - { goto err; } -#line 71 "libre2c_posix/lex.cc" +yy5: +#line 77 "../libre2c_posix/lex.re" + { + ASTChar c(static_cast(cur[-1]), 0); + std::vector *str = new std::vector; + str->push_back(c); + yylval.regexp = ast_str(0, 0, str, false); + return REGEXP; + } +#line 115 "libre2c_posix/lex.cc" yy6: ++cur; -#line 41 "../libre2c_posix/lex.re" - { return cur[-1]; } -#line 76 "libre2c_posix/lex.cc" +#line 46 "../libre2c_posix/lex.re" + { + error("anchors are not supported"); + return ERROR; + } +#line 123 "libre2c_posix/lex.cc" yy8: ++cur; -#line 46 "../libre2c_posix/lex.re" +#line 44 "../libre2c_posix/lex.re" + { return cur[-1]; } +#line 128 "libre2c_posix/lex.cc" +yy10: + ++cur; +#line 72 "../libre2c_posix/lex.re" { yylval.regexp = ast_dot(0, 0); return REGEXP; } -#line 84 "libre2c_posix/lex.cc" -yy10: +#line 136 "libre2c_posix/lex.cc" +yy12: + yych = *++cur; + if (yych == '^') goto yy15; +#line 52 "../libre2c_posix/lex.re" + { goto cls; } +#line 142 "libre2c_posix/lex.cc" +yy14: + yych = *(mar = ++cur); + if (yych <= '/') goto yy5; + if (yych <= '9') { + yyt1 = cur; + goto yy17; + } + goto yy5; +yy15: ++cur; #line 51 "../libre2c_posix/lex.re" + { neg = true; goto cls; } +#line 155 "libre2c_posix/lex.cc" +yy17: + yych = *++cur; + if (yybm[0+yych] & 128) { + goto yy17; + } + if (yych == ',') goto yy20; + if (yych == '}') goto yy21; +yy19: + cur = mar; + goto yy5; +yy20: + yych = *++cur; + if (yych <= '/') goto yy19; + if (yych <= '9') { + yyt2 = cur; + goto yy23; + } + if (yych == '}') goto yy25; + goto yy19; +yy21: + ++cur; + x = yyt1; +#line 54 "../libre2c_posix/lex.re" { - yylval.number = static_cast(cur[-1] - '0'); - return DIGIT; + if (!s_to_u32_unsafe(x, cur - 1, yylval.bounds.min)) goto err_cnt; + yylval.bounds.max = yylval.bounds.min; + return COUNT; } -#line 92 "libre2c_posix/lex.cc" -yy12: +#line 184 "libre2c_posix/lex.cc" +yy23: + yych = *++cur; + if (yych <= '/') goto yy19; + if (yych <= '9') goto yy23; + if (yych == '}') goto yy27; + goto yy19; +yy25: ++cur; -#line 56 "../libre2c_posix/lex.re" + x = yyt1; +#line 66 "../libre2c_posix/lex.re" { - ASTChar c(static_cast(cur[-1]), 0); - std::vector *str = new std::vector; - str->push_back(c); - yylval.regexp = ast_str(0, 0, str, false); - return REGEXP; + if (!s_to_u32_unsafe(x, cur - 2, yylval.bounds.min)) goto err_cnt; + yylval.bounds.max = AST::MANY; + return COUNT; } -#line 103 "libre2c_posix/lex.cc" -yy14: - yych = *++cur; - if (yych == '^') goto yy16; -#line 44 "../libre2c_posix/lex.re" - { goto cls; } -#line 109 "libre2c_posix/lex.cc" -yy16: +#line 200 "libre2c_posix/lex.cc" +yy27: ++cur; -#line 43 "../libre2c_posix/lex.re" - { neg = true; goto cls; } -#line 114 "libre2c_posix/lex.cc" + x = yyt1; + y = yyt2; +#line 60 "../libre2c_posix/lex.re" + { + if (!s_to_u32_unsafe(x, y - 1, yylval.bounds.min) + || !s_to_u32_unsafe(y, cur - 1, yylval.bounds.max)) goto err_cnt; + return COUNT; + } +#line 211 "libre2c_posix/lex.cc" } -#line 63 "../libre2c_posix/lex.re" +#line 84 "../libre2c_posix/lex.re" cls: if (lex_cls_chr(cur, l) != 0) goto err; -#line 122 "libre2c_posix/lex.cc" +#line 219 "libre2c_posix/lex.cc" { char yych; yych = *(mar = cur); - if (yych == '-') goto yy21; -yy20: -#line 68 "../libre2c_posix/lex.re" + if (yych == '-') goto yy32; +yy31: +#line 89 "../libre2c_posix/lex.re" { u = l; goto add; } -#line 130 "libre2c_posix/lex.cc" -yy21: +#line 227 "libre2c_posix/lex.cc" +yy32: yych = *++cur; - if (yych != ']') goto yy23; + if (yych != ']') goto yy34; cur = mar; - goto yy20; -yy23: + goto yy31; +yy34: ++cur; cur -= 1; -#line 69 "../libre2c_posix/lex.re" +#line 90 "../libre2c_posix/lex.re" { if (lex_cls_chr(cur, u) != 0) goto err; goto add; } -#line 141 "libre2c_posix/lex.cc" +#line 238 "libre2c_posix/lex.cc" } -#line 70 "../libre2c_posix/lex.re" +#line 91 "../libre2c_posix/lex.re" add: if (l > u) goto err; cls.push_back(ASTRange(l, u, 0)); -#line 149 "libre2c_posix/lex.cc" +#line 246 "libre2c_posix/lex.cc" { char yych; yych = *cur; - if (yych == ']') goto yy28; -#line 75 "../libre2c_posix/lex.re" + if (yych == ']') goto yy39; +#line 96 "../libre2c_posix/lex.re" { goto cls; } -#line 156 "libre2c_posix/lex.cc" -yy28: +#line 253 "libre2c_posix/lex.cc" +yy39: ++cur; -#line 76 "../libre2c_posix/lex.re" +#line 97 "../libre2c_posix/lex.re" { std::vector *p = new std::vector; p->swap(cls); yylval.regexp = ast_cls(0, 0, p, neg); return REGEXP; } -#line 166 "libre2c_posix/lex.cc" +#line 263 "libre2c_posix/lex.cc" } -#line 82 "../libre2c_posix/lex.re" +#line 103 "../libre2c_posix/lex.re" err: error("syntax error: %s\n", cur); return ERROR; + +err_cnt: + error("repetition count overflow"); + return ERROR; } int32_t lex_cls_chr(const char *&cur, uint32_t &c) { -#line 179 "libre2c_posix/lex.cc" +#line 280 "libre2c_posix/lex.cc" { char yych; yych = *cur; - if (yych <= 0x00) goto yy32; - if (yych == '[') goto yy36; - goto yy34; -yy32: + if (yych <= 0x00) goto yy43; + if (yych == '[') goto yy47; + goto yy45; +yy43: ++cur; -#line 92 "../libre2c_posix/lex.re" +#line 117 "../libre2c_posix/lex.re" { return 1; } -#line 190 "libre2c_posix/lex.cc" -yy34: +#line 291 "libre2c_posix/lex.cc" +yy45: ++cur; -yy35: -#line 97 "../libre2c_posix/lex.re" +yy46: +#line 122 "../libre2c_posix/lex.re" { c = static_cast(cur[-1]); return 0; } -#line 196 "libre2c_posix/lex.cc" -yy36: +#line 297 "libre2c_posix/lex.cc" +yy47: yych = *++cur; if (yych <= '9') { - if (yych != '.') goto yy35; + if (yych != '.') goto yy46; } else { - if (yych <= ':') goto yy39; - if (yych == '=') goto yy41; - goto yy35; + if (yych <= ':') goto yy50; + if (yych == '=') goto yy52; + goto yy46; } ++cur; -#line 93 "../libre2c_posix/lex.re" +#line 118 "../libre2c_posix/lex.re" { error("collating characters not supported"); return 1; } -#line 209 "libre2c_posix/lex.cc" -yy39: +#line 310 "libre2c_posix/lex.cc" +yy50: ++cur; -#line 94 "../libre2c_posix/lex.re" +#line 119 "../libre2c_posix/lex.re" { error("character classes not supported"); return 1; } -#line 214 "libre2c_posix/lex.cc" -yy41: +#line 315 "libre2c_posix/lex.cc" +yy52: ++cur; -#line 95 "../libre2c_posix/lex.re" +#line 120 "../libre2c_posix/lex.re" { error("equivalence classes not supported"); return 1; } -#line 219 "libre2c_posix/lex.cc" +#line 320 "libre2c_posix/lex.cc" } -#line 98 "../libre2c_posix/lex.re" +#line 123 "../libre2c_posix/lex.re" } diff --git a/re2c/bootstrap/libre2c_posix/parse.cc b/re2c/bootstrap/libre2c_posix/parse.cc index ad896dbd..3ee8ee40 100644 --- a/re2c/bootstrap/libre2c_posix/parse.cc +++ b/re2c/bootstrap/libre2c_posix/parse.cc @@ -116,7 +116,7 @@ extern int yydebug; # define YYTOKENTYPE enum yytokentype { - DIGIT = 258, + COUNT = 258, ERROR = 259, REGEXP = 260 }; @@ -129,8 +129,8 @@ union YYSTYPE { #line 25 "../libre2c_posix/parse.ypp" /* yacc.c:355 */ - const re2c::AST * regexp; - uint32_t number; + const re2c::AST *regexp; + re2c::ASTBounds bounds; #line 136 "libre2c_posix/parse.cc" /* yacc.c:355 */ }; @@ -391,16 +391,16 @@ union yyalloc /* YYFINAL -- State number of the termination state. */ #define YYFINAL 9 /* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 22 +#define YYLAST 13 /* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 15 +#define YYNTOKENS 12 /* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 7 +#define YYNNTS 6 /* YYNRULES -- Number of rules. */ -#define YYNRULES 17 +#define YYNRULES 13 /* YYNSTATES -- Number of states. */ -#define YYNSTATES 26 +#define YYNSTATES 18 /* YYTRANSLATE[YYX] -- Symbol number corresponding to YYX as returned by yylex, with out-of-bounds checking. */ @@ -418,7 +418,7 @@ static const yytype_uint8 yytranslate[] = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 13, 14, 7, 8, 12, 2, 2, 2, 2, 2, + 10, 11, 7, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -426,7 +426,7 @@ static const yytype_uint8 yytranslate[] = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 10, 6, 11, 2, 2, 2, 2, + 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -448,7 +448,7 @@ static const yytype_uint8 yytranslate[] = static const yytype_uint8 yyrline[] = { 0, 39, 39, 42, 43, 47, 48, 52, 53, 54, - 55, 56, 57, 58, 62, 63, 67, 68 + 55, 56, 60, 61 }; #endif @@ -457,9 +457,9 @@ static const yytype_uint8 yyrline[] = First, the terminals, then, starting at YYNTOKENS, nonterminals. */ static const char *const yytname[] = { - "$end", "error", "$undefined", "DIGIT", "ERROR", "REGEXP", "'|'", "'*'", - "'+'", "'?'", "'{'", "'}'", "','", "'('", "')'", "$accept", "regexp", - "expr", "term", "factor", "number", "primary", YY_NULLPTR + "$end", "error", "$undefined", "COUNT", "ERROR", "REGEXP", "'|'", "'*'", + "'+'", "'?'", "'('", "')'", "$accept", "regexp", "expr", "term", + "factor", "primary", YY_NULLPTR }; #endif @@ -469,14 +469,14 @@ static const char *const yytname[] = static const yytype_uint16 yytoknum[] = { 0, 256, 257, 258, 259, 260, 124, 42, 43, 63, - 123, 125, 44, 40, 41 + 40, 41 }; # endif -#define YYPACT_NINF -7 +#define YYPACT_NINF -5 #define yypact_value_is_default(Yystate) \ - (!!((Yystate) == (-7))) + (!!((Yystate) == (-5))) #define YYTABLE_NINF -1 @@ -487,9 +487,8 @@ static const yytype_uint16 yytoknum[] = STATE-NUM. */ static const yytype_int8 yypact[] = { - -2, -7, -2, 6, -5, -7, -2, 12, -4, -7, - -2, -7, -7, -7, -7, 4, -7, -7, -7, -3, - -7, -7, 1, -7, 2, -7 + -2, -5, -2, 1, 4, -5, -2, -3, -4, -5, + -2, -5, -5, -5, -5, -5, -5, -5 }; /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. @@ -497,21 +496,20 @@ static const yytype_int8 yypact[] = means the default is an error. */ static const yytype_uint8 yydefact[] = { - 0, 16, 0, 0, 2, 3, 5, 7, 0, 1, - 0, 6, 8, 9, 10, 0, 17, 4, 14, 0, - 15, 11, 0, 12, 0, 13 + 0, 12, 0, 0, 2, 3, 5, 7, 0, 1, + 0, 6, 11, 8, 9, 10, 13, 4 }; /* YYPGOTO[NTERM-NUM]. */ static const yytype_int8 yypgoto[] = { - -7, -7, 13, 8, -7, -6, -7 + -5, -5, 9, 3, -5, -5 }; /* YYDEFGOTO[NTERM-NUM]. */ static const yytype_int8 yydefgoto[] = { - -1, 3, 4, 5, 6, 19, 7 + -1, 3, 4, 5, 6, 7 }; /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If @@ -519,39 +517,36 @@ static const yytype_int8 yydefgoto[] = number is the opposite. If YYTABLE_NINF, syntax error. */ static const yytype_uint8 yytable[] = { - 20, 10, 10, 1, 18, 20, 9, 18, 21, 22, - 16, 2, 23, 25, 11, 8, 24, 0, 17, 12, - 13, 14, 15 + 12, 9, 10, 1, 13, 14, 15, 16, 2, 11, + 10, 8, 0, 17 }; static const yytype_int8 yycheck[] = { - 3, 6, 6, 5, 3, 3, 0, 3, 11, 12, - 14, 13, 11, 11, 6, 2, 22, -1, 10, 7, - 8, 9, 10 + 3, 0, 6, 5, 7, 8, 9, 11, 10, 6, + 6, 2, -1, 10 }; /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing symbol of state STATE-NUM. */ static const yytype_uint8 yystos[] = { - 0, 5, 13, 16, 17, 18, 19, 21, 17, 0, - 6, 18, 7, 8, 9, 10, 14, 18, 3, 20, - 3, 11, 12, 11, 20, 11 + 0, 5, 10, 13, 14, 15, 16, 17, 14, 0, + 6, 15, 3, 7, 8, 9, 11, 15 }; /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ static const yytype_uint8 yyr1[] = { - 0, 15, 16, 17, 17, 18, 18, 19, 19, 19, - 19, 19, 19, 19, 20, 20, 21, 21 + 0, 12, 13, 14, 14, 15, 15, 16, 16, 16, + 16, 16, 17, 17 }; /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */ static const yytype_uint8 yyr2[] = { 0, 2, 1, 1, 3, 1, 2, 1, 2, 2, - 2, 4, 5, 6, 1, 2, 1, 3 + 2, 2, 1, 3 }; @@ -1232,71 +1227,53 @@ yyreduce: case 2: #line 39 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { regexp = (yyval.regexp); } -#line 1236 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1231 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 4: #line 43 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_alt((yyvsp[-2].regexp), (yyvsp[0].regexp)); } -#line 1242 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1237 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 6: #line 48 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_cat((yyvsp[-1].regexp), (yyvsp[0].regexp)); } -#line 1248 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1243 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 8: #line 53 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_iter((yyvsp[-1].regexp), 0, AST::MANY); } -#line 1254 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1249 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 9: #line 54 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_iter((yyvsp[-1].regexp), 1, AST::MANY); } -#line 1260 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1255 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 10: #line 55 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_iter((yyvsp[-1].regexp), 0, 1); } -#line 1266 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1261 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 11: #line 56 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ - { (yyval.regexp) = ast_iter((yyvsp[-3].regexp), (yyvsp[-1].number), (yyvsp[-1].number)); } -#line 1272 "libre2c_posix/parse.cc" /* yacc.c:1646 */ - break; - - case 12: -#line 57 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ - { (yyval.regexp) = ast_iter((yyvsp[-4].regexp), (yyvsp[-2].number), AST::MANY); } -#line 1278 "libre2c_posix/parse.cc" /* yacc.c:1646 */ + { (yyval.regexp) = ast_iter((yyvsp[-1].regexp), (yyvsp[0].bounds).min, (yyvsp[0].bounds).max); } +#line 1267 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 13: -#line 58 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ - { (yyval.regexp) = ast_iter((yyvsp[-5].regexp), (yyvsp[-3].number), (yyvsp[-1].number)); } -#line 1284 "libre2c_posix/parse.cc" /* yacc.c:1646 */ - break; - - case 15: -#line 63 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ - { (yyval.number) = (yyvsp[-1].number) * 10 + (yyvsp[0].number); } -#line 1290 "libre2c_posix/parse.cc" /* yacc.c:1646 */ - break; - - case 17: -#line 68 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ +#line 61 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_cap((yyvsp[-1].regexp)); } -#line 1296 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1273 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; -#line 1300 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1277 "libre2c_posix/parse.cc" /* yacc.c:1646 */ default: break; } /* User semantic actions sometimes alter yychar, and that requires @@ -1524,7 +1501,7 @@ yyreturn: #endif return yyresult; } -#line 71 "../libre2c_posix/parse.ypp" /* yacc.c:1906 */ +#line 64 "../libre2c_posix/parse.ypp" /* yacc.c:1906 */ extern "C" { diff --git a/re2c/bootstrap/libre2c_posix/parse.h b/re2c/bootstrap/libre2c_posix/parse.h index 5045350a..beb69438 100644 --- a/re2c/bootstrap/libre2c_posix/parse.h +++ b/re2c/bootstrap/libre2c_posix/parse.h @@ -45,7 +45,7 @@ extern int yydebug; # define YYTOKENTYPE enum yytokentype { - DIGIT = 258, + COUNT = 258, ERROR = 259, REGEXP = 260 }; @@ -58,8 +58,8 @@ union YYSTYPE { #line 25 "../libre2c_posix/parse.ypp" /* yacc.c:1909 */ - const re2c::AST * regexp; - uint32_t number; + const re2c::AST *regexp; + re2c::ASTBounds bounds; #line 65 "libre2c_posix/parse.h" /* yacc.c:1909 */ }; diff --git a/re2c/libre2c_posix/lex.re b/re2c/libre2c_posix/lex.re index 337cb3da..266e0f34 100644 --- a/re2c/libre2c_posix/lex.re +++ b/re2c/libre2c_posix/lex.re @@ -6,6 +6,7 @@ #include "src/parse/ast.h" #include "src/options/msg.h" #include "src/util/range.h" +#include "src/util/s_to_n32_unsafe.h" #include "parse.h" #include "libre2c_posix/lex.h" @@ -17,18 +18,20 @@ namespace re2c { static int32_t lex_cls_chr(const char *&, uint32_t &); /*!re2c + re2c:flags:tags = 1; re2c:yyfill:enable = 0; re2c:define:YYCURSOR = cur; re2c:define:YYMARKER = mar; re2c:define:YYCTYPE = char; - eof = "\x00"; - esc = "\\"; + nil = "\x00"; + num = [0-9]+; */ int lex(const char *&cur) { - const char *mar; + /*!stags:re2c format = "const char *@@;"; */ + const char *mar, *x, *y; std::vector cls; bool neg = false; uint32_t l, u; @@ -36,24 +39,42 @@ int lex(const char *&cur) /*!re2c * { goto err; } - eof { return 0; } + nil { return 0; } - [{}()|*+?] { return cur[-1]; } + [()|*+?] { return cur[-1]; } + + [$^] { + error("anchors are not supported"); + return ERROR; + } "[^" { neg = true; goto cls; } "[" { goto cls; } + "{" @x num "}" { + if (!s_to_u32_unsafe(x, cur - 1, yylval.bounds.min)) goto err_cnt; + yylval.bounds.max = yylval.bounds.min; + return COUNT; + } + + "{" @x num "," @y num "}" { + if (!s_to_u32_unsafe(x, y - 1, yylval.bounds.min) + || !s_to_u32_unsafe(y, cur - 1, yylval.bounds.max)) goto err_cnt; + return COUNT; + } + + "{" @x num ",}" { + if (!s_to_u32_unsafe(x, cur - 2, yylval.bounds.min)) goto err_cnt; + yylval.bounds.max = AST::MANY; + return COUNT; + } + "." { yylval.regexp = ast_dot(0, 0); return REGEXP; } - [0-9] { - yylval.number = static_cast(cur[-1] - '0'); - return DIGIT; - } - - [a-zA-Z] { + [^] \ nil { ASTChar c(static_cast(cur[-1]), 0); std::vector *str = new std::vector; str->push_back(c); @@ -84,6 +105,10 @@ add: err: error("syntax error: %s\n", cur); return ERROR; + +err_cnt: + error("repetition count overflow"); + return ERROR; } int32_t lex_cls_chr(const char *&cur, uint32_t &c) @@ -94,7 +119,7 @@ int32_t lex_cls_chr(const char *&cur, uint32_t &c) "[:" { error("character classes not supported"); return 1; } "[=" { error("equivalence classes not supported"); return 1; } - [^] \ eof { c = static_cast(cur[-1]); return 0; } + [^] \ nil { c = static_cast(cur[-1]); return 0; } */ } diff --git a/re2c/libre2c_posix/parse.ypp b/re2c/libre2c_posix/parse.ypp index a679fa4f..cc7d0e8f 100644 --- a/re2c/libre2c_posix/parse.ypp +++ b/re2c/libre2c_posix/parse.ypp @@ -23,16 +23,16 @@ void yyerror(const char *pattern, const char*); %start regexp %union { - const re2c::AST * regexp; - uint32_t number; + const re2c::AST *regexp; + re2c::ASTBounds bounds; }; -%token DIGIT +%token COUNT %token ERROR %token REGEXP %type REGEXP regexp expr term factor primary -%type DIGIT number +%type COUNT %% @@ -50,17 +50,10 @@ term factor : primary -| primary '*' { $$ = ast_iter($1, 0, AST::MANY); } -| primary '+' { $$ = ast_iter($1, 1, AST::MANY); } -| primary '?' { $$ = ast_iter($1, 0, 1); } -| primary '{' number '}' { $$ = ast_iter($1, $3, $3); } -| primary '{' number ',' '}' { $$ = ast_iter($1, $3, AST::MANY); } -| primary '{' number ',' number '}' { $$ = ast_iter($1, $3, $5); } -; - -number -: DIGIT -| number DIGIT { $$ = $1 * 10 + $2; } +| primary '*' { $$ = ast_iter($1, 0, AST::MANY); } +| primary '+' { $$ = ast_iter($1, 1, AST::MANY); } +| primary '?' { $$ = ast_iter($1, 0, 1); } +| primary COUNT { $$ = ast_iter($1, $2.min, $2.max); } ; primary -- 2.40.0