From: Ulya Trofimovich Date: Mon, 14 Jan 2019 23:04:23 +0000 (+0000) Subject: libre2c_posix: initial support for character classes in parser. X-Git-Tag: 1.2~218 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ef425c63d947f95e828f8635629a84d1738a6492;p=re2c libre2c_posix: initial support for character classes in parser. --- diff --git a/re2c/bootstrap/libre2c_posix/lex.cc b/re2c/bootstrap/libre2c_posix/lex.cc index f7c8d148..794c0155 100644 --- a/re2c/bootstrap/libre2c_posix/lex.cc +++ b/re2c/bootstrap/libre2c_posix/lex.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 1.1.1 on Sat Jan 12 23:45:53 2019 */ +/* Generated by re2c 1.1.1 on Mon Jan 14 22:54:34 2019 */ #line 1 "../libre2c_posix/lex.re" #include @@ -6,36 +6,51 @@ #include "src/encoding/enc.h" #include "src/parse/ast.h" +#include "src/options/msg.h" #include "src/util/range.h" #include "parse.h" #include "libre2c_posix/lex.h" + extern YYSTYPE yylval; namespace re2c { +static int32_t lex_cls_chr(const char *&, uint32_t &); + +#line 27 "../libre2c_posix/lex.re" + + int lex(const char *&cur) { - -#line 21 "libre2c_posix/lex.cc" + const char *mar; + std::vector cls; + bool neg = false; + uint32_t l, u; + + +#line 33 "libre2c_posix/lex.cc" { char yych; yych = *cur; - if (yych <= '9') { - if (yych <= '+') { + if (yych <= '>') { + if (yych <= '-') { if (yych <= 0x00) goto yy2; if (yych <= '\'') goto yy4; - goto yy6; + if (yych <= '+') goto yy6; + goto yy4; } else { - if (yych == '.') goto yy8; + if (yych <= '.') goto yy8; if (yych <= '/') goto yy4; - goto yy10; + if (yych <= '9') goto yy10; + goto yy4; } } else { - if (yych <= 'Z') { - if (yych == '?') goto yy6; + if (yych <= '[') { + if (yych <= '?') goto yy6; if (yych <= '@') goto yy4; - goto yy12; + if (yych <= 'Z') goto yy12; + goto yy14; } else { if (yych <= '`') goto yy4; if (yych <= 'z') goto yy12; @@ -45,48 +60,164 @@ int lex(const char *&cur) } yy2: ++cur; -#line 23 "../libre2c_posix/lex.re" +#line 39 "../libre2c_posix/lex.re" { return 0; } -#line 51 "libre2c_posix/lex.cc" +#line 66 "libre2c_posix/lex.cc" yy4: ++cur; -#line 22 "../libre2c_posix/lex.re" - { printf("syntax error: %s\n", cur); exit(1); } -#line 56 "libre2c_posix/lex.cc" +#line 37 "../libre2c_posix/lex.re" + { goto err; } +#line 71 "libre2c_posix/lex.cc" yy6: ++cur; -#line 24 "../libre2c_posix/lex.re" +#line 41 "../libre2c_posix/lex.re" { return cur[-1]; } -#line 61 "libre2c_posix/lex.cc" +#line 76 "libre2c_posix/lex.cc" yy8: ++cur; -#line 26 "../libre2c_posix/lex.re" +#line 46 "../libre2c_posix/lex.re" { - yylval.regexp = ast_dot(0, 0); - return REGEXP; - } -#line 69 "libre2c_posix/lex.cc" + yylval.regexp = ast_dot(0, 0); + return REGEXP; + } +#line 84 "libre2c_posix/lex.cc" yy10: ++cur; -#line 31 "../libre2c_posix/lex.re" +#line 51 "../libre2c_posix/lex.re" { - yylval.number = static_cast(cur[-1] - '0'); - return DIGIT; - } -#line 77 "libre2c_posix/lex.cc" + yylval.number = static_cast(cur[-1] - '0'); + return DIGIT; + } +#line 92 "libre2c_posix/lex.cc" yy12: ++cur; -#line 36 "../libre2c_posix/lex.re" +#line 56 "../libre2c_posix/lex.re" { - ASTChar c(static_cast(cur[-1]), 0); - std::vector *str = new std::vector; - str->push_back(c); - yylval.regexp = ast_str(0, 0, str, false); - return REGEXP; - } -#line 88 "libre2c_posix/lex.cc" -} + ASTChar c(static_cast(cur[-1]), 0); + std::vector *str = new std::vector; + str->push_back(c); + yylval.regexp = ast_str(0, 0, str, false); + return REGEXP; + } +#line 103 "libre2c_posix/lex.cc" +yy14: + yych = *++cur; + if (yych == '^') goto yy16; +#line 44 "../libre2c_posix/lex.re" + { goto cls; } +#line 109 "libre2c_posix/lex.cc" +yy16: + ++cur; #line 43 "../libre2c_posix/lex.re" + { neg = true; goto cls; } +#line 114 "libre2c_posix/lex.cc" +} +#line 63 "../libre2c_posix/lex.re" + + +cls: + if (lex_cls_chr(cur, l) != 0) goto err; + +#line 122 "libre2c_posix/lex.cc" +{ + char yych; + yych = *(mar = cur); + if (yych == '-') goto yy21; +yy20: +#line 68 "../libre2c_posix/lex.re" + { u = l; goto add; } +#line 130 "libre2c_posix/lex.cc" +yy21: + yych = *++cur; + if (yych != ']') goto yy23; + cur = mar; + goto yy20; +yy23: + ++cur; + cur -= 1; +#line 69 "../libre2c_posix/lex.re" + { if (lex_cls_chr(cur, u) != 0) goto err; goto add; } +#line 141 "libre2c_posix/lex.cc" +} +#line 70 "../libre2c_posix/lex.re" + +add: + if (l > u) goto err; + cls.push_back(ASTRange(l, u, 0)); + +#line 149 "libre2c_posix/lex.cc" +{ + char yych; + yych = *cur; + if (yych == ']') goto yy28; +#line 75 "../libre2c_posix/lex.re" + { goto cls; } +#line 156 "libre2c_posix/lex.cc" +yy28: + ++cur; +#line 76 "../libre2c_posix/lex.re" + { + std::vector *p = new std::vector; + p->swap(cls); + yylval.regexp = ast_cls(0, 0, p, neg); + return REGEXP; + } +#line 166 "libre2c_posix/lex.cc" +} +#line 82 "../libre2c_posix/lex.re" + + +err: + error("syntax error: %s\n", cur); + return ERROR; +} + +int32_t lex_cls_chr(const char *&cur, uint32_t &c) +{ + +#line 179 "libre2c_posix/lex.cc" +{ + char yych; + yych = *cur; + if (yych <= 0x00) goto yy32; + if (yych == '[') goto yy36; + goto yy34; +yy32: + ++cur; +#line 92 "../libre2c_posix/lex.re" + { return 1; } +#line 190 "libre2c_posix/lex.cc" +yy34: + ++cur; +yy35: +#line 97 "../libre2c_posix/lex.re" + { c = static_cast(cur[-1]); return 0; } +#line 196 "libre2c_posix/lex.cc" +yy36: + yych = *++cur; + if (yych <= '9') { + if (yych != '.') goto yy35; + } else { + if (yych <= ':') goto yy39; + if (yych == '=') goto yy41; + goto yy35; + } + ++cur; +#line 93 "../libre2c_posix/lex.re" + { error("collating characters not supported"); return 1; } +#line 209 "libre2c_posix/lex.cc" +yy39: + ++cur; +#line 94 "../libre2c_posix/lex.re" + { error("character classes not supported"); return 1; } +#line 214 "libre2c_posix/lex.cc" +yy41: + ++cur; +#line 95 "../libre2c_posix/lex.re" + { error("equivalence classes not supported"); return 1; } +#line 219 "libre2c_posix/lex.cc" +} +#line 98 "../libre2c_posix/lex.re" } diff --git a/re2c/bootstrap/libre2c_posix/parse.cc b/re2c/bootstrap/libre2c_posix/parse.cc index 7082434e..ad896dbd 100644 --- a/re2c/bootstrap/libre2c_posix/parse.cc +++ b/re2c/bootstrap/libre2c_posix/parse.cc @@ -117,7 +117,8 @@ extern int yydebug; enum yytokentype { DIGIT = 258, - REGEXP = 259 + ERROR = 259, + REGEXP = 260 }; #endif @@ -131,7 +132,7 @@ union YYSTYPE const re2c::AST * regexp; uint32_t number; -#line 135 "libre2c_posix/parse.cc" /* yacc.c:355 */ +#line 136 "libre2c_posix/parse.cc" /* yacc.c:355 */ }; typedef union YYSTYPE YYSTYPE; @@ -148,7 +149,7 @@ int yyparse (const char *&pattern); /* Copy the second part of user declarations. */ -#line 152 "libre2c_posix/parse.cc" /* yacc.c:358 */ +#line 153 "libre2c_posix/parse.cc" /* yacc.c:358 */ #ifdef short # undef short @@ -390,10 +391,10 @@ union yyalloc /* YYFINAL -- State number of the termination state. */ #define YYFINAL 9 /* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 21 +#define YYLAST 22 /* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 14 +#define YYNTOKENS 15 /* YYNNTS -- Number of nonterminals. */ #define YYNNTS 7 /* YYNRULES -- Number of rules. */ @@ -404,7 +405,7 @@ union yyalloc /* YYTRANSLATE[YYX] -- Symbol number corresponding to YYX as returned by yylex, with out-of-bounds checking. */ #define YYUNDEFTOK 2 -#define YYMAXUTOK 259 +#define YYMAXUTOK 260 #define YYTRANSLATE(YYX) \ ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) @@ -417,15 +418,15 @@ static const yytype_uint8 yytranslate[] = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 12, 13, 6, 7, 11, 2, 2, 2, 2, 2, + 13, 14, 7, 8, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 8, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 9, 5, 10, 2, 2, 2, 2, + 2, 2, 2, 10, 6, 11, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -438,15 +439,16 @@ static const yytype_uint8 yytranslate[] = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 1, 2, 3, 4 + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5 }; #if YYDEBUG /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ static const yytype_uint8 yyrline[] = { - 0, 38, 38, 41, 42, 46, 47, 51, 52, 53, - 54, 55, 56, 57, 61, 62, 66, 67 + 0, 39, 39, 42, 43, 47, 48, 52, 53, 54, + 55, 56, 57, 58, 62, 63, 67, 68 }; #endif @@ -455,9 +457,9 @@ static const yytype_uint8 yyrline[] = First, the terminals, then, starting at YYNTOKENS, nonterminals. */ static const char *const yytname[] = { - "$end", "error", "$undefined", "DIGIT", "REGEXP", "'|'", "'*'", "'+'", - "'?'", "'{'", "'}'", "','", "'('", "')'", "$accept", "regexp", "expr", - "term", "factor", "number", "primary", YY_NULLPTR + "$end", "error", "$undefined", "DIGIT", "ERROR", "REGEXP", "'|'", "'*'", + "'+'", "'?'", "'{'", "'}'", "','", "'('", "')'", "$accept", "regexp", + "expr", "term", "factor", "number", "primary", YY_NULLPTR }; #endif @@ -466,15 +468,15 @@ static const char *const yytname[] = (internal) symbol number NUM (which must be that of a token). */ static const yytype_uint16 yytoknum[] = { - 0, 256, 257, 258, 259, 124, 42, 43, 63, 123, - 125, 44, 40, 41 + 0, 256, 257, 258, 259, 260, 124, 42, 43, 63, + 123, 125, 44, 40, 41 }; # endif -#define YYPACT_NINF -8 +#define YYPACT_NINF -7 #define yypact_value_is_default(Yystate) \ - (!!((Yystate) == (-8))) + (!!((Yystate) == (-7))) #define YYTABLE_NINF -1 @@ -485,9 +487,9 @@ static const yytype_uint16 yytoknum[] = STATE-NUM. */ static const yytype_int8 yypact[] = { - -2, -8, -2, 3, 9, -8, -2, 12, -4, -8, - -2, -8, -8, -8, -8, 13, -8, -8, -8, -3, - -8, -8, 1, -8, 2, -8 + -2, -7, -2, 6, -5, -7, -2, 12, -4, -7, + -2, -7, -7, -7, -7, 4, -7, -7, -7, -3, + -7, -7, 1, -7, 2, -7 }; /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. @@ -503,7 +505,7 @@ static const yytype_uint8 yydefact[] = /* YYPGOTO[NTERM-NUM]. */ static const yytype_int8 yypgoto[] = { - -8, -8, 4, 7, -8, -7, -8 + -7, -7, 13, 8, -7, -6, -7 }; /* YYDEFGOTO[NTERM-NUM]. */ @@ -517,32 +519,32 @@ static const yytype_int8 yydefgoto[] = number is the opposite. If YYTABLE_NINF, syntax error. */ static const yytype_uint8 yytable[] = { - 20, 10, 1, 9, 18, 20, 8, 21, 22, 16, - 2, 23, 25, 11, 10, 24, 18, 17, 12, 13, - 14, 15 + 20, 10, 10, 1, 18, 20, 9, 18, 21, 22, + 16, 2, 23, 25, 11, 8, 24, 0, 17, 12, + 13, 14, 15 }; -static const yytype_uint8 yycheck[] = +static const yytype_int8 yycheck[] = { - 3, 5, 4, 0, 3, 3, 2, 10, 11, 13, - 12, 10, 10, 6, 5, 22, 3, 10, 6, 7, - 8, 9 + 3, 6, 6, 5, 3, 3, 0, 3, 11, 12, + 14, 13, 11, 11, 6, 2, 22, -1, 10, 7, + 8, 9, 10 }; /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing symbol of state STATE-NUM. */ static const yytype_uint8 yystos[] = { - 0, 4, 12, 15, 16, 17, 18, 20, 16, 0, - 5, 17, 6, 7, 8, 9, 13, 17, 3, 19, - 3, 10, 11, 10, 19, 10 + 0, 5, 13, 16, 17, 18, 19, 21, 17, 0, + 6, 18, 7, 8, 9, 10, 14, 18, 3, 20, + 3, 11, 12, 11, 20, 11 }; /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ static const yytype_uint8 yyr1[] = { - 0, 14, 15, 16, 16, 17, 17, 18, 18, 18, - 18, 18, 18, 18, 19, 19, 20, 20 + 0, 15, 16, 17, 17, 18, 18, 19, 19, 19, + 19, 19, 19, 19, 20, 20, 21, 21 }; /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */ @@ -1228,73 +1230,73 @@ yyreduce: switch (yyn) { case 2: -#line 38 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ +#line 39 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { regexp = (yyval.regexp); } -#line 1234 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1236 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 4: -#line 42 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ +#line 43 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_alt((yyvsp[-2].regexp), (yyvsp[0].regexp)); } -#line 1240 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1242 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 6: -#line 47 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ +#line 48 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_cat((yyvsp[-1].regexp), (yyvsp[0].regexp)); } -#line 1246 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1248 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 8: -#line 52 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ +#line 53 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_iter((yyvsp[-1].regexp), 0, AST::MANY); } -#line 1252 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1254 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 9: -#line 53 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ +#line 54 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_iter((yyvsp[-1].regexp), 1, AST::MANY); } -#line 1258 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1260 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 10: -#line 54 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ +#line 55 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_iter((yyvsp[-1].regexp), 0, 1); } -#line 1264 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1266 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 11: -#line 55 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ +#line 56 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_iter((yyvsp[-3].regexp), (yyvsp[-1].number), (yyvsp[-1].number)); } -#line 1270 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1272 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 12: -#line 56 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ +#line 57 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_iter((yyvsp[-4].regexp), (yyvsp[-2].number), AST::MANY); } -#line 1276 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1278 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 13: -#line 57 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ +#line 58 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_iter((yyvsp[-5].regexp), (yyvsp[-3].number), (yyvsp[-1].number)); } -#line 1282 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1284 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 15: -#line 62 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ +#line 63 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.number) = (yyvsp[-1].number) * 10 + (yyvsp[0].number); } -#line 1288 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1290 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; case 17: -#line 67 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ +#line 68 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ { (yyval.regexp) = ast_cap((yyvsp[-1].regexp)); } -#line 1294 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1296 "libre2c_posix/parse.cc" /* yacc.c:1646 */ break; -#line 1298 "libre2c_posix/parse.cc" /* yacc.c:1646 */ +#line 1300 "libre2c_posix/parse.cc" /* yacc.c:1646 */ default: break; } /* User semantic actions sometimes alter yychar, and that requires @@ -1522,7 +1524,7 @@ yyreturn: #endif return yyresult; } -#line 70 "../libre2c_posix/parse.ypp" /* yacc.c:1906 */ +#line 71 "../libre2c_posix/parse.ypp" /* yacc.c:1906 */ extern "C" { diff --git a/re2c/bootstrap/libre2c_posix/parse.h b/re2c/bootstrap/libre2c_posix/parse.h index 50362db5..5045350a 100644 --- a/re2c/bootstrap/libre2c_posix/parse.h +++ b/re2c/bootstrap/libre2c_posix/parse.h @@ -46,7 +46,8 @@ extern int yydebug; enum yytokentype { DIGIT = 258, - REGEXP = 259 + ERROR = 259, + REGEXP = 260 }; #endif @@ -60,7 +61,7 @@ union YYSTYPE const re2c::AST * regexp; uint32_t number; -#line 64 "libre2c_posix/parse.h" /* yacc.c:1909 */ +#line 65 "libre2c_posix/parse.h" /* yacc.c:1909 */ }; typedef union YYSTYPE YYSTYPE; diff --git a/re2c/libre2c_posix/lex.re b/re2c/libre2c_posix/lex.re index 7c2ec2c0..337cb3da 100644 --- a/re2c/libre2c_posix/lex.re +++ b/re2c/libre2c_posix/lex.re @@ -4,43 +4,98 @@ #include "src/encoding/enc.h" #include "src/parse/ast.h" +#include "src/options/msg.h" #include "src/util/range.h" #include "parse.h" #include "libre2c_posix/lex.h" + extern YYSTYPE yylval; namespace re2c { +static int32_t lex_cls_chr(const char *&, uint32_t &); + +/*!re2c + re2c:yyfill:enable = 0; + re2c:define:YYCURSOR = cur; + re2c:define:YYMARKER = mar; + re2c:define:YYCTYPE = char; + + eof = "\x00"; + esc = "\\"; +*/ + int lex(const char *&cur) { - /*!re2c - re2c:yyfill:enable = 0; - re2c:define:YYCURSOR = cur; - re2c:define:YYCTYPE = char; - - * { printf("syntax error: %s\n", cur); exit(1); } - "\x00" { return 0; } - [{}()|*+?] { return cur[-1]; } - - "." { - yylval.regexp = ast_dot(0, 0); - return REGEXP; - } - - [0-9] { - yylval.number = static_cast(cur[-1] - '0'); - return DIGIT; - } - - [a-zA-Z] { - ASTChar c(static_cast(cur[-1]), 0); - std::vector *str = new std::vector; - str->push_back(c); - yylval.regexp = ast_str(0, 0, str, false); - return REGEXP; - } - */ + const char *mar; + std::vector cls; + bool neg = false; + uint32_t l, u; + +/*!re2c + * { goto err; } + + eof { return 0; } + + [{}()|*+?] { return cur[-1]; } + + "[^" { neg = true; goto cls; } + "[" { goto cls; } + + "." { + yylval.regexp = ast_dot(0, 0); + return REGEXP; + } + + [0-9] { + yylval.number = static_cast(cur[-1] - '0'); + return DIGIT; + } + + [a-zA-Z] { + ASTChar c(static_cast(cur[-1]), 0); + std::vector *str = new std::vector; + str->push_back(c); + yylval.regexp = ast_str(0, 0, str, false); + return REGEXP; + } +*/ + +cls: + if (lex_cls_chr(cur, l) != 0) goto err; +/*!re2c + "" { u = l; goto add; } + "-" / [^\]] { if (lex_cls_chr(cur, u) != 0) goto err; goto add; } +*/ +add: + if (l > u) goto err; + cls.push_back(ASTRange(l, u, 0)); +/*!re2c + "" { goto cls; } + "]" { + std::vector *p = new std::vector; + p->swap(cls); + yylval.regexp = ast_cls(0, 0, p, neg); + return REGEXP; + } +*/ + +err: + error("syntax error: %s\n", cur); + return ERROR; +} + +int32_t lex_cls_chr(const char *&cur, uint32_t &c) +{ +/*!re2c + * { return 1; } + "[." { error("collating characters not supported"); return 1; } + "[:" { error("character classes not supported"); return 1; } + "[=" { error("equivalence classes not supported"); return 1; } + + [^] \ eof { c = static_cast(cur[-1]); return 0; } +*/ } } // namespace re2c diff --git a/re2c/libre2c_posix/parse.ypp b/re2c/libre2c_posix/parse.ypp index 0d92e7b2..a679fa4f 100644 --- a/re2c/libre2c_posix/parse.ypp +++ b/re2c/libre2c_posix/parse.ypp @@ -28,6 +28,7 @@ void yyerror(const char *pattern, const char*); }; %token DIGIT +%token ERROR %token REGEXP %type REGEXP regexp expr term factor primary diff --git a/re2c/libre2c_posix/test.cpp b/re2c/libre2c_posix/test.cpp index 4e216acd..a249e249 100644 --- a/re2c/libre2c_posix/test.cpp +++ b/re2c/libre2c_posix/test.cpp @@ -92,6 +92,21 @@ int main() T3("(a)|(a)", "a", 0,1, 0,1, -1,-1); T3("(a)*(a)*", "a", 0,1, 0,1, -1,-1); + T1("[a]", "a", 0,1); + T0("[a]", "b"); + T0("[^a]", "a"); + T1("[^a]", "b", 0,1); + T1("[ac]*", "ac", 0,2); + T1("[a-c]*", "abc", 0,3); + T1("[]]", "]", 0,1); + T0("[^]]", "]"); + T1("[^]]", "a", 0,1); + T1("[-]", "-", 0,1); + T1("[]-]*", "]-", 0,2); + T1("[-a]*", "-a", 0,2); + T1("[a-]*", "-a", 0,2); + T1("[-a-]*", "-a", 0,2); + // categorize T4("(a*)(ab)*(b*)", "abc", 0,2, 0,1, -1,-1, 1,2); T7("((a*)(ab)*)((b*)(a*))", "aba", 0,3, 0,2, 0,0, 0,2, 2,3, 2,2, 2,3);