From 51d59d304bd18ce8519cb2e96101110ddc351e2d Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Sat, 12 Jan 2019 12:50:47 +0000 Subject: [PATCH] Experimental: initial implementation of libre2c_posix, POSIX regexp library based on re2c. --- re2c/Makefile.am | 6 +- re2c/Makefile.libre2c_posix.am | 172 +++ re2c/bootstrap/libre2c_posix/lex.cc | 85 ++ re2c/bootstrap/libre2c_posix/parse.cc | 1550 +++++++++++++++++++++++++ re2c/bootstrap/libre2c_posix/parse.h | 76 ++ re2c/configure.ac | 1 + re2c/libre2c_posix/lex.h | 15 + re2c/libre2c_posix/lex.re | 41 + re2c/libre2c_posix/parse.ypp | 94 ++ re2c/libre2c_posix/regcomp.cc | 56 + re2c/libre2c_posix/regex.h | 36 + re2c/libre2c_posix/regexec.cc | 96 ++ re2c/libre2c_posix/stubs.cc | 2 + re2c/libre2c_posix/test.cpp | 68 ++ 14 files changed, 2296 insertions(+), 2 deletions(-) create mode 100644 re2c/Makefile.libre2c_posix.am create mode 100644 re2c/bootstrap/libre2c_posix/lex.cc create mode 100644 re2c/bootstrap/libre2c_posix/parse.cc create mode 100644 re2c/bootstrap/libre2c_posix/parse.h create mode 100644 re2c/libre2c_posix/lex.h create mode 100644 re2c/libre2c_posix/lex.re create mode 100644 re2c/libre2c_posix/parse.ypp create mode 100644 re2c/libre2c_posix/regcomp.cc create mode 100644 re2c/libre2c_posix/regex.h create mode 100644 re2c/libre2c_posix/regexec.cc create mode 100644 re2c/libre2c_posix/stubs.cc create mode 100644 re2c/libre2c_posix/test.cpp diff --git a/re2c/Makefile.am b/re2c/Makefile.am index c3121dcc..092af731 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -261,7 +261,9 @@ CLEANFILES = \ cp $(top_srcdir)/bootstrap/$(@:cc={cc,h}) $(@D); \ else \ $(BISON) --output=$@ --defines=$(@:cc=h) $< && \ - if test -x $(RE2C); then cp $@ $(@:cc=h) $(top_srcdir)/bootstrap/$(@D); fi; \ + if test -x $(RE2C); then \ + cp $@ $(@:cc=h) $(top_srcdir)/bootstrap/$(@D); \ + fi; \ fi # lexer depends on bison-generated header @@ -338,4 +340,4 @@ TESTS = \ $(re2c_TESTSUITE) \ $(check_PROGRAMS) -#include $(top_srcdir)/Makefile.libre2cposix.am +include $(top_srcdir)/Makefile.libre2c_posix.am diff --git a/re2c/Makefile.libre2c_posix.am b/re2c/Makefile.libre2c_posix.am new file mode 100644 index 00000000..34036407 --- /dev/null +++ b/re2c/Makefile.libre2c_posix.am @@ -0,0 +1,172 @@ +# this file is incomplete and should be included from the main Makefile.am + +noinst_LIBRARIES = libre2c_posix.a + +libre2c_posix_a_CXXFLAGS = $(AM_CXXFLAGS) -fPIC + +libre2c_posix_a_HDR = \ + libre2c_posix/lex.h \ + libre2c_posix/regex.h \ + src/codegen/bitmap.h \ + src/codegen/emit.h \ + src/codegen/go.h \ + src/codegen/input_api.h \ + src/codegen/label.h \ + src/codegen/output.h \ + src/codegen/print.h \ + src/options/msg.h \ + src/options/opt.h \ + src/options/ver_to_vernum.h \ + src/options/warn.h \ + src/adfa/action.h \ + src/adfa/adfa.h \ + src/cfg/cfg.h \ + src/dfa/determinization.h \ + src/dfa/dfa.h \ + src/dfa/tag_history.h \ + src/dfa/tagver_table.h \ + src/dfa/tcmd.h \ + src/nfa/nfa.h \ + src/encoding/case.h \ + src/encoding/ebcdic/ebcdic.h \ + src/encoding/ebcdic/ebcdic_regexp.h \ + src/encoding/enc.h \ + src/encoding/range_suffix.h \ + src/encoding/utf8/utf8.h \ + src/encoding/utf8/utf8_regexp.h \ + src/encoding/utf16/utf16_regexp.h \ + src/encoding/utf16/utf16.h \ + src/regexp/empty_class_policy.h \ + src/regexp/re.h \ + src/regexp/rule.h \ + src/regexp/tag.h \ + src/compile.h \ + src/skeleton/path.h \ + src/skeleton/skeleton.h \ + src/parse/ast.h \ + src/parse/input.h \ + src/parse/parser.h \ + src/parse/scanner.h \ + src/parse/unescape.h \ + src/debug/debug.h \ + src/util/allocate.h \ + src/util/attribute.h \ + src/util/c99_stdint.h \ + src/util/counter.h \ + src/util/forbid_copy.h \ + src/util/free_list.h \ + src/util/get_dir.h \ + src/util/hash32.h \ + src/util/local_increment.h \ + src/util/lookup.h \ + src/util/range.h \ + src/util/s_to_n32_unsafe.h \ + src/util/slab_allocator.h \ + src/util/smart_ptr.h \ + src/util/static_assert.h \ + src/util/string_utils.h \ + src/util/u32lim.h \ + src/util/uniq_vector.h \ + src/util/wrap_iter.h + +libre2c_posix_a_SRC = \ + libre2c_posix/regcomp.cc \ + libre2c_posix/regexec.cc \ + libre2c_posix/stubs.cc \ + src/parse/ast.cc \ + src/options/msg.cc \ + src/options/opt.cc \ + src/options/warn.cc \ + src/cfg/cfg.cc \ + src/cfg/compact.cc \ + src/cfg/dce.cc \ + src/cfg/freeze.cc \ + src/cfg/interfere.cc \ + src/cfg/liveanal.cc \ + src/cfg/normalize.cc \ + src/cfg/optimize.cc \ + src/cfg/rename.cc \ + src/cfg/varalloc.cc \ + src/dfa/closure.cc \ + src/dfa/closure_leftmost.cc \ + src/dfa/closure_posix.cc \ + src/debug/dump_adfa.cc \ + src/debug/dump_cfg.cc \ + src/debug/dump_dfa.cc \ + src/debug/dump_interf.cc \ + src/debug/dump_nfa.cc \ + src/dfa/dead_rules.cc \ + src/dfa/determinization.cc \ + src/dfa/fallback_tags.cc \ + src/dfa/fillpoints.cc \ + src/dfa/find_state.cc \ + src/dfa/minimization.cc \ + src/dfa/posix_precedence.cc \ + src/dfa/tag_history.cc \ + src/dfa/tagver_table.cc \ + src/dfa/tcmd.cc \ + src/nfa/estimate_size.cc \ + src/nfa/re_to_nfa.cc \ + src/encoding/enc.cc \ + src/encoding/range_suffix.cc \ + src/encoding/ebcdic/ebcdic_regexp.cc \ + src/encoding/utf16/utf16.cc \ + src/encoding/utf16/utf16_regexp.cc \ + src/encoding/utf8/utf8.cc \ + src/encoding/utf8/utf8_regexp.cc \ + src/regexp/ast_to_re.cc \ + src/regexp/default_tags.cc \ + src/regexp/fixed_tags.cc \ + src/regexp/nullable.cc \ + src/regexp/rule.cc \ + src/regexp/split_charset.cc \ + src/regexp/tag.cc \ + src/skeleton/control_flow.cc \ + src/skeleton/maxpath.cc \ + src/skeleton/skeleton.cc \ + src/util/range.cc \ + src/util/s_to_n32_unsafe.cc + +libre2c_posix_a_SOURCES = \ + $(libre2c_posix_a_HDR) \ + $(libre2c_posix_a_SRC) + +libre2c_posix_a_CUSTOM = \ + libre2c_posix/lex.re \ + libre2c_posix/parse.ypp \ + src/options/ver_to_vernum.re + +libre2c_posix_a_GEN_SRC = \ + libre2c_posix/lex.cc \ + libre2c_posix/parse.cc \ + src/options/ver_to_vernum.cc +libre2c_posix_a_GEN_HDR = \ + libre2c_posix/parse.h +libre2c_posix_a_GEN = \ + $(libre2c_posix_a_GEN_SRC) \ + $(libre2c_posix_a_GEN_HDR) + +libre2c_posix_a_BOOTSTRAP = \ + bootstrap/libre2c_posix/parse.cc \ + bootstrap/libre2c_posix/parse.h \ + bootstrap/libre2c_posix/lex.cc \ + bootstrap/src/options/ver_to_vernum.cc + +nodist_libre2c_posix_a_SOURCES = $(libre2c_posix_a_GEN) + +EXTRA_DIST += \ + $(libre2c_posix_a_BOOTSTRAP) \ + $(libre2c_posix_a_CUSTOM) + +CLEANFILES += $(libre2c_posix_a_GEN) + +# lexer depends on bison-generated header +libre2c_posix/lex.cc: libre2c_posix/parse.cc + +# generate all sources before compiling +all: $(libre2c_posix_a_GEN_SRC) + +libre2c_posix_a_test_SOURCES = libre2c_posix/test.cpp +libre2c_posix_a_test_LDADD = libre2c_posix.a +libre2c_posix_a_test_CXXFLAGS = $(libre2c_posix_a_CXXFLAGS) +check_PROGRAMS += libre2c_posix_a_test diff --git a/re2c/bootstrap/libre2c_posix/lex.cc b/re2c/bootstrap/libre2c_posix/lex.cc new file mode 100644 index 00000000..1e1fdff2 --- /dev/null +++ b/re2c/bootstrap/libre2c_posix/lex.cc @@ -0,0 +1,85 @@ +/* Generated by re2c 1.1.1 on Sat Jan 12 12:49:21 2019 */ +#line 1 "../libre2c_posix/lex.re" +#include + +#include "src/util/c99_stdint.h" + +#include "src/encoding/enc.h" +#include "src/parse/ast.h" +#include "src/util/range.h" +#include "parse.h" +#include "libre2c_posix/lex.h" + +extern YYSTYPE yylval; + +namespace re2c { + +int lex(const char *&cur) +{ + +#line 21 "libre2c_posix/lex.cc" +{ + char yych; + yych = *cur; + if (yych <= '>') { + if (yych <= '+') { + if (yych <= 0x00) goto yy2; + if (yych <= '\'') goto yy4; + goto yy6; + } else { + if (yych <= '/') goto yy4; + if (yych <= '9') goto yy8; + goto yy4; + } + } else { + if (yych <= 'Z') { + if (yych <= '?') goto yy6; + if (yych <= '@') goto yy4; + goto yy10; + } else { + if (yych <= '`') goto yy4; + if (yych <= 'z') goto yy10; + if (yych <= '}') goto yy6; + goto yy4; + } + } +yy2: + ++cur; +#line 23 "../libre2c_posix/lex.re" + { return 0; } +#line 51 "libre2c_posix/lex.cc" +yy4: + ++cur; +#line 22 "../libre2c_posix/lex.re" + { printf("syntax error: %s\n", cur); exit(1); } +#line 56 "libre2c_posix/lex.cc" +yy6: + ++cur; +#line 24 "../libre2c_posix/lex.re" + { return cur[-1]; } +#line 61 "libre2c_posix/lex.cc" +yy8: + ++cur; +#line 26 "../libre2c_posix/lex.re" + { + yylval.number = static_cast(cur[-1] - '0'); + return DIGIT; + } +#line 69 "libre2c_posix/lex.cc" +yy10: + ++cur; +#line 31 "../libre2c_posix/lex.re" + { + ASTChar c(static_cast(cur[-1]), 0); + std::vector *str = new std::vector; + str->push_back(c); + yylval.regexp = ast_str(0, 0, str, false); + return REGEXP; + } +#line 80 "libre2c_posix/lex.cc" +} +#line 38 "../libre2c_posix/lex.re" + +} + +} // namespace re2c diff --git a/re2c/bootstrap/libre2c_posix/parse.cc b/re2c/bootstrap/libre2c_posix/parse.cc new file mode 100644 index 00000000..7082434e --- /dev/null +++ b/re2c/bootstrap/libre2c_posix/parse.cc @@ -0,0 +1,1550 @@ +/* A Bison parser, made by GNU Bison 3.0.4. */ + +/* Bison implementation for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* C LALR(1) parser skeleton written by Richard Stallman, by + simplifying the original so-called "semantic" parser. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +/* Identify Bison output. */ +#define YYBISON 1 + +/* Bison version. */ +#define YYBISON_VERSION "3.0.4" + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 0 + +/* Push parsers. */ +#define YYPUSH 0 + +/* Pull parsers. */ +#define YYPULL 1 + + + + +/* Copy the first part of user declarations. */ +#line 1 "../libre2c_posix/parse.ypp" /* yacc.c:339 */ + + +#include + +#include "src/util/c99_stdint.h" +#include "src/parse/ast.h" +#include "libre2c_posix/lex.h" + +using namespace re2c; + +extern "C" { + +int yylex(const char *&pattern); +void yyerror(const char *pattern, const char*); + +} // extern "C" + + +#line 85 "libre2c_posix/parse.cc" /* yacc.c:339 */ + +# ifndef YY_NULLPTR +# if defined __cplusplus && 201103L <= __cplusplus +# define YY_NULLPTR nullptr +# else +# define YY_NULLPTR 0 +# endif +# endif + +/* Enabling verbose error messages. */ +#ifdef YYERROR_VERBOSE +# undef YYERROR_VERBOSE +# define YYERROR_VERBOSE 1 +#else +# define YYERROR_VERBOSE 0 +#endif + +/* In a future release of Bison, this section will be replaced + by #include "parse.h". */ +#ifndef YY_YY_LIBRE2C_POSIX_PARSE_H_INCLUDED +# define YY_YY_LIBRE2C_POSIX_PARSE_H_INCLUDED +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif +#if YYDEBUG +extern int yydebug; +#endif + +/* Token type. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + DIGIT = 258, + REGEXP = 259 + }; +#endif + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED + +union YYSTYPE +{ +#line 25 "../libre2c_posix/parse.ypp" /* yacc.c:355 */ + + const re2c::AST * regexp; + uint32_t number; + +#line 135 "libre2c_posix/parse.cc" /* yacc.c:355 */ +}; + +typedef union YYSTYPE YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + +extern YYSTYPE yylval; + +int yyparse (const char *&pattern); + +#endif /* !YY_YY_LIBRE2C_POSIX_PARSE_H_INCLUDED */ + +/* Copy the second part of user declarations. */ + +#line 152 "libre2c_posix/parse.cc" /* yacc.c:358 */ + +#ifdef short +# undef short +#endif + +#ifdef YYTYPE_UINT8 +typedef YYTYPE_UINT8 yytype_uint8; +#else +typedef unsigned char yytype_uint8; +#endif + +#ifdef YYTYPE_INT8 +typedef YYTYPE_INT8 yytype_int8; +#else +typedef signed char yytype_int8; +#endif + +#ifdef YYTYPE_UINT16 +typedef YYTYPE_UINT16 yytype_uint16; +#else +typedef unsigned short int yytype_uint16; +#endif + +#ifdef YYTYPE_INT16 +typedef YYTYPE_INT16 yytype_int16; +#else +typedef short int yytype_int16; +#endif + +#ifndef YYSIZE_T +# ifdef __SIZE_TYPE__ +# define YYSIZE_T __SIZE_TYPE__ +# elif defined size_t +# define YYSIZE_T size_t +# elif ! defined YYSIZE_T +# include /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# else +# define YYSIZE_T unsigned int +# endif +#endif + +#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) + +#ifndef YY_ +# if defined YYENABLE_NLS && YYENABLE_NLS +# if ENABLE_NLS +# include /* INFRINGES ON USER NAME SPACE */ +# define YY_(Msgid) dgettext ("bison-runtime", Msgid) +# endif +# endif +# ifndef YY_ +# define YY_(Msgid) Msgid +# endif +#endif + +#ifndef YY_ATTRIBUTE +# if (defined __GNUC__ \ + && (2 < __GNUC__ || (__GNUC__ == 2 && 96 <= __GNUC_MINOR__))) \ + || defined __SUNPRO_C && 0x5110 <= __SUNPRO_C +# define YY_ATTRIBUTE(Spec) __attribute__(Spec) +# else +# define YY_ATTRIBUTE(Spec) /* empty */ +# endif +#endif + +#ifndef YY_ATTRIBUTE_PURE +# define YY_ATTRIBUTE_PURE YY_ATTRIBUTE ((__pure__)) +#endif + +#ifndef YY_ATTRIBUTE_UNUSED +# define YY_ATTRIBUTE_UNUSED YY_ATTRIBUTE ((__unused__)) +#endif + +#if !defined _Noreturn \ + && (!defined __STDC_VERSION__ || __STDC_VERSION__ < 201112) +# if defined _MSC_VER && 1200 <= _MSC_VER +# define _Noreturn __declspec (noreturn) +# else +# define _Noreturn YY_ATTRIBUTE ((__noreturn__)) +# endif +#endif + +/* Suppress unused-variable warnings by "using" E. */ +#if ! defined lint || defined __GNUC__ +# define YYUSE(E) ((void) (E)) +#else +# define YYUSE(E) /* empty */ +#endif + +#if defined __GNUC__ && 407 <= __GNUC__ * 100 + __GNUC_MINOR__ +/* Suppress an incorrect diagnostic about yylval being uninitialized. */ +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"")\ + _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +# define YY_IGNORE_MAYBE_UNINITIALIZED_END \ + _Pragma ("GCC diagnostic pop") +#else +# define YY_INITIAL_VALUE(Value) Value +#endif +#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_END +#endif +#ifndef YY_INITIAL_VALUE +# define YY_INITIAL_VALUE(Value) /* Nothing. */ +#endif + + +#if ! defined yyoverflow || YYERROR_VERBOSE + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# ifdef YYSTACK_USE_ALLOCA +# if YYSTACK_USE_ALLOCA +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# elif defined __BUILTIN_VA_ARG_INCR +# include /* INFRINGES ON USER NAME SPACE */ +# elif defined _AIX +# define YYSTACK_ALLOC __alloca +# elif defined _MSC_VER +# include /* INFRINGES ON USER NAME SPACE */ +# define alloca _alloca +# else +# define YYSTACK_ALLOC alloca +# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS +# include /* INFRINGES ON USER NAME SPACE */ + /* Use EXIT_SUCCESS as a witness for stdlib.h. */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's 'empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) +# ifndef YYSTACK_ALLOC_MAXIMUM + /* The OS might guarantee only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + invoke alloca (N) if N exceeds 4096. Use a slightly smaller number + to allow for a few compiler-allocated temporary stack slots. */ +# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ +# endif +# else +# define YYSTACK_ALLOC YYMALLOC +# define YYSTACK_FREE YYFREE +# ifndef YYSTACK_ALLOC_MAXIMUM +# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM +# endif +# if (defined __cplusplus && ! defined EXIT_SUCCESS \ + && ! ((defined YYMALLOC || defined malloc) \ + && (defined YYFREE || defined free))) +# include /* INFRINGES ON USER NAME SPACE */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# ifndef YYMALLOC +# define YYMALLOC malloc +# if ! defined malloc && ! defined EXIT_SUCCESS +void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# ifndef YYFREE +# define YYFREE free +# if ! defined free && ! defined EXIT_SUCCESS +void free (void *); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# endif +#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ + + +#if (! defined yyoverflow \ + && (! defined __cplusplus \ + || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + yytype_int16 yyss_alloc; + YYSTYPE yyvs_alloc; +}; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +# define YYCOPY_NEEDED 1 + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ + do \ + { \ + YYSIZE_T yynewbytes; \ + YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ + Stack = &yyptr->Stack_alloc; \ + yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / sizeof (*yyptr); \ + } \ + while (0) + +#endif + +#if defined YYCOPY_NEEDED && YYCOPY_NEEDED +/* Copy COUNT objects from SRC to DST. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if defined __GNUC__ && 1 < __GNUC__ +# define YYCOPY(Dst, Src, Count) \ + __builtin_memcpy (Dst, Src, (Count) * sizeof (*(Src))) +# else +# define YYCOPY(Dst, Src, Count) \ + do \ + { \ + YYSIZE_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (Dst)[yyi] = (Src)[yyi]; \ + } \ + while (0) +# endif +# endif +#endif /* !YYCOPY_NEEDED */ + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 9 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 21 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 14 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 7 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 17 +/* YYNSTATES -- Number of states. */ +#define YYNSTATES 26 + +/* YYTRANSLATE[YYX] -- Symbol number corresponding to YYX as returned + by yylex, with out-of-bounds checking. */ +#define YYUNDEFTOK 2 +#define YYMAXUTOK 259 + +#define YYTRANSLATE(YYX) \ + ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) + +/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM + as returned by yylex, without out-of-bounds checking. */ +static const yytype_uint8 yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 12, 13, 6, 7, 11, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 8, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 9, 5, 10, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4 +}; + +#if YYDEBUG + /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ +static const yytype_uint8 yyrline[] = +{ + 0, 38, 38, 41, 42, 46, 47, 51, 52, 53, + 54, 55, 56, 57, 61, 62, 66, 67 +}; +#endif + +#if YYDEBUG || YYERROR_VERBOSE || 0 +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "$end", "error", "$undefined", "DIGIT", "REGEXP", "'|'", "'*'", "'+'", + "'?'", "'{'", "'}'", "','", "'('", "')'", "$accept", "regexp", "expr", + "term", "factor", "number", "primary", YY_NULLPTR +}; +#endif + +# ifdef YYPRINT +/* YYTOKNUM[NUM] -- (External) token number corresponding to the + (internal) symbol number NUM (which must be that of a token). */ +static const yytype_uint16 yytoknum[] = +{ + 0, 256, 257, 258, 259, 124, 42, 43, 63, 123, + 125, 44, 40, 41 +}; +# endif + +#define YYPACT_NINF -8 + +#define yypact_value_is_default(Yystate) \ + (!!((Yystate) == (-8))) + +#define YYTABLE_NINF -1 + +#define yytable_value_is_error(Yytable_value) \ + 0 + + /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +static const yytype_int8 yypact[] = +{ + -2, -8, -2, 3, 9, -8, -2, 12, -4, -8, + -2, -8, -8, -8, -8, 13, -8, -8, -8, -3, + -8, -8, 1, -8, 2, -8 +}; + + /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. + Performed when YYTABLE does not specify something else to do. Zero + means the default is an error. */ +static const yytype_uint8 yydefact[] = +{ + 0, 16, 0, 0, 2, 3, 5, 7, 0, 1, + 0, 6, 8, 9, 10, 0, 17, 4, 14, 0, + 15, 11, 0, 12, 0, 13 +}; + + /* YYPGOTO[NTERM-NUM]. */ +static const yytype_int8 yypgoto[] = +{ + -8, -8, 4, 7, -8, -7, -8 +}; + + /* YYDEFGOTO[NTERM-NUM]. */ +static const yytype_int8 yydefgoto[] = +{ + -1, 3, 4, 5, 6, 19, 7 +}; + + /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule whose + number is the opposite. If YYTABLE_NINF, syntax error. */ +static const yytype_uint8 yytable[] = +{ + 20, 10, 1, 9, 18, 20, 8, 21, 22, 16, + 2, 23, 25, 11, 10, 24, 18, 17, 12, 13, + 14, 15 +}; + +static const yytype_uint8 yycheck[] = +{ + 3, 5, 4, 0, 3, 3, 2, 10, 11, 13, + 12, 10, 10, 6, 5, 22, 3, 10, 6, 7, + 8, 9 +}; + + /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing + symbol of state STATE-NUM. */ +static const yytype_uint8 yystos[] = +{ + 0, 4, 12, 15, 16, 17, 18, 20, 16, 0, + 5, 17, 6, 7, 8, 9, 13, 17, 3, 19, + 3, 10, 11, 10, 19, 10 +}; + + /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ +static const yytype_uint8 yyr1[] = +{ + 0, 14, 15, 16, 16, 17, 17, 18, 18, 18, + 18, 18, 18, 18, 19, 19, 20, 20 +}; + + /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */ +static const yytype_uint8 yyr2[] = +{ + 0, 2, 1, 1, 3, 1, 2, 1, 2, 2, + 2, 4, 5, 6, 1, 2, 1, 3 +}; + + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = YYEMPTY) +#define YYEMPTY (-2) +#define YYEOF 0 + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrorlab + + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ +do \ + if (yychar == YYEMPTY) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + YYPOPSTACK (yylen); \ + yystate = *yyssp; \ + goto yybackup; \ + } \ + else \ + { \ + yyerror (pattern, YY_("syntax error: cannot back up")); \ + YYERROR; \ + } \ +while (0) + +/* Error token number */ +#define YYTERROR 1 +#define YYERRCODE 256 + + + +/* Enable debugging if requested. */ +#if YYDEBUG + +# ifndef YYFPRINTF +# include /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (0) + +/* This macro is provided for backward compatibility. */ +#ifndef YY_LOCATION_PRINT +# define YY_LOCATION_PRINT(File, Loc) ((void) 0) +#endif + + +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yy_symbol_print (stderr, \ + Type, Value, pattern); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (0) + + +/*----------------------------------------. +| Print this symbol's value on YYOUTPUT. | +`----------------------------------------*/ + +static void +yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, const char *&pattern) +{ + FILE *yyo = yyoutput; + YYUSE (yyo); + YYUSE (pattern); + if (!yyvaluep) + return; +# ifdef YYPRINT + if (yytype < YYNTOKENS) + YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); +# endif + YYUSE (yytype); +} + + +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ + +static void +yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, const char *&pattern) +{ + YYFPRINTF (yyoutput, "%s %s (", + yytype < YYNTOKENS ? "token" : "nterm", yytname[yytype]); + + yy_symbol_value_print (yyoutput, yytype, yyvaluep, pattern); + YYFPRINTF (yyoutput, ")"); +} + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (included). | +`------------------------------------------------------------------*/ + +static void +yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop) +{ + YYFPRINTF (stderr, "Stack now"); + for (; yybottom <= yytop; yybottom++) + { + int yybot = *yybottom; + YYFPRINTF (stderr, " %d", yybot); + } + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (0) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +static void +yy_reduce_print (yytype_int16 *yyssp, YYSTYPE *yyvsp, int yyrule, const char *&pattern) +{ + unsigned long int yylno = yyrline[yyrule]; + int yynrhs = yyr2[yyrule]; + int yyi; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", + yyrule - 1, yylno); + /* The symbols being reduced. */ + for (yyi = 0; yyi < yynrhs; yyi++) + { + YYFPRINTF (stderr, " $%d = ", yyi + 1); + yy_symbol_print (stderr, + yystos[yyssp[yyi + 1 - yynrhs]], + &(yyvsp[(yyi + 1) - (yynrhs)]) + , pattern); + YYFPRINTF (stderr, "\n"); + } +} + +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (yyssp, yyvsp, Rule, pattern); \ +} while (0) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !YYDEBUG */ +# define YYDPRINTF(Args) +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !YYDEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + +#if YYERROR_VERBOSE + +# ifndef yystrlen +# if defined __GLIBC__ && defined _STRING_H +# define yystrlen strlen +# else +/* Return the length of YYSTR. */ +static YYSIZE_T +yystrlen (const char *yystr) +{ + YYSIZE_T yylen; + for (yylen = 0; yystr[yylen]; yylen++) + continue; + return yylen; +} +# endif +# endif + +# ifndef yystpcpy +# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE +# define yystpcpy stpcpy +# else +/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in + YYDEST. */ +static char * +yystpcpy (char *yydest, const char *yysrc) +{ + char *yyd = yydest; + const char *yys = yysrc; + + while ((*yyd++ = *yys++) != '\0') + continue; + + return yyd - 1; +} +# endif +# endif + +# ifndef yytnamerr +/* Copy to YYRES the contents of YYSTR after stripping away unnecessary + quotes and backslashes, so that it's suitable for yyerror. The + heuristic is that double-quoting is unnecessary unless the string + contains an apostrophe, a comma, or backslash (other than + backslash-backslash). YYSTR is taken from yytname. If YYRES is + null, do not copy; instead, return the length of what the result + would have been. */ +static YYSIZE_T +yytnamerr (char *yyres, const char *yystr) +{ + if (*yystr == '"') + { + YYSIZE_T yyn = 0; + char const *yyp = yystr; + + for (;;) + switch (*++yyp) + { + case '\'': + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + /* Fall through. */ + default: + if (yyres) + yyres[yyn] = *yyp; + yyn++; + break; + + case '"': + if (yyres) + yyres[yyn] = '\0'; + return yyn; + } + do_not_strip_quotes: ; + } + + if (! yyres) + return yystrlen (yystr); + + return yystpcpy (yyres, yystr) - yyres; +} +# endif + +/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message + about the unexpected token YYTOKEN for the state stack whose top is + YYSSP. + + Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is + not large enough to hold the message. In that case, also set + *YYMSG_ALLOC to the required number of bytes. Return 2 if the + required number of bytes is too large to store. */ +static int +yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, + yytype_int16 *yyssp, int yytoken) +{ + YYSIZE_T yysize0 = yytnamerr (YY_NULLPTR, yytname[yytoken]); + YYSIZE_T yysize = yysize0; + enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; + /* Internationalized format string. */ + const char *yyformat = YY_NULLPTR; + /* Arguments of yyformat. */ + char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; + /* Number of reported tokens (one for the "unexpected", one per + "expected"). */ + int yycount = 0; + + /* There are many possibilities here to consider: + - If this state is a consistent state with a default action, then + the only way this function was invoked is if the default action + is an error action. In that case, don't check for expected + tokens because there are none. + - The only way there can be no lookahead present (in yychar) is if + this state is a consistent state with a default action. Thus, + detecting the absence of a lookahead is sufficient to determine + that there is no unexpected or expected token to report. In that + case, just report a simple "syntax error". + - Don't assume there isn't a lookahead just because this state is a + consistent state with a default action. There might have been a + previous inconsistent state, consistent state with a non-default + action, or user semantic action that manipulated yychar. + - Of course, the expected token list depends on states to have + correct lookahead information, and it depends on the parser not + to perform extra reductions after fetching a lookahead from the + scanner and before detecting a syntax error. Thus, state merging + (from LALR or IELR) and default reductions corrupt the expected + token list. However, the list is correct for canonical LR with + one exception: it will still contain any token that will not be + accepted due to an error action in a later state. + */ + if (yytoken != YYEMPTY) + { + int yyn = yypact[*yyssp]; + yyarg[yycount++] = yytname[yytoken]; + if (!yypact_value_is_default (yyn)) + { + /* Start YYX at -YYN if negative to avoid negative indexes in + YYCHECK. In other words, skip the first -YYN actions for + this state because they are default actions. */ + int yyxbegin = yyn < 0 ? -yyn : 0; + /* Stay within bounds of both yycheck and yytname. */ + int yychecklim = YYLAST - yyn + 1; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; + int yyx; + + for (yyx = yyxbegin; yyx < yyxend; ++yyx) + if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR + && !yytable_value_is_error (yytable[yyx + yyn])) + { + if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) + { + yycount = 1; + yysize = yysize0; + break; + } + yyarg[yycount++] = yytname[yyx]; + { + YYSIZE_T yysize1 = yysize + yytnamerr (YY_NULLPTR, yytname[yyx]); + if (! (yysize <= yysize1 + && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + return 2; + yysize = yysize1; + } + } + } + } + + switch (yycount) + { +# define YYCASE_(N, S) \ + case N: \ + yyformat = S; \ + break + YYCASE_(0, YY_("syntax error")); + YYCASE_(1, YY_("syntax error, unexpected %s")); + YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s")); + YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s")); + YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s")); + YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s")); +# undef YYCASE_ + } + + { + YYSIZE_T yysize1 = yysize + yystrlen (yyformat); + if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + return 2; + yysize = yysize1; + } + + if (*yymsg_alloc < yysize) + { + *yymsg_alloc = 2 * yysize; + if (! (yysize <= *yymsg_alloc + && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) + *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; + return 1; + } + + /* Avoid sprintf, as that infringes on the user's name space. + Don't have undefined behavior even if the translation + produced a string with the wrong number of "%s"s. */ + { + char *yyp = *yymsg; + int yyi = 0; + while ((*yyp = *yyformat) != '\0') + if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) + { + yyp += yytnamerr (yyp, yyarg[yyi++]); + yyformat += 2; + } + else + { + yyp++; + yyformat++; + } + } + return 0; +} +#endif /* YYERROR_VERBOSE */ + +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +static void +yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep, const char *&pattern) +{ + YYUSE (yyvaluep); + YYUSE (pattern); + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + YYUSE (yytype); + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + + + +/* The lookahead symbol. */ +int yychar; + +/* The semantic value of the lookahead symbol. */ +YYSTYPE yylval; +/* Number of syntax errors so far. */ +int yynerrs; + + +/*----------. +| yyparse. | +`----------*/ + +int +yyparse (const char *&pattern) +{ + int yystate; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus; + + /* The stacks and their tools: + 'yyss': related to states. + 'yyvs': related to semantic values. + + Refer to the stacks through separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* The state stack. */ + yytype_int16 yyssa[YYINITDEPTH]; + yytype_int16 *yyss; + yytype_int16 *yyssp; + + /* The semantic value stack. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs; + YYSTYPE *yyvsp; + + YYSIZE_T yystacksize; + + int yyn; + int yyresult; + /* Lookahead token as an internal (translated) token number. */ + int yytoken = 0; + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + +#if YYERROR_VERBOSE + /* Buffer for error messages, and its allocated size. */ + char yymsgbuf[128]; + char *yymsg = yymsgbuf; + YYSIZE_T yymsg_alloc = sizeof yymsgbuf; +#endif + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + yyssp = yyss = yyssa; + yyvsp = yyvs = yyvsa; + yystacksize = YYINITDEPTH; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yystate = 0; + yyerrstatus = 0; + yynerrs = 0; + yychar = YYEMPTY; /* Cause a token to be read. */ + goto yysetstate; + +/*------------------------------------------------------------. +| yynewstate -- Push a new state, which is found in yystate. | +`------------------------------------------------------------*/ + yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; + + yysetstate: + *yyssp = yystate; + + if (yyss + yystacksize - 1 <= yyssp) + { + /* Get the current used size of the three stacks, in elements. */ + YYSIZE_T yysize = yyssp - yyss + 1; + +#ifdef yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + YYSTYPE *yyvs1 = yyvs; + yytype_int16 *yyss1 = yyss; + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * sizeof (*yyssp), + &yyvs1, yysize * sizeof (*yyvsp), + &yystacksize); + + yyss = yyss1; + yyvs = yyvs1; + } +#else /* no yyoverflow */ +# ifndef YYSTACK_RELOCATE + goto yyexhaustedlab; +# else + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + goto yyexhaustedlab; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + yytype_int16 *yyss1 = yyss; + union yyalloc *yyptr = + (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); + if (! yyptr) + goto yyexhaustedlab; + YYSTACK_RELOCATE (yyss_alloc, yyss); + YYSTACK_RELOCATE (yyvs_alloc, yyvs); +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif +#endif /* no yyoverflow */ + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + + YYDPRINTF ((stderr, "Stack size increased to %lu\n", + (unsigned long int) yystacksize)); + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } + + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + + if (yystate == YYFINAL) + YYACCEPT; + + goto yybackup; + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + + /* Do appropriate processing given the current state. Read a + lookahead token if we need one and don't already have one. */ + + /* First try to decide what to do without reference to lookahead token. */ + yyn = yypact[yystate]; + if (yypact_value_is_default (yyn)) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ + if (yychar == YYEMPTY) + { + YYDPRINTF ((stderr, "Reading a token: ")); + yychar = yylex (pattern); + } + + if (yychar <= YYEOF) + { + yychar = yytoken = YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else + { + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yytable_value_is_error (yyn)) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + /* Shift the lookahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); + + /* Discard the shifted token. */ + yychar = YYEMPTY; + + yystate = yyn; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- Do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + '$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; + + + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 2: +#line 38 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ + { regexp = (yyval.regexp); } +#line 1234 "libre2c_posix/parse.cc" /* yacc.c:1646 */ + break; + + case 4: +#line 42 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ + { (yyval.regexp) = ast_alt((yyvsp[-2].regexp), (yyvsp[0].regexp)); } +#line 1240 "libre2c_posix/parse.cc" /* yacc.c:1646 */ + break; + + case 6: +#line 47 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ + { (yyval.regexp) = ast_cat((yyvsp[-1].regexp), (yyvsp[0].regexp)); } +#line 1246 "libre2c_posix/parse.cc" /* yacc.c:1646 */ + break; + + case 8: +#line 52 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ + { (yyval.regexp) = ast_iter((yyvsp[-1].regexp), 0, AST::MANY); } +#line 1252 "libre2c_posix/parse.cc" /* yacc.c:1646 */ + break; + + case 9: +#line 53 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ + { (yyval.regexp) = ast_iter((yyvsp[-1].regexp), 1, AST::MANY); } +#line 1258 "libre2c_posix/parse.cc" /* yacc.c:1646 */ + break; + + case 10: +#line 54 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ + { (yyval.regexp) = ast_iter((yyvsp[-1].regexp), 0, 1); } +#line 1264 "libre2c_posix/parse.cc" /* yacc.c:1646 */ + break; + + case 11: +#line 55 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ + { (yyval.regexp) = ast_iter((yyvsp[-3].regexp), (yyvsp[-1].number), (yyvsp[-1].number)); } +#line 1270 "libre2c_posix/parse.cc" /* yacc.c:1646 */ + break; + + case 12: +#line 56 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ + { (yyval.regexp) = ast_iter((yyvsp[-4].regexp), (yyvsp[-2].number), AST::MANY); } +#line 1276 "libre2c_posix/parse.cc" /* yacc.c:1646 */ + break; + + case 13: +#line 57 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ + { (yyval.regexp) = ast_iter((yyvsp[-5].regexp), (yyvsp[-3].number), (yyvsp[-1].number)); } +#line 1282 "libre2c_posix/parse.cc" /* yacc.c:1646 */ + break; + + case 15: +#line 62 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ + { (yyval.number) = (yyvsp[-1].number) * 10 + (yyvsp[0].number); } +#line 1288 "libre2c_posix/parse.cc" /* yacc.c:1646 */ + break; + + case 17: +#line 67 "../libre2c_posix/parse.ypp" /* yacc.c:1646 */ + { (yyval.regexp) = ast_cap((yyvsp[-1].regexp)); } +#line 1294 "libre2c_posix/parse.cc" /* yacc.c:1646 */ + break; + + +#line 1298 "libre2c_posix/parse.cc" /* yacc.c:1646 */ + default: break; + } + /* User semantic actions sometimes alter yychar, and that requires + that yytoken be updated with the new translation. We take the + approach of translating immediately before every use of yytoken. + One alternative is translating here after every semantic action, + but that translation would be missed if the semantic action invokes + YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or + if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an + incorrect destructor might then be invoked immediately. In the + case of YYERROR or YYBACKUP, subsequent parser actions might lead + to an incorrect destructor call or verbose syntax error message + before the lookahead is translated. */ + YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); + + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + + *++yyvsp = yyval; + + /* Now 'shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + + yyn = yyr1[yyn]; + + yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; + if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) + yystate = yytable[yystate]; + else + yystate = yydefgoto[yyn - YYNTOKENS]; + + goto yynewstate; + + +/*--------------------------------------. +| yyerrlab -- here on detecting error. | +`--------------------------------------*/ +yyerrlab: + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar); + + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; +#if ! YYERROR_VERBOSE + yyerror (pattern, YY_("syntax error")); +#else +# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \ + yyssp, yytoken) + { + char const *yymsgp = YY_("syntax error"); + int yysyntax_error_status; + yysyntax_error_status = YYSYNTAX_ERROR; + if (yysyntax_error_status == 0) + yymsgp = yymsg; + else if (yysyntax_error_status == 1) + { + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc); + if (!yymsg) + { + yymsg = yymsgbuf; + yymsg_alloc = sizeof yymsgbuf; + yysyntax_error_status = 2; + } + else + { + yysyntax_error_status = YYSYNTAX_ERROR; + yymsgp = yymsg; + } + } + yyerror (pattern, yymsgp); + if (yysyntax_error_status == 2) + goto yyexhaustedlab; + } +# undef YYSYNTAX_ERROR +#endif + } + + + + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + if (yychar <= YYEOF) + { + /* Return failure if at end of input. */ + if (yychar == YYEOF) + YYABORT; + } + else + { + yydestruct ("Error: discarding", + yytoken, &yylval, pattern); + yychar = YYEMPTY; + } + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab1; + + +/*---------------------------------------------------. +| yyerrorlab -- error raised explicitly by YYERROR. | +`---------------------------------------------------*/ +yyerrorlab: + + /* Pacify compilers like GCC when the user code never invokes + YYERROR and the label yyerrorlab therefore never appears in user + code. */ + if (/*CONSTCOND*/ 0) + goto yyerrorlab; + + /* Do not reclaim the symbols of the rule whose action triggered + this YYERROR. */ + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + yystate = *yyssp; + goto yyerrlab1; + + +/*-------------------------------------------------------------. +| yyerrlab1 -- common code for both syntax error and YYERROR. | +`-------------------------------------------------------------*/ +yyerrlab1: + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + for (;;) + { + yyn = yypact[yystate]; + if (!yypact_value_is_default (yyn)) + { + yyn += YYTERROR; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; + + + yydestruct ("Error: popping", + yystos[yystate], yyvsp, pattern); + YYPOPSTACK (1); + yystate = *yyssp; + YY_STACK_PRINT (yyss, yyssp); + } + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturn; + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturn; + +#if !defined yyoverflow || YYERROR_VERBOSE +/*-------------------------------------------------. +| yyexhaustedlab -- memory exhaustion comes here. | +`-------------------------------------------------*/ +yyexhaustedlab: + yyerror (pattern, YY_("memory exhausted")); + yyresult = 2; + /* Fall through. */ +#endif + +yyreturn: + if (yychar != YYEMPTY) + { + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = YYTRANSLATE (yychar); + yydestruct ("Cleanup: discarding lookahead", + yytoken, &yylval, pattern); + } + /* Do not reclaim the symbols of the rule whose action triggered + this YYABORT or YYACCEPT. */ + YYPOPSTACK (yylen); + YY_STACK_PRINT (yyss, yyssp); + while (yyssp != yyss) + { + yydestruct ("Cleanup: popping", + yystos[*yyssp], yyvsp, pattern); + YYPOPSTACK (1); + } +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif +#if YYERROR_VERBOSE + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); +#endif + return yyresult; +} +#line 70 "../libre2c_posix/parse.ypp" /* yacc.c:1906 */ + + +extern "C" { + +void yyerror(const char *pattern, const char *msg) +{ + fprintf(stderr, "%s (on RE %s)", msg, pattern); +} + +int yylex(const char *&pattern) +{ + return lex(pattern); +} + +} // extern "C" + +namespace re2c { + +const AST *parse(const char *pattern) +{ + yyparse(pattern); + return regexp; +} + +} // namespace re2c diff --git a/re2c/bootstrap/libre2c_posix/parse.h b/re2c/bootstrap/libre2c_posix/parse.h new file mode 100644 index 00000000..50362db5 --- /dev/null +++ b/re2c/bootstrap/libre2c_posix/parse.h @@ -0,0 +1,76 @@ +/* A Bison parser, made by GNU Bison 3.0.4. */ + +/* Bison interface for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +#ifndef YY_YY_LIBRE2C_POSIX_PARSE_H_INCLUDED +# define YY_YY_LIBRE2C_POSIX_PARSE_H_INCLUDED +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif +#if YYDEBUG +extern int yydebug; +#endif + +/* Token type. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + DIGIT = 258, + REGEXP = 259 + }; +#endif + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED + +union YYSTYPE +{ +#line 25 "../libre2c_posix/parse.ypp" /* yacc.c:1909 */ + + const re2c::AST * regexp; + uint32_t number; + +#line 64 "libre2c_posix/parse.h" /* yacc.c:1909 */ +}; + +typedef union YYSTYPE YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + +extern YYSTYPE yylval; + +int yyparse (const char *&pattern); + +#endif /* !YY_YY_LIBRE2C_POSIX_PARSE_H_INCLUDED */ diff --git a/re2c/configure.ac b/re2c/configure.ac index 50b81cf7..3466e21a 100644 --- a/re2c/configure.ac +++ b/re2c/configure.ac @@ -32,6 +32,7 @@ AM_COND_IF([REBUILD_DOCS], [ # checks for programs +AC_PROG_RANLIB AC_PATH_PROG(BISON, bison, no) AC_PROG_CC # used in skeleton tests AC_PROG_CXX diff --git a/re2c/libre2c_posix/lex.h b/re2c/libre2c_posix/lex.h new file mode 100644 index 00000000..1e47656f --- /dev/null +++ b/re2c/libre2c_posix/lex.h @@ -0,0 +1,15 @@ +#ifndef _RE2C_LIB_LEX_ +#define _RE2C_LIB_LEX_ + +#include "src/regexp/re.h" +#include "src/parse/ast.h" + +namespace re2c { + +int lex(const char *&pattern); +const AST *parse(const char *pattern); +extern const AST *regexp; + +} // namespace re2c + +#endif // _RE2C_LIB_LEX_ diff --git a/re2c/libre2c_posix/lex.re b/re2c/libre2c_posix/lex.re new file mode 100644 index 00000000..a160ebd7 --- /dev/null +++ b/re2c/libre2c_posix/lex.re @@ -0,0 +1,41 @@ +#include + +#include "src/util/c99_stdint.h" + +#include "src/encoding/enc.h" +#include "src/parse/ast.h" +#include "src/util/range.h" +#include "parse.h" +#include "libre2c_posix/lex.h" + +extern YYSTYPE yylval; + +namespace re2c { + +int lex(const char *&cur) +{ + /*!re2c + re2c:yyfill:enable = 0; + re2c:define:YYCURSOR = cur; + re2c:define:YYCTYPE = char; + + * { printf("syntax error: %s\n", cur); exit(1); } + "\x00" { return 0; } + [{}()|*+?] { return cur[-1]; } + + [0-9] { + yylval.number = static_cast(cur[-1] - '0'); + return DIGIT; + } + + [a-zA-Z] { + ASTChar c(static_cast(cur[-1]), 0); + std::vector *str = new std::vector; + str->push_back(c); + yylval.regexp = ast_str(0, 0, str, false); + return REGEXP; + } + */ +} + +} // namespace re2c diff --git a/re2c/libre2c_posix/parse.ypp b/re2c/libre2c_posix/parse.ypp new file mode 100644 index 00000000..0d92e7b2 --- /dev/null +++ b/re2c/libre2c_posix/parse.ypp @@ -0,0 +1,94 @@ +%{ + +#include + +#include "src/util/c99_stdint.h" +#include "src/parse/ast.h" +#include "libre2c_posix/lex.h" + +using namespace re2c; + +extern "C" { + +int yylex(const char *&pattern); +void yyerror(const char *pattern, const char*); + +} // extern "C" + +%} + +%lex-param {const char *&pattern} +%parse-param {const char *&pattern} + +%start regexp + +%union { + const re2c::AST * regexp; + uint32_t number; +}; + +%token DIGIT +%token REGEXP + +%type REGEXP regexp expr term factor primary +%type DIGIT number + +%% + +regexp: expr { regexp = $$; }; + +expr +: term +| expr '|' term { $$ = ast_alt($1, $3); } +; + +term +: factor +| factor term { $$ = ast_cat($1, $2); } // in POSIX concatenation is right-associative +; + +factor +: primary +| primary '*' { $$ = ast_iter($1, 0, AST::MANY); } +| primary '+' { $$ = ast_iter($1, 1, AST::MANY); } +| primary '?' { $$ = ast_iter($1, 0, 1); } +| primary '{' number '}' { $$ = ast_iter($1, $3, $3); } +| primary '{' number ',' '}' { $$ = ast_iter($1, $3, AST::MANY); } +| primary '{' number ',' number '}' { $$ = ast_iter($1, $3, $5); } +; + +number +: DIGIT +| number DIGIT { $$ = $1 * 10 + $2; } +; + +primary +: REGEXP +| '(' expr ')' { $$ = ast_cap($2); } +; + +%% + +extern "C" { + +void yyerror(const char *pattern, const char *msg) +{ + fprintf(stderr, "%s (on RE %s)", msg, pattern); +} + +int yylex(const char *&pattern) +{ + return lex(pattern); +} + +} // extern "C" + +namespace re2c { + +const AST *parse(const char *pattern) +{ + yyparse(pattern); + return regexp; +} + +} // namespace re2c diff --git a/re2c/libre2c_posix/regcomp.cc b/re2c/libre2c_posix/regcomp.cc new file mode 100644 index 00000000..0a2e994f --- /dev/null +++ b/re2c/libre2c_posix/regcomp.cc @@ -0,0 +1,56 @@ +#include "libre2c_posix/lex.h" +#include "libre2c_posix/regex.h" +#include "src/options/opt.h" +#include "src/options/warn.h" +#include "src/nfa/nfa.h" +#include "src/dfa/dfa.h" + + +namespace re2c { + +int lex(const char *pattern); +const AST *regexp; + +} // namespace re2c + +using namespace re2c; + +int regcomp(regex_t *preg, const char *pattern, int /* cflags */) +{ + conopt_t globopts; + globopts.FFlag = true; + Opt opts(globopts); + opts.set_posix_captures(true); + const opt_t *opt = opts.snapshot(); + + Warn warn; + + const AST *a = parse(pattern); + + RangeMgr rangemgr; + + ASTRule ar(a, new Code ("", 0)); + std::vector arv; + arv.push_back(ar); + RESpec re(arv, opt, warn, rangemgr); + + split_charset(re); + for (uint32_t i = 1, j = 0; i < re.charset.size(); ++i) { + for (; j < re.charset[i]; ++j) { + preg->char2class[j] = i - 1; + } + } + + find_fixed_tags(re); + + insert_default_tags(re); + + nfa_t *nfa = new nfa_t(re); + + dfa_t *dfa = new dfa_t(*nfa, opt, "", warn); + + preg->nfa = nfa; + preg->dfa = dfa; + + return 0; +} diff --git a/re2c/libre2c_posix/regex.h b/re2c/libre2c_posix/regex.h new file mode 100644 index 00000000..31242d1a --- /dev/null +++ b/re2c/libre2c_posix/regex.h @@ -0,0 +1,36 @@ +#ifndef _RE2C_LIB_REGEX_ +#define _RE2C_LIB_REGEX_ + +#include +#include + +// fwd +namespace re2c { +struct nfa_t; +struct dfa_t; +} + +typedef ssize_t regoff_t; + +struct regex_t { + const re2c::nfa_t *nfa; + const re2c::dfa_t *dfa; + size_t char2class[256]; +}; + +struct regmatch_t { + regoff_t rm_so; + regoff_t rm_eo; +}; + +static const int REG_NOMATCH = INT_MAX; + +int regcomp(regex_t *preg, const char *pattern, int cflags); + +size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size); + +int regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); + +void regfree(regex_t *preg); + +#endif // _RE2C_LIB_REGEX_ diff --git a/re2c/libre2c_posix/regexec.cc b/re2c/libre2c_posix/regexec.cc new file mode 100644 index 00000000..c5ca1cbc --- /dev/null +++ b/re2c/libre2c_posix/regexec.cc @@ -0,0 +1,96 @@ +#include +#include + +#include "libre2c_posix/lex.h" +#include "libre2c_posix/regex.h" +#include "src/options/opt.h" +#include "src/options/warn.h" +#include "src/nfa/nfa.h" +#include "src/debug/debug.h" +#include "src/dfa/dfa.h" + + +using namespace re2c; + +static void apply_regops(regoff_t *regs, const tcmd_t *cmd, regoff_t pos) +{ + for (const tcmd_t *p = cmd; p; p = p->next) { + if (tcmd_t::iscopy(p)) { + regs[p->lhs] = regs[p->rhs]; + } + else if (tcmd_t::isset(p)) { + regs[p->lhs] = *p->history == TAGVER_BOTTOM ? -1 : pos; + } + else { + DASSERT(false); + } + } +} + +int regexec(const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int /* eflags */) +{ + const dfa_t *dfa = preg->dfa; + int result = REG_NOMATCH; + regoff_t *regs = new regoff_t[dfa->maxtagver + 1]; + size_t i = 0; + const char *p = string, *q; + const dfa_state_t *s, *x = NULL; + + apply_regops(regs, dfa->tcmd0, 0); + + for (;;) { + s = dfa->states[i]; + const int32_t c = *p++; + const size_t j = preg->char2class[c]; + i = s->arcs[j]; + + if (s->rule != Rule::NONE) { + q = p; + x = s; + } + + if (i == dfa_t::NIL || c == 0) break; + + apply_regops(regs, s->tcmd[j], p - string - 1); + } + + if (s->rule == Rule::NONE && x != NULL) { + s = x; + p = q; + } + + if (s->rule != Rule::NONE) { + result = 0; + const regoff_t mlen = p - string - 1; + + apply_regops(regs, s->tcmd[dfa->nchars], mlen); + + const Rule &rule = dfa->rules[0]; + const size_t last = std::min(nmatch * 2, rule.htag); + + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = mlen; + + for (size_t t = rule.ltag; t < last; ++t) { + const Tag &tag = dfa->tags[t]; + if (fictive(tag)) continue; + + regoff_t off; + if (!fixed(tag)) { + off = regs[dfa->finvers[t]]; + } + else { + off = -static_cast(tag.dist); + off += tag.base == Tag::RIGHTMOST + ? mlen : regs[dfa->finvers[tag.base]]; + } + + regmatch_t *rm = &pmatch[tag.ncap / 2 + 1]; + *((tag.ncap % 2 == 0) ? &rm->rm_so : &rm->rm_eo) = off; + } + } + + delete[] regs; + return result; +} diff --git a/re2c/libre2c_posix/stubs.cc b/re2c/libre2c_posix/stubs.cc new file mode 100644 index 00000000..0dae4b9e --- /dev/null +++ b/re2c/libre2c_posix/stubs.cc @@ -0,0 +1,2 @@ +extern const char *help; +const char *help = ""; diff --git a/re2c/libre2c_posix/test.cpp b/re2c/libre2c_posix/test.cpp new file mode 100644 index 00000000..c1463c53 --- /dev/null +++ b/re2c/libre2c_posix/test.cpp @@ -0,0 +1,68 @@ +#include +#include + +#include "src/util/c99_stdint.h" +#include "regex.h" + + +int test(const char *pattern, const char *string, size_t nmatch, ...) +{ + regex_t re; + regmatch_t *pmatch = new regmatch_t[nmatch]; + int result; + + result = regcomp(&re, pattern, 0); + if (result != 0) { + fprintf(stderr, "regcomp() failed for RE %s\n", pattern); + goto end; + } + + result = regexec(&re, string, nmatch, pmatch, 0); + if (result != 0) { + fprintf(stderr, "regexec() failed for RE %s and string %s\n" + , pattern, string); + goto end; + } + + va_list vl; + va_start(vl, nmatch); + + for (uint32_t i = 0; i < nmatch; ++i) { + regoff_t so = va_arg(vl, regoff_t); + regoff_t eo = va_arg(vl, regoff_t); + const regmatch_t want = {so, eo}, &have = pmatch[i]; + + if (want.rm_so != have.rm_so || want.rm_eo != have.rm_eo) { + result = 1; + fprintf(stderr, "incorrect submatch for RE %s and string %s," + " group %u:\n", pattern, string, i); + for (uint32_t j = 0; j < nmatch; ++j) { + fprintf(stderr, + "pmatch[%u].rm_so = %lu, " + "pmatch[%u].rm_eo = %lu\n" + , j, pmatch[j].rm_so + , j, pmatch[j].rm_eo); + } + fprintf(stderr, "\n"); + goto end; + } + } + +end: + va_end(vl); + delete[] pmatch; + return result; +} + +int main() +{ + int err = 0; + + err |= test("a", "a", 1, 0,1); + err |= test("(a)", "a", 2, 0,1, 0,1); + err |= test("(a*)", "aaa", 2, 0,3, 0,3); + err |= test("(a*)(b*)", "aabb", 3, 0,4, 0,2, 2,4); + err |= test("(a*)(a*)", "aa", 3, 0,2, 0,2, 2,2); + + return err; +} -- 2.50.1