From ebea1d58ff72b562ec2765b89eb4f716f0d39ea8 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Thu, 14 May 2015 18:20:13 +0100 Subject: [PATCH] Forbid copying of 're2c::Substr'. Removed useless methods (most of them became useless after 're2c::Str' removal) and obsolete autoconf check for 'strndup'. Removed 'Scanner::token' methods. They used 'Scanner::check_token_length', which was pretty useless: 1. checking for the lower should have always succeed, because 'Scanner::tok' is always set to buffer start in 'Scanner::fill' and if the token was too long, it's start will be lost anyway. 2. checking for the upper bound could fail if re2c dev passed some trash into it, but any normal function would do so and this is no particular reason to have special runtime checks here. Now substrings are constructed in lexer, where all the lengths and bounds are easier to verify from lexing context (for re2c dev). --- re2c/Makefile.am | 3 +- re2c/bootstrap/scanner_lex.cc | 29 +++++++----- re2c/configure.ac | 2 +- re2c/src/dfa/actions.cc | 8 ++-- re2c/src/parse/scanner.cc | 14 +++--- re2c/src/parse/scanner.h | 37 ++++----------- re2c/src/parse/scanner_lex.re | 27 ++++++----- re2c/src/util/substr.cc | 34 ------------- re2c/src/util/substr.h | 89 +++++++---------------------------- 9 files changed, 71 insertions(+), 172 deletions(-) delete mode 100644 re2c/src/util/substr.cc diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 47134af8..a5bfd0a3 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -94,8 +94,7 @@ SRC = \ $(srcdir)/src/mbo_getopt.cc \ $(srcdir)/src/parse/input.cc \ $(srcdir)/src/parse/scanner.cc \ - $(srcdir)/src/util/range.cc \ - $(srcdir)/src/util/substr.cc + $(srcdir)/src/util/range.cc # omit SRC_PARSER here; include it in EXTRA_DIST instead # (automake generates standard build rules for all YACC-ish # sources, they will conflict with our custom build rule). diff --git a/re2c/bootstrap/scanner_lex.cc b/re2c/bootstrap/scanner_lex.cc index 9f35bbb2..311c78ed 100644 --- a/re2c/bootstrap/scanner_lex.cc +++ b/re2c/bootstrap/scanner_lex.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 0.14.1.dev on Thu May 14 13:56:12 2015*/ +/* Generated by re2c 0.14.1.dev on Thu May 14 17:19:41 2015*/ #include #include #include @@ -948,13 +948,14 @@ yy164: } else { /* Add one char in front and one behind instead of 's or "s */ cur = cursor; + SubStr s (tok, cur - tok); if (bCaseInsensitive || bCaseInverted) { - yylval.regexp = strToCaseInsensitiveRE(token()); + yylval.regexp = strToCaseInsensitiveRE (s); } else { - yylval.regexp = strToRE(token()); + yylval.regexp = strToRE (s); } return STRING; } @@ -1166,7 +1167,8 @@ yy191: ++YYCURSOR; { cur = cursor; - yylval.regexp = ranToRE(token()); + SubStr s (tok, cur - tok); + yylval.regexp = ranToRE (s); return RANGE; } yy193: @@ -1179,7 +1181,8 @@ yy194: ++YYCURSOR; { cur = cursor; - yylval.regexp = invToRE(token()); + SubStr s (tok, cur - tok); + yylval.regexp = invToRE (s); return RANGE; } yy196: @@ -1278,13 +1281,14 @@ yy218: ++YYCURSOR; { cur = cursor; + SubStr s (tok + 1, cur - tok - 2); if (bCaseInverted) { - yylval.regexp = strToRE(token(1, cur - tok - 2)); + yylval.regexp = strToRE (s); } else { - yylval.regexp = strToCaseInsensitiveRE(token(1, cur - tok - 2)); + yylval.regexp = strToCaseInsensitiveRE (s); } return STRING; } @@ -1307,13 +1311,14 @@ yy223: ++YYCURSOR; { cur = cursor; + SubStr s (tok + 1, cur - tok - 2); if (bCaseInsensitive || bCaseInverted) { - yylval.regexp = strToCaseInsensitiveRE(token(1, cur - tok - 2)); + yylval.regexp = strToCaseInsensitiveRE (s); } else { - yylval.regexp = strToRE(token(1, cur - tok - 2)); + yylval.regexp = strToRE (s); } return STRING; } @@ -2185,7 +2190,7 @@ yy351: yy352: { cur = cursor; - yylval.number = atoi(token().to_string().c_str()); + yylval.number = atoi(std::string (tok, cur - tok).c_str()); iscfg = 0; return NUMBER; } @@ -2429,7 +2434,7 @@ yy379: yy380: { cur = cursor; - cline = atoi(token().to_string().c_str()); + cline = atoi(std::string (tok, cur - tok).c_str()); goto sourceline; } yy381: @@ -2465,7 +2470,7 @@ yy387: ++YYCURSOR; { cur = cursor; - escape (in.file_name, token(1, cur - tok - 2).to_string()); + escape (in.file_name, std::string (tok + 1, cur - tok - 2)); goto sourceline; } } diff --git a/re2c/configure.ac b/re2c/configure.ac index 86baefa2..6137fc09 100644 --- a/re2c/configure.ac +++ b/re2c/configure.ac @@ -50,7 +50,7 @@ AC_CHECK_SIZEOF([0i8], [], [[]]) # checks for library functions -AC_CHECK_FUNCS([strdup strndup]) +AC_CHECK_FUNCS([strdup]) AC_CONFIG_FILES([\ diff --git a/re2c/src/dfa/actions.cc b/re2c/src/dfa/actions.cc index b3d576eb..4a533f4b 100644 --- a/re2c/src/dfa/actions.cc +++ b/re2c/src/dfa/actions.cc @@ -757,7 +757,7 @@ RegExp * Scanner::matchSymbol(uint32_t c) const return new MatchOp(new Range(c, c + 1)); } -RegExp * Scanner::strToRE(SubStr s) const +RegExp * Scanner::strToRE (SubStr & s) const { if (s.len == 0) return new NullOp; @@ -770,7 +770,7 @@ RegExp * Scanner::strToRE(SubStr s) const return re; } -RegExp * Scanner::strToCaseInsensitiveRE(SubStr s) const +RegExp * Scanner::strToCaseInsensitiveRE (SubStr & s) const { if (s.len == 0) return new NullOp; @@ -828,7 +828,7 @@ RegExp * Scanner::matchSymbolRange(Range * r) const return new MatchOp(r); } -RegExp * Scanner::ranToRE(SubStr s) const +RegExp * Scanner::ranToRE (SubStr & s) const { s.len -= 2; s.str += 1; @@ -839,7 +839,7 @@ RegExp * Scanner::ranToRE(SubStr s) const return matchSymbolRange(mkRange(s)); } -RegExp * Scanner::invToRE(SubStr s) const +RegExp * Scanner::invToRE (SubStr & s) const { s.len -= 3; s.str += 2; diff --git a/re2c/src/parse/scanner.cc b/re2c/src/parse/scanner.cc index 168d5a77..4a4a2b0c 100644 --- a/re2c/src/parse/scanner.cc +++ b/re2c/src/parse/scanner.cc @@ -1,7 +1,13 @@ #include +#include #include "scanner.h" +// used by Scanner::fatal_at and Scanner::fatalf +#if defined(_MSC_VER) && !defined(vsnprintf) +# define vsnprintf _vsnprintf +#endif + namespace re2c { void Scanner::config(const std::string & cfg, int num) @@ -442,14 +448,6 @@ Scanner::~Scanner() } } -void Scanner::check_token_length(char *pos, uint32_t len) const -{ - if (pos < bot || pos + len > top) - { - fatal("Token exceeds limit"); - } -} - void Scanner::reuse() { next_label = 0; diff --git a/re2c/src/parse/scanner.h b/re2c/src/parse/scanner.h index 9031a9c6..88e81d90 100644 --- a/re2c/src/parse/scanner.h +++ b/re2c/src/parse/scanner.h @@ -83,24 +83,19 @@ public: void config(const std::string &, int); void config(const std::string &, const std::string &); - void check_token_length(char *pos, uint32_t len) const; - SubStr token() const; - SubStr token(uint32_t start, uint32_t len) const; - uint32_t unescape(SubStr &s) const; std::string& unescape(SubStr& str_in, std::string& str_out) const; - Range * mkRange(SubStr &s) const; - Range * getRange(SubStr &s) const; - RegExp * matchSymbol(uint32_t c) const; - RegExp * matchSymbolRange(Range * r) const; - RegExp * strToName(SubStr s) const; - RegExp * strToRE(SubStr s) const; - RegExp * strToCaseInsensitiveRE(SubStr s) const; - RegExp * ranToRE(SubStr s) const; - RegExp * invToRE(SubStr s) const; - RegExp * mkDot() const; - RegExp * mkDefault() const; + Range * mkRange (SubStr & s) const; + Range * getRange (SubStr & s) const; + RegExp * matchSymbol (uint32_t c) const; + RegExp * matchSymbolRange (Range * r) const; + RegExp * strToRE (SubStr & s) const; + RegExp * strToCaseInsensitiveRE (SubStr & s) const; + RegExp * ranToRE (SubStr & s) const; + RegExp * invToRE (SubStr & s) const; + RegExp * mkDot () const; + RegExp * mkDefault () const; FORBID_COPY (Scanner); }; @@ -130,18 +125,6 @@ inline void Scanner::fatal(const char *msg) const fatal(0, msg); } -inline SubStr Scanner::token() const -{ - check_token_length(tok, cur - tok); - return SubStr(tok, cur - tok); -} - -inline SubStr Scanner::token(uint32_t start, uint32_t len) const -{ - check_token_length(tok + start, len); - return SubStr(tok + start, len); -} - } // end namespace re2c #endif diff --git a/re2c/src/parse/scanner_lex.re b/re2c/src/parse/scanner_lex.re index 869ee04d..8a5c24c1 100644 --- a/re2c/src/parse/scanner_lex.re +++ b/re2c/src/parse/scanner_lex.re @@ -254,26 +254,28 @@ scan: dstring { cur = cursor; + SubStr s (tok + 1, cur - tok - 2); if (bCaseInsensitive || bCaseInverted) { - yylval.regexp = strToCaseInsensitiveRE(token(1, cur - tok - 2)); + yylval.regexp = strToCaseInsensitiveRE (s); } else { - yylval.regexp = strToRE(token(1, cur - tok - 2)); + yylval.regexp = strToRE (s); } return STRING; } sstring { cur = cursor; + SubStr s (tok + 1, cur - tok - 2); if (bCaseInverted) { - yylval.regexp = strToRE(token(1, cur - tok - 2)); + yylval.regexp = strToRE (s); } else { - yylval.regexp = strToCaseInsensitiveRE(token(1, cur - tok - 2)); + yylval.regexp = strToCaseInsensitiveRE (s); } return STRING; } @@ -287,13 +289,15 @@ scan: istring { cur = cursor; - yylval.regexp = invToRE(token()); + SubStr s (tok, cur - tok); + yylval.regexp = invToRE (s); return RANGE; } cstring { cur = cursor; - yylval.regexp = ranToRE(token()); + SubStr s (tok, cur - tok); + yylval.regexp = ranToRE (s); return RANGE; } @@ -384,13 +388,14 @@ scan: } else { /* Add one char in front and one behind instead of 's or "s */ cur = cursor; + SubStr s (tok, cur - tok); if (bCaseInsensitive || bCaseInverted) { - yylval.regexp = strToCaseInsensitiveRE(token()); + yylval.regexp = strToCaseInsensitiveRE (s); } else { - yylval.regexp = strToRE(token()); + yylval.regexp = strToRE (s); } return STRING; } @@ -580,7 +585,7 @@ value: /*!re2c number { cur = cursor; - yylval.number = atoi(token().to_string().c_str()); + yylval.number = atoi(std::string (tok, cur - tok).c_str()); iscfg = 0; return NUMBER; } @@ -614,12 +619,12 @@ sourceline: /*!re2c lineno { cur = cursor; - cline = atoi(token().to_string().c_str()); + cline = atoi(std::string (tok, cur - tok).c_str()); goto sourceline; } dstring { cur = cursor; - escape (in.file_name, token(1, cur - tok - 2).to_string()); + escape (in.file_name, std::string (tok + 1, cur - tok - 2)); goto sourceline; } "\n" { diff --git a/re2c/src/util/substr.cc b/re2c/src/util/substr.cc deleted file mode 100644 index 9bf4a41d..00000000 --- a/re2c/src/util/substr.cc +++ /dev/null @@ -1,34 +0,0 @@ -/* $Id$ */ -#include -#include - -#include "src/globals.h" -#include "src/util/substr.h" - -#ifndef HAVE_STRNDUP - -char *strndup(const char *str, size_t len) -{ - char * ret = (char*)malloc(len + 1); - - memcpy(ret, str, len); - ret[len] = '\0'; - return ret; -} - -#endif - -namespace re2c -{ - -void SubStr::out(std::ostream& o) const -{ - o.write(str, len); -} - -bool operator==(const SubStr &s1, const SubStr &s2) -{ - return (bool) (s1.len == s2.len && memcmp(s1.str, s2.str, s1.len) == 0); -} - -} // end namespace re2c diff --git a/re2c/src/util/substr.h b/re2c/src/util/substr.h index 4f6f5683..7e6ca64f 100644 --- a/re2c/src/util/substr.h +++ b/re2c/src/util/substr.h @@ -1,13 +1,8 @@ -/* $Id$ */ -#ifndef _substr_h -#define _substr_h +#ifndef __SUBSTR__ +#define __SUBSTR__ -#include -#include -#include - -#include "config.h" #include "src/util/c99_stdint.h" +#include "src/util/forbid_copy.h" namespace re2c { @@ -19,71 +14,19 @@ public: const char * const org; uint32_t len; -public: - friend bool operator==(const SubStr &, const SubStr &); - SubStr(const uint8_t *, uint32_t); - SubStr(const char*, uint32_t); - explicit SubStr(const char*); - SubStr(const SubStr&); - virtual ~SubStr(); - void out(std::ostream&) const; - std::string to_string() const; - uint32_t ofs() const; - -private: - SubStr & operator = (const SubStr &); + inline SubStr (const char * s, uint32_t l) + : str (s) + , org (s) + , len (l) + {} + inline uint32_t ofs () const + { + return str - org; + } + + FORBID_COPY (SubStr); }; -inline std::ostream& operator<<(std::ostream& o, const SubStr &s) -{ - s.out(o); - return o; -} - -inline std::ostream& operator<<(std::ostream& o, const SubStr* s) -{ - return o << *s; -} - -inline SubStr::SubStr(const uint8_t *s, uint32_t l) - : str((char*)s), org((char*)s), len(l) -{ } - -inline SubStr::SubStr(const char *s, uint32_t l) - : str(s), org(s), len(l) -{ } - -inline SubStr::SubStr(const char *s) - : str(s), org(s), len(strlen(s)) -{ } - -inline SubStr::SubStr(const SubStr &s) - : str(s.str), org(s.str), len(s.len) -{ } - -inline SubStr::~SubStr() -{ } - -inline std::string SubStr::to_string() const -{ - return str && len ? std::string(str, len) : std::string(); -} - -inline uint32_t SubStr::ofs() const -{ - return str - org; -} - -} // end namespace re2c - -#ifndef HAVE_STRNDUP - -char *strndup(const char *str, size_t len); - -#endif - -#if defined(_MSC_VER) && !defined(vsnprintf) -#define vsnprintf _vsnprintf -#endif +} // namespace re2c -#endif +#endif // __SUBSTR__ -- 2.40.0