From: Ulya Trofimovich Date: Fri, 14 Aug 2015 12:57:58 +0000 (+0100) Subject: Lexer: unified token length calculation. X-Git-Tag: 0.15~123 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=48c067251c0554a961938ac018147042ae2fff3d;p=re2c Lexer: unified token length calculation. Token length equals the difference between two pointers: YYCURSOR value on the moment of successful match and YYCURSOR value when entering DFA. This difference should be nonnegative and fit buffer size. --- diff --git a/re2c/bootstrap/src/parse/scanner_lex.cc b/re2c/bootstrap/src/parse/scanner_lex.cc index 820cf8f4..2358193c 100644 --- a/re2c/bootstrap/src/parse/scanner_lex.cc +++ b/re2c/bootstrap/src/parse/scanner_lex.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 0.14.3 on Wed Aug 12 22:30:20 2015 */ +/* Generated by re2c 0.14.3 on Fri Aug 14 12:58:19 2015 */ #line 1 "../src/parse/scanner_lex.re" #include #include @@ -113,7 +113,7 @@ echo: { if (!(ignore_eoc || DFlag || flag_skeleton)) { - out.write(tok, cur - tok - 1); + out.write(tok, tok_len () - 1); // -1 so we don't write out the \0 } if(cur == eof) @@ -146,7 +146,7 @@ yy7: } else if (!(DFlag || flag_skeleton)) { - out.write(tok, cur - tok); + out.write(tok, tok_len ()); } tok = pos = cur; cline++; @@ -266,7 +266,7 @@ yy32: const size_t lexeme_len = cur[-1] == '{' ? sizeof ("%{") - 1 : sizeof ("/*!re2c") - 1; - out.write(tok, cur - tok - lexeme_len); + out.write(tok, tok_len () - lexeme_len); } tok = cur; return Parse; @@ -296,7 +296,7 @@ yy34: if (!(DFlag || flag_skeleton)) { const size_t lexeme_len = sizeof ("/*!use:re2c") - 1; - out.write(tok, cur - tok - lexeme_len); + out.write(tok, tok_len () - lexeme_len); } tok = cur; return Reuse; @@ -439,7 +439,7 @@ yy85: } else if (!(DFlag || flag_skeleton)) { - out.write(tok, cur - tok); + out.write(tok, tok_len ()); } tok = pos = cur; goto echo; @@ -464,7 +464,7 @@ yy87: } else if (!(DFlag || flag_skeleton)) { - out.write(tok, cur - tok); + out.write(tok, tok_len ()); } tok = pos = cur; goto echo; @@ -952,7 +952,7 @@ yy149: if (!FFlag) { fatal("curly braces for names only allowed with -F switch"); } - yylval.str = new std::string (tok + 1, cur - tok - 2); + yylval.str = new std::string (tok + 1, tok_len () - 2); // -2 to omit braces return ID; } #line 959 "src/parse/scanner_lex.cc" @@ -1014,11 +1014,11 @@ yy164: #line 391 "../src/parse/scanner_lex.re" { if (!FFlag) { - yylval.str = new std::string (tok, cur - tok); + yylval.str = new std::string (tok, tok_len ()); return ID; } else { /* Add one char in front and one behind instead of 's or "s */ - SubStr s (tok, cur - tok); + SubStr s (tok, tok_len ()); if (bCaseInsensitive || bCaseInverted) { yylval.regexp = strToCaseInsensitiveRE (s); @@ -1040,7 +1040,7 @@ yy167: YYCURSOR = YYCTXMARKER; #line 386 "../src/parse/scanner_lex.re" { - yylval.str = new std::string (tok, cur - tok); + yylval.str = new std::string (tok, tok_len ()); return ID; } #line 1047 "src/parse/scanner_lex.cc" @@ -1126,7 +1126,7 @@ yy176: { tok += 5; /* skip "re2c:" */ lexer_state = LEX_CONFIG; - yylval.str = new std::string (tok, cur - tok); + yylval.str = new std::string (tok, tok_len ()); return CONFIG; } #line 1133 "src/parse/scanner_lex.cc" @@ -1190,7 +1190,7 @@ yy181: YYCURSOR = YYCTXMARKER; #line 373 "../src/parse/scanner_lex.re" { - yylval.str = new std::string (tok, cur - tok); + yylval.str = new std::string (tok, tok_len ()); if (FFlag) { lexer_state = LEX_FLEX_NAME; @@ -1249,7 +1249,7 @@ yy191: ++YYCURSOR; #line 302 "../src/parse/scanner_lex.re" { - SubStr s (tok, cur - tok); + SubStr s (tok, tok_len ()); yylval.regexp = ranToRE (s); return RANGE; } @@ -1264,7 +1264,7 @@ yy194: ++YYCURSOR; #line 296 "../src/parse/scanner_lex.re" { - SubStr s (tok, cur - tok); + SubStr s (tok, tok_len ()); yylval.regexp = invToRE (s); return RANGE; } @@ -1378,7 +1378,7 @@ yy218: ++YYCURSOR; #line 276 "../src/parse/scanner_lex.re" { - SubStr s (tok + 1, cur - tok - 2); + SubStr s (tok + 1, tok_len () - 2); // -2 to omit quotes if (bCaseInverted) { yylval.regexp = strToRE (s); @@ -1409,7 +1409,7 @@ yy223: ++YYCURSOR; #line 263 "../src/parse/scanner_lex.re" { - SubStr s (tok + 1, cur - tok - 2); + SubStr s (tok + 1, tok_len () - 2); // -2 to omit quotes if (bCaseInsensitive || bCaseInverted) { yylval.regexp = strToCaseInsensitiveRE (s); @@ -1705,7 +1705,7 @@ yy264: { --cur; } - yylval.code = new Code (tok, cur - tok, get_fname (), tline); + yylval.code = new Code (tok, tok_len (), get_fname (), tline); return CODE; } else if (cur == eof) @@ -1752,7 +1752,7 @@ yy269: } else if (--depth == 0) { - yylval.code = new Code (tok, cur - tok, get_fname (), tline); + yylval.code = new Code (tok, tok_len (), get_fname (), tline); return CODE; } goto code; @@ -2359,7 +2359,7 @@ value: yy358: #line 608 "../src/parse/scanner_lex.re" { - yylval.str = new std::string (tok, cur - tok); + yylval.str = new std::string (tok, tok_len ()); lexer_state = LEX_NORMAL; return VALUE; } @@ -2372,7 +2372,7 @@ yy359: yy360: #line 603 "../src/parse/scanner_lex.re" { - yylval.number = atoi(std::string (tok, cur - tok).c_str()); + yylval.number = atoi(std::string (tok, tok_len ()).c_str()); lexer_state = LEX_NORMAL; return NUMBER; } @@ -2623,7 +2623,7 @@ yy387: yy388: #line 635 "../src/parse/scanner_lex.re" { - cline = atoi(std::string (tok, cur - tok).c_str()); + cline = atoi(std::string (tok, tok_len ()).c_str()); goto sourceline; } #line 2630 "src/parse/scanner_lex.cc" @@ -2660,7 +2660,7 @@ yy395: ++YYCURSOR; #line 639 "../src/parse/scanner_lex.re" { - escape (in.file_name, std::string (tok + 1, cur - tok - 2)); + escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes goto sourceline; } #line 2667 "src/parse/scanner_lex.cc" diff --git a/re2c/src/codegen/output.cc b/re2c/src/codegen/output.cc index d24ac8f8..bd5b945a 100644 --- a/re2c/src/codegen/output.cc +++ b/re2c/src/codegen/output.cc @@ -90,9 +90,9 @@ std::ostream & OutputFile::stream () return blocks.back ()->fragments.back ()->stream; } -void OutputFile::write (const char * s, std::streamsize n) +void OutputFile::write (const char * s, size_t n) { - stream ().write (s, n); + stream ().write (s, static_cast (n)); } void OutputFile::write_hex (uint32_t n) diff --git a/re2c/src/codegen/output.h b/re2c/src/codegen/output.h index 73b0c4ef..4a26865c 100644 --- a/re2c/src/codegen/output.h +++ b/re2c/src/codegen/output.h @@ -68,7 +68,7 @@ public: void new_block (); - void write (const char * s, std::streamsize n); + void write (const char * s, size_t n); void write_hex (uint32_t n); void write_char_hex (uint32_t n); void write_range (uint32_t u, uint32_t l); diff --git a/re2c/src/parse/scanner.h b/re2c/src/parse/scanner.h index 379b8df7..34f160c0 100644 --- a/re2c/src/parse/scanner.h +++ b/re2c/src/parse/scanner.h @@ -59,6 +59,7 @@ private: private: void fill (uint32_t); void set_sourceline (); + size_t tok_len () const; public: Scanner(Input &, OutputFile &); @@ -110,9 +111,16 @@ public: FORBID_COPY (Scanner); }; +inline size_t Scanner::tok_len () const +{ + // lexing and fill procedures must maintain: token pointer <= cursor pointer + return static_cast (cur - tok); +} + inline size_t Scanner::get_pos() const { - return cur - bot; + // lexing and fill procedures must maintain: buffer bottom <= cursor pointer + return static_cast (cur - bot); } inline const std::string & Scanner::get_fname () const diff --git a/re2c/src/parse/scanner_lex.re b/re2c/src/parse/scanner_lex.re index 6683be0d..9b8eaa3f 100644 --- a/re2c/src/parse/scanner_lex.re +++ b/re2c/src/parse/scanner_lex.re @@ -79,7 +79,7 @@ echo: const size_t lexeme_len = cur[-1] == '{' ? sizeof ("%{") - 1 : sizeof ("/*!re2c") - 1; - out.write(tok, cur - tok - lexeme_len); + out.write(tok, tok_len () - lexeme_len); } tok = cur; return Parse; @@ -105,7 +105,7 @@ echo: if (!(DFlag || flag_skeleton)) { const size_t lexeme_len = sizeof ("/*!use:re2c") - 1; - out.write(tok, cur - tok - lexeme_len); + out.write(tok, tok_len () - lexeme_len); } tok = cur; return Reuse; @@ -156,7 +156,7 @@ echo: } else if (!(DFlag || flag_skeleton)) { - out.write(tok, cur - tok); + out.write(tok, tok_len ()); } tok = pos = cur; goto echo; @@ -174,7 +174,7 @@ echo: } else if (!(DFlag || flag_skeleton)) { - out.write(tok, cur - tok); + out.write(tok, tok_len ()); } tok = pos = cur; goto echo; @@ -190,7 +190,7 @@ echo: } else if (!(DFlag || flag_skeleton)) { - out.write(tok, cur - tok); + out.write(tok, tok_len ()); } tok = pos = cur; cline++; @@ -199,7 +199,7 @@ echo: zero { if (!(ignore_eoc || DFlag || flag_skeleton)) { - out.write(tok, cur - tok - 1); + out.write(tok, tok_len () - 1); // -1 so we don't write out the \0 } if(cur == eof) @@ -261,7 +261,7 @@ start: } dstring { - SubStr s (tok + 1, cur - tok - 2); + SubStr s (tok + 1, tok_len () - 2); // -2 to omit quotes if (bCaseInsensitive || bCaseInverted) { yylval.regexp = strToCaseInsensitiveRE (s); @@ -274,7 +274,7 @@ start: } sstring { - SubStr s (tok + 1, cur - tok - 2); + SubStr s (tok + 1, tok_len () - 2); // -2 to omit quotes if (bCaseInverted) { yylval.regexp = strToRE (s); @@ -294,13 +294,13 @@ start: } istring { - SubStr s (tok, cur - tok); + SubStr s (tok, tok_len ()); yylval.regexp = invToRE (s); return RANGE; } cstring { - SubStr s (tok, cur - tok); + SubStr s (tok, tok_len ()); yylval.regexp = ranToRE (s); return RANGE; } @@ -359,19 +359,19 @@ start: if (!FFlag) { fatal("curly braces for names only allowed with -F switch"); } - yylval.str = new std::string (tok + 1, cur - tok - 2); + yylval.str = new std::string (tok + 1, tok_len () - 2); // -2 to omit braces return ID; } config { tok += 5; /* skip "re2c:" */ lexer_state = LEX_CONFIG; - yylval.str = new std::string (tok, cur - tok); + yylval.str = new std::string (tok, tok_len ()); return CONFIG; } name / (space+ [^=>,]) { - yylval.str = new std::string (tok, cur - tok); + yylval.str = new std::string (tok, tok_len ()); if (FFlag) { lexer_state = LEX_FLEX_NAME; @@ -384,17 +384,17 @@ start: } name / (space* [=>,]) { - yylval.str = new std::string (tok, cur - tok); + yylval.str = new std::string (tok, tok_len ()); return ID; } name / [^] { if (!FFlag) { - yylval.str = new std::string (tok, cur - tok); + yylval.str = new std::string (tok, tok_len ()); return ID; } else { /* Add one char in front and one behind instead of 's or "s */ - SubStr s (tok, cur - tok); + SubStr s (tok, tok_len ()); if (bCaseInsensitive || bCaseInverted) { yylval.regexp = strToCaseInsensitiveRE (s); @@ -458,7 +458,7 @@ code: } else if (--depth == 0) { - yylval.code = new Code (tok, cur - tok, get_fname (), tline); + yylval.code = new Code (tok, tok_len (), get_fname (), tline); return CODE; } goto code; @@ -499,7 +499,7 @@ code: { --cur; } - yylval.code = new Code (tok, cur - tok, get_fname (), tline); + yylval.code = new Code (tok, tok_len (), get_fname (), tline); return CODE; } else if (cur == eof) @@ -601,12 +601,12 @@ config: value: /*!re2c number { - yylval.number = atoi(std::string (tok, cur - tok).c_str()); + yylval.number = atoi(std::string (tok, tok_len ()).c_str()); lexer_state = LEX_NORMAL; return NUMBER; } value { - yylval.str = new std::string (tok, cur - tok); + yylval.str = new std::string (tok, tok_len ()); lexer_state = LEX_NORMAL; return VALUE; } @@ -633,11 +633,11 @@ sourceline: tok = cur; /*!re2c lineno { - cline = atoi(std::string (tok, cur - tok).c_str()); + cline = atoi(std::string (tok, tok_len ()).c_str()); goto sourceline; } dstring { - escape (in.file_name, std::string (tok + 1, cur - tok - 2)); + escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes goto sourceline; } "\n" {