]> granicus.if.org Git - re2c/commitdiff
Lexer: unified token length calculation.
authorUlya Trofimovich <skvadrik@gmail.com>
Fri, 14 Aug 2015 12:57:58 +0000 (13:57 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Fri, 14 Aug 2015 12:57:58 +0000 (13:57 +0100)
Token length equals the difference between two pointers: YYCURSOR
value on the moment of successful match and YYCURSOR value when
entering DFA. This difference should be nonnegative and fit buffer
size.

re2c/bootstrap/src/parse/scanner_lex.cc
re2c/src/codegen/output.cc
re2c/src/codegen/output.h
re2c/src/parse/scanner.h
re2c/src/parse/scanner_lex.re

index 820cf8f4fe0f9570064ec57e71ec7e67fe54e575..2358193c36d55770093086458b4a589100670092 100644 (file)
@@ -1,4 +1,4 @@
-/* Generated by re2c 0.14.3 on Wed Aug 12 22:30:20 2015 */
+/* Generated by re2c 0.14.3 on Fri Aug 14 12:58:19 2015 */
 #line 1 "../src/parse/scanner_lex.re"
 #include <stdlib.h>
 #include <string.h>
@@ -113,7 +113,7 @@ echo:
        {
                                        if (!(ignore_eoc || DFlag || flag_skeleton))
                                        {
-                                               out.write(tok, cur - tok - 1);
+                                               out.write(tok, tok_len () - 1);
                                                // -1 so we don't write out the \0
                                        }
                                        if(cur == eof)
@@ -146,7 +146,7 @@ yy7:
                                        }
                                        else if (!(DFlag || flag_skeleton))
                                        {
-                                               out.write(tok, cur - tok);
+                                               out.write(tok, tok_len ());
                                        }
                                        tok = pos = cur;
                                        cline++;
@@ -266,7 +266,7 @@ yy32:
                                                const size_t lexeme_len = cur[-1] == '{'
                                                        ? sizeof ("%{") - 1
                                                        : sizeof ("/*!re2c") - 1;
-                                               out.write(tok, cur - tok - lexeme_len);
+                                               out.write(tok, tok_len () - lexeme_len);
                                        }
                                        tok = cur;
                                        return Parse;
@@ -296,7 +296,7 @@ yy34:
                                        if (!(DFlag || flag_skeleton))
                                        {
                                                const size_t lexeme_len = sizeof ("/*!use:re2c") - 1;
-                                               out.write(tok, cur - tok - lexeme_len);
+                                               out.write(tok, tok_len () - lexeme_len);
                                        }
                                        tok = cur;
                                        return Reuse;
@@ -439,7 +439,7 @@ yy85:
                                        }
                                        else if (!(DFlag || flag_skeleton))
                                        {
-                                               out.write(tok, cur - tok);
+                                               out.write(tok, tok_len ());
                                        }
                                        tok = pos = cur;
                                        goto echo;
@@ -464,7 +464,7 @@ yy87:
                                        }
                                        else if (!(DFlag || flag_skeleton))
                                        {
-                                               out.write(tok, cur - tok);
+                                               out.write(tok, tok_len ());
                                        }
                                        tok = pos = cur;
                                        goto echo;
@@ -952,7 +952,7 @@ yy149:
                                        if (!FFlag) {
                                                fatal("curly braces for names only allowed with -F switch");
                                        }
-                                       yylval.str = new std::string (tok + 1, cur - tok - 2);
+                                       yylval.str = new std::string (tok + 1, tok_len () - 2); // -2 to omit braces
                                        return ID;
                                }
 #line 959 "src/parse/scanner_lex.cc"
@@ -1014,11 +1014,11 @@ yy164:
 #line 391 "../src/parse/scanner_lex.re"
        {
                                        if (!FFlag) {
-                                               yylval.str = new std::string (tok, cur - tok);
+                                               yylval.str = new std::string (tok, tok_len ());
                                                return ID;
                                        } else {
                                                /* Add one char in front and one behind instead of 's or "s */
-                                               SubStr s (tok, cur - tok);
+                                               SubStr s (tok, tok_len ());
                                                if (bCaseInsensitive || bCaseInverted)
                                                {
                                                        yylval.regexp = strToCaseInsensitiveRE (s);
@@ -1040,7 +1040,7 @@ yy167:
        YYCURSOR = YYCTXMARKER;
 #line 386 "../src/parse/scanner_lex.re"
        {
-                                       yylval.str = new std::string (tok, cur - tok);
+                                       yylval.str = new std::string (tok, tok_len ());
                                        return ID;
                                }
 #line 1047 "src/parse/scanner_lex.cc"
@@ -1126,7 +1126,7 @@ yy176:
        {
                                        tok += 5; /* skip "re2c:" */
                                        lexer_state = LEX_CONFIG;
-                                       yylval.str = new std::string (tok, cur - tok);
+                                       yylval.str = new std::string (tok, tok_len ());
                                        return CONFIG;
                                }
 #line 1133 "src/parse/scanner_lex.cc"
@@ -1190,7 +1190,7 @@ yy181:
        YYCURSOR = YYCTXMARKER;
 #line 373 "../src/parse/scanner_lex.re"
        {
-                                       yylval.str = new std::string (tok, cur - tok);
+                                       yylval.str = new std::string (tok, tok_len ());
                                        if (FFlag)
                                        {
                                                lexer_state = LEX_FLEX_NAME;
@@ -1249,7 +1249,7 @@ yy191:
        ++YYCURSOR;
 #line 302 "../src/parse/scanner_lex.re"
        {
-                                       SubStr s (tok, cur - tok);
+                                       SubStr s (tok, tok_len ());
                                        yylval.regexp = ranToRE (s);
                                        return RANGE;
                                }
@@ -1264,7 +1264,7 @@ yy194:
        ++YYCURSOR;
 #line 296 "../src/parse/scanner_lex.re"
        {
-                                       SubStr s (tok, cur - tok);
+                                       SubStr s (tok, tok_len ());
                                        yylval.regexp = invToRE (s);
                                        return RANGE;
                                }
@@ -1378,7 +1378,7 @@ yy218:
        ++YYCURSOR;
 #line 276 "../src/parse/scanner_lex.re"
        {
-                                       SubStr s (tok + 1, cur - tok - 2);
+                                       SubStr s (tok + 1, tok_len () - 2); // -2 to omit quotes
                                        if (bCaseInverted)
                                        {
                                                yylval.regexp = strToRE (s);
@@ -1409,7 +1409,7 @@ yy223:
        ++YYCURSOR;
 #line 263 "../src/parse/scanner_lex.re"
        {
-                                       SubStr s (tok + 1, cur - tok - 2);
+                                       SubStr s (tok + 1, tok_len () - 2); // -2 to omit quotes
                                        if (bCaseInsensitive || bCaseInverted)
                                        {
                                                yylval.regexp = strToCaseInsensitiveRE (s);
@@ -1705,7 +1705,7 @@ yy264:
                                                {
                                                        --cur;
                                                }
-                                               yylval.code = new Code (tok, cur - tok, get_fname (), tline);
+                                               yylval.code = new Code (tok, tok_len (), get_fname (), tline);
                                                return CODE;
                                        }
                                        else if (cur == eof)
@@ -1752,7 +1752,7 @@ yy269:
                                        }
                                        else if (--depth == 0)
                                        {
-                                               yylval.code = new Code (tok, cur - tok, get_fname (), tline);
+                                               yylval.code = new Code (tok, tok_len (), get_fname (), tline);
                                                return CODE;
                                        }
                                        goto code;
@@ -2359,7 +2359,7 @@ value:
 yy358:
 #line 608 "../src/parse/scanner_lex.re"
        {
-                                       yylval.str = new std::string (tok, cur - tok);
+                                       yylval.str = new std::string (tok, tok_len ());
                                        lexer_state = LEX_NORMAL;
                                        return VALUE;
                                }
@@ -2372,7 +2372,7 @@ yy359:
 yy360:
 #line 603 "../src/parse/scanner_lex.re"
        {
-                                       yylval.number = atoi(std::string (tok, cur - tok).c_str());
+                                       yylval.number = atoi(std::string (tok, tok_len ()).c_str());
                                        lexer_state = LEX_NORMAL;
                                        return NUMBER;
                                }
@@ -2623,7 +2623,7 @@ yy387:
 yy388:
 #line 635 "../src/parse/scanner_lex.re"
        {
-                                       cline = atoi(std::string (tok, cur - tok).c_str());
+                                       cline = atoi(std::string (tok, tok_len ()).c_str());
                                        goto sourceline; 
                                }
 #line 2630 "src/parse/scanner_lex.cc"
@@ -2660,7 +2660,7 @@ yy395:
        ++YYCURSOR;
 #line 639 "../src/parse/scanner_lex.re"
        {
-                                       escape (in.file_name, std::string (tok + 1, cur - tok - 2));
+                                       escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes
                                        goto sourceline; 
                                }
 #line 2667 "src/parse/scanner_lex.cc"
index d24ac8f8ee964f3bdf91e27d62b00670a463ca88..bd5b945a0a6cd66ece1a869fde36afcd507d5a86 100644 (file)
@@ -90,9 +90,9 @@ std::ostream & OutputFile::stream ()
        return blocks.back ()->fragments.back ()->stream;
 }
 
-void OutputFile::write (const char * s, std::streamsize n)
+void OutputFile::write (const char * s, size_t n)
 {
-       stream ().write (s, n);
+       stream ().write (s, static_cast<std::streamsize> (n));
 }
 
 void OutputFile::write_hex (uint32_t n)
index 73b0c4ef1cec8cf51686b56dbfd7ef026cb0358c..4a26865c0aba67539d4e3faa8bd06415fc0fc7c0 100644 (file)
@@ -68,7 +68,7 @@ public:
 
        void new_block ();
 
-       void write (const char * s, std::streamsize n);
+       void write (const char * s, size_t n);
        void write_hex (uint32_t n);
        void write_char_hex (uint32_t n);
        void write_range (uint32_t u, uint32_t l);
index 379b8df7a49f541e0b7ecf11f46f98cea1e7f74e..34f160c03e5762194cb0d04f2939ab2985336b11 100644 (file)
@@ -59,6 +59,7 @@ private:
 private:
        void fill (uint32_t);
        void set_sourceline ();
+       size_t tok_len () const;
 
 public:
        Scanner(Input &, OutputFile &);
@@ -110,9 +111,16 @@ public:
        FORBID_COPY (Scanner);
 };
 
+inline size_t Scanner::tok_len () const
+{
+       // lexing and fill procedures must maintain: token pointer <= cursor pointer
+       return static_cast<size_t> (cur - tok);
+}
+
 inline size_t Scanner::get_pos() const
 {
-       return cur - bot;
+       // lexing and fill procedures must maintain: buffer bottom <= cursor pointer
+       return static_cast<size_t> (cur - bot);
 }
 
 inline const std::string & Scanner::get_fname () const
index 6683be0d48b65f79c377140de6c38d6e09bb2e4d..9b8eaa3f3f5005fed98817aa108e14b5089d181c 100644 (file)
@@ -79,7 +79,7 @@ echo:
                                                const size_t lexeme_len = cur[-1] == '{'
                                                        ? sizeof ("%{") - 1
                                                        : sizeof ("/*!re2c") - 1;
-                                               out.write(tok, cur - tok - lexeme_len);
+                                               out.write(tok, tok_len () - lexeme_len);
                                        }
                                        tok = cur;
                                        return Parse;
@@ -105,7 +105,7 @@ echo:
                                        if (!(DFlag || flag_skeleton))
                                        {
                                                const size_t lexeme_len = sizeof ("/*!use:re2c") - 1;
-                                               out.write(tok, cur - tok - lexeme_len);
+                                               out.write(tok, tok_len () - lexeme_len);
                                        }
                                        tok = cur;
                                        return Reuse;
@@ -156,7 +156,7 @@ echo:
                                        }
                                        else if (!(DFlag || flag_skeleton))
                                        {
-                                               out.write(tok, cur - tok);
+                                               out.write(tok, tok_len ());
                                        }
                                        tok = pos = cur;
                                        goto echo;
@@ -174,7 +174,7 @@ echo:
                                        }
                                        else if (!(DFlag || flag_skeleton))
                                        {
-                                               out.write(tok, cur - tok);
+                                               out.write(tok, tok_len ());
                                        }
                                        tok = pos = cur;
                                        goto echo;
@@ -190,7 +190,7 @@ echo:
                                        }
                                        else if (!(DFlag || flag_skeleton))
                                        {
-                                               out.write(tok, cur - tok);
+                                               out.write(tok, tok_len ());
                                        }
                                        tok = pos = cur;
                                        cline++;
@@ -199,7 +199,7 @@ echo:
        zero            {
                                        if (!(ignore_eoc || DFlag || flag_skeleton))
                                        {
-                                               out.write(tok, cur - tok - 1);
+                                               out.write(tok, tok_len () - 1);
                                                // -1 so we don't write out the \0
                                        }
                                        if(cur == eof)
@@ -261,7 +261,7 @@ start:
                                }
 
        dstring         {
-                                       SubStr s (tok + 1, cur - tok - 2);
+                                       SubStr s (tok + 1, tok_len () - 2); // -2 to omit quotes
                                        if (bCaseInsensitive || bCaseInverted)
                                        {
                                                yylval.regexp = strToCaseInsensitiveRE (s);
@@ -274,7 +274,7 @@ start:
                                }
 
        sstring         {
-                                       SubStr s (tok + 1, cur - tok - 2);
+                                       SubStr s (tok + 1, tok_len () - 2); // -2 to omit quotes
                                        if (bCaseInverted)
                                        {
                                                yylval.regexp = strToRE (s);
@@ -294,13 +294,13 @@ start:
                                }
 
        istring         {
-                                       SubStr s (tok, cur - tok);
+                                       SubStr s (tok, tok_len ());
                                        yylval.regexp = invToRE (s);
                                        return RANGE;
                                }
 
        cstring         {
-                                       SubStr s (tok, cur - tok);
+                                       SubStr s (tok, tok_len ());
                                        yylval.regexp = ranToRE (s);
                                        return RANGE;
                                }
@@ -359,19 +359,19 @@ start:
                                        if (!FFlag) {
                                                fatal("curly braces for names only allowed with -F switch");
                                        }
-                                       yylval.str = new std::string (tok + 1, cur - tok - 2);
+                                       yylval.str = new std::string (tok + 1, tok_len () - 2); // -2 to omit braces
                                        return ID;
                                }
 
        config          {
                                        tok += 5; /* skip "re2c:" */
                                        lexer_state = LEX_CONFIG;
-                                       yylval.str = new std::string (tok, cur - tok);
+                                       yylval.str = new std::string (tok, tok_len ());
                                        return CONFIG;
                                }
 
        name / (space+ [^=>,])  {
-                                       yylval.str = new std::string (tok, cur - tok);
+                                       yylval.str = new std::string (tok, tok_len ());
                                        if (FFlag)
                                        {
                                                lexer_state = LEX_FLEX_NAME;
@@ -384,17 +384,17 @@ start:
                                }
 
        name / (space* [=>,])   {
-                                       yylval.str = new std::string (tok, cur - tok);
+                                       yylval.str = new std::string (tok, tok_len ());
                                        return ID;
                                }
 
        name / [^]      {
                                        if (!FFlag) {
-                                               yylval.str = new std::string (tok, cur - tok);
+                                               yylval.str = new std::string (tok, tok_len ());
                                                return ID;
                                        } else {
                                                /* Add one char in front and one behind instead of 's or "s */
-                                               SubStr s (tok, cur - tok);
+                                               SubStr s (tok, tok_len ());
                                                if (bCaseInsensitive || bCaseInverted)
                                                {
                                                        yylval.regexp = strToCaseInsensitiveRE (s);
@@ -458,7 +458,7 @@ code:
                                        }
                                        else if (--depth == 0)
                                        {
-                                               yylval.code = new Code (tok, cur - tok, get_fname (), tline);
+                                               yylval.code = new Code (tok, tok_len (), get_fname (), tline);
                                                return CODE;
                                        }
                                        goto code;
@@ -499,7 +499,7 @@ code:
                                                {
                                                        --cur;
                                                }
-                                               yylval.code = new Code (tok, cur - tok, get_fname (), tline);
+                                               yylval.code = new Code (tok, tok_len (), get_fname (), tline);
                                                return CODE;
                                        }
                                        else if (cur == eof)
@@ -601,12 +601,12 @@ config:
 value:
 /*!re2c
        number          {
-                                       yylval.number = atoi(std::string (tok, cur - tok).c_str());
+                                       yylval.number = atoi(std::string (tok, tok_len ()).c_str());
                                        lexer_state = LEX_NORMAL;
                                        return NUMBER;
                                }
        value           {
-                                       yylval.str = new std::string (tok, cur - tok);
+                                       yylval.str = new std::string (tok, tok_len ());
                                        lexer_state = LEX_NORMAL;
                                        return VALUE;
                                }
@@ -633,11 +633,11 @@ sourceline:
        tok = cur;
 /*!re2c        
        lineno          {
-                                       cline = atoi(std::string (tok, cur - tok).c_str());
+                                       cline = atoi(std::string (tok, tok_len ()).c_str());
                                        goto sourceline; 
                                }
        dstring         {
-                                       escape (in.file_name, std::string (tok + 1, cur - tok - 2));
+                                       escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes
                                        goto sourceline; 
                                }
        "\n"                    {