From: Ulya Trofimovich Date: Tue, 10 Nov 2015 14:41:04 +0000 (+0000) Subject: Fixed C++98 lexer example (program code). X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f214a2b4a08a4a057dd415eb2a1356bd365a6aeb;p=re2c Fixed C++98 lexer example (program code). Fix: check length when parsing integer sequences. If a string contains e.g. "\uFFFFf", it must be recognized as "\xffff\x66" and not as "\xfffff". --- diff --git a/src/examples/07_c++98.re b/src/examples/07_c++98.re index 36a685ba..2be8488c 100644 --- a/src/examples/07_c++98.re +++ b/src/examples/07_c++98.re @@ -61,54 +61,45 @@ static bool adddgt(unsigned long &u, unsigned long d) return true; } -static bool lex_int_sfx(const unsigned char *s, unsigned long u) +static bool lex_oct(const unsigned char *s, const unsigned char *e, unsigned long &u) { + for (u = 0, ++s; s < e;) { /*!re2c re2c:yyfill:enable = 0; re2c:define:YYCURSOR = s; - * { return u < INT_MAX; } - 'u' { return u < UINT_MAX; } - 'l' { return u < LONG_MAX; } - 'ul' | 'lu' { return true; } - */ -} - -static bool lex_oct(const unsigned char *s, bool sfx, unsigned long &u) -{ - for (u = 0, ++s;;) { - /*!re2c - re2c:yyfill:enable = 0; - re2c:define:YYCURSOR = s; - [0-7] { if (adddgt<8>(u, s[-1] - 0x30u)) continue; return false; } - "" { return !sfx || lex_int_sfx(s, u); } + [0-7] { if (!adddgt<8>(u, s[-1] - 0x30u)) return false; continue; } + * { return false; } */ } + return true; } -static bool lex_dec(const unsigned char *s, bool sfx, unsigned long &u) +static bool lex_dec(const unsigned char *s, const unsigned char *e, unsigned long &u) { - for (u = 0;;) { + for (u = 0; s < e;) { /*!re2c re2c:yyfill:enable = 0; re2c:define:YYCURSOR = s; - [0-9] { if (adddgt<10>(u, s[-1] - 0x30u)) continue; return false; } - "" { return !sfx || lex_int_sfx(s, u); } + [0-9] { if (!adddgt<10>(u, s[-1] - 0x30u)) return false; continue; } + * { return false; } */ } + return true; } -static bool lex_hex(const unsigned char *s, bool sfx, unsigned long &u) +static bool lex_hex(const unsigned char *s, const unsigned char *e, unsigned long &u) { - for (u = 0, s += 2;;) { + for (u = 0, s += 2; s < e;) { /*!re2c re2c:yyfill:enable = 0; re2c:define:YYCURSOR = s; - [0-9] { if (adddgt<16>(u, s[-1] - 0x30u)) continue; return false; } - [a-f] { if (adddgt<16>(u, s[-1] - 0x61u + 10)) continue; return false; } - [A-F] { if (adddgt<16>(u, s[-1] - 0x41u + 10)) continue; return false; } - "" { return !sfx || lex_int_sfx(s, u); } + [0-9] { if (!adddgt<16>(u, s[-1] - 0x30u)) return false; continue; } + [a-f] { if (!adddgt<16>(u, s[-1] - 0x61u + 10)) return false; continue; } + [A-F] { if (!adddgt<16>(u, s[-1] - 0x41u + 10)) return false; continue; } + * { return false; } */ } + return true; } static bool lex_str(input_t &in, unsigned char q) @@ -136,10 +127,10 @@ static bool lex_str(input_t &in, unsigned char q) "\\'" { u = '\''; continue; } "\\\"" { u = '"'; continue; } "\\?" { u = '?'; continue; } - "\\" [0-7]{1,3} { lex_oct(in.tok, false, u); continue; } - "\\u" [0-9a-fA-F]{4} { lex_hex(in.tok, false, u); continue; } - "\\U" [0-9a-fA-F]{8} { lex_hex(in.tok, false, u); continue; } - "\\x" [0-9a-fA-F]+ { if (!lex_hex(in.tok, false, u)) return false; continue; } + "\\" [0-7]{1,3} { lex_oct(in.tok, in.cur, u); continue; } + "\\u" [0-9a-fA-F]{4} { lex_hex(in.tok, in.cur, u); continue; } + "\\U" [0-9a-fA-F]{8} { lex_hex(in.tok, in.cur, u); continue; } + "\\x" [0-9a-fA-F]+ { if (!lex_hex(in.tok, in.cur, u)) return false; continue; } */ } printf("%c", q); @@ -220,13 +211,12 @@ static bool lex(input_t &in) "L"? "''" { return false; } // integer literals - int_sfx = 'u' | 'l' | 'ul' | 'lu'; - oct = "0" [0-7]* int_sfx?; - dec = [1-9][0-9]* int_sfx?; - hex = '0x' [0-9a-fA-F]+ int_sfx?; - oct { if (!lex_oct(in.tok, true, u)) return false; printf("%lu", u); continue; } - dec { if (!lex_dec(in.tok, true, u)) return false; printf("%lu", u); continue; } - hex { if (!lex_hex(in.tok, true, u)) return false; printf("%lu", u); continue; } + oct = "0" [0-7]*; + dec = [1-9][0-9]*; + hex = '0x' [0-9a-fA-F]+; + oct { if (!lex_oct(in.tok, in.cur, u)) return false; goto sfx; } + dec { if (!lex_dec(in.tok, in.cur, u)) return false; goto sfx; } + hex { if (!lex_hex(in.tok, in.cur, u)) return false; goto sfx; } // floating literals frc = [0-9]* "." [0-9]+ | [0-9]+ "."; @@ -356,6 +346,13 @@ static bool lex(input_t &in) id = [a-zA-Z_][a-zA-Z_0-9]*; id { printf("%.*s", in.cur - in.tok, in.tok); continue; } */ +sfx: + /*!re2c + "" { if (u > INT_MAX) return false; printf("%d", static_cast(u)); continue; } + 'u' { if (u > UINT_MAX) return false; printf("%u", static_cast(u)); continue; } + 'l' { if (u > LONG_MAX) return false; printf("%ld", static_cast(u)); continue; } + 'ul' | 'lu' { printf("%lu", u); continue; } + */ } } diff --git a/src/examples/example_07.rst b/src/examples/example_07.rst index 8b1686ad..14fbf509 100644 --- a/src/examples/example_07.rst +++ b/src/examples/example_07.rst @@ -52,22 +52,23 @@ Generate, compile and run: eof = true; memset(lim, 0, YYMAXFILL); lim += YYMAXFILL; } RETURN true; } }; TE MPLATE STATIC BOOL adddgt(UNSIGNED LONG &u, UNSIGNED LONG d) { IF (u > (ULONG_MAX - d) / base) { RETURN false; } u = u * base + d; RETURN true; } STAT - IC BOOL lex_int_sfx(CONST UNSIGNED CHAR *s, UNSIGNED LONG u) { } STATIC BOOL lex - _oct(CONST UNSIGNED CHAR *s, BOOL sfx, UNSIGNED LONG &u) { FOR (u = 0, ++s;;) { - } } STATIC BOOL lex_dec(CONST UNSIGNED CHAR *s, BOOL sfx, UNSIGNED LONG &u) { FO - R (u = 0;;) { } } STATIC BOOL lex_hex(CONST UNSIGNED CHAR *s, BOOL sfx, UNSIGNED - LONG &u) { FOR (u = 0, s += 2;;) { } } STATIC BOOL lex_str(input_t &in, UNSIGNE - D CHAR q) { printf("\x25\x63", q); FOR (UNSIGNED LONG u = q;; printf("\x5c\x78\x - 25\x6c\x78", u)) { in.tok = in.cur; } printf("\x25\x63", q); RETURN true; } STAT - IC BOOL lex_flt(CONST UNSIGNED CHAR *s) { DOUBLE d = 0; DOUBLE x = 1; INT e = 0; - mant_int: mant_frac: exp_sign: exp: sfx: end: printf("\x25\x67", d); RETURN tru - e; } STATIC BOOL lex(input_t &in) { UNSIGNED LONG u; FOR (;;) { in.tok = in.cur; - } } INT main(INT argc, CHAR **argv) { IF (argc != 2) { printf ("\x75\x73\x61\x6 - 7\x65\x3a\x20\x2e\x2f\x65\x78\x61\x6d\x70\x6c\x65\x20\x3c\x66\x69\x6c\x65\x6e\x6 - 1\x6d\x65\x3e\xa"); RETURN 1; } FILE *file = fopen(argv[1], "\x72\x62"); IF (!fi - le) { printf("\x65\x72\x72\x6f\x72\x3a\x20\x63\x61\x6e\x6e\x6f\x74\x20\x6f\x70\x - 65\x6e\x20\x66\x69\x6c\x65\x3a\x20\x25\x73\xa", argv[1]); RETURN 1; } input_t in - (file); IF (!lex(in)) { printf("\x2e\x2e\x2e\x20\x65\x72\x72\x6f\x72\xa"); } ELS - E { printf("\xa"); } fclose(file); RETURN 0; } + IC BOOL lex_oct(CONST UNSIGNED CHAR *s, CONST UNSIGNED CHAR *e, UNSIGNED LONG &u + ) { FOR (u = 0, ++s; s < e;) { } RETURN true; } STATIC BOOL lex_dec(CONST UNSIGN + ED CHAR *s, CONST UNSIGNED CHAR *e, UNSIGNED LONG &u) { FOR (u = 0; s < e;) { } + RETURN true; } STATIC BOOL lex_hex(CONST UNSIGNED CHAR *s, CONST UNSIGNED CHAR * + e, UNSIGNED LONG &u) { FOR (u = 0, s += 2; s < e;) { } RETURN true; } STATIC BOO + L lex_str(input_t &in, UNSIGNED CHAR q) { printf("\x25\x63", q); FOR (UNSIGNED L + ONG u = q;; printf("\x5c\x78\x25\x6c\x78", u)) { in.tok = in.cur; } printf("\x25 + \x63", q); RETURN true; } STATIC BOOL lex_flt(CONST UNSIGNED CHAR *s) { DOUBLE d + = 0; DOUBLE x = 1; INT e = 0; mant_int: mant_frac: exp_sign: exp: sfx: end: pri + ntf("\x25\x67", d); RETURN true; } STATIC BOOL lex(input_t &in) { UNSIGNED LONG + u; FOR (;;) { in.tok = in.cur; sfx: } } INT main(INT argc, CHAR **argv) { IF (ar + gc != 2) { printf ("\x75\x73\x61\x67\x65\x3a\x20\x2e\x2f\x65\x78\x61\x6d\x70\x6c + \x65\x20\x3c\x66\x69\x6c\x65\x6e\x61\x6d\x65\x3e\xa"); RETURN 1; } FILE *file = + fopen(argv[1], "\x72\x62"); IF (!file) { printf("\x65\x72\x72\x6f\x72\x3a\x20\x6 + 3\x61\x6e\x6e\x6f\x74\x20\x6f\x70\x65\x6e\x20\x66\x69\x6c\x65\x3a\x20\x25\x73\xa + ", argv[1]); RETURN 1; } input_t in(file); IF (!lex(in)) { printf("\x2e\x2e\x2e\ + x20\x65\x72\x72\x6f\x72\xa"); } ELSE { printf("\xa"); } fclose(file); RETURN 0; + }