From de1414b6c0c0de8ff3d67d4ed8259df5bcb51e6f Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 11 Feb 2019 21:43:52 +0000 Subject: [PATCH] Added test for EOF rule. --- re2c/test/eof/utf8_any.i.c | 2170 +++++++++++++++++++++++++++++++++++ re2c/test/eof/utf8_any.i.re | 179 +++ 2 files changed, 2349 insertions(+) create mode 100644 re2c/test/eof/utf8_any.i.c create mode 100644 re2c/test/eof/utf8_any.i.re diff --git a/re2c/test/eof/utf8_any.i.c b/re2c/test/eof/utf8_any.i.c new file mode 100644 index 00000000..e4702f54 --- /dev/null +++ b/re2c/test/eof/utf8_any.i.c @@ -0,0 +1,2170 @@ +/* Generated by re2c */ +/* + +assuming filename a.re: + +$ re2c -i a.re -oa.cc -W && g++ a.cc -oa -Wall -Wextra && ./a +z: 7a +ы: d1 8b +(nil): 00 +й: d0 b9 +1: 31 +lex error: c0 61 ff 61 +skipping one byte ... +a: 61 +lex error: ff 61 +skipping one byte ... +a: 61 +done! + +z: 7a +ы: d1 8b +(nil): 00 +й: d0 b9 +1: 31 +lex error: c0 61 ff 61 ff ff ff ff +skipping one byte ... +a: 61 +lex error: ff 61 ff ff ff ff +skipping one byte ... +a: 61 +done! + +z: 7a +ы: d1 8b +(nil): 00 +й: d0 b9 +1: 31 +lex error: c0 61 ff 61 +skipping one byte ... +a: 61 +lex error: ff 61 +skipping one byte ... +a: 61 +done! + +*/ + +#include +#include +#include +#include + + + +enum Ecode {EOK, EEOF, ENOLEX}; + +struct Result { + Ecode err; + uint8_t *pos; +}; + +static void print_bytes(const uint8_t *s, const uint8_t *e) +{ + for (; s < e; ++s) printf(" %02x", *s); + printf("\n"); +} + +typedef Result (lex1_t)(uint8_t*, uint8_t*, uint8_t*); + +static Result lex1_sentinel(uint8_t *cur, uint8_t *end, uint8_t* /* unused */) +{ + uint8_t *YYMARKER, *tok = cur; + +{ + uint8_t yych; + yych = *cur; + switch (yych) { + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case '\t': + case '\v': + case '\f': + case '\r': + case 0x0E: + case 0x0F: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1A: + case 0x1B: + case 0x1C: + case 0x1D: + case 0x1E: + case 0x1F: + case ' ': + case '!': + case '"': + case '#': + case '$': + case '%': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case '-': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case ':': + case ';': + case '<': + case '=': + case '>': + case '?': + case '@': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '[': + case '\\': + case ']': + case '^': + case '_': + case '`': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case '{': + case '|': + case '}': + case '~': + case 0x7F: goto yy2; + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: goto yy6; + case 0xE0: goto yy7; + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: goto yy8; + case 0xF0: goto yy9; + case 0xF1: + case 0xF2: + case 0xF3: goto yy10; + case 0xF4: goto yy11; + default: goto yy4; + } +yy2: + ++cur; + { return {EOK, cur}; } +yy4: + ++cur; +yy5: + { return {tok == end && *tok == 0xc0 ? EEOF : ENOLEX, tok}; } +yy6: + yych = *++cur; + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy2; + default: goto yy5; + } +yy7: + yych = *(YYMARKER = ++cur); + switch (yych) { + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy12; + default: goto yy5; + } +yy8: + yych = *(YYMARKER = ++cur); + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy12; + default: goto yy5; + } +yy9: + yych = *(YYMARKER = ++cur); + switch (yych) { + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy14; + default: goto yy5; + } +yy10: + yych = *(YYMARKER = ++cur); + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy14; + default: goto yy5; + } +yy11: + yych = *(YYMARKER = ++cur); + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: goto yy14; + default: goto yy5; + } +yy12: + yych = *++cur; + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy2; + default: goto yy13; + } +yy13: + cur = YYMARKER; + goto yy5; +yy14: + yych = *++cur; + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy12; + default: goto yy13; + } +} + +} + +#define YYMAXFILL 4 + + +static Result lex1_default(uint8_t *cur, uint8_t *end, uint8_t *last) +{ + uint8_t *YYMARKER, *tok = cur; + +{ + uint8_t yych; + if ((end - cur) < 4) assert(false); + yych = *cur; + switch (yych) { + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case '\t': + case '\v': + case '\f': + case '\r': + case 0x0E: + case 0x0F: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1A: + case 0x1B: + case 0x1C: + case 0x1D: + case 0x1E: + case 0x1F: + case ' ': + case '!': + case '"': + case '#': + case '$': + case '%': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case '-': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case ':': + case ';': + case '<': + case '=': + case '>': + case '?': + case '@': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '[': + case '\\': + case ']': + case '^': + case '_': + case '`': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case '{': + case '|': + case '}': + case '~': + case 0x7F: goto yy17; + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: goto yy21; + case 0xE0: goto yy22; + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: goto yy23; + case 0xF0: goto yy24; + case 0xF1: + case 0xF2: + case 0xF3: goto yy25; + case 0xF4: goto yy26; + default: goto yy19; + } +yy17: + ++cur; + { return {EOK, cur}; } +yy19: + ++cur; +yy20: + { return {tok == last ? EEOF : ENOLEX, tok}; } +yy21: + yych = *++cur; + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy17; + default: goto yy20; + } +yy22: + yych = *(YYMARKER = ++cur); + switch (yych) { + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy27; + default: goto yy20; + } +yy23: + yych = *(YYMARKER = ++cur); + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy27; + default: goto yy20; + } +yy24: + yych = *(YYMARKER = ++cur); + switch (yych) { + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy29; + default: goto yy20; + } +yy25: + yych = *(YYMARKER = ++cur); + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy29; + default: goto yy20; + } +yy26: + yych = *(YYMARKER = ++cur); + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: goto yy29; + default: goto yy20; + } +yy27: + yych = *++cur; + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy17; + default: goto yy28; + } +yy28: + cur = YYMARKER; + goto yy20; +yy29: + yych = *++cur; + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy27; + default: goto yy28; + } +} + +} + +static int fill() { return 1; } // always error + +static Result lex1_simple(uint8_t *cur, uint8_t* end, uint8_t* /* unused */) +{ + uint8_t *YYMARKER, *tok = cur; + +{ + uint8_t yych; +yy31: +yy31_: + yych = *cur; + switch (yych) { + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case '\t': + case '\v': + case '\f': + case '\r': + case 0x0E: + case 0x0F: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1A: + case 0x1B: + case 0x1C: + case 0x1D: + case 0x1E: + case 0x1F: + case ' ': + case '!': + case '"': + case '#': + case '$': + case '%': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case '-': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case ':': + case ';': + case '<': + case '=': + case '>': + case '?': + case '@': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '[': + case '\\': + case ']': + case '^': + case '_': + case '`': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case '{': + case '|': + case '}': + case '~': + case 0x7F: + if (end <= cur) { + if (fill () == 0) goto yy31_; + else goto yyeof; + } + goto yy32; + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: goto yy36; + case 0xE0: goto yy37; + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: goto yy38; + case 0xF0: goto yy39; + case 0xF1: + case 0xF2: + case 0xF3: goto yy40; + case 0xF4: goto yy41; + default: goto yy34; + } +yy32: + ++cur; + { return {EOK, cur}; } +yy34: + ++cur; +yy35: + { return {ENOLEX, tok}; } +yy36: + ++cur; +yy36_: + yych = *cur; + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy32; + default: + if (end <= cur) { + if (fill () == 0) goto yy36_; + } + goto yy35; + } +yy37: + YYMARKER = ++cur; +yy37_: + yych = *cur; + switch (yych) { + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy42; + default: + if (end <= cur) { + if (fill () == 0) goto yy37_; + } + goto yy35; + } +yy38: + YYMARKER = ++cur; +yy38_: + yych = *cur; + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy42; + default: + if (end <= cur) { + if (fill () == 0) goto yy38_; + } + goto yy35; + } +yy39: + YYMARKER = ++cur; +yy39_: + yych = *cur; + switch (yych) { + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy44; + default: + if (end <= cur) { + if (fill () == 0) goto yy39_; + } + goto yy35; + } +yy40: + YYMARKER = ++cur; +yy40_: + yych = *cur; + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy44; + default: + if (end <= cur) { + if (fill () == 0) goto yy40_; + } + goto yy35; + } +yy41: + YYMARKER = ++cur; +yy41_: + yych = *cur; + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: goto yy44; + default: + if (end <= cur) { + if (fill () == 0) goto yy41_; + } + goto yy35; + } +yy42: + ++cur; +yy42_: + yych = *cur; + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy32; + default: + if (end <= cur) { + if (fill () == 0) goto yy42_; + } + goto yy43; + } +yy43: + cur = YYMARKER; + goto yy35; +yy44: + ++cur; +yy44_: + yych = *cur; + switch (yych) { + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: goto yy42; + default: + if (end <= cur) { + if (fill () == 0) goto yy44_; + } + goto yy43; + } +yyeof: + { return {EEOF, cur}; } +} + +} + +static void lex(uint8_t *cur, uint8_t *end, uint8_t *lim, lex1_t *f) +{ + Result res; + for (;;) { + res = f(cur, end, lim); + if (res.err == EEOF) { + printf("done!\n\n"); + break; + } + else if (res.err == ENOLEX) { + printf("lex error:"); + print_bytes(res.pos, end); + printf("skipping one byte ...\n"); + cur = res.pos + 1; + } + else { + assert (res.err == EOK); + if (*cur) printf("%.*s:", (int)(res.pos - cur), cur); + else printf("(nil):"); + print_bytes(cur, res.pos); + cur = res.pos; + } + } +} + +static void lex_sentinel(char *str, size_t len) +{ + str[len] = 0xc0; + uint8_t *cur = (uint8_t*)str, *end = cur + len; + lex(cur, end, NULL, lex1_sentinel); + str[len] = 0; +} + +static void lex_default(char *str, size_t len) +{ + uint8_t *buf = new uint8_t[len + YYMAXFILL]; + uint8_t *cur = buf; + uint8_t *end = buf + len; + uint8_t *lim = end + YYMAXFILL; + memcpy(buf, str, len); + memset(end, 0xff, YYMAXFILL); + lex(cur, lim, end, lex1_default); + delete[] buf; +} + +static void lex_simple(char *str, size_t len) +{ + uint8_t *cur = (uint8_t*)str, *end = cur + len; + lex(cur, end, NULL, lex1_simple); +} + +int main() +{ + char s[] = "zы\x00й1aaaa"; + const size_t l = sizeof(s) - 1; + s[l - 4] = 0xc0; + s[l - 2] = 0xff; + + lex_sentinel(s, l); + lex_default(s, l); + lex_simple(s, l); + + return 0; +} diff --git a/re2c/test/eof/utf8_any.i.re b/re2c/test/eof/utf8_any.i.re new file mode 100644 index 00000000..30c45fb7 --- /dev/null +++ b/re2c/test/eof/utf8_any.i.re @@ -0,0 +1,179 @@ +/* + +assuming filename a.re: + +$ re2c -i a.re -oa.cc -W && g++ a.cc -oa -Wall -Wextra && ./a +z: 7a +ы: d1 8b +(nil): 00 +й: d0 b9 +1: 31 +lex error: c0 61 ff 61 +skipping one byte ... +a: 61 +lex error: ff 61 +skipping one byte ... +a: 61 +done! + +z: 7a +ы: d1 8b +(nil): 00 +й: d0 b9 +1: 31 +lex error: c0 61 ff 61 ff ff ff ff +skipping one byte ... +a: 61 +lex error: ff 61 ff ff ff ff +skipping one byte ... +a: 61 +done! + +z: 7a +ы: d1 8b +(nil): 00 +й: d0 b9 +1: 31 +lex error: c0 61 ff 61 +skipping one byte ... +a: 61 +lex error: ff 61 +skipping one byte ... +a: 61 +done! + +*/ + +#include +#include +#include +#include + +/*!re2c + re2c:define:YYCTYPE = uint8_t; + re2c:define:YYCURSOR = cur; + re2c:define:YYLIMIT = end; + re2c:flags:utf-8 = 1; +*/ + +enum Ecode {EOK, EEOF, ENOLEX}; + +struct Result { + Ecode err; + uint8_t *pos; +}; + +static void print_bytes(const uint8_t *s, const uint8_t *e) +{ + for (; s < e; ++s) printf(" %02x", *s); + printf("\n"); +} + +typedef Result (lex1_t)(uint8_t*, uint8_t*, uint8_t*); + +static Result lex1_sentinel(uint8_t *cur, uint8_t *end, uint8_t* /* unused */) +{ + uint8_t *YYMARKER, *tok = cur; + /*!re2c + re2c:yyfill:enable = 0; + + . { return {EOK, cur}; } + * { return {tok == end && *tok == 0xc0 ? EEOF : ENOLEX, tok}; } + */ +} + +/*!max:re2c*/ + +static Result lex1_default(uint8_t *cur, uint8_t *end, uint8_t *last) +{ + uint8_t *YYMARKER, *tok = cur; + /*!re2c + re2c:yyfill:enable = 1; + re2c:define:YYFILL:naked = 1; + re2c:define:YYFILL = 'assert(false);'; + + . { return {EOK, cur}; } + * { return {tok == last ? EEOF : ENOLEX, tok}; } + */ +} + +static int fill() { return 1; } // always error + +static Result lex1_simple(uint8_t *cur, uint8_t* end, uint8_t* /* unused */) +{ + uint8_t *YYMARKER, *tok = cur; + /*!re2c + re2c:yyfill:enable = 1; + re2c:define:YYFILL = fill; + re2c:eof = 0; + + . { return {EOK, cur}; } + $ { return {EEOF, cur}; } + * { return {ENOLEX, tok}; } + */ +} + +static void lex(uint8_t *cur, uint8_t *end, uint8_t *lim, lex1_t *f) +{ + Result res; + for (;;) { + res = f(cur, end, lim); + if (res.err == EEOF) { + printf("done!\n\n"); + break; + } + else if (res.err == ENOLEX) { + printf("lex error:"); + print_bytes(res.pos, end); + printf("skipping one byte ...\n"); + cur = res.pos + 1; + } + else { + assert (res.err == EOK); + if (*cur) printf("%.*s:", (int)(res.pos - cur), cur); + else printf("(nil):"); + print_bytes(cur, res.pos); + cur = res.pos; + } + } +} + +static void lex_sentinel(char *str, size_t len) +{ + str[len] = 0xc0; + uint8_t *cur = (uint8_t*)str, *end = cur + len; + lex(cur, end, NULL, lex1_sentinel); + str[len] = 0; +} + +static void lex_default(char *str, size_t len) +{ + uint8_t *buf = new uint8_t[len + YYMAXFILL]; + uint8_t *cur = buf; + uint8_t *end = buf + len; + uint8_t *lim = end + YYMAXFILL; + memcpy(buf, str, len); + memset(end, 0xff, YYMAXFILL); + lex(cur, lim, end, lex1_default); + delete[] buf; +} + +static void lex_simple(char *str, size_t len) +{ + uint8_t *cur = (uint8_t*)str, *end = cur + len; + lex(cur, end, NULL, lex1_simple); +} + +int main() +{ + char s[] = "zы\x00й1aaaa"; + const size_t l = sizeof(s) - 1; + s[l - 4] = 0xc0; + s[l - 2] = 0xff; + + lex_sentinel(s, l); + lex_default(s, l); + lex_simple(s, l); + + return 0; +} -- 2.40.0