From 8ac9292416c130bb42b2796f6e2575c56adec5db Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 22 Aug 2017 18:06:55 +0100 Subject: [PATCH] Updated examples and added them to 'run_tests.sh' script. --- re2c/examples/001_upn_calculator/README | 83 ----- re2c/examples/001_upn_calculator/calc_001.re | 84 ----- re2c/examples/001_upn_calculator/calc_002.re | 69 ---- re2c/examples/001_upn_calculator/calc_003.re | 61 ---- re2c/examples/001_upn_calculator/calc_004.re | 78 ---- re2c/examples/001_upn_calculator/calc_005.re | 144 -------- .../examples/001_upn_calculator/calc_006.s.re | 162 --------- .../examples/001_upn_calculator/calc_007.b.re | 135 ------- .../examples/001_upn_calculator/calc_008.b.re | 158 -------- .../001_upn_calculator/windows/HiResTimer.h | 54 --- .../001_upn_calculator/windows/main.b.re | 291 --------------- re2c/examples/002_strip_comments/README | 21 -- .../002_strip_comments/strip_001.s.re | 147 -------- .../002_strip_comments/strip_002.s.re | 162 --------- .../002_strip_comments/strip_003.b.re | 179 --------- .../01_recognizing_integers.i--tags.c | 0 .../01_recognizing_integers.i--tags.re | 0 .../examples/02_recognizing_strings.i--tags.c | 0 .../02_recognizing_strings.i--tags.re | 0 .../03_arbitrary_large_input.i--tags.c | 0 .../03_arbitrary_large_input.i--tags.re | 0 .../04_parsing_integers_blocks.i--tags.c | 0 .../04_parsing_integers_blocks.i--tags.re | 0 .../05_parsing_integers_conditions.ci--tags.c | 0 ...05_parsing_integers_conditions.ci--tags.re | 0 .../examples/06_braille.cr8i--tags.c | 0 .../examples/06_braille.cr8i--tags.re | 0 re2c/{test => }/examples/07_cxx98.i--tags.c | 0 re2c/{test => }/examples/07_cxx98.i--tags.re | 0 re2c/{test => }/examples/08_ipv4.i--tags.c | 0 re2c/{test => }/examples/08_ipv4.i--tags.re | 0 .../examples/09_etc_passwd.i--tags.c | 0 .../examples/09_etc_passwd.i--tags.re | 0 .../examples/10_uri_rfc3986.i--tags.c | 0 .../examples/10_uri_rfc3986.i--tags.re | 0 .../examples/11_http_rfc7230.i--tags.c | 0 .../examples/11_http_rfc7230.i--tags.re | 0 re2c/{test => }/examples/12_float.i--tags.c | 0 re2c/{test => }/examples/12_float.i--tags.re | 0 re2c/{test => }/examples/13_records.i--tags.c | 0 .../{test => }/examples/13_records.i--tags.re | 0 re2c/{test => }/examples/14_options.i--tags.c | 0 .../{test => }/examples/14_options.i--tags.re | 0 re2c/examples/15_binsyms.i--input(custom).c | 209 +++++++++++ re2c/examples/15_binsyms.i--input(custom).re | 49 +++ .../16_fake_sentinel.i--input(custom).c | 249 +++++++++++++ .../16_fake_sentinel.i--input(custom).re | 31 ++ re2c/examples/17_ifstream.i--input(custom).c | 49 +++ re2c/examples/17_ifstream.i--input(custom).re | 30 ++ re2c/examples/18_push_model.if.c | 240 +++++++++++++ re2c/examples/18_push_model.if.re | 95 +++++ re2c/examples/input_custom/fixed.re | 35 -- re2c/examples/input_custom/simple/README | 20 -- re2c/examples/input_custom/simple/default.re | 24 -- re2c/examples/input_custom/simple/fgetc.re | 43 --- .../input_custom/simple/istringstream.re | 27 -- re2c/examples/langs/c.re | 272 -------------- re2c/examples/langs/modula.re | 203 ----------- re2c/examples/langs/rexx.re | 319 ---------------- re2c/examples/push_model/push.re | 340 ------------------ re2c/run_tests.sh.in | 2 +- 61 files changed, 953 insertions(+), 3112 deletions(-) delete mode 100644 re2c/examples/001_upn_calculator/README delete mode 100644 re2c/examples/001_upn_calculator/calc_001.re delete mode 100644 re2c/examples/001_upn_calculator/calc_002.re delete mode 100644 re2c/examples/001_upn_calculator/calc_003.re delete mode 100644 re2c/examples/001_upn_calculator/calc_004.re delete mode 100644 re2c/examples/001_upn_calculator/calc_005.re delete mode 100644 re2c/examples/001_upn_calculator/calc_006.s.re delete mode 100644 re2c/examples/001_upn_calculator/calc_007.b.re delete mode 100644 re2c/examples/001_upn_calculator/calc_008.b.re delete mode 100644 re2c/examples/001_upn_calculator/windows/HiResTimer.h delete mode 100644 re2c/examples/001_upn_calculator/windows/main.b.re delete mode 100644 re2c/examples/002_strip_comments/README delete mode 100644 re2c/examples/002_strip_comments/strip_001.s.re delete mode 100644 re2c/examples/002_strip_comments/strip_002.s.re delete mode 100644 re2c/examples/002_strip_comments/strip_003.b.re rename re2c/{test => }/examples/01_recognizing_integers.i--tags.c (100%) rename re2c/{test => }/examples/01_recognizing_integers.i--tags.re (100%) rename re2c/{test => }/examples/02_recognizing_strings.i--tags.c (100%) rename re2c/{test => }/examples/02_recognizing_strings.i--tags.re (100%) rename re2c/{test => }/examples/03_arbitrary_large_input.i--tags.c (100%) rename re2c/{test => }/examples/03_arbitrary_large_input.i--tags.re (100%) rename re2c/{test => }/examples/04_parsing_integers_blocks.i--tags.c (100%) rename re2c/{test => }/examples/04_parsing_integers_blocks.i--tags.re (100%) rename re2c/{test => }/examples/05_parsing_integers_conditions.ci--tags.c (100%) rename re2c/{test => }/examples/05_parsing_integers_conditions.ci--tags.re (100%) rename re2c/{test => }/examples/06_braille.cr8i--tags.c (100%) rename re2c/{test => }/examples/06_braille.cr8i--tags.re (100%) rename re2c/{test => }/examples/07_cxx98.i--tags.c (100%) rename re2c/{test => }/examples/07_cxx98.i--tags.re (100%) rename re2c/{test => }/examples/08_ipv4.i--tags.c (100%) rename re2c/{test => }/examples/08_ipv4.i--tags.re (100%) rename re2c/{test => }/examples/09_etc_passwd.i--tags.c (100%) rename re2c/{test => }/examples/09_etc_passwd.i--tags.re (100%) rename re2c/{test => }/examples/10_uri_rfc3986.i--tags.c (100%) rename re2c/{test => }/examples/10_uri_rfc3986.i--tags.re (100%) rename re2c/{test => }/examples/11_http_rfc7230.i--tags.c (100%) rename re2c/{test => }/examples/11_http_rfc7230.i--tags.re (100%) rename re2c/{test => }/examples/12_float.i--tags.c (100%) rename re2c/{test => }/examples/12_float.i--tags.re (100%) rename re2c/{test => }/examples/13_records.i--tags.c (100%) rename re2c/{test => }/examples/13_records.i--tags.re (100%) rename re2c/{test => }/examples/14_options.i--tags.c (100%) rename re2c/{test => }/examples/14_options.i--tags.re (100%) create mode 100644 re2c/examples/15_binsyms.i--input(custom).c create mode 100644 re2c/examples/15_binsyms.i--input(custom).re create mode 100644 re2c/examples/16_fake_sentinel.i--input(custom).c create mode 100644 re2c/examples/16_fake_sentinel.i--input(custom).re create mode 100644 re2c/examples/17_ifstream.i--input(custom).c create mode 100644 re2c/examples/17_ifstream.i--input(custom).re create mode 100644 re2c/examples/18_push_model.if.c create mode 100644 re2c/examples/18_push_model.if.re delete mode 100644 re2c/examples/input_custom/fixed.re delete mode 100644 re2c/examples/input_custom/simple/README delete mode 100644 re2c/examples/input_custom/simple/default.re delete mode 100644 re2c/examples/input_custom/simple/fgetc.re delete mode 100644 re2c/examples/input_custom/simple/istringstream.re delete mode 100644 re2c/examples/langs/c.re delete mode 100644 re2c/examples/langs/modula.re delete mode 100644 re2c/examples/langs/rexx.re delete mode 100644 re2c/examples/push_model/push.re diff --git a/re2c/examples/001_upn_calculator/README b/re2c/examples/001_upn_calculator/README deleted file mode 100644 index 81377d75..00000000 --- a/re2c/examples/001_upn_calculator/README +++ /dev/null @@ -1,83 +0,0 @@ -re2c lesson 001_upn_calculator, (c) M. Boerger 2006 - -This lesson gets you started with re2c. In the end you will have an easy RPN -(reverse polish notation) calculator for use at command line. - -You will learn about the basic interface of re2c when scanning input strings. -How to detect the end of the input and use that to stop scanning in order to -avoid problems. - -Once you have successfully installed re2c you can use it to generate *.c files -from the *.re files presented in this lesson. Actually the expected *.c files -are already present. So you should name them *.cc or something alike or just -give them a different name like test.c. To do so you simply change into the -directory and execute the following command: - - re2c calc_001.re > test.c - -Then use your compiler to compile that code and run it. If you are using gcc -you simply do the following: - - gcc -o test.o test.c - ./test.o - -If you are using windows you might want to read till the end of this lesson. - -When you want to debug the code it helps to make re2c generate working #line -information. To do so you simply specify the output file using the -o switch -followed by the output filename: - - re2c -o test.c calc_001.re - -The input files *.re each contain basic step by comments that explain what is -going on and what you can see in the examples. - -In order to optimize the generated code we will use the -s command line switch -of re2c. This tells re2c to generate code that uses if statements rather -then endless switch/case expressions where appropriate. Note that the file name -extension is actually '.s.re' to tell the test system to use the -s switch. To -invoke re2 you do the following: - - re2c -s -o test.c calc_006.s.re - -Finally we use the -b switch to have the code use a decision table. The -b -switch also contains the -s behavior. - - re2c -b -o test.c calc_007.b.re - - - -------------------------------------------------------------------------------- - -For windows users Lynn Allan provided some additional stuff to get you started -in the Microsoft world. This addon resides in the windows subdirectory and -gives you something to expereiment with. The code in that directory is based -on the first step and has the following changes: - -* vc6 .dsp/.dsw and vc7/vc8 .sln/.vcproj project files that have "Custom Build -Steps" that can tell when main.re changes, and know how to generate main.c -from main.re. They assume that you unpacked the zip package and have re2c -itself build or installed in Release and Release-2005 directory respectively. -If re2c cannot be found you need to modify the custom build step and correct -the path to re2c. - -* BuildAndRun.bat to do command line rec2 and then cl and then run the -executable (discontinues with message if errors). - -* built-in cppunit-like test to confirm it worked as expected. - -* array of test strings "fed" to scan rather than file contents to facilitate -testing and also reduce the newbie learning curve. - -* HiResTimer output for 10,000 loops and 100,000 loops. While this might be -excessive for this lesson, it illustrates how to do it for subsequent lessons -and your own stuff using windows. Also it shows that Release build is as fast -as strncmp for this test and can probably be made significantly faster. - -* If you want to build the other steps of this lesson using windows tools -simply copy the *.re files into the windows directory as main.re and rebuild. - - -------------------------------------------------------------------------------- -Sidenote: UPN is the german translation of RPN, somehow hardcoded into the -authors brain :-) diff --git a/re2c/examples/001_upn_calculator/calc_001.re b/re2c/examples/001_upn_calculator/calc_001.re deleted file mode 100644 index fe8d3ae1..00000000 --- a/re2c/examples/001_upn_calculator/calc_001.re +++ /dev/null @@ -1,84 +0,0 @@ -/* re2c lesson 001_upn_calculator, calc_001, (c) M. Boerger 2006 - 2007 */ -/*!ignore:re2c - -- basic interface for string reading - - . We define the macros YYCTYPE, YYCURSOR, YYLIMIT, YYMARKER, YYFILL - . YYCTYPE is the type re2c operates on or in other words the type that - it generates code for. While it is not a big difference when we were - using 'unsigned char' here we would need to run re2c with option -w - to fully support types with sieof() > 1. - . YYCURSOR is used internally and holds the current scanner position. In - expression handlers, the code blocks after re2c expressions, this can be - used to identify the end of the token. - . YYMARKER is not always being used so we set an initial value to avoid - a compiler warning. Here we could also omit it compleley. - . YYLIMIT stores the end of the input. Unfortunatley we have to use strlen() - in this lesson. In the next example we see one way to get rid of it. - . We use a 'for(;;)'-loop around the scanner block. We could have used a - 'while(1)'-loop instead but some compilers generate a warning for it. - . To make the output more readable we use 're2c:indent:top' scanner - configuration that configures re2c to prepend a single tab (the default) - to the beginning of each output line. - . The following lines are expressions and for each expression we output the - token name and continue the scanner loop. - . The second last token detects the end of our input, the terminating zero in - our input string. In other scanners detecting the end of input may vary. - For example binary code may contain \0 as valid input. - . The last expression accepts any input character. It tells re2c to accept - the opposit of the empty range. This includes numbers and our tokens but - as re2c goes from top to botton when evaluating the expressions this is no - problem. - . The first three rules show that re2c actually prioritizes the expressions - from top to bottom. Octal number require a starting "0" and the actual - number. Normal numbers start with a digit greater 0. And zero is finally a - special case. A single "0" is detected by the last rule of this set. And - valid ocal number is already being detected by the first rule. This even - includes multi "0" sequences that in octal notation also means zero. - Another way would be to only use two rules: - "0" [0-9]+ - "0" | ( [1-9] [0-9]* ) - A full description of re2c rule syntax can be found in the manual. -*/ - -#include -#include -#include - -int scan(char *s, int l) -{ - char *p = s; - char *q = 0; -#define YYCTYPE char -#define YYCURSOR p -#define YYLIMIT (s+l) -#define YYMARKER q -#define YYFILL(n) - - for(;;) - { -/*!re2c - re2c:indent:top = 2; - "0"[0-9]+ { printf("Oct\n"); continue; } - [1-9][0-9]* { printf("Num\n"); continue; } - "0" { printf("Num\n"); continue; } - "+" { printf("+\n"); continue; } - "-" { printf("-\n"); continue; } - "\000" { printf("EOF\n"); return 0; } - [^] { printf("ERR\n"); return 1; } -*/ - } -} - -int main(int argc, char **argv) -{ - if (argc > 1) - { - return scan(argv[1], strlen(argv[1])); - } - else - { - fprintf(stderr, "%s \n", argv[0]); - return 1; - } -} diff --git a/re2c/examples/001_upn_calculator/calc_002.re b/re2c/examples/001_upn_calculator/calc_002.re deleted file mode 100644 index 417e9f31..00000000 --- a/re2c/examples/001_upn_calculator/calc_002.re +++ /dev/null @@ -1,69 +0,0 @@ -/* re2c lesson 001_upn_calculator, calc_002, (c) M. Boerger 2006 - 2007 */ -/*!ignore:re2c - -- making use of YYFILL - - . Here we modified the scanner to not require strlen() on the call. Instead - we compute limit on the fly. That is whenever more input is needed we - search for the terminating \0 in the next n chars the scanner needs. - . If there is not enough input we quit the scanner. - . Note that in lesson_001 YYLIMIT was a character pointer computed only once. - Here is of course also of type YYCTYPE but a variable that gets reevaluated - by YYFILL(). - . To make the code smaller we take advantage of the fact that our loop has no - break so far. This allows us to use break here and have the code that is - used for YYFILL() not contain the printf in every occurence. That way the - generated code gets smaller. - -*/ - -#include -#include -#include - -int fill(char *p, int n, char **l) -{ - while (*++p && n--) ; - * l = p; - return n <= 0; -} - -int scan(char *s) -{ - char *p = s; - char *l = s; - char *q = 0; -#define YYCTYPE char -#define YYCURSOR p -#define YYLIMIT l -#define YYMARKER q -#define YYFILL(n) { if (!fill(p, n, &l)) break; } - - for(;;) - { -/*!re2c - re2c:indent:top = 2; - "0"[0-9]+ { printf("Oct\n"); continue; } - [1-9][0-9]* { printf("Num\n"); continue; } - "0" { printf("Num\n"); continue; } - "+" { printf("+\n"); continue; } - "-" { printf("+\n"); continue; } - "\000" { printf("EOF\n"); return 0; } - [^] { printf("ERR\n"); return 1; } -*/ - } - printf("OOD\n"); return 2; -} - -int main(int argc, char **argv) -{ - if (argc > 1) - { - return scan(argv[1]); - } - else - { - fprintf(stderr, "%s \n", argv[0]); - return 0; - } -} diff --git a/re2c/examples/001_upn_calculator/calc_003.re b/re2c/examples/001_upn_calculator/calc_003.re deleted file mode 100644 index e48aec92..00000000 --- a/re2c/examples/001_upn_calculator/calc_003.re +++ /dev/null @@ -1,61 +0,0 @@ -/* re2c lesson 001_upn_calculator, calc_003, (c) M. Boerger 2006 - 2007 */ -/*!ignore:re2c - -- making use of YYFILL - - . Again provide the length of the input to generate the limit only once. Now - we can use YYFILL() to detect the end and simply return since YYFILL() is - only being used if the next scanner run might use more chars then YYLIMIT - allows. - . Note that we now use (s+l+2) instead of (s+l) as we did in lesson_001. In - the first lesson we did not quit from YYFILL() and used a special rule to - detect the end of input. Here we use the fact that we know the exact end - of input and that this length does not include the terminating zero. Since - YYLIMIT points to the first character behind the used buffer we use "+ 2". - If we would use "+1" we could drop the "\000" rule but could no longer - distinguish between end of input and out of data. - -*/ - -#include -#include -#include - -int scan(char *s, int l) -{ - char *p = s; - char *q = 0; -#define YYCTYPE char -#define YYCURSOR p -#define YYLIMIT (s+l+2) -#define YYMARKER q -#define YYFILL(n) { printf("OOD\n"); return 2; } - - for(;;) - { -/*!re2c - re2c:indent:top = 2; - "0"[0-9]+ { printf("Oct\n"); continue; } - [1-9][0-9]* { printf("Num\n"); continue; } - "0" { printf("Num\n"); continue; } - "+" { printf("+\n"); continue; } - "-" { printf("+\n"); continue; } - "\000" { printf("EOF\n"); return 0; } - [^] { printf("ERR\n"); return 1; } -*/ - } - return 0; -} - -int main(int argc, char **argv) -{ - if (argc > 1) - { - return scan(argv[1], strlen(argv[1])); - } - else - { - fprintf(stderr, "%s \n", argv[0]); - return 0; - } -} diff --git a/re2c/examples/001_upn_calculator/calc_004.re b/re2c/examples/001_upn_calculator/calc_004.re deleted file mode 100644 index 977e438b..00000000 --- a/re2c/examples/001_upn_calculator/calc_004.re +++ /dev/null @@ -1,78 +0,0 @@ -/* re2c lesson 001_upn_calculator, calc_004, (c) M. Boerger 2006 - 2007 */ -/*!ignore:re2c - -- making use of definitions - . We provide complex rules as definitions. We can even have definitions made - up from other definitions. And we could also use definitions as part of - rules and not only as full rules as shown in this lesson. - -- showing the tokens - . re2c does not store the beginning of a token on its own but we can easily - do this by providing variable, in our case t, that is set to YYCURSOR on - every loop. If we were not using a loop here the token, we could have used - s instead of a new variable instead. - . As we use the token for an output function that requires a terminating zero - we copy the token. Alternatively we could store the end of the token, then - replace it with a zero character and replace it after the token has been - used. However that approach is not always acceptable. - -*/ - -#include -#include -#include - -char * tokendup(const char *t, const char *l) -{ - size_t n = l -t + 1; - char *r = (char*)malloc(n); - - memmove(r, t, n-1); - r[n] = '\0'; - return r; -} - -int scan(char *s, int l) -{ - char *p = s; - char *q = 0; - char *t; -#define YYCTYPE char -#define YYCURSOR p -#define YYLIMIT (s+l+2) -#define YYMARKER q -#define YYFILL(n) { printf("OOD\n"); return 2; } - - for(;;) - { - t = p; -/*!re2c - re2c:indent:top = 2; - - DIGIT = [0-9] ; - OCT = "0" DIGIT+ ; - INT = "0" | ( [1-9] DIGIT* ) ; - - OCT { t = tokendup(t, p); printf("Oct: %s\n", t); free(t); continue; } - INT { t = tokendup(t, p); printf("Num: %s\n", t); free(t); continue; } - "+" { printf("+\n"); continue; } - "-" { printf("+\n"); continue; } - "\000" { printf("EOF\n"); return 0; } - [^] { printf("ERR\n"); return 1; } -*/ - } - return 0; -} - -int main(int argc, char **argv) -{ - if (argc > 1) - { - return scan(argv[1], strlen(argv[1])); - } - else - { - fprintf(stderr, "%s \n", argv[0]); - return 0; - } -} diff --git a/re2c/examples/001_upn_calculator/calc_005.re b/re2c/examples/001_upn_calculator/calc_005.re deleted file mode 100644 index 6ae2a484..00000000 --- a/re2c/examples/001_upn_calculator/calc_005.re +++ /dev/null @@ -1,144 +0,0 @@ -/* re2c lesson 001_upn_calculator, calc_005, (c) M. Boerger 2006 - 2007 */ -/*!ignore:re2c - -- turning this lesson into an easy calculator - . We are going to write an UPN calculator so we need an additional rule to - ignore white space. - . Then we need to store the scanned input somewhere and do our math on it. - . Also we need to scan all arguments since the main c code gets the input - split up into chunks. - . In contrast to what we did before we now add a variable res that holds the - scanner state. We initialize that variable to 0 and quit the loop when it - is non zero. This will also be our return value so that we can use it in - function main to generate error information. - . To support operating systems where ' and " get passed in program arguments - we check for them being first and last input character. If so we correct - input pointer and input length. Since now our scanner might not see a - terminating zero we change YYLIMIT again and drop the special zero rule. -*/ - -#include -#include -#include - -#define DEBUG(stmt) stmt - -int stack[4]; -int depth = 0; - -int push_num(const char *t, const char *l, int radix) -{ - int num = 0; - - if (depth >= sizeof(stack)) - { - return 3; - } - - --t; - while(++t < l) - { - num = num * radix + (*t - '0'); - } - DEBUG(printf("Num: %d\n", num)); - - stack[depth++] = num; - return 0; -} - -int stack_add() -{ - if (depth < 2) return 4; - - --depth; - stack[depth-1] = stack[depth-1] + stack[depth]; - return 0; -} - -int stack_sub() -{ - if (depth < 2) return 4; - - --depth; - stack[depth-1] = stack[depth-1] - stack[depth]; - return 0; -} - -int scan(char *s, int l) -{ - char *p = s; - char *q = 0; - char *t; - int res = 0; - -#define YYCTYPE char -#define YYCURSOR p -#define YYLIMIT (s+l+1) -#define YYMARKER q -#define YYFILL(n) { return depth == 1 ? 0 : 2; } - - while(!res) - { - t = p; -/*!re2c - re2c:indent:top = 2; - - DIGIT = [0-9] ; - OCT = "0" DIGIT+ ; - INT = "0" | ( [1-9] DIGIT* ) ; - WS = [ \t]+ ; - - WS { continue; } - OCT { res = push_num(t, p, 8); continue; } - INT { res = push_num(t, p, 10); continue; } - "+" { res = stack_add(); continue; } - "-" { res = stack_sub(); continue; } - [^] { res = 1; continue; } -*/ - } - return res; -} - -int main(int argc, char **argv) -{ - if (argc > 1) - { - char *inp; - int res = 0, argp = 0, len; - - while(!res && ++argp < argc) - { - inp = argv[argp]; - len = strlen(inp); - if (inp[0] == '\"' && inp[len-1] == '\"') - { - ++inp; - len -=2; - } - res = scan(inp, len); - } - switch(res) - { - case 0: - printf("Result: %d\n", stack[0]); - return 0; - case 1: - fprintf(stderr, "Illegal character in input.\n"); - return 1; - case 2: - fprintf(stderr, "Premature end of input.\n"); - return 2; - case 3: - fprintf(stderr, "Stack overflow.\n"); - return 3; - case 4: - fprintf(stderr, "Stack underflow.\n"); - return 4; - } - } - else - { - fprintf(stderr, "%s \n", argv[0]); - return 0; - } -} diff --git a/re2c/examples/001_upn_calculator/calc_006.s.re b/re2c/examples/001_upn_calculator/calc_006.s.re deleted file mode 100644 index 10da31cd..00000000 --- a/re2c/examples/001_upn_calculator/calc_006.s.re +++ /dev/null @@ -1,162 +0,0 @@ -/* re2c lesson 001_upn_calculator, calc_006, (c) M. Boerger 2006 - 2007 */ -/*!ignore:re2c - -- avoiding YYFILL() - . We use the inplace configuration re2c:yyfill to suppress generation of - YYFILL() blocks. This of course means we no longer have to provide the - macro. - . We also drop the YYMARKER stuff since we know that re2c does not generate - it for this example. - . Since re2c does no longer check for out of data situations we must do this. - For that reason we first reintroduce our zero rule and second we need to - ensure that the scanner does not take more than one bytes in one go. - - In the example suppose "0" is passed. The scanner reads the first "0" and - then is in an undecided state. The scanner can earliest decide on the next - char what the token is. In case of a zero the input ends and it was a - number, 0 to be precise. In case of a digit it is an octal number and the - next character needs to be read. In case of any other character the scanner - will detect an error with the any rule [^]. - - Now the above shows that the scanner may read two characters directly. But - only if the first is a "0". So we could easily check that if the first char - is "0" and the next char is a digit then yet another charcter is present. - But we require our inut to be zero terminated. And that means we do not - have to check anything for this scanner. - - However with other rule sets re2c might read more then one character in a - row. In those cases it is normally hard to impossible to avoid YYFILL. - -- optimizing the generated code by using -s command line switch of re2c - . This tells re2c to generate code that uses if statements rather - then endless switch/case expressions where appropriate. Note that the - generated code now requires the input to be unsigned char rather than char - due to the way comparisons are generated. -*/ - -#include -#include -#include - -#define DEBUG(stmt) stmt - -int stack[4]; -int depth = 0; - -int push_num(const unsigned char *t, const unsigned char *l, int radix) -{ - int num = 0; - - if (depth >= sizeof(stack)) - { - return 3; - } - - --t; - while(++t < l) - { - num = num * radix + (*t - (unsigned char)'0'); - } - DEBUG(printf("Num: %d\n", num)); - - stack[depth++] = num; - return 0; -} - -int stack_add() -{ - if (depth < 2) return 4; - - --depth; - stack[depth-1] = stack[depth-1] + stack[depth]; - DEBUG(printf("+\n")); - return 0; -} - -int stack_sub() -{ - if (depth < 2) return 4; - - --depth; - stack[depth-1] = stack[depth-1] - stack[depth]; - DEBUG(printf("-\n")); - return 0; -} - -int scan(char *s) -{ - unsigned char *p = (unsigned char*)s; - unsigned char *t; - int res = 0; - -#define YYCTYPE unsigned char -#define YYCURSOR p - - while(!res) - { - t = p; -/*!re2c - re2c:indent:top = 2; - re2c:yyfill:enable = 0; - - DIGIT = [0-9] ; - OCT = "0" DIGIT+ ; - INT = "0" | ( [1-9] DIGIT* ) ; - WS = [ \t]+ ; - - WS { continue; } - OCT { res = push_num(t, p, 8); continue; } - INT { res = push_num(t, p, 10); continue; } - "+" { res = stack_add(); continue; } - "-" { res = stack_sub(); continue; } - "\000" { res = depth == 1 ? 0 : 2; break; } - [^] { res = 1; continue; } -*/ - } - return res; -} - -int main(int argc, char **argv) -{ - if (argc > 1) - { - char *inp; - int res = 0, argp = 0, len; - - while(!res && ++argp < argc) - { - inp = strdup(argv[argp]); - len = strlen(inp); - if (inp[0] == '\"' && inp[len-1] == '\"') - { - inp[len - 1] = '\0'; - ++inp; - } - res = scan(inp); - free(inp); - } - switch(res) - { - case 0: - printf("Result: %d\n", stack[0]); - return 0; - case 1: - fprintf(stderr, "Illegal character in input.\n"); - return 1; - case 2: - fprintf(stderr, "Premature end of input.\n"); - return 2; - case 3: - fprintf(stderr, "Stack overflow.\n"); - return 3; - case 4: - fprintf(stderr, "Stack underflow.\n"); - return 4; - } - } - else - { - fprintf(stderr, "%s \n", argv[0]); - return 0; - } -} diff --git a/re2c/examples/001_upn_calculator/calc_007.b.re b/re2c/examples/001_upn_calculator/calc_007.b.re deleted file mode 100644 index 52381965..00000000 --- a/re2c/examples/001_upn_calculator/calc_007.b.re +++ /dev/null @@ -1,135 +0,0 @@ -/* re2c lesson 001_upn_calculator, calc_007, (c) M. Boerger 2006 - 2007 */ -/*!ignore:re2c - -- optimizing the generated code by using -b command line switch of re2c - . This tells re2c to generate code that uses a decision table. The -b switch - also contains the -s behavior. And -b also requires the input to be - unsigned chars. -*/ - -#include -#include -#include - -#define DEBUG(stmt) stmt - -int stack[4]; -int depth = 0; - -int push_num(const unsigned char *t, const unsigned char *l, int radix) -{ - int num = 0; - - if (depth >= sizeof(stack)) - { - return 3; - } - - --t; - while(++t < l) - { - num = num * radix + (*t - (unsigned char)'0'); - } - DEBUG(printf("Num: %d\n", num)); - - stack[depth++] = num; - return 0; -} - -int stack_add() -{ - if (depth < 2) return 4; - - --depth; - stack[depth-1] = stack[depth-1] + stack[depth]; - DEBUG(printf("+\n")); - return 0; -} - -int stack_sub() -{ - if (depth < 2) return 4; - - --depth; - stack[depth-1] = stack[depth-1] - stack[depth]; - DEBUG(printf("+\n")); - return 0; -} - -int scan(char *s) -{ - unsigned char *p = (unsigned char*)s; - unsigned char *t; - int res = 0; - -#define YYCTYPE unsigned char -#define YYCURSOR p - - while(!res) - { - t = p; -/*!re2c - re2c:indent:top = 2; - re2c:yyfill:enable = 0; - - DIGIT = [0-9] ; - OCT = "0" DIGIT+ ; - INT = "0" | ( [1-9] DIGIT* ) ; - WS = [ \t]+ ; - - WS { continue; } - OCT { res = push_num(t, p, 8); continue; } - INT { res = push_num(t, p, 10); continue; } - "+" { res = stack_add(); continue; } - "-" { res = stack_sub(); continue; } - "\000" { res = depth == 1 ? 0 : 2; break; } - [^] { res = 1; continue; } -*/ - } - return res; -} - -int main(int argc, char **argv) -{ - if (argc > 1) - { - char *inp; - int res = 0, argp = 0, len; - - while(!res && ++argp < argc) - { - inp = strdup(argv[argp]); - len = strlen(inp); - if (inp[0] == '\"' && inp[len-1] == '\"') - { - inp[len - 1] = '\0'; - ++inp; - } - res = scan(inp); - free(inp); - } - switch(res) - { - case 0: - printf("Result: %d\n", stack[0]); - return 0; - case 1: - fprintf(stderr, "Illegal character in input.\n"); - return 1; - case 2: - fprintf(stderr, "Premature end of input.\n"); - return 2; - case 3: - fprintf(stderr, "Stack overflow.\n"); - return 3; - case 4: - fprintf(stderr, "Stack underflow.\n"); - return 4; - } - } - else - { - fprintf(stderr, "%s \n", argv[0]); - return 0; - } -} diff --git a/re2c/examples/001_upn_calculator/calc_008.b.re b/re2c/examples/001_upn_calculator/calc_008.b.re deleted file mode 100644 index ed1a088e..00000000 --- a/re2c/examples/001_upn_calculator/calc_008.b.re +++ /dev/null @@ -1,158 +0,0 @@ -/* re2c lesson 001_upn_calculator, calc_008, (c) M. Boerger 2006 - 2007 */ -/*!ignore:re2c - -- using -b with signed character input - . Since the code is being generated with -b switch re2c requires the internal - character variable yych to use an unsigned character type. For that reason - the previous lessons had a conversion at the beginning of their scan() - function. Other re2c generated code often have the scanners work completely - on unsigned input. Thus requesting a conversion. - - To avoid the conversion on input, re2c allows to do the conversion when - reading the internal yych variable. To enable that conversion you need to - use the implace configuration 're2c:yych:conversion' and set it to 1. This - will change the generated code to insert conversions to YYCTYPE whenever - yych is being read. - -- More inplace configurations for better/nicer code - . re2c allows to overwrite the generation of any define, label or variable - used in the generated code. For example we overwrite the 'yych' variable - name to 'curr' using inplace configuration 're2c:variable:yych = curr;'. - - . We further more use inplace configurations instead of defines. This allows - to use correct conversions to 'unsigned char' instead of having to convert - to 'YYCTYPE' when placing 're2c:define:YYCTYPE = "unsigned char";' infront - of 're2c:yych:conversion'. Note that we have to use apostrophies for the - first setting as it contains a space. - - . Last but not least we use 're2c:labelprefix = scan' to change the prefix - of generated labels. -*/ - -#include -#include -#include - -#define DEBUG(stmt) stmt - -int stack[4]; -int depth = 0; - -int push_num(const char *t, const char *l, int radix) -{ - int num = 0; - - if (depth >= sizeof(stack)) - { - return 3; - } - - --t; - while(++t < l) - { - num = num * radix + (*t - '0'); - } - DEBUG(printf("Num: %d\n", num)); - - stack[depth++] = num; - return 0; -} - -int stack_add() -{ - if (depth < 2) return 4; - - --depth; - stack[depth-1] = stack[depth-1] + stack[depth]; - DEBUG(printf("+\n")); - return 0; -} - -int stack_sub() -{ - if (depth < 2) return 4; - - --depth; - stack[depth-1] = stack[depth-1] - stack[depth]; - DEBUG(printf("+\n")); - return 0; -} - -int scan(char *p) -{ - char *t; - int res = 0; - - while(!res) - { - t = p; -/*!re2c - re2c:define:YYCTYPE = "unsigned char"; - re2c:define:YYCURSOR = p; - re2c:variable:yych = curr; - re2c:indent:top = 2; - re2c:yyfill:enable = 0; - re2c:yych:conversion = 1; - re2c:labelprefix = scan; - - DIGIT = [0-9] ; - OCT = "0" DIGIT+ ; - INT = "0" | ( [1-9] DIGIT* ) ; - WS = [ \t]+ ; - - WS { continue; } - OCT { res = push_num(t, p, 8); continue; } - INT { res = push_num(t, p, 10); continue; } - "+" { res = stack_add(); continue; } - "-" { res = stack_sub(); continue; } - "\000" { res = depth == 1 ? 0 : 2; break; } - [^] { res = 1; continue; } -*/ - } - return res; -} - -int main(int argc, char **argv) -{ - if (argc > 1) - { - char *inp; - int res = 0, argp = 0, len; - - while(!res && ++argp < argc) - { - inp = strdup(argv[argp]); - len = strlen(inp); - if (inp[0] == '\"' && inp[len-1] == '\"') - { - inp[len - 1] = '\0'; - ++inp; - } - res = scan(inp); - free(inp); - } - switch(res) - { - case 0: - printf("Result: %d\n", stack[0]); - return 0; - case 1: - fprintf(stderr, "Illegal character in input.\n"); - return 1; - case 2: - fprintf(stderr, "Premature end of input.\n"); - return 2; - case 3: - fprintf(stderr, "Stack overflow.\n"); - return 3; - case 4: - fprintf(stderr, "Stack underflow.\n"); - return 4; - } - } - else - { - fprintf(stderr, "%s \n", argv[0]); - return 0; - } -} diff --git a/re2c/examples/001_upn_calculator/windows/HiResTimer.h b/re2c/examples/001_upn_calculator/windows/HiResTimer.h deleted file mode 100644 index 585a1d98..00000000 --- a/re2c/examples/001_upn_calculator/windows/HiResTimer.h +++ /dev/null @@ -1,54 +0,0 @@ -/** - * @file HiResTimer.h - * @brief - * @note - */ - -#ifndef _HI_RES_TIMER_H_ -#define _HI_RES_TIMER_H_ - -#ifdef WIN32 -#include // probably already done in stdafx.h -static LARGE_INTEGER start; -static LARGE_INTEGER stop; -static LARGE_INTEGER freq; -static _int64 elapsedCounts; -static double elapsedMillis; -static double elapsedMicros; -static HANDLE processHandle; -static DWORD prevPriorityClass; - -void HrtInit() -{ - processHandle = GetCurrentProcess(); - prevPriorityClass = GetPriorityClass(processHandle); - QueryPerformanceFrequency(&freq); -} - -void HrtStart() -{ - QueryPerformanceCounter(&start); -} - -void HrtSetPriority(DWORD priority) -{ - int flag; - prevPriorityClass = GetPriorityClass(processHandle); - flag = SetPriorityClass(processHandle, priority); -} - -void HrtResetPriority(void) -{ - int flag = SetPriorityClass(processHandle, prevPriorityClass); -} - -double HrtElapsedMillis() -{ - QueryPerformanceCounter(&stop); - elapsedCounts = (stop.QuadPart - start.QuadPart); - elapsedMillis = ((elapsedCounts * 1000.0) / freq.QuadPart); - return elapsedMillis; -} - -#endif -#endif \ No newline at end of file diff --git a/re2c/examples/001_upn_calculator/windows/main.b.re b/re2c/examples/001_upn_calculator/windows/main.b.re deleted file mode 100644 index 1600b83d..00000000 --- a/re2c/examples/001_upn_calculator/windows/main.b.re +++ /dev/null @@ -1,291 +0,0 @@ -/* re2c lesson 001_upn_calculator, main.b.re, (c) M. Boerger, L. Allan 2006 */ -/*!ignore:re2c - -- basic interface for string reading - - . We define the macros YYCTYPE, YYCURSOR, YYLIMIT, YYMARKER, YYFILL - . YYCTYPE is the type re2c operates on or in other words the type that - it generates code for. While it is not a big difference when we were - using 'unsigned char' here we would need to run re2c with option -w - to fully support types with sieof() > 1. - . YYCURSOR is used internally and holds the current scanner position. In - expression handlers, the code blocks after re2c expressions, this can be - used to identify the end of the token. - . YYMARKER is not always being used so we set an initial value to avoid - a compiler warning. - . YYLIMIT stores the end of the input. Unfortunatley we have to use strlen() - in this lesson. In the next example we see one way to get rid of it. - . We use a 'for(;;)'-loop around the scanner block. We could have used a - 'while(1)'-loop instead but some compilers generate a warning for it. - . To make the output more readable we use 're2c:indent:top' scanner - configuration that configures re2c to prepend a single tab (the default) - to the beginning of each output line. - . The following lines are expressions and for each expression we output the - token name and continue the scanner loop. - . The second last token detects the end of our input, the terminating zero in - our input string. In other scanners detecting the end of input may vary. - For example binary code may contain \0 as valid input. - . The last expression accepts any input character. It tells re2c to accept - the opposit of the empty range. This includes numbers and our tokens but - as re2c goes from top to botton when evaluating the expressions this is no - problem. - . The first three rules show that re2c actually prioritizes the expressions - from top to bottom. Octal number require a starting "0" and the actual - number. Normal numbers start with a digit greater 0. And zero is finally a - special case. A single "0" is detected by the last rule of this set. And - valid ocal number is already being detected by the first rule. This even - includes multi "0" sequences that in octal notation also means zero. - Another way would be to only use two rules: - "0" [0-9]+ - "0" | ( [1-9] [0-9]* ) - A full description of re2c rule syntax can be found in the manual. -*/ - -#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers - -#if _MSC_VER > 1200 -#define WINVER 0x0400 // Change this to the appropriate value to target Windows 98 and Windows 2000 or later. -#endif // Prevents warning from vc7.1 complaining about redefinition - -#include -#include -#include -#include -#include -#include "HiResTimer.h" - -static char gTestBuf[1000] = ""; - -/** - * @brief Setup HiResolution timer and confirm it is working ok - */ -void InitHiResTimerAndVerifyWorking(void) -{ - double elapsed; - HrtInit(); - HrtSetPriority(ABOVE_NORMAL_PRIORITY_CLASS); - HrtStart(); - Sleep(100); - elapsed = HrtElapsedMillis(); - if ((elapsed < 90) || (elapsed > 110)) { - printf("HiResTimer misbehaving: %f\n", elapsed); - exit(2); - } -} - -/** - * @brief Scan for numbers in different formats - */ -int ScanFullSpeed(char *pzStrToScan, size_t lenStrToScan) -{ - unsigned char *pzCurScanPos = (unsigned char*)pzStrToScan; - unsigned char *pzBacktrackInfo = 0; -#define YYCTYPE unsigned char -#define YYCURSOR pzCurScanPos -#define YYLIMIT (pzStrToScan+lenStrToScan) -#define YYMARKER pzBacktrackInfo -#define YYFILL(n) - - for(;;) - { -/*!re2c - re2c:indent:top = 2; - [1-9][0-9]* { continue; } - [0][0-9]+ { continue; } - "+" { continue; } - "-" { continue; } - "\000" { return 0; } - [^] { return 1; } -*/ - } -} - -/** - * @brief Scan for numbers in different formats - */ -int scan(char *pzStrToScan, size_t lenStrToScan) -{ - unsigned char *pzCurScanPos = (unsigned char*)pzStrToScan; - unsigned char *pzBacktrackInfo = 0; -#define YYCTYPE unsigned char -#define YYCURSOR pzCurScanPos -#define YYLIMIT (pzStrToScan+lenStrToScan) -#define YYMARKER pzBacktrackInfo -#define YYFILL(n) - - for(;;) - { -/*!re2c - re2c:indent:top = 2; - [1-9][0-9]* { printf("Num\n"); strcat(gTestBuf, "Num "); continue; } - [0][0-9]+ { printf("Oct\n"); strcat(gTestBuf, "Oct "); continue; } - "+" { printf("+\n"); strcat(gTestBuf, "+ "); continue; } - "-" { printf("-\n"); strcat(gTestBuf, "- "); continue; } - "\000" { printf("EOF\n"); return 0; } - [^] { printf("ERR\n"); strcat(gTestBuf, "ERR "); return 1; } -*/ - } -} - -/** - * @brief Show high resolution elapsed time for 10,000 and 100,000 loops - */ -void DoTimingsOfStrnCmp(void) -{ - char testStr[] = "Hello, world"; - int totLoops = 10000; - int totFoundCount = 0; - int foundCount = 0; - int loop; - int rc; - const int progressAnd = 0xFFFFF000; - double elapsed; - - printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1)); - - HrtStart(); - for (loop = 0; loop < totLoops; ++loop) { - foundCount = 0; - rc = strncmp(testStr, "Hello", 5); - if (rc == 0) { - foundCount++; - totFoundCount++; - if ((totFoundCount & progressAnd) == totFoundCount) { - printf("*"); - } - } - } - elapsed = HrtElapsedMillis(); - printf("\nstrncmp Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed); - printf("FoundCount each loop: %d\n", foundCount); - printf("TotalFoundCount for all loops: %d\n", totFoundCount); - - totLoops = 100000; - HrtStart(); - for (loop = 0; loop < totLoops; ++loop) { - foundCount = 0; - rc = strncmp(testStr, "Hello", 5); - if (rc == 0) { - foundCount++; - totFoundCount++; - if ((totFoundCount & progressAnd) == totFoundCount) { - printf("*"); - } - } - } - elapsed = HrtElapsedMillis(); - printf("\nstrncmp Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed); - printf("FoundCount each loop: %d\n", foundCount); - printf("TotalFoundCount for all loops: %d\n", totFoundCount); -} - -/** - * @brief Show high resolution elapsed time for 10,000 and 100,000 loops - */ -void DoTimingsOfRe2c(void) -{ - char* testStrings[] = { "123", "1234", "+123", "01234", "-04321", "abc", "123abc" }; - const int testCount = sizeof(testStrings) / sizeof(testStrings[0]); - int i; - int totLoops = 10000 / testCount; // Doing more than one per loop - int totFoundCount = 0; - int foundCount = 0; - int loop; - int rc; - const int progressAnd = 0xFFFFF000; - double elapsed; - - printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1)); - - HrtStart(); - for (loop = 0; loop < totLoops; ++loop) { - foundCount = 0; - strcpy(gTestBuf, ""); - for (i = 0; i < testCount; ++i) { - char* pzCurStr = testStrings[i]; - size_t len = strlen(pzCurStr); // Calc of strlen slows things down ... std::string? - rc = ScanFullSpeed(pzCurStr, len); - if (rc == 0) { - foundCount++; - totFoundCount++; - if ((totFoundCount & progressAnd) == totFoundCount) { - printf("*"); - } - } - } - } - elapsed = HrtElapsedMillis(); - printf("\nRe2c Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed); - printf("FoundCount each loop: %d\n", foundCount); - printf("TotalFoundCount for all loops: %d\n", totFoundCount); - - totLoops = 100000 / testCount; - printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1)); - - HrtStart(); - for (loop = 0; loop < totLoops; ++loop) { - foundCount = 0; - strcpy(gTestBuf, ""); - for (i = 0; i < testCount; ++i) { - char* pzCurStr = testStrings[i]; - size_t len = strlen(pzCurStr); // Calc of strlen slows things down ... std::string? - rc = ScanFullSpeed(pzCurStr, len); - if (rc == 0) { - foundCount++; - totFoundCount++; - if ((totFoundCount & progressAnd) == totFoundCount) { - printf("*"); - } - } - } - } - elapsed = HrtElapsedMillis(); - printf("\nRe2c Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed); - printf("FoundCount each loop: %d\n", foundCount); - printf("TotalFoundCount for all loops: %d\n", totFoundCount); -} - -/** - * @brief Entry point for console app - */ -int main(int argc, char **argv) -{ - char testStr_A[] = "123"; - char* testStr_B = "456"; - char* testStrings[] = { "123", "1234", "+123", "01234", "-04321", "abc", "123abc" }; - const int testCount = sizeof(testStrings) / sizeof(testStrings[0]); - int i; - - int rc = scan(testStr_A, 3); - printf("rc: %d\n", rc); - - rc = scan(testStr_B, 3); - printf("rc: %d\n", rc); - - rc = scan("789", 3); - printf("rc: %d\n", rc); - - strcpy(gTestBuf, ""); - for (i = 0; i < testCount; ++i) { - char* pzCurStr = testStrings[i]; - size_t len = strlen(pzCurStr); - scan(pzCurStr, len); - } - printf("%s\n", gTestBuf); - rc = strcmp(gTestBuf, "Num Num + Num Oct - Oct ERR Num ERR "); - if (rc == 0) { - printf("Success\n"); - } - else { - printf("Failure\n"); - } - assert(0 == rc); // Doesn't work with Release build - - InitHiResTimerAndVerifyWorking(); - - DoTimingsOfStrnCmp(); - - DoTimingsOfRe2c(); - - return 0; -} diff --git a/re2c/examples/002_strip_comments/README b/re2c/examples/002_strip_comments/README deleted file mode 100644 index 353d6690..00000000 --- a/re2c/examples/002_strip_comments/README +++ /dev/null @@ -1,21 +0,0 @@ -re2c lesson 002_strip_comments, (c) M. Boerger 2006 - -In this lesson you will learn how to use multiple scanner blocks and how to -read the input from a file instead of a zero terminated string. In the end you -will have a scanner that filters comments out of c source files but keeps re2c -comments. - -The first scanner can be generated with: - - re2c -s -o t.c strip_001.s.re - -In the second step we will learn about YYMARKER that stores backtracking -information. - - re2c -s -0 t.c strip_002.b.re - -The third step brings trailing contexts that are stored in YYCTXMARKER. We also -change to use -b instead of -s option since the scanner gets more and more -complex. - - re2c -b -0 t.c strip_002.b.re diff --git a/re2c/examples/002_strip_comments/strip_001.s.re b/re2c/examples/002_strip_comments/strip_001.s.re deleted file mode 100644 index 5525ae3c..00000000 --- a/re2c/examples/002_strip_comments/strip_001.s.re +++ /dev/null @@ -1,147 +0,0 @@ -/* re2c lesson 002_strip_comments, strip_001.s, (c) M. Boerger 2006 - 2007 */ -/*!ignore:re2c - -- basic interface for file reading - . This scanner will read chunks of input from a file. The easiest way would - be to read the whole file into a memory buffer and use that a zero - terminated string. - . Instead we want to read input chunks of a reasonable size as they are neede - by the scanner. Thus we basically need YYFILL(n) to call fread(n). - . Before we provide a buffer that we constantly reallocate we instead use - one buffer that we get from the stack or global memory just once. When we - reach the end of the buffer we simply move the beginning of our input - that is somewhere in our buffer to the beginning of our buffer and then - append the next chunk of input to the correct end inside our buffer. - . As re2c scanners might read more than one character we need to ensure our - buffer is long enough. We can use re2c to inform about the maximum size - by placing a "!max:re2c" comment somewhere. This gets translated to a - "#define YYMAXFILL " line where is the maximum length value. This - define can be used as precompiler condition. - -- multiple scanner blocks - . We use a main scanner block that outputs every input character unless the - input is two /s or a / followed by a *. In the latter two cases we switch - to a special c++ comment and a comment block respectively. - . Both special blocks simply detect their end ignore any other character. - . The c++ block is a bit special. Since the terminating new line needs to - be output and that can either be a new line or a carridge return followed - by a new line. - . In order to ensure that we do not read behind our buffer we reset the token - pointer to the cursor on every scanner run. -*/ - -#include -#include -#include - -/*!max:re2c */ -#define BSIZE 128 - -#if BSIZE < YYMAXFILL -# error BSIZE must be greater YYMAXFILL -#endif - -#define YYCTYPE unsigned char -#define YYCURSOR s.cur -#define YYLIMIT s.lim -#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } - -typedef struct Scanner -{ - FILE *fp; - unsigned char *cur, *tok, *lim, *eof; - unsigned char buffer[BSIZE]; -} Scanner; - -int fill(Scanner *s, int len) -{ - if (!len) - { - s->cur = s->tok = s->lim = s->buffer; - s->eof = 0; - } - if (!s->eof) - { - int got, cnt = s->tok - s->buffer; - - if (cnt > 0) - { - memcpy(s->buffer, s->tok, s->lim - s->tok); - s->tok -= cnt; - s->cur -= cnt; - s->lim -= cnt; - } - cnt = BSIZE - cnt; - if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) - { - s->eof = &s->lim[got]; - } - s->lim += got; - } - else if (s->cur + len > s->eof) - { - return 0; /* not enough input data */ - } - return -1; -} - -int scan(FILE *fp) -{ - int res = 0; - Scanner s; - - if (!fp) - { - return 1; /* no file was opened */ - } - - s.fp = fp; - - fill(&s, 0); - - for(;;) - { - s.tok = s.cur; -/*!re2c - re2c:indent:top = 2; - - NL = "\r"? "\n" ; - ANY = [^] ; - - "/" "/" { goto cppcomment; } - "/" "*" { goto comment; } - ANY { fputc(*s.tok, stdout); continue; } -*/ -comment: - s.tok = s.cur; -/*!re2c - "*" "/" { continue; } - ANY { goto comment; } -*/ -cppcomment: - s.tok = s.cur; -/*!re2c - NL { fwrite(s.tok, 1, s.cur - s.tok, stdout); continue; } - ANY { goto cppcomment; } -*/ - } - - if (fp != stdin) - { - fclose(fp); /* close only if not stdin */ - } - return res; /* return result */ -} - -int main(int argc, char **argv) -{ - if (argc > 1) - { - return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")); - } - else - { - fprintf(stderr, "%s \n", argv[0]); - return 1; - } -} diff --git a/re2c/examples/002_strip_comments/strip_002.s.re b/re2c/examples/002_strip_comments/strip_002.s.re deleted file mode 100644 index 3c2a6cf8..00000000 --- a/re2c/examples/002_strip_comments/strip_002.s.re +++ /dev/null @@ -1,162 +0,0 @@ -/* re2c lesson 002_strip_comments, strip_002.s, (c) M. Boerger 2006 - 2007 */ -/*!ignore:re2c - -- complexity - . When a comment is preceeded by a new line and followed by whitespace and a - new line then we can drop the trailing whitespace and new line. - . Additional to what we strip out already what about two consequtive comment - blocks? When two comments are only separated by whitespace we want to drop - both. In other words when detecting the end of a comment block we need to - check whether it is followed by only whitespace and the a new comment in - which case we continure ignoring the input. If it is followed only by white - space and a new line we strip out the new white space and new line. In any - other case we start outputting all that follows. - But we cannot simply use the following two rules: - "*" "/" WS* "/" "*" { continue; } - "*" "/" WS* NL { continue; } - The main problem is that WS* can get bigger then our buffer, so we need a - new scanner. - . Meanwhile our scanner gets a bit more complex and we have to add two more - things. First the scanner code now uses a YYMARKER to store backtracking - information. - -- backtracking information - . When the scanner has two rules that can have the same beginning but a - different ending then it needs to store the position that identifies the - common part. This is called backtracking. As mentioned above re2c expects - you to provide compiler define YYMARKER and a pointer variable. - . When shifting buffer contents as done in our fill function the marker needs - to be corrected, too. - -*/ - -#include -#include -#include - -/*!max:re2c */ -#define BSIZE 128 - -#if BSIZE < YYMAXFILL -# error BSIZE must be greater YYMAXFILL -#endif - -#define YYCTYPE unsigned char -#define YYCURSOR s.cur -#define YYLIMIT s.lim -#define YYMARKER s.mrk -#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } - -typedef struct Scanner -{ - FILE *fp; - unsigned char *cur, *tok, *lim, *eof, *mrk; - unsigned char buffer[BSIZE]; -} Scanner; - -int fill(Scanner *s, int len) -{ - if (!len) - { - s->cur = s->tok = s->lim = s->mrk = s->buffer; - s->eof = 0; - } - if (!s->eof) - { - int got, cnt = s->tok - s->buffer; - - if (cnt > 0) - { - memcpy(s->buffer, s->tok, s->lim - s->tok); - s->tok -= cnt; - s->cur -= cnt; - s->lim -= cnt; - s->mrk -= cnt; - } - cnt = BSIZE - cnt; - if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) - { - s->eof = &s->lim[got]; - } - s->lim += got; - } - else if (s->cur + len > s->eof) - { - return 0; /* not enough input data */ - } - return -1; -} - -void echo(Scanner *s) -{ - fwrite(s->tok, 1, s->cur - s->tok, stdout); -} - -int scan(FILE *fp) -{ - int res = 0; - Scanner s; - - if (!fp) - { - return 1; /* no file was opened */ - } - - s.fp = fp; - - fill(&s, 0); - - for(;;) - { - s.tok = s.cur; -/*!re2c - re2c:indent:top = 2; - - NL = "\r"? "\n" ; - WS = [\r\n\t ] ; - ANY = [^] ; - - "/" "/" { goto cppcomment; } - "/" "*" { goto comment; } - ANY { fputc(*s.tok, stdout); continue; } -*/ -comment: - s.tok = s.cur; -/*!re2c - "*" "/" { goto commentws; } - ANY { goto comment; } -*/ -commentws: - s.tok = s.cur; -/*!re2c - NL { echo(&s); continue; } - WS { goto commentws; } - ANY { echo(&s); continue; } -*/ -cppcomment: - s.tok = s.cur; -/*!re2c - NL { echo(&s); continue; } - ANY { goto cppcomment; } -*/ - } - - if (fp != stdin) - { - fclose(fp); /* close only if not stdin */ - } - return res; /* return result */ -} - -int main(int argc, char **argv) -{ - if (argc > 1) - { - return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")); - } - else - { - fprintf(stderr, "%s \n", argv[0]); - return 1; - } -} diff --git a/re2c/examples/002_strip_comments/strip_003.b.re b/re2c/examples/002_strip_comments/strip_003.b.re deleted file mode 100644 index a7b1a5c7..00000000 --- a/re2c/examples/002_strip_comments/strip_003.b.re +++ /dev/null @@ -1,179 +0,0 @@ -/* re2c lesson 002_strip_comments, strip_003.b, (c) M. Boerger 2006 - 2007 */ -/*!ignore:re2c - -- more complexity - . Right now we strip out trailing white space and new lines after a comment - block. This can be a problem when the comment block was not preceeded by - a new line. - . The solution is to use trailing contexts. - -- trailing contexts - . Re2c allows to check for a portion of input and only recognize it when it - is followed by another portion. This is called a trailing context. - . The trailing context is not part of the identified input. That means that - it follows exactly at the cursor. A consequence is that the scanner has - already read more input and on the next run you need to restore begining - of input, in our case s.tok, from the cursor, here s.cur, rather then - restoring to the beginning of the buffer. This way the scanner can reuse - the portion it has already read. - . The position of the trailing context is stored in YYCTXMARKER for which - a pointer variable needs to be provided. - . As with YYMARKER the corrsponding variable needs to be corrected if we - shift in some buffer. - . Still this is not all we need to solve the problem. What is left is that - the information whether we detected a trailing context was detected has to - be stored somewhere. This is done by the new variable nlcomment. - -- formatting - . Until now we only used single line expression code and we always had the - opening { on the same line as the rule itself. If we have multiline rule - code and care for formatting we can no longer rely on re2c. Now we have - to indent the rule code ourself. Also we need to take care of the opening - {. If we keep it on the same line as the rule then re2c will indent it - correctly and the emitted #line informations will be correct. If we place - it on the next line then the #line directive will also point to that line - and not to the rule. -*/ - -#include -#include -#include - -/*!max:re2c */ -#define BSIZE 128 - -#if BSIZE < YYMAXFILL -# error BSIZE must be greater YYMAXFILL -#endif - -#define YYCTYPE unsigned char -#define YYCURSOR s.cur -#define YYLIMIT s.lim -#define YYMARKER s.mrk -#define YYCTXMARKER s.ctx -#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } - -typedef struct Scanner -{ - FILE *fp; - unsigned char *cur, *tok, *lim, *eof, *ctx, *mrk; - unsigned char buffer[BSIZE]; -} Scanner; - -int fill(Scanner *s, int len) -{ - if (!len) - { - s->cur = s->tok = s->lim = s->mrk = s->buffer; - s->eof = 0; - } - if (!s->eof) - { - int got, cnt = s->tok - s->buffer; - - if (cnt > 0) - { - memcpy(s->buffer, s->tok, s->lim - s->tok); - s->tok -= cnt; - s->cur -= cnt; - s->lim -= cnt; - s->mrk -= cnt; - s->ctx -= cnt; - } - cnt = BSIZE - cnt; - if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) - { - s->eof = &s->lim[got]; - } - s->lim += got; - } - else if (s->cur + len > s->eof) - { - return 0; /* not enough input data */ - } - return -1; -} - -void echo(Scanner *s) -{ - fwrite(s->tok, 1, s->cur - s->tok, stdout); -} - -int scan(FILE *fp) -{ - int res = 0; - int nlcomment = 0; - Scanner s; - - if (!fp) - { - return 1; /* no file was opened */ - } - - s.fp = fp; - - fill(&s, 0); - - for(;;) - { - s.tok = s.cur; -/*!re2c - re2c:indent:top = 2; - - NL = "\r"? "\n" ; - WS = [\r\n\t ] ; - ANY = [^] ; - - "/" "/" { goto cppcomment; } - NL / "/""*" { echo(&s); nlcomment = 1; continue; } - "/" "*" { goto comment; } - ANY { fputc(*s.tok, stdout); continue; } -*/ -comment: - s.tok = s.cur; -/*!re2c - "*" "/" { goto commentws; } - ANY { goto comment; } -*/ -commentws: - s.tok = s.cur; -/*!re2c - NL? "/" "*" { goto comment; } - NL { - if (!nlcomment) - { - echo(&s); - } - nlcomment = 0; - continue; - } - WS { goto commentws; } - ANY { echo(&s); nlcomment = 0; continue; } -*/ -cppcomment: - s.tok = s.cur; -/*!re2c - NL { echo(&s); continue; } - ANY { goto cppcomment; } -*/ - } - - if (fp != stdin) - { - fclose(fp); /* close only if not stdin */ - } - return res; /* return result */ -} - -int main(int argc, char **argv) -{ - if (argc > 1) - { - return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")); - } - else - { - fprintf(stderr, "%s \n", argv[0]); - return 1; - } -} diff --git a/re2c/test/examples/01_recognizing_integers.i--tags.c b/re2c/examples/01_recognizing_integers.i--tags.c similarity index 100% rename from re2c/test/examples/01_recognizing_integers.i--tags.c rename to re2c/examples/01_recognizing_integers.i--tags.c diff --git a/re2c/test/examples/01_recognizing_integers.i--tags.re b/re2c/examples/01_recognizing_integers.i--tags.re similarity index 100% rename from re2c/test/examples/01_recognizing_integers.i--tags.re rename to re2c/examples/01_recognizing_integers.i--tags.re diff --git a/re2c/test/examples/02_recognizing_strings.i--tags.c b/re2c/examples/02_recognizing_strings.i--tags.c similarity index 100% rename from re2c/test/examples/02_recognizing_strings.i--tags.c rename to re2c/examples/02_recognizing_strings.i--tags.c diff --git a/re2c/test/examples/02_recognizing_strings.i--tags.re b/re2c/examples/02_recognizing_strings.i--tags.re similarity index 100% rename from re2c/test/examples/02_recognizing_strings.i--tags.re rename to re2c/examples/02_recognizing_strings.i--tags.re diff --git a/re2c/test/examples/03_arbitrary_large_input.i--tags.c b/re2c/examples/03_arbitrary_large_input.i--tags.c similarity index 100% rename from re2c/test/examples/03_arbitrary_large_input.i--tags.c rename to re2c/examples/03_arbitrary_large_input.i--tags.c diff --git a/re2c/test/examples/03_arbitrary_large_input.i--tags.re b/re2c/examples/03_arbitrary_large_input.i--tags.re similarity index 100% rename from re2c/test/examples/03_arbitrary_large_input.i--tags.re rename to re2c/examples/03_arbitrary_large_input.i--tags.re diff --git a/re2c/test/examples/04_parsing_integers_blocks.i--tags.c b/re2c/examples/04_parsing_integers_blocks.i--tags.c similarity index 100% rename from re2c/test/examples/04_parsing_integers_blocks.i--tags.c rename to re2c/examples/04_parsing_integers_blocks.i--tags.c diff --git a/re2c/test/examples/04_parsing_integers_blocks.i--tags.re b/re2c/examples/04_parsing_integers_blocks.i--tags.re similarity index 100% rename from re2c/test/examples/04_parsing_integers_blocks.i--tags.re rename to re2c/examples/04_parsing_integers_blocks.i--tags.re diff --git a/re2c/test/examples/05_parsing_integers_conditions.ci--tags.c b/re2c/examples/05_parsing_integers_conditions.ci--tags.c similarity index 100% rename from re2c/test/examples/05_parsing_integers_conditions.ci--tags.c rename to re2c/examples/05_parsing_integers_conditions.ci--tags.c diff --git a/re2c/test/examples/05_parsing_integers_conditions.ci--tags.re b/re2c/examples/05_parsing_integers_conditions.ci--tags.re similarity index 100% rename from re2c/test/examples/05_parsing_integers_conditions.ci--tags.re rename to re2c/examples/05_parsing_integers_conditions.ci--tags.re diff --git a/re2c/test/examples/06_braille.cr8i--tags.c b/re2c/examples/06_braille.cr8i--tags.c similarity index 100% rename from re2c/test/examples/06_braille.cr8i--tags.c rename to re2c/examples/06_braille.cr8i--tags.c diff --git a/re2c/test/examples/06_braille.cr8i--tags.re b/re2c/examples/06_braille.cr8i--tags.re similarity index 100% rename from re2c/test/examples/06_braille.cr8i--tags.re rename to re2c/examples/06_braille.cr8i--tags.re diff --git a/re2c/test/examples/07_cxx98.i--tags.c b/re2c/examples/07_cxx98.i--tags.c similarity index 100% rename from re2c/test/examples/07_cxx98.i--tags.c rename to re2c/examples/07_cxx98.i--tags.c diff --git a/re2c/test/examples/07_cxx98.i--tags.re b/re2c/examples/07_cxx98.i--tags.re similarity index 100% rename from re2c/test/examples/07_cxx98.i--tags.re rename to re2c/examples/07_cxx98.i--tags.re diff --git a/re2c/test/examples/08_ipv4.i--tags.c b/re2c/examples/08_ipv4.i--tags.c similarity index 100% rename from re2c/test/examples/08_ipv4.i--tags.c rename to re2c/examples/08_ipv4.i--tags.c diff --git a/re2c/test/examples/08_ipv4.i--tags.re b/re2c/examples/08_ipv4.i--tags.re similarity index 100% rename from re2c/test/examples/08_ipv4.i--tags.re rename to re2c/examples/08_ipv4.i--tags.re diff --git a/re2c/test/examples/09_etc_passwd.i--tags.c b/re2c/examples/09_etc_passwd.i--tags.c similarity index 100% rename from re2c/test/examples/09_etc_passwd.i--tags.c rename to re2c/examples/09_etc_passwd.i--tags.c diff --git a/re2c/test/examples/09_etc_passwd.i--tags.re b/re2c/examples/09_etc_passwd.i--tags.re similarity index 100% rename from re2c/test/examples/09_etc_passwd.i--tags.re rename to re2c/examples/09_etc_passwd.i--tags.re diff --git a/re2c/test/examples/10_uri_rfc3986.i--tags.c b/re2c/examples/10_uri_rfc3986.i--tags.c similarity index 100% rename from re2c/test/examples/10_uri_rfc3986.i--tags.c rename to re2c/examples/10_uri_rfc3986.i--tags.c diff --git a/re2c/test/examples/10_uri_rfc3986.i--tags.re b/re2c/examples/10_uri_rfc3986.i--tags.re similarity index 100% rename from re2c/test/examples/10_uri_rfc3986.i--tags.re rename to re2c/examples/10_uri_rfc3986.i--tags.re diff --git a/re2c/test/examples/11_http_rfc7230.i--tags.c b/re2c/examples/11_http_rfc7230.i--tags.c similarity index 100% rename from re2c/test/examples/11_http_rfc7230.i--tags.c rename to re2c/examples/11_http_rfc7230.i--tags.c diff --git a/re2c/test/examples/11_http_rfc7230.i--tags.re b/re2c/examples/11_http_rfc7230.i--tags.re similarity index 100% rename from re2c/test/examples/11_http_rfc7230.i--tags.re rename to re2c/examples/11_http_rfc7230.i--tags.re diff --git a/re2c/test/examples/12_float.i--tags.c b/re2c/examples/12_float.i--tags.c similarity index 100% rename from re2c/test/examples/12_float.i--tags.c rename to re2c/examples/12_float.i--tags.c diff --git a/re2c/test/examples/12_float.i--tags.re b/re2c/examples/12_float.i--tags.re similarity index 100% rename from re2c/test/examples/12_float.i--tags.re rename to re2c/examples/12_float.i--tags.re diff --git a/re2c/test/examples/13_records.i--tags.c b/re2c/examples/13_records.i--tags.c similarity index 100% rename from re2c/test/examples/13_records.i--tags.c rename to re2c/examples/13_records.i--tags.c diff --git a/re2c/test/examples/13_records.i--tags.re b/re2c/examples/13_records.i--tags.re similarity index 100% rename from re2c/test/examples/13_records.i--tags.re rename to re2c/examples/13_records.i--tags.re diff --git a/re2c/test/examples/14_options.i--tags.c b/re2c/examples/14_options.i--tags.c similarity index 100% rename from re2c/test/examples/14_options.i--tags.c rename to re2c/examples/14_options.i--tags.c diff --git a/re2c/test/examples/14_options.i--tags.re b/re2c/examples/14_options.i--tags.re similarity index 100% rename from re2c/test/examples/14_options.i--tags.re rename to re2c/examples/14_options.i--tags.re diff --git a/re2c/examples/15_binsyms.i--input(custom).c b/re2c/examples/15_binsyms.i--input(custom).c new file mode 100644 index 00000000..9a146179 --- /dev/null +++ b/re2c/examples/15_binsyms.i--input(custom).c @@ -0,0 +1,209 @@ +/* Generated by re2c */ +#include +#include + +static void lex(const char *cur, const char *lim) +{ + const char *mar, *tok; +# define YYCTYPE char +# define YYPEEK() *cur +# define YYSKIP() if (++cur == lim) return; +# define YYBACKUP() mar = cur +# define YYRESTORE() cur = mar +loop: + tok = cur; + +{ + YYCTYPE yych; + yych = YYPEEK (); + switch (yych) { + case '_': goto yy4; + default: goto yy2; + } +yy2: + YYSKIP (); +yy3: + { goto loop; } +yy4: + YYSKIP (); + YYBACKUP (); + yych = YYPEEK (); + switch (yych) { + case '_': goto yy5; + default: goto yy3; + } +yy5: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy7; + default: goto yy6; + } +yy6: + YYRESTORE (); + goto yy3; +yy7: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy7; + default: goto yy9; + } +yy9: + { + printf("%.*s\n", (int) (cur - tok), tok); + goto loop; + } +} + +} + +int main(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "no input files\n"); + return 1; + } + + FILE *file = fopen(argv[1], "rb"); + if (file == NULL) { + fprintf(stderr, "cannot open file\n"); + return 1; + } + + fseek(file, 0, SEEK_END); + const size_t fsize = (size_t) ftell(file); + fseek(file, 0, SEEK_SET); + + char *buffer = (char*) malloc(fsize); + fread(buffer, 1, fsize, file); + lex(buffer, buffer + fsize); + + free(buffer); + fclose(file); + return 0; +} diff --git a/re2c/examples/15_binsyms.i--input(custom).re b/re2c/examples/15_binsyms.i--input(custom).re new file mode 100644 index 00000000..ec1c2d6e --- /dev/null +++ b/re2c/examples/15_binsyms.i--input(custom).re @@ -0,0 +1,49 @@ +#include +#include + +static void lex(const char *cur, const char *lim) +{ + const char *mar, *tok; +# define YYCTYPE char +# define YYPEEK() *cur +# define YYSKIP() if (++cur == lim) return; +# define YYBACKUP() mar = cur +# define YYRESTORE() cur = mar +loop: + tok = cur; + /*!re2c + re2c:yyfill:enable = 0; + + * { goto loop; } + "__" [a-zA-Z0-9_]+ { + printf("%.*s\n", (int) (cur - tok), tok); + goto loop; + } + */ +} + +int main(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "no input files\n"); + return 1; + } + + FILE *file = fopen(argv[1], "rb"); + if (file == NULL) { + fprintf(stderr, "cannot open file\n"); + return 1; + } + + fseek(file, 0, SEEK_END); + const size_t fsize = (size_t) ftell(file); + fseek(file, 0, SEEK_SET); + + char *buffer = (char*) malloc(fsize); + fread(buffer, 1, fsize, file); + lex(buffer, buffer + fsize); + + free(buffer); + fclose(file); + return 0; +} diff --git a/re2c/examples/16_fake_sentinel.i--input(custom).c b/re2c/examples/16_fake_sentinel.i--input(custom).c new file mode 100644 index 00000000..868c31f8 --- /dev/null +++ b/re2c/examples/16_fake_sentinel.i--input(custom).c @@ -0,0 +1,249 @@ +/* Generated by re2c */ +#include +#include + +static int lex(const char *cur, const char *lim) +{ + const char *mar, *tok = cur; +# define YYCTYPE char +# define YYPEEK() (cur < lim ? *cur : 0) +# define YYSKIP() ++cur +# define YYBACKUP() mar = cur +# define YYRESTORE() cur = mar + +{ + YYCTYPE yych; + yych = YYPEEK (); + switch (yych) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy4; + default: goto yy2; + } +yy2: + YYSKIP (); +yy3: + { printf("error\n"); return 1; } +yy4: + YYSKIP (); + YYBACKUP (); + yych = YYPEEK (); + switch (yych) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy5; + case ';': goto yy8; + default: goto yy3; + } +yy5: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy5; + case ';': goto yy8; + default: goto yy7; + } +yy7: + YYRESTORE (); + goto yy3; +yy8: + YYSKIP (); + yych = YYPEEK (); + if (yych >= 0x01) goto yy7; + YYSKIP (); + { + printf("%.*s\n", (int) (cur - tok) - 1, tok); + return 0; + } +} + +} + +int main(int argc, char **argv) +{ + if (argc != 2) return 1; + + char *s = argv[1]; + size_t l = strlen(s); + s[l] = ';'; // overwrite terminating NULL + return lex(s, s + l + 1); +} diff --git a/re2c/examples/16_fake_sentinel.i--input(custom).re b/re2c/examples/16_fake_sentinel.i--input(custom).re new file mode 100644 index 00000000..f06ffefb --- /dev/null +++ b/re2c/examples/16_fake_sentinel.i--input(custom).re @@ -0,0 +1,31 @@ +#include +#include + +static int lex(const char *cur, const char *lim) +{ + const char *mar, *tok = cur; +# define YYCTYPE char +# define YYPEEK() (cur < lim ? *cur : 0) +# define YYSKIP() ++cur +# define YYBACKUP() mar = cur +# define YYRESTORE() cur = mar + /*!re2c + re2c:yyfill:enable = 0; + + * { printf("error\n"); return 1; } + [0-9a-zA-Z]+ [;] [\x00] { + printf("%.*s\n", (int) (cur - tok) - 1, tok); + return 0; + } + */ +} + +int main(int argc, char **argv) +{ + if (argc != 2) return 1; + + char *s = argv[1]; + size_t l = strlen(s); + s[l] = ';'; // overwrite terminating NULL + return lex(s, s + l + 1); +} diff --git a/re2c/examples/17_ifstream.i--input(custom).c b/re2c/examples/17_ifstream.i--input(custom).c new file mode 100644 index 00000000..478e0824 --- /dev/null +++ b/re2c/examples/17_ifstream.i--input(custom).c @@ -0,0 +1,49 @@ +/* Generated by re2c */ +#include + +static void conv(std::ifstream &in, std::ofstream &out) +{ + std::streampos mar; +# define YYCTYPE char +# define YYPEEK() in.peek() +# define YYSKIP() do { in.ignore(); if (in.eof()) return; } while(0) +# define YYBACKUP() mar = in.tellg() +# define YYRESTORE() in.seekg(mar) +loop: + +{ + YYCTYPE yych; + yych = YYPEEK (); + switch (yych) { + case '\r': goto yy4; + default: goto yy2; + } +yy2: + YYSKIP (); +yy3: + { out.put(yych); goto loop; } +yy4: + YYSKIP (); + yych = YYPEEK (); + switch (yych) { + case '\n': goto yy5; + default: goto yy3; + } +yy5: + YYSKIP (); + { out.put('\n'); goto loop; } +} + +} + +int main(int argc, char **argv) +{ + if (argc != 3) return 1; + + std::ifstream in(argv[1], std::ios::binary); + std::ofstream out(argv[2], std::ios::binary); + if (in.fail() || out.fail()) return 2; + + conv(in, out); + return 0; +} diff --git a/re2c/examples/17_ifstream.i--input(custom).re b/re2c/examples/17_ifstream.i--input(custom).re new file mode 100644 index 00000000..69b0f18a --- /dev/null +++ b/re2c/examples/17_ifstream.i--input(custom).re @@ -0,0 +1,30 @@ +#include + +static void conv(std::ifstream &in, std::ofstream &out) +{ + std::streampos mar; +# define YYCTYPE char +# define YYPEEK() in.peek() +# define YYSKIP() do { in.ignore(); if (in.eof()) return; } while(0) +# define YYBACKUP() mar = in.tellg() +# define YYRESTORE() in.seekg(mar) +loop: + /*!re2c + re2c:yyfill:enable = 0; + + * { out.put(yych); goto loop; } + "\r\n" { out.put('\n'); goto loop; } + */ +} + +int main(int argc, char **argv) +{ + if (argc != 3) return 1; + + std::ifstream in(argv[1], std::ios::binary); + std::ofstream out(argv[2], std::ios::binary); + if (in.fail() || out.fail()) return 2; + + conv(in, out); + return 0; +} diff --git a/re2c/examples/18_push_model.if.c b/re2c/examples/18_push_model.if.c new file mode 100644 index 00000000..30335433 --- /dev/null +++ b/re2c/examples/18_push_model.if.c @@ -0,0 +1,240 @@ +/* Generated by re2c */ +#include +#include + +#define YYMAXFILL 1 + +static const size_t SIZE = 4096; + +struct input_t { + char buf[SIZE + YYMAXFILL]; + char *lim; + char *cur; + char *tok; + int state; + unsigned need; + unsigned yyaccept; + char yych; + + input_t() + : buf() + , lim(buf + SIZE) + , cur(lim) + , tok(lim) + , state(-1) + , need(0) + , yyaccept(0) + , yych(0) + {} + + bool fill() + { + const size_t free = tok - buf; + if (free < need) return false; + + memmove(buf, tok, buf - tok + SIZE); + lim -= free; + cur -= free; + tok -= free; + lim += fread(lim, 1, free, stdin); + if (lim < buf + SIZE) { + memset(lim, 0, YYMAXFILL); + lim += YYMAXFILL; + } + return true; + } +}; + +enum status_t { OK, FAIL, NEED_MORE_INPUT }; + +static status_t lex(input_t &in, unsigned &words) +{ +# define YYGETSTATE() in.state +# define YYSETSTATE(s) in.state = s +# define YYFILL(n) do { in.need = n; return NEED_MORE_INPUT; } while (0) + switch (YYGETSTATE()) { +default: goto yy0; +case 0: goto yyFillLabel0; +case 1: goto yyFillLabel1; +case 2: goto yyFillLabel2; +} + +loop: + in.tok = in.cur; + + +yy0: + YYSETSTATE(0); + if (in.lim <= in.cur) YYFILL(1); +yyFillLabel0: + in.yych = *in.cur; + switch (in.yych) { + case 0x00: goto yy2; + case '\n': + case ' ': goto yy6; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy9; + default: goto yy4; + } +yy2: + ++in.cur; + { return OK; } +yy4: + ++in.cur; + { return FAIL; } +yy6: + ++in.cur; + YYSETSTATE(1); + if (in.lim <= in.cur) YYFILL(1); +yyFillLabel1: + in.yych = *in.cur; + switch (in.yych) { + case '\n': + case ' ': goto yy6; + default: goto yy8; + } +yy8: + { goto loop; } +yy9: + ++in.cur; + YYSETSTATE(2); + if (in.lim <= in.cur) YYFILL(1); +yyFillLabel2: + in.yych = *in.cur; + switch (in.yych) { + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy9; + default: goto yy11; + } +yy11: + { ++words; goto loop; } + +} + +int main() +{ + input_t in; + unsigned words = 0; + + while (true) { + const status_t st = lex(in, words); + + // end of input: print result + if (st == OK) { + printf("\nword count: %u\n", words); + break; + + // unexpected error: abort + } else if (st == FAIL) { + printf("\nerror\n"); + return 1; + + // get more input and continue + } else if (!in.fill()) { + printf("\nsmall buffer\n"); + return 2; + } + } + + return 0; +} diff --git a/re2c/examples/18_push_model.if.re b/re2c/examples/18_push_model.if.re new file mode 100644 index 00000000..2f136730 --- /dev/null +++ b/re2c/examples/18_push_model.if.re @@ -0,0 +1,95 @@ +#include +#include + +/*!max:re2c*/ +static const size_t SIZE = 4096; + +struct input_t { + char buf[SIZE + YYMAXFILL]; + char *lim; + char *cur; + char *tok; + int state; + unsigned need; + unsigned yyaccept; + char yych; + + input_t() + : buf() + , lim(buf + SIZE) + , cur(lim) + , tok(lim) + , state(-1) + , need(0) + , yyaccept(0) + , yych(0) + {} + + bool fill() + { + const size_t free = tok - buf; + if (free < need) return false; + + memmove(buf, tok, buf - tok + SIZE); + lim -= free; + cur -= free; + tok -= free; + lim += fread(lim, 1, free, stdin); + if (lim < buf + SIZE) { + memset(lim, 0, YYMAXFILL); + lim += YYMAXFILL; + } + return true; + } +}; + +enum status_t { OK, FAIL, NEED_MORE_INPUT }; + +static status_t lex(input_t &in, unsigned &words) +{ +# define YYGETSTATE() in.state +# define YYSETSTATE(s) in.state = s +# define YYFILL(n) do { in.need = n; return NEED_MORE_INPUT; } while (0) + /*!getstate:re2c*/ +loop: + in.tok = in.cur; + /*!re2c + re2c:define:YYCTYPE = char; + re2c:define:YYCURSOR = in.cur; + re2c:define:YYLIMIT = in.lim; + re2c:variable:yych = in.yych; + + * { return FAIL; } + [\x00] { return OK; } + [\n ]+ { goto loop; } + [a-zA-Z]+ { ++words; goto loop; } + */ +} + +int main() +{ + input_t in; + unsigned words = 0; + + while (true) { + const status_t st = lex(in, words); + + // end of input: print result + if (st == OK) { + printf("\nword count: %u\n", words); + break; + + // unexpected error: abort + } else if (st == FAIL) { + printf("\nerror\n"); + return 1; + + // get more input and continue + } else if (!in.fill()) { + printf("\nsmall buffer\n"); + return 2; + } + } + + return 0; +} diff --git a/re2c/examples/input_custom/fixed.re b/re2c/examples/input_custom/fixed.re deleted file mode 100644 index 51f3b2b0..00000000 --- a/re2c/examples/input_custom/fixed.re +++ /dev/null @@ -1,35 +0,0 @@ -// Build with "--input custom" re2c switch. -// -// This is an example of handling fixed-length buffer with "--input custom": -// on each YYPEEK we check for the end of input, thus YYFILL generation -// can be safely suppressed. -// -// Note that YYLIMIT points not to terminating NULL, but to the previous -// character: we emulate the case when input has no terminating NULL. -// -// For a real-life example see https://github.com/sopyer/mjson -// or mjson.re from re2c test collection. - -bool lex (const char * cursor, const char * const limit) -{ - const char * marker; - const char * ctxmarker; -# define YYCTYPE char -# define YYPEEK() (cursor >= limit ? 0 : *cursor) -# define YYSKIP() ++cursor -# define YYBACKUP() marker = cursor -# define YYBACKUPCTX() ctxmarker = cursor -# define YYRESTORE() cursor = marker -# define YYRESTORECTX() cursor = ctxmarker - /*!re2c - re2c:yyfill:enable = 0; - "int buffer " / "[" [0-9]+ "]" { return true; } - * { return false; } - */ -} - -int main () -{ - char buffer [] = "int buffer [1024]"; - return !lex (buffer, buffer + sizeof (buffer) - 1); -} diff --git a/re2c/examples/input_custom/simple/README b/re2c/examples/input_custom/simple/README deleted file mode 100644 index c0c4d955..00000000 --- a/re2c/examples/input_custom/simple/README +++ /dev/null @@ -1,20 +0,0 @@ -Build with "--input custom" re2c switch. - -These are three examples of "--input custom" usage: - -- input_custom_default.re: - implements default re2c input model (pointers to plain buffer) - -- input_custom_fgetc: - implements C-style file input (using ) - -- input_custom_fgetc: - implements std::istringstream input - -Note that these examples are very simple and don't need -to implement YYFILL; the only reason they don't use -"re2c:yyfill:enable = 0;" is to keep YYLESSTHAN and YYLIMIT -(for the sake of example). - -In real-life programs one will need to care for correct -end-of-input handling. diff --git a/re2c/examples/input_custom/simple/default.re b/re2c/examples/input_custom/simple/default.re deleted file mode 100644 index 94cde7cd..00000000 --- a/re2c/examples/input_custom/simple/default.re +++ /dev/null @@ -1,24 +0,0 @@ -bool lex (const char * cursor, const char * const limit) -{ - const char * marker; - const char * ctxmarker; -# define YYCTYPE char -# define YYPEEK() *cursor -# define YYSKIP() ++cursor -# define YYBACKUP() marker = cursor -# define YYBACKUPCTX() ctxmarker = cursor -# define YYRESTORE() cursor = marker -# define YYRESTORECTX() cursor = ctxmarker -# define YYLESSTHAN(n) limit - cursor < n -# define YYFILL(n) {} - /*!re2c - "int buffer " / "[" [0-9]+ "]" { return true; } - * { return false; } - */ -} - -int main () -{ - char buffer [] = "int buffer [1024]"; - return !lex (buffer, buffer + sizeof (buffer)); -} diff --git a/re2c/examples/input_custom/simple/fgetc.re b/re2c/examples/input_custom/simple/fgetc.re deleted file mode 100644 index d2dffd9a..00000000 --- a/re2c/examples/input_custom/simple/fgetc.re +++ /dev/null @@ -1,43 +0,0 @@ -#include - -char peek (FILE * f) -{ - char c = fgetc (f); - ungetc (c, f); - return c; -} - -bool lex (FILE * f, const long limit) -{ - long marker; - long ctxmarker; -# define YYCTYPE char -# define YYPEEK() peek (f) -# define YYSKIP() fgetc (f) -# define YYBACKUP() marker = ftell (f) -# define YYBACKUPCTX() ctxmarker = ftell (f) -# define YYRESTORE() fseek (f, marker, SEEK_SET) -# define YYRESTORECTX() fseek (f, ctxmarker, SEEK_SET) -# define YYLESSTHAN(n) limit - ftell (f) < n -# define YYFILL(n) {} - /*!re2c - "int buffer " / "[" [0-9]+ "]" { return true; } - * { return false; } - */ -} - -int main () -{ - const char buffer [] = "int buffer [1024]"; - const char fn [] = "input.txt"; - - FILE * f = fopen (fn, "w"); - fwrite (buffer, 1, sizeof (buffer), f); - fclose (f); - - f = fopen (fn, "rb"); - int result = !lex (f, sizeof (buffer)); - fclose (f); - - return result; -} diff --git a/re2c/examples/input_custom/simple/istringstream.re b/re2c/examples/input_custom/simple/istringstream.re deleted file mode 100644 index 5d702291..00000000 --- a/re2c/examples/input_custom/simple/istringstream.re +++ /dev/null @@ -1,27 +0,0 @@ -#include - -bool lex (std::istringstream & is, const std::streampos limit) -{ - std::streampos marker; - std::streampos ctxmarker; -# define YYCTYPE char -# define YYPEEK() is.peek () -# define YYSKIP() is.ignore () -# define YYBACKUP() marker = is.tellg () -# define YYBACKUPCTX() ctxmarker = is.tellg () -# define YYRESTORE() is.seekg (marker) -# define YYRESTORECTX() is.seekg (ctxmarker) -# define YYLESSTHAN(n) limit - is.tellg () < n -# define YYFILL(n) {} - /*!re2c - "int buffer " / "[" [0-9]+ "]" { return true; } - * { return false; } - */ -} - -int main () -{ - const char buffer [] = "int buffer [1024]"; - std::istringstream is (buffer); - return !lex (is, sizeof (buffer)); -} diff --git a/re2c/examples/langs/c.re b/re2c/examples/langs/c.re deleted file mode 100644 index 7e413e2d..00000000 --- a/re2c/examples/langs/c.re +++ /dev/null @@ -1,272 +0,0 @@ -#include -#include -#include - -#define ADDEQ 257 -#define ANDAND 258 -#define ANDEQ 259 -#define ARRAY 260 -#define ASM 261 -#define AUTO 262 -#define BREAK 263 -#define CASE 264 -#define CHAR 265 -#define CONST 266 -#define CONTINUE 267 -#define DECR 268 -#define DEFAULT 269 -#define DEREF 270 -#define DIVEQ 271 -#define DO 272 -#define DOUBLE 273 -#define ELLIPSIS 274 -#define ELSE 275 -#define ENUM 276 -#define EQL 277 -#define EXTERN 278 -#define FCON 279 -#define FLOAT 280 -#define FOR 281 -#define FUNCTION 282 -#define GEQ 283 -#define GOTO 284 -#define ICON 285 -#define ID 286 -#define IF 287 -#define INCR 288 -#define INT 289 -#define LEQ 290 -#define LONG 291 -#define LSHIFT 292 -#define LSHIFTEQ 293 -#define MODEQ 294 -#define MULEQ 295 -#define NEQ 296 -#define OREQ 297 -#define OROR 298 -#define POINTER 299 -#define REGISTER 300 -#define RETURN 301 -#define RSHIFT 302 -#define RSHIFTEQ 303 -#define SCON 304 -#define SHORT 305 -#define SIGNED 306 -#define SIZEOF 307 -#define STATIC 308 -#define STRUCT 309 -#define SUBEQ 310 -#define SWITCH 311 -#define TYPEDEF 312 -#define UNION 313 -#define UNSIGNED 314 -#define VOID 315 -#define VOLATILE 316 -#define WHILE 317 -#define XOREQ 318 -#define EOI 319 - -typedef unsigned int uint; -typedef unsigned char uchar; - -#define BSIZE 8192 - -#define YYCTYPE uchar -#define YYCURSOR cursor -#define YYLIMIT s->lim -#define YYMARKER s->ptr -#define YYFILL(n) {cursor = fill(s, cursor);} - -#define RET(i) {s->cur = cursor; return i;} - -typedef struct Scanner { - int fd; - uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; - uint line; -} Scanner; - -uchar *fill(Scanner *s, uchar *cursor){ - if(!s->eof) { - uint cnt = s->tok - s->bot; - if(cnt){ - memcpy(s->bot, s->tok, s->lim - s->tok); - s->tok = s->bot; - s->ptr -= cnt; - cursor -= cnt; - s->pos -= cnt; - s->lim -= cnt; - } - if((s->top - s->lim) < BSIZE){ - uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar)); - memcpy(buf, s->tok, s->lim - s->tok); - s->tok = buf; - s->ptr = &buf[s->ptr - s->bot]; - cursor = &buf[cursor - s->bot]; - s->pos = &buf[s->pos - s->bot]; - s->lim = &buf[s->lim - s->bot]; - s->top = &s->lim[BSIZE]; - free(s->bot); - s->bot = buf; - } - if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){ - s->eof = &s->lim[cnt]; *(s->eof)++ = '\n'; - } - s->lim += cnt; - } - return cursor; -} - -int scan(Scanner *s){ - uchar *cursor = s->cur; -std: - s->tok = cursor; -/*!re2c -any = [\000-\377]; -O = [0-7]; -D = [0-9]; -L = [a-zA-Z_]; -H = [a-fA-F0-9]; -E = [Ee] [+-]? D+; -FS = [fFlL]; -IS = [uUlL]*; -ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+); -*/ - -/*!re2c - "/*" { goto comment; } - - "auto" { RET(AUTO); } - "break" { RET(BREAK); } - "case" { RET(CASE); } - "char" { RET(CHAR); } - "const" { RET(CONST); } - "continue" { RET(CONTINUE); } - "default" { RET(DEFAULT); } - "do" { RET(DO); } - "double" { RET(DOUBLE); } - "else" { RET(ELSE); } - "enum" { RET(ENUM); } - "extern" { RET(EXTERN); } - "float" { RET(FLOAT); } - "for" { RET(FOR); } - "goto" { RET(GOTO); } - "if" { RET(IF); } - "int" { RET(INT); } - "long" { RET(LONG); } - "register" { RET(REGISTER); } - "return" { RET(RETURN); } - "short" { RET(SHORT); } - "signed" { RET(SIGNED); } - "sizeof" { RET(SIZEOF); } - "static" { RET(STATIC); } - "struct" { RET(STRUCT); } - "switch" { RET(SWITCH); } - "typedef" { RET(TYPEDEF); } - "union" { RET(UNION); } - "unsigned" { RET(UNSIGNED); } - "void" { RET(VOID); } - "volatile" { RET(VOLATILE); } - "while" { RET(WHILE); } - - L (L|D)* { RET(ID); } - - ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) | - (['] (ESC|any\[\n\\'])* [']) - { RET(ICON); } - - (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?) - { RET(FCON); } - - (["] (ESC|any\[\n\\"])* ["]) - { RET(SCON); } - - "..." { RET(ELLIPSIS); } - ">>=" { RET(RSHIFTEQ); } - "<<=" { RET(LSHIFTEQ); } - "+=" { RET(ADDEQ); } - "-=" { RET(SUBEQ); } - "*=" { RET(MULEQ); } - "/=" { RET(DIVEQ); } - "%=" { RET(MODEQ); } - "&=" { RET(ANDEQ); } - "^=" { RET(XOREQ); } - "|=" { RET(OREQ); } - ">>" { RET(RSHIFT); } - "<<" { RET(LSHIFT); } - "++" { RET(INCR); } - "--" { RET(DECR); } - "->" { RET(DEREF); } - "&&" { RET(ANDAND); } - "||" { RET(OROR); } - "<=" { RET(LEQ); } - ">=" { RET(GEQ); } - "==" { RET(EQL); } - "!=" { RET(NEQ); } - ";" { RET(';'); } - "{" { RET('{'); } - "}" { RET('}'); } - "," { RET(','); } - ":" { RET(':'); } - "=" { RET('='); } - "(" { RET('('); } - ")" { RET(')'); } - "[" { RET('['); } - "]" { RET(']'); } - "." { RET('.'); } - "&" { RET('&'); } - "!" { RET('!'); } - "~" { RET('~'); } - "-" { RET('-'); } - "+" { RET('+'); } - "*" { RET('*'); } - "/" { RET('/'); } - "%" { RET('%'); } - "<" { RET('<'); } - ">" { RET('>'); } - "^" { RET('^'); } - "|" { RET('|'); } - "?" { RET('?'); } - - - [ \t\v\f]+ { goto std; } - - "\n" - { - if(cursor == s->eof) RET(EOI); - s->pos = cursor; s->line++; - goto std; - } - - any - { - printf("unexpected character: %c\n", *s->tok); - goto std; - } -*/ - -comment: -/*!re2c - "*/" { goto std; } - "\n" - { - if(cursor == s->eof) RET(EOI); - s->tok = s->pos = cursor; s->line++; - goto comment; - } - any { goto comment; } -*/ -} - -main(){ - Scanner in; - int t; - memset((char*) &in, 0, sizeof(in)); - in.fd = 0; - while((t = scan(&in)) != EOI){ -/* - printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok); - printf("%d\n", t); -*/ - } - close(in.fd); -} diff --git a/re2c/examples/langs/modula.re b/re2c/examples/langs/modula.re deleted file mode 100644 index 186b0cc1..00000000 --- a/re2c/examples/langs/modula.re +++ /dev/null @@ -1,203 +0,0 @@ -#include -#include -#include - -typedef unsigned int uint; -typedef unsigned char uchar; - -#define BSIZE 8192 - -#define YYCTYPE uchar -#define YYCURSOR cursor -#define YYLIMIT s->lim -#define YYMARKER s->ptr -#define YYCTXMARKER s->ctx -#define YYFILL {cursor = fill(s, cursor);} - -#define RETURN(i) {s->cur = cursor; return i;} - -typedef struct Scanner { - int fd; - uchar *bot, *tok, *ptr, *ctx, *cur, *pos, *lim, *top, *eof; - uint line; -} Scanner; - -uchar *fill(Scanner *s, uchar *cursor){ - if(!s->eof){ - uint cnt = s->tok - s->bot; - if(cnt){ - memcpy(s->bot, s->tok, s->lim - s->tok); - s->tok = s->bot; - s->ptr -= cnt; - cursor -= cnt; - s->pos -= cnt; - s->lim -= cnt; - } - if((s->top - s->lim) < BSIZE){ - uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar)); - memcpy(buf, s->tok, s->lim - s->tok); - s->tok = buf; - s->ptr = &buf[s->ptr - s->bot]; - cursor = &buf[cursor - s->bot]; - s->pos = &buf[s->pos - s->bot]; - s->lim = &buf[s->lim - s->bot]; - s->top = &s->lim[BSIZE]; - free(s->bot); - s->bot = buf; - } - if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){ - s->eof = &s->lim[cnt]; *(s->eof)++ = '\n'; - } - s->lim += cnt; - } - return cursor; -} - -int scan(Scanner *s){ - uchar *cursor = s->cur; - uint depth; -std: - s->tok = cursor; -/*!re2c -any = [\000-\377]; -digit = [0-9]; -letter = [a-zA-Z]; -*/ - -/*!re2c - "(*" { depth = 1; goto comment; } - - digit + {RETURN(1);} - digit + / ".." {RETURN(1);} - [0-7] + "B" {RETURN(2);} - [0-7] + "C" {RETURN(3);} - digit [0-9A-F] * "H" {RETURN(4);} - digit + "." digit * ("E" ([+-]) ? digit +) ? {RETURN(5);} - ['] (any\[\n']) * ['] | ["] (any\[\n"]) * ["] {RETURN(6);} - - "#" {RETURN(7);} - "&" {RETURN(8);} - "(" {RETURN(9);} - ")" {RETURN(10);} - "*" {RETURN(11);} - "+" {RETURN(12);} - "," {RETURN(13);} - "-" {RETURN(14);} - "." {RETURN(15);} - ".." {RETURN(16);} - "/" {RETURN(17);} - ":" {RETURN(18);} - ":=" {RETURN(19);} - ";" {RETURN(20);} - "<" {RETURN(21);} - "<=" {RETURN(22);} - "<>" {RETURN(23);} - "=" {RETURN(24);} - ">" {RETURN(25);} - ">=" {RETURN(26);} - "[" {RETURN(27);} - "]" {RETURN(28);} - "^" {RETURN(29);} - "{" {RETURN(30);} - "|" {RETURN(31);} - "}" {RETURN(32);} - "~" {RETURN(33);} - - "AND" {RETURN(34);} - "ARRAY" {RETURN(35);} - "BEGIN" {RETURN(36);} - "BY" {RETURN(37);} - "CASE" {RETURN(38);} - "CONST" {RETURN(39);} - "DEFINITION" {RETURN(40);} - "DIV" {RETURN(41);} - "DO" {RETURN(42);} - "ELSE" {RETURN(43);} - "ELSIF" {RETURN(44);} - "END" {RETURN(45);} - "EXIT" {RETURN(46);} - "EXPORT" {RETURN(47);} - "FOR" {RETURN(48);} - "FROM" {RETURN(49);} - "IF" {RETURN(50);} - "IMPLEMENTATION" {RETURN(51);} - "IMPORT" {RETURN(52);} - "IN" {RETURN(53);} - "LOOP" {RETURN(54);} - "MOD" {RETURN(55);} - "MODULE" {RETURN(56);} - "NOT" {RETURN(57);} - "OF" {RETURN(58);} - "OR" {RETURN(59);} - "POINTER" {RETURN(60);} - "PROCEDURE" {RETURN(61);} - "QUALIFIED" {RETURN(62);} - "RECORD" {RETURN(63);} - "REPEAT" {RETURN(64);} - "RETURN" {RETURN(65);} - "SET" {RETURN(66);} - "THEN" {RETURN(67);} - "TO" {RETURN(68);} - "TYPE" {RETURN(69);} - "UNTIL" {RETURN(70);} - "VAR" {RETURN(71);} - "WHILE" {RETURN(72);} - "WITH" {RETURN(73);} - - letter (letter | digit) * {RETURN(74);} - - [ \t]+ { goto std; } - - "\n" - { - if(cursor == s->eof) RETURN(0); - s->pos = cursor; s->line++; - goto std; - } - - any - { - printf("unexpected character: %c\n", *s->tok); - goto std; - } -*/ -comment: -/*!re2c - "*)" - { - if(--depth == 0) - goto std; - else - goto comment; - } - "(*" { ++depth; goto comment; } - "\n" - { - if(cursor == s->eof) RETURN(0); - s->tok = s->pos = cursor; s->line++; - goto comment; - } - any { goto comment; } -*/ -} - -/* -void putStr(FILE *o, char *s, uint l){ - while(l-- > 0) - putc(*s++, o); -} -*/ - -main(){ - Scanner in; - memset((char*) &in, 0, sizeof(in)); - in.fd = 0; - while(scan(&in)){ -/* - putc('<', stdout); - putStr(stdout, (char*) in.tok, in.cur - in.tok); - putc('>', stdout); - putc('\n', stdout); -*/ - } -} diff --git a/re2c/examples/langs/rexx.re b/re2c/examples/langs/rexx.re deleted file mode 100644 index b74741da..00000000 --- a/re2c/examples/langs/rexx.re +++ /dev/null @@ -1,319 +0,0 @@ -#include "scanio.h" -#include "scanner.h" - -#define CURSOR ch -#define LOADCURSOR ch = *cursor; -#define ADVANCE cursor++; -#define BACK(n) cursor -= (n); -#define CHECK(n) if((ScanCB.lim - cursor) < (n)){cursor = ScanFill(cursor);} -#define MARK(n) ScanCB.ptr = cursor; sel = (n); -#define REVERT cursor = ScanCB.ptr; -#define MARKER sel - -#define RETURN(i) {ScanCB.cur = cursor; return i;} - -int ScanToken(){ - uchar *cursor = ScanCB.cur; - unsigned sel; - uchar ch; - ScanCB.tok = cursor; - ScanCB.eot = NULL; -/*!re2c -all = [\000-\377]; -eof = [\000]; -any = all\eof; -letter = [a-z]|[A-Z]; -digit = [0-9]; -symchr = letter|digit|[.!?_]; -const = (digit|[.])symchr*([eE][+-]?digit+)?; -simple = (symchr\(digit|[.]))(symchr\[.])*; -stem = simple [.]; -symbol = symchr*; -sqstr = ['] ((any\['\n])|(['][']))* [']; -dqstr = ["] ((any\["\n])|(["]["]))* ["]; -str = sqstr|dqstr; -ob = [ \t]*; -not = [\\~]; -A = [aA]; -B = [bB]; -C = [cC]; -D = [dD]; -E = [eE]; -F = [fF]; -G = [gG]; -H = [hH]; -I = [iI]; -J = [jJ]; -K = [kK]; -L = [lL]; -M = [mM]; -N = [nN]; -O = [oO]; -P = [pP]; -Q = [qQ]; -R = [rR]; -S = [sS]; -T = [tT]; -U = [uU]; -V = [vV]; -W = [wW]; -X = [xX]; -Y = [yY]; -Z = [zZ]; -*/ - -scan: -/*!re2c -"\n" - { - ++(ScanCB.lineNum); - ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk); - RETURN(SU_EOL); - } -"|" ob "|" - { RETURN(OP_CONCAT); } -"+" - { RETURN(OP_PLUS); } -"-" - { RETURN(OP_MINUS); } -"*" - { RETURN(OP_MULT); } -"/" - { RETURN(OP_DIV); } -"%" - { RETURN(OP_IDIV); } -"/" ob "/" - { RETURN(OP_REMAIN); } -"*" ob "*" - { RETURN(OP_POWER); } -"=" - { RETURN(OP_EQUAL); } -not ob "=" | "<" ob ">" | ">" ob "<" - { RETURN(OP_EQUAL_N); } -">" - { RETURN(OP_GT); } -"<" - { RETURN(OP_LT); } -">" ob "=" | not ob "<" - { RETURN(OP_GE); } -"<" ob "=" | not ob ">" - { RETURN(OP_LE); } -"=" ob "=" - { RETURN(OP_EQUAL_EQ); } -not ob "=" ob "=" - { RETURN(OP_EQUAL_EQ_N); } -">" ob ">" - { RETURN(OP_GT_STRICT); } -"<" ob "<" - { RETURN(OP_LT_STRICT); } -">" ob ">" ob "=" | not ob "<" ob "<" - { RETURN(OP_GE_STRICT); } -"<" ob "<" ob "=" | not ob ">" ob ">" - { RETURN(OP_LE_STRICT); } -"&" - { RETURN(OP_AND); } -"|" - { RETURN(OP_OR); } -"&" ob "&" - { RETURN(OP_XOR); } -not - { RETURN(OP_NOT); } - -":" - { RETURN(SU_COLON); } -"," - { RETURN(SU_COMMA); } -"(" - { RETURN(SU_POPEN); } -")" - { RETURN(SU_PCLOSE); } -";" - { RETURN(SU_EOC); } - -A D D R E S S - { RETURN(RX_ADDRESS); } -A R G - { RETURN(RX_ARG); } -C A L L - { RETURN(RX_CALL); } -D O - { RETURN(RX_DO); } -D R O P - { RETURN(RX_DROP); } -E L S E - { RETURN(RX_ELSE); } -E N D - { RETURN(RX_END); } -E X I T - { RETURN(RX_EXIT); } -I F - { RETURN(RX_IF); } -I N T E R P R E T - { RETURN(RX_INTERPRET); } -I T E R A T E - { RETURN(RX_ITERATE); } -L E A V E - { RETURN(RX_LEAVE); } -N O P - { RETURN(RX_NOP); } -N U M E R I C - { RETURN(RX_NUMERIC); } -O P T I O N S - { RETURN(RX_OPTIONS); } -O T H E R W I S E - { RETURN(RX_OTHERWISE); } -P A R S E - { RETURN(RX_PARSE); } -P R O C E D U R E - { RETURN(RX_PROCEDURE); } -P U L L - { RETURN(RX_PULL); } -P U S H - { RETURN(RX_PUSH); } -Q U E U E - { RETURN(RX_QUEUE); } -R E T U R N - { RETURN(RX_RETURN); } -S A Y - { RETURN(RX_SAY); } -S E L E C T - { RETURN(RX_SELECT); } -S I G N A L - { RETURN(RX_SIGNAL); } -T H E N - { RETURN(RX_THEN); } -T R A C E - { RETURN(RX_TRACE); } -W H E N - { RETURN(RX_WHEN); } -O F F - { RETURN(RXS_OFF); } -O N - { RETURN(RXS_ON); } -B Y - { RETURN(RXS_BY); } -D I G I T S - { RETURN(RXS_DIGITS); } -E N G I N E E R I N G - { RETURN(RXS_ENGINEERING); } -E R R O R - { RETURN(RXS_ERROR); } -E X P O S E - { RETURN(RXS_EXPOSE); } -F A I L U R E - { RETURN(RXS_FAILURE); } -F O R - { RETURN(RXS_FOR); } -F O R E V E R - { RETURN(RXS_FOREVER); } -F O R M - { RETURN(RXS_FORM); } -F U Z Z - { RETURN(RXS_FUZZ); } -H A L T - { RETURN(RXS_HALT); } -L I N E I N - { RETURN(RXS_LINEIN); } -N A M E - { RETURN(RXS_NAME); } -N O T R E A D Y - { RETURN(RXS_NOTREADY); } -N O V A L U E - { RETURN(RXS_NOVALUE); } -S C I E N T I F I C - { RETURN(RXS_SCIENTIFIC); } -S O U R C E - { RETURN(RXS_SOURCE); } -S Y N T A X - { RETURN(RXS_SYNTAX); } -T O - { RETURN(RXS_TO); } -U N T I L - { RETURN(RXS_UNTIL); } -U P P E R - { RETURN(RXS_UPPER); } -V A L U E - { RETURN(RXS_VALUE); } -V A R - { RETURN(RXS_VAR); } -V E R S I O N - { RETURN(RXS_VERSION); } -W H I L E - { RETURN(RXS_WHILE); } -W I T H - { RETURN(RXS_WITH); } - -const - { RETURN(SU_CONST); } -simple - { RETURN(SU_SYMBOL); } -stem - { RETURN(SU_SYMBOL_STEM); } -symbol - { RETURN(SU_SYMBOL_COMPOUND); } -str - { RETURN(SU_LITERAL); } -str [bB] / (all\symchr) - { RETURN(SU_LITERAL_BIN); } -str [xX] / (all\symchr) - { RETURN(SU_LITERAL_HEX); } - -eof - { RETURN(SU_EOF); } -any - { RETURN(SU_ERROR); } -*/ -} - -bool StripToken(){ - uchar *cursor = ScanCB.cur; - unsigned depth; - uchar ch; - bool blanks = FALSE; - ScanCB.eot = cursor; -strip: -/*!re2c -"/*" - { - depth = 1; - goto comment; - } -"\r" - { goto strip; } -[ \t] - { - blanks = TRUE; - goto strip; - } -[] / all - { RETURN(blanks); } -*/ - -comment: -/*!re2c -"*/" - { - if(--depth == 0) - goto strip; - else - goto comment; - } -"\n" - { - ++(ScanCB.lineNum); - ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk); - goto comment; - } -"/*" - { - ++depth; - goto comment; - } -eof - { RETURN(blanks); } -any - { - goto comment; - } -*/ -} diff --git a/re2c/examples/push_model/push.re b/re2c/examples/push_model/push.re deleted file mode 100644 index 5ad6e7ac..00000000 --- a/re2c/examples/push_model/push.re +++ /dev/null @@ -1,340 +0,0 @@ -/* - * A push-model scanner example for re2c -f - * Written Mon Apr 11 2005 by mgix@mgix.com - * This file is in the public domain. - * - */ - -// ---------------------------------------------------------------------- - -#include -#include -#include -#include -#include - -#if defined(WIN32) - - typedef signed char int8_t; - typedef signed short int16_t; - typedef signed int int32_t; - - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; - -#else - - #include - #include - - #ifndef O_BINARY - #define O_BINARY 0 - #endif - -#endif - -// ---------------------------------------------------------------------- -#define TOKENS \ - \ - TOK(kEOF) \ - TOK(kEOL) \ - TOK(kUnknown) \ - TOK(kIdentifier) \ - TOK(kDecimalConstant) \ - \ - TOK(kEqual) \ - TOK(kLeftParen) \ - TOK(kRightParen) \ - TOK(kMinus) \ - TOK(kPlus) \ - TOK(kStar) \ - TOK(kSlash) \ - \ - TOK(kIf) \ - TOK(kFor) \ - TOK(kElse) \ - TOK(kGoto) \ - TOK(kBreak) \ - TOK(kWhile) \ - TOK(kReturn) \ - - -// ---------------------------------------------------------------------- -static const char *tokenNames[] = -{ - #define TOK(x) #x, - TOKENS - #undef TOK -}; - -// ---------------------------------------------------------------------- -class PushScanner -{ -public: - - enum Token - { - #define TOK(x) x, - TOKENS - #undef TOK - }; - -private: - - bool eof; - int32_t state; - - uint8_t *limit; - uint8_t *start; - uint8_t *cursor; - uint8_t *marker; - - uint8_t *buffer; - uint8_t *bufferEnd; - - uint8_t yych; - uint32_t yyaccept; - -public: - - // ---------------------------------------------------------------------- - PushScanner() - { - limit = 0; - start = 0; - state = -1; - cursor = 0; - marker = 0; - buffer = 0; - eof = false; - bufferEnd = 0; - } - - // ---------------------------------------------------------------------- - ~PushScanner() - { - } - - // ---------------------------------------------------------------------- - void send( - Token token - ) - { - size_t tokenSize = cursor-start; - const char *tokenName = tokenNames[token]; - printf( - "scanner is pushing out a token of type %d (%s)", - token, - tokenName - ); - - if(token==kEOF) putchar('\n'); - else - { - size_t tokenNameSize = strlen(tokenNames[token]); - size_t padSize = 20-(20"); - - fwrite( - start, - tokenSize, - 1, - stdout - ); - - printf("<----\n"); - } - } - - // ---------------------------------------------------------------------- - uint32_t push( - const void *input, - ssize_t inputSize - ) - { - printf( - "scanner is receiving a new data batch of length %d\n" - "scanner continues with saved state = %d\n", - inputSize, - state - ); - - /* - * Data source is signaling end of file when batch size - * is less than maxFill. This is slightly annoying because - * maxFill is a value that can only be known after re2c does - * its thing. Practically though, maxFill is never bigger than - * the longest keyword, so given our grammar, 32 is a safe bet. - */ - uint8_t null[64]; - const ssize_t maxFill = 32; - if(inputSize