From 1365e5434d6178d0335c5b2d8ddcfc5ba79931f9 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 22 Aug 2017 17:37:44 +0100 Subject: [PATCH] Added example of a push-model lexer. --- src/examples/18_push_model.re.txt | 95 +++++++++++++++++++++++++++++++ src/examples/example_18.rst | 42 ++++++++++++++ src/examples/examples.rst | 7 +++ 3 files changed, 144 insertions(+) create mode 100644 src/examples/18_push_model.re.txt create mode 100644 src/examples/example_18.rst diff --git a/src/examples/18_push_model.re.txt b/src/examples/18_push_model.re.txt new file mode 100644 index 00000000..2f136730 --- /dev/null +++ b/src/examples/18_push_model.re.txt @@ -0,0 +1,95 @@ +#include +#include + +/*!max:re2c*/ +static const size_t SIZE = 4096; + +struct input_t { + char buf[SIZE + YYMAXFILL]; + char *lim; + char *cur; + char *tok; + int state; + unsigned need; + unsigned yyaccept; + char yych; + + input_t() + : buf() + , lim(buf + SIZE) + , cur(lim) + , tok(lim) + , state(-1) + , need(0) + , yyaccept(0) + , yych(0) + {} + + bool fill() + { + const size_t free = tok - buf; + if (free < need) return false; + + memmove(buf, tok, buf - tok + SIZE); + lim -= free; + cur -= free; + tok -= free; + lim += fread(lim, 1, free, stdin); + if (lim < buf + SIZE) { + memset(lim, 0, YYMAXFILL); + lim += YYMAXFILL; + } + return true; + } +}; + +enum status_t { OK, FAIL, NEED_MORE_INPUT }; + +static status_t lex(input_t &in, unsigned &words) +{ +# define YYGETSTATE() in.state +# define YYSETSTATE(s) in.state = s +# define YYFILL(n) do { in.need = n; return NEED_MORE_INPUT; } while (0) + /*!getstate:re2c*/ +loop: + in.tok = in.cur; + /*!re2c + re2c:define:YYCTYPE = char; + re2c:define:YYCURSOR = in.cur; + re2c:define:YYLIMIT = in.lim; + re2c:variable:yych = in.yych; + + * { return FAIL; } + [\x00] { return OK; } + [\n ]+ { goto loop; } + [a-zA-Z]+ { ++words; goto loop; } + */ +} + +int main() +{ + input_t in; + unsigned words = 0; + + while (true) { + const status_t st = lex(in, words); + + // end of input: print result + if (st == OK) { + printf("\nword count: %u\n", words); + break; + + // unexpected error: abort + } else if (st == FAIL) { + printf("\nerror\n"); + return 1; + + // get more input and continue + } else if (!in.fill()) { + printf("\nsmall buffer\n"); + return 2; + } + } + + return 0; +} diff --git a/src/examples/example_18.rst b/src/examples/example_18.rst new file mode 100644 index 00000000..fef7d342 --- /dev/null +++ b/src/examples/example_18.rst @@ -0,0 +1,42 @@ +Push model +---------- + +By default re2c generates *pull-model* lexers: +it assumes that the lexer runs without interrupts and calls ``YYFILL`` to "pull" more input. +In some cases it might be necessary to generate a *push-model* lexer +that stops when it runs out of input and returns control to the outer program. +Later, when the outer program obtains more input, it resumes lexer and continues lexing from the point where it stopped. + +In order to function in this manner lexer must be able to store its inner state before returning to the caller. +This can be done with re2c ``-f`` ``--storable-state`` option `described here `_. +The example below reads chunks of input from ``stdin`` and counts the number of words in it. +Note that the parsing loop is located in the ``main`` function, +and ``YYFILL`` merely returns instead of refilling buffer. +Lexer state is represented with variables ``state``, ``yych`` and ``yyaccept``. +Dispatch on ``state`` is generated with the help of ``/*!getstate:re2c*/`` directive. +In this example explicit use of the directive is necessary, because we need to put entry code between state dispatch and lexer start. +If the directive is omitted, re2c emits state dispatch right before lexer start +(in this case ``yy0`` should be used as the start label). + +:download:`[push_model.re] <18_push_model.re.txt>` + +.. literalinclude:: 18_push_model.re.txt + :language: cpp + :linenos: + +Compile: + +.. code-block:: bash + + $ re2c --input custom -o push_model.cc push_model.re + $ g++ -o push_model push_model.cc + +Run: + +.. code-block:: bash + + $ ./push_model + Lorem ipsum dolor sit amet^D + word count: 5 + + diff --git a/src/examples/examples.rst b/src/examples/examples.rst index 0dad5fca..7d0da27f 100644 --- a/src/examples/examples.rst +++ b/src/examples/examples.rst @@ -22,6 +22,7 @@ Examples Strings in binaries Fake sentinel std::ifstream + Push model Examples have been written with two goals in mind. First, they are practical: each example solves a distinct real-world problem, @@ -46,6 +47,12 @@ Second, examples show various aspects of using re2c API: Some additional details of handling *tags* in ``YYFILL`` are illustrated in parsing `URI `_ and parsing `HTTP messages `_. + Using `storable state `_ feature + to write `push-model `_ lexers: + it is necessary when the input comes in chunks that are controlled by the outside program. + In such case lexer must be stopped when there is not enough input + and later resumed from the same point. + `Submatch extraction <../manual/features/submatch/submatch.html>`_: using *s-tags* to store input positions corresponding to various parts of the regular expression in variables (outlined by parsing `IPv4 address `_ and also used in -- 2.40.0