]> granicus.if.org Git - re2c/commitdiff
Added [-Wcondition-order] description.
authorUlya Trofimovich <skvadrik@gmail.com>
Sat, 7 Nov 2015 22:48:42 +0000 (22:48 +0000)
committerUlya Trofimovich <skvadrik@gmail.com>
Sat, 7 Nov 2015 22:48:42 +0000 (22:48 +0000)
src/manual/warnings/condition_order/how_it_works.rst [new file with mode: 0644]
src/manual/warnings/condition_order/real_world.rst [new file with mode: 0644]
src/manual/warnings/condition_order/simple_example.rst [new file with mode: 0644]
src/manual/warnings/condition_order/wcondition_order.re [new file with mode: 0644]
src/manual/warnings/condition_order/wcondition_order.rst [new file with mode: 0644]
src/manual/warnings/warnings.rst
src/manual/warnings/wcondition_order.rst [deleted file]

diff --git a/src/manual/warnings/condition_order/how_it_works.rst b/src/manual/warnings/condition_order/how_it_works.rst
new file mode 100644 (file)
index 0000000..bd820c7
--- /dev/null
@@ -0,0 +1,12 @@
+How it works
+~~~~~~~~~~~~
+
+The warning is triggered if at the same time:
+
+* Conditions are enabled with ``-c``.
+* Neither ``/*!types:re2c*/``, nor ``-t, --type-header`` is used.
+* Initial dispatch on conditions in sensitive to condition order:
+  it is either an ``if`` statement or a jump table.
+  Note that the number of conditions must be greater than one,
+  otherwise dispatch shrinks to a simple unconditional jump.
+
diff --git a/src/manual/warnings/condition_order/real_world.rst b/src/manual/warnings/condition_order/real_world.rst
new file mode 100644 (file)
index 0000000..11f663d
--- /dev/null
@@ -0,0 +1,89 @@
+Real-world examples
+~~~~~~~~~~~~~~~~~~~
+
+The  best real-world example is a story of how ``[-Wcondition-order]`` was added to re2c.
+
+One day I decided to change condition numbering scheme.
+It was only natural: re2c assigns numbers to conditions in the order they appear in code.
+This is not very convenient, because elsewhere in the code condition names (not numbers) are used as unique identifiers.
+Names are sorted lexicographically, so the original condition order is not preserved.
+It takes extra care to remember the mapping of names to numbers.
+So why not just drop numbers and sort conditions by their names?
+No one cares for condition order anyway: since re2c-generated code uses condition names,
+they must be defined, and why would anyone define them by hand if they can do it automatically.
+
+Or wait.
+Almost occasionally I found a real-world program that does use fixed condition numbers.
+Here is the carcass of this program:
+
+.. code-block:: cpp
+    :number-lines:
+
+    #define NORMAL 0
+    #define PRE_RAW 1
+    #define RAW 2
+    #define INITIAL 3
+    
+    /*!re2c
+        re2c:yyfill:check = 0;
+
+        T_TRUE      'true'
+        T_YES       'yes'
+        T_ON        'on'
+        T_ENABLED   'enabled'
+        T_FALSE     'false'
+        T_NO        'no'
+        T_OFF       'off'
+        T_DISABLED  'disabled'
+        T_EVAL      'ev'
+        T_SHELL     'sh'
+        T_IF        'if'
+        T_RUN       'run'
+        T_RUN_SHORT "r"
+        WS          [ \r\n\t]+
+        DIGITS      [-]?[0-9\.]+
+        ID          [^ \r\n\t:#\000]+
+        ADDR        [0][x][a-fA-F0-9]+
+        OPCODE      (ZEND_|zend_)([A-Za-z])+
+        INPUT       [^\n\000]+
+
+        <!*> {}
+        <*>{WS}?[\n\000] {}
+        <PRE_RAW, NORMAL>[-][r]{WS}?{DIGITS} {}
+        <NORMAL>{T_IF}{WS} {}
+        <NORMAL>{ID}[:]{1}[//]{2} {}
+        <NORMAL>[#]{1} {}
+        <NORMAL>[:]{2} {}
+        <NORMAL>[:]{1} {}
+        <NORMAL>({T_YES}|{T_ON}|{T_ENABLED}|{T_TRUE}){WS} {}
+        <NORMAL>({T_NO}|{T_OFF}|{T_DISABLED}|{T_FALSE}){WS} {}
+        <NORMAL>{DIGITS} {}
+        <NORMAL>{ADDR} {}
+        <NORMAL>{OPCODE} {}
+        <NORMAL>{ID} {}
+        <RAW>{INPUT} {}
+        <*>{WS} {}
+        <INITIAL>{T_EVAL}{WS} {}
+        <INITIAL>{T_SHELL}{WS} {}
+        <INITIAL>({T_RUN}|{T_RUN_SHORT}){WS} {}
+        <PRE_RAW>. {}
+        <INITIAL>. {}
+    */
+
+Curiously, the program somehow avoids using re2c-generated condition names:
+even ``YYSETCONDITION`` is called manually instead of using ``=>`` in rules.
+The program uses ``-g`` option, so the initial dispatch does depend on condition order:
+
+.. code-block:: cpp
+
+    static void *yyctable[4] = {
+        &&yyc_NORMAL,
+        &&yyc_PRE_RAW,
+        &&yyc_RAW,
+        &&yyc_INITIAL,
+    };
+    goto *yyctable[YYGETCONDITION()];
+
+Of course, re2c will keep to the old condition numbering scheme (at least in the nearest future).
+Who wants to break the old code? ``:)``
+
diff --git a/src/manual/warnings/condition_order/simple_example.rst b/src/manual/warnings/condition_order/simple_example.rst
new file mode 100644 (file)
index 0000000..10da472
--- /dev/null
@@ -0,0 +1,86 @@
+A simple example
+~~~~~~~~~~~~~~~~
+
+The following lexer consists of two conditions: ``a`` and ``b``.
+It starts in condition ``a``, which expects a sequence of letters ``'a'`` followed by a comma.
+Comma causes transition to condition ``b``, which expects a sequence of letters ``'b'`` followed by an exclamation.
+Anything else is an error.
+Nothing special, except that instead of generating condition names with ``/*!types:re2c*/`` directive
+or ``-t, --type-header`` option we hardcoded them manually:
+
+`[wcondition_order.re] <wcondition_order.re>`_
+
+.. include:: wcondition_order.re
+    :code: cpp
+    :number-lines:
+
+Condition order is controlled by ``REVERSED_CONDITION_ORDER`` define.
+Let's compile and run it:
+
+.. code-block::
+
+    $ re2c -c -o example.c -Wcondition-order wcondition_order.re
+    $
+    $ gcc -o example example.c
+    $ ./example
+    aaaa,bbb!
+    $
+    $ gcc -o example -DREVERSED_CONDITION_ORDER example.c
+    $ ./example
+    aaaa,bbb!
+
+Everything works fine: we get ``aaaa,bbb!`` in both cases.
+However, if we use ``-s`` re2c option, lexer becomes sensitive to condition order:
+
+.. code-block::
+
+    $ re2c -cs -o example.c -Wcondition-order wcondition_order.re
+    re2c: warning: line 31: looks like you use hardcoded numbers instead of autogenerated condition names: better add
+    '/*!types:re2c*/' directive or '-t, --type-header' option and don't rely on fixed condition order. [-Wcondition-order]
+    $
+    $ gcc -o example example.c
+    $ ./example
+    aaaa,bbb!
+    $
+    $ gcc -o example -DREVERSED_CONDITION_ORDER example.c
+    $ ./example
+    error
+
+And we also get a warning from re2c.
+The same behaviour (re2c warning and error with ``-DREVERSED_CONDITION_ORDER``) remains if we use ``-g`` option
+(or any option that implies ``-s`` or ``-g``).
+Why is that?
+A look at the generated code explains everything.
+Normally the inital dispatch on conditions is a ``switch`` statement:
+
+.. code-block:: cpp
+
+       switch (c) {
+       case yyca: goto yyc_a;
+       case yycb: goto yyc_b;
+       }
+
+Dispatch uses explicit condition names and works no matter what numbers are assigned to them.
+However, with ``-s`` option re2c generates an ``if`` statement instead of a ``switch``:
+
+.. code-block:: cpp
+
+       if (c < 1) {
+               goto yyc_a;
+       } else {
+               goto yyc_b;
+       }
+
+And with ``-g`` option it uses jump table (computed ``goto``):
+
+.. code-block:: cpp
+
+       static void *yyctable[2] = {
+               &&yyc_a,
+               &&yyc_b,
+       };
+       goto *yyctable[c];
+
+Clearly, the last two cases are sensitive to condition order.
+The fix is easy: as the warning suggests, use ``/*!types:re2c*/`` directive or ``-t, --type-header`` option.
+
diff --git a/src/manual/warnings/condition_order/wcondition_order.re b/src/manual/warnings/condition_order/wcondition_order.re
new file mode 100644 (file)
index 0000000..40a3c73
--- /dev/null
@@ -0,0 +1,34 @@
+#include <stdio.h>
+
+#ifdef REVERSED_CONDITION_ORDER
+#    define yyca 1
+#    define yycb 0
+#else
+#    define yyca 0
+#    define yycb 1
+#endif
+
+int main()
+{
+    const char * YYCURSOR = "aaaa,bbb!";
+    int c = yyca;
+    for (;;) {
+    /*!re2c
+        re2c:define:YYCTYPE = char;
+        re2c:yyfill:enable = 0;
+        re2c:define:YYSETCONDITION = "c = @@;";
+        re2c:define:YYSETCONDITION:naked = 1;
+        re2c:define:YYGETCONDITION = c;
+        re2c:define:YYGETCONDITION:naked = 1;
+
+        <*> * { printf ("error\n"); break; }
+
+        <a> "a"      { printf ("a"); continue; }
+        <a> "," => b { printf (","); continue; }
+
+        <b> "!" { printf ("!\n"); break; }
+        <b> "b" { printf ("b"); continue; }
+    */
+    }
+    return 0;
+}
diff --git a/src/manual/warnings/condition_order/wcondition_order.rst b/src/manual/warnings/condition_order/wcondition_order.rst
new file mode 100644 (file)
index 0000000..1889830
--- /dev/null
@@ -0,0 +1,10 @@
+[-Wcondition-order]
+--------------------------
+
+.. include:: ../home.rst
+.. include:: ../../../contents.rst
+
+.. include:: simple_example.rst
+.. include:: how_it_works.rst
+.. include:: real_world.rst
+
index e59fc5db178a364cb033ba676087e5c1dfdcc163..3577d2bdb2e1eb472c89ca070a19f54f7bdcb2a6 100644 (file)
@@ -8,7 +8,7 @@ Warnings
 
 * `[-Wundefined-control-flow] <undefined_control_flow/wundefined_control_flow.html>`_
 * `[-Wunreachable-rules]      <unreachable_rules/wunreachable_rules.html>`_
-* `[-Wcondition-order]        <wcondition_order.html>`_
+* `[-Wcondition-order]        <condition_order/wcondition_order.html>`_
 * `[-Wuseless-escape]         <wuseless_escape.html>`_
 * `[-Wswapped-range]          <wswapped_range.html>`_
 * `[-Wempty-character-class]  <wempty_character_class.html>`_
diff --git a/src/manual/warnings/wcondition_order.rst b/src/manual/warnings/wcondition_order.rst
deleted file mode 100644 (file)
index 5d6e7cd..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-[-Wcondition-order]
---------------------------
-
-.. include:: home.rst
-