From 6e4cfd4623a9b8602a1da7bc7ac514468d0c0d2d Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 10 Nov 2015 17:36:27 +0000 Subject: [PATCH] Added [-Wuseless-escape] description. --- .../warnings/useless_escape/how_it_works.rst | 25 ++++++++++ .../warnings/useless_escape/real_world.rst | 24 +++++++++ .../useless_escape/simple_example.rst | 49 +++++++++++++++++++ .../useless_escape/wuseless_escape.rst | 10 ++++ src/manual/warnings/warnings.rst | 2 +- src/manual/warnings/wuseless_escape.rst | 5 -- 6 files changed, 109 insertions(+), 6 deletions(-) create mode 100644 src/manual/warnings/useless_escape/how_it_works.rst create mode 100644 src/manual/warnings/useless_escape/real_world.rst create mode 100644 src/manual/warnings/useless_escape/simple_example.rst create mode 100644 src/manual/warnings/useless_escape/wuseless_escape.rst delete mode 100644 src/manual/warnings/wuseless_escape.rst diff --git a/src/manual/warnings/useless_escape/how_it_works.rst b/src/manual/warnings/useless_escape/how_it_works.rst new file mode 100644 index 00000000..84cd66c9 --- /dev/null +++ b/src/manual/warnings/useless_escape/how_it_works.rst @@ -0,0 +1,25 @@ +How it works +~~~~~~~~~~~~ + +re2c recognizes escapes in the following lexemes: + +* double-quoted strings ``" ... "`` +* single-quoted strings ``' ... '`` +* character classes ``[ ... ]`` and ``[^ ... ]`` + +The following escapes are recognized: + +* Closing quotes (``\"`` for double-quoted strings, ``\'`` for single-quoted strings and ``\]`` for character classes). +* Dash ``\-`` in character classes. +* Octal escapes: ``\ooo``, where ``o`` is in range ``[0 - 7]`` + (maximal octal escape is ``\377``, which equals ``0xFF``). +* Hexadecimal escapes: ``\xhh``, ``\Xhhhh``, ``\uhhhh`` and ``\Uhhhhhhhh``, + where ``h`` is in range ``[0 - 9]``, ``[a - f]`` or ``[A - F]``. +* Miscellaneous escapes: ``\a``, ``\b``, ``\f``, ``\n``, ``\r``, ``\t``, ``\v``, ``\\``. + +Ill-formed octal and hexadecimal escapes are treated as errors. +Escape followed by a newline is also an error: multiline strings and classes are not allowed +(this is very inconvenient; hopefully it will be fixed in future). +Any other ill-formed escapes are ignored. +If ``[-Wuseless-escape]`` is enabled, re2c warns about ignored escapes. + diff --git a/src/manual/warnings/useless_escape/real_world.rst b/src/manual/warnings/useless_escape/real_world.rst new file mode 100644 index 00000000..7596af62 --- /dev/null +++ b/src/manual/warnings/useless_escape/real_world.rst @@ -0,0 +1,24 @@ +Real-world examples +~~~~~~~~~~~~~~~~~~~ + +I found many useless escapes in real-world programs: + +* A very strange escape ``\*`` in a regular expression like ``"*\*"``: + either someone wanted to write ``"*\\*"`` (with backslash in the middle), + or I have no explanation at all (considering that the first ``*`` is not escaped). + As far as I know re2c always treated ``"*\*"`` as ``"**"``. + +* ``\h`` in character classes (e.g. ``[ \h\t\v\f\r]``): + perhaps someone confused ``\h`` with horisontal tab + (or even hostname ``:)``). + +* ``\[`` in charater classes; this one is very common. + +* ``\/`` in character classes (e.g. ``[^\/\000]``) and strings (e.g. ``"\/*"``). + However, there is one interesting case: ``"/**** State @@ ***\/"``: + here unescaped slash would end multiline comment. + Perhaps ``[-Wuseless-escape]`` should be fixed to recognize such cases. + +* ``\.`` in character classes (e.g ``[\.]``). + + diff --git a/src/manual/warnings/useless_escape/simple_example.rst b/src/manual/warnings/useless_escape/simple_example.rst new file mode 100644 index 00000000..91f01eed --- /dev/null +++ b/src/manual/warnings/useless_escape/simple_example.rst @@ -0,0 +1,49 @@ +A simple example +~~~~~~~~~~~~~~~~ + +.. code-block:: cpp + :number-lines: + + /*!re2c + * {} + "\a\A\"\'\[\]\-\x5d\377" {} + '\a\A\"\'\[\]\-\x5d\377' {} + [\a\A\"\'\[\]\-\x5d\377] {} + */ + +Given this code, ```re2c -Wuseless-escape``` reports a bunch of warnings: + +.. code-block:: + + re2c: warning: line 3: column 11: escape has no effect: '\A' [-Wuseless-escape] + re2c: warning: line 3: column 15: escape has no effect: '\'' [-Wuseless-escape] + re2c: warning: line 3: column 17: escape has no effect: '\[' [-Wuseless-escape] + re2c: warning: line 3: column 19: escape has no effect: '\]' [-Wuseless-escape] + re2c: warning: line 3: column 21: escape has no effect: '\-' [-Wuseless-escape] + re2c: warning: line 4: column 11: escape has no effect: '\A' [-Wuseless-escape] + re2c: warning: line 4: column 13: escape has no effect: '\"' [-Wuseless-escape] + re2c: warning: line 4: column 17: escape has no effect: '\[' [-Wuseless-escape] + re2c: warning: line 4: column 19: escape has no effect: '\]' [-Wuseless-escape] + re2c: warning: line 4: column 21: escape has no effect: '\-' [-Wuseless-escape] + re2c: warning: line 5: column 11: escape has no effect: '\A' [-Wuseless-escape] + re2c: warning: line 5: column 13: escape has no effect: '\"' [-Wuseless-escape] + re2c: warning: line 5: column 15: escape has no effect: '\'' [-Wuseless-escape] + re2c: warning: line 5: column 17: escape has no effect: '\[' [-Wuseless-escape] + +It says that ``\A`` and ``\[`` escapes are meaningless in all rules, +``\-`` makes sense only in character class +and each type of closing quotes (``"``, ``'`` and ``]``) should only be escaped inside of same-quoted string. +Useless escapes are ignored: the escaped symbol is treated as not escaped (``\A`` becomes ``A``, etc.). +The above example should be fixed as follows: + +.. code-block:: cpp + :number-lines: + + /*!re2c + * {} + "\aA\"'[]-\x5d\377" {} + '\aA"\'[]-\x5d\377' {} + [\aA"'[\]\-\x5d\377] {} + */ + + diff --git a/src/manual/warnings/useless_escape/wuseless_escape.rst b/src/manual/warnings/useless_escape/wuseless_escape.rst new file mode 100644 index 00000000..56d2a4d0 --- /dev/null +++ b/src/manual/warnings/useless_escape/wuseless_escape.rst @@ -0,0 +1,10 @@ +[-Wuseless-escape] +-------------------------- + +.. include:: ../home.rst +.. include:: ../../../contents.rst + +.. include:: simple_example.rst +.. include:: how_it_works.rst +.. include:: real_world.rst + diff --git a/src/manual/warnings/warnings.rst b/src/manual/warnings/warnings.rst index 3577d2bd..e8886170 100644 --- a/src/manual/warnings/warnings.rst +++ b/src/manual/warnings/warnings.rst @@ -9,7 +9,7 @@ Warnings * `[-Wundefined-control-flow] `_ * `[-Wunreachable-rules] `_ * `[-Wcondition-order] `_ -* `[-Wuseless-escape] `_ +* `[-Wuseless-escape] `_ * `[-Wswapped-range] `_ * `[-Wempty-character-class] `_ * `[-Wmatch-empty-string] `_ diff --git a/src/manual/warnings/wuseless_escape.rst b/src/manual/warnings/wuseless_escape.rst deleted file mode 100644 index d7437e0d..00000000 --- a/src/manual/warnings/wuseless_escape.rst +++ /dev/null @@ -1,5 +0,0 @@ -[-Wuseless-escape] --------------------------- - -.. include:: home.rst - -- 2.50.1