Version 0.10.0 (????-??-??)
---------------------------
+- Added support for c/c++ compatible \u and \U unicode notation.
- Added ability to control indendation.
- Made scanner error out in case an ambiguous /* is found.
- Fixed indendation of generated code.
}
}
+ case 'U':
+ {
+ if (s.len < 8)
+ {
+ fatal(s.ofs()+s.len, "Illegal unicode character, eight hexadecimal digits are required");
+ return ~0;
+ }
+
+ uint l = 0;
+
+ if (s.str[0] == '0')
+ {
+ l++;
+ if (s.str[1] == '0')
+ {
+ l++;
+ if (s.str[2] == '0')
+ {
+ l++;
+ if (s.str[3] == '0')
+ {
+ l++;
+ }
+ }
+ }
+ }
+
+ if (l != 4)
+ {
+ fatal(s.ofs()+l, "Illegal unicode character, eight hexadecimal digits are required");
+ }
+
+ s.len -= 4;
+ s.str += 4;
+
+ // no break;
+ }
case 'X':
+ case 'u':
{
if (s.len < 4)
{
- fatal(s.ofs()+s.len, "Illegal hexadecimal character code, four hexadecimal digits are required");
+ fatal(s.ofs()+s.len,
+ c == 'X'
+ ? "Illegal hexadecimal character code, four hexadecimal digits are required"
+ : "Illegal unicode character, four hexadecimal digits are required");
return ~0;
}
if (!p1 || !p2 || !p3 || !p4)
{
- fatal(s.ofs()+(p1?1:0)+(p2?1:0)+(p3?1:0), "Illegal hexadecimal character code");
+ fatal(s.ofs()+(p1?1:0)+(p2?1:0)+(p3?1:0),
+ c == 'X'
+ ? "Illegal hexadecimal character code, non hexxdecimal digit found"
+ : "Illegal unicode character, non hexadecimal digit found");
return ~0;
}
else
if (v >= nRealChars)
{
- fatal(s.ofs(), "Illegal hexadecimal character code, out of range");
+ fatal(s.ofs(),
+ c == 'X'
+ ? "Illegal hexadecimal character code, out of range"
+ : "Illegal unicode character, out of range");
}
return v;
if (!p0 || !p1 || !p2)
{
- fatal(s.ofs()+(p1?1:0), "Illegal octal character code");
+ fatal(s.ofs()+(p1?1:0), "Illegal octal character code, non octal digit found");
return ~0;
}
else
.ds rx regular expression
.ds lx \fIl\fP-expression
\"$Log$
+\"Revision 1.33 2006/01/01 17:13:56 helly
+\"- Added support for c/c++ compatible \u and \U unicode notation.
+\"
\"Revision 1.32 2005/12/31 00:54:55 helly
\"- Update docu
\"
character definitions and the following set of escape sequences (\fB\\n\fP,
\fB\\t\fP, \fB\\v\fP, \fB\\b\fP, \fB\\r\fP, \fB\\f\fP, \fB\\a\fP, \fB\\\\\fP).
An octal character is defined by a backslash followed by its three octal digits
-and a hexadecimal character is defined by backslash, a lower cased 'x' and its
-two hexadecimal digits or a backslash, an upper cased 'X' and its four
-hexadecimal digits. Since characters greater \fB0X00FF\fP are not allowed in
-non unicode mode the only portable "\fBany\fP" rule is \fB(.|"\\n")\fP.
+and a hexadecimal character is defined by backslash, a lower cased '\fBx\fP'
+and its two hexadecimal digits or a backslash, an upper cased \fBX\fP and its
+four hexadecimal digits. Since characters greater \fB\\X00FF\fP are not allowed
+in non unicode mode, the only portable "\fBany\fP" rule is \fB(.|"\\n")\fP. re2c
+further more supports the c/c++ unicode notation. That is a backslash followed
+by either a lowercased \fBu\fP and its four hexadecimal digits or an uppercased
+\fBU\fP and its eight hexadecimal digits. However using the U notation it is
+not possible to support characters greater \fB\\U0000FFFF\fP due to an internal
+limitation of re2c.
.LP
The regular expressions listed above are grouped according to
precedence, from highest precedence at the top to lowest at the bottom.
--- /dev/null
+line 2, column 6: Illegal unicode character, eight hexadecimal digits are required
+/* Generated by re2c */
+#line 1 "error10.re"
--- /dev/null
+/*!re2c
+[\U00900000YY] {}
+*/
--- /dev/null
+line 2, column 9: Illegal unicode character, non hexadecimal digit found
+/* Generated by re2c */
+#line 1 "error11.re"
--- /dev/null
+/*!re2c
+[\U00000ZZZYY] {}
+*/
/*!re2c
-[\x0Z] {}
+[\x0ZYY] {}
*/
/*!re2c
-[\400] {}
+[\400YY] {}
*/
-line 2, column 4: Illegal octal character code
+line 2, column 4: Illegal octal character code, non octal digit found
/* Generated by re2c */
#line 1 "error8.re"
/*!re2c
-[\090] {}
+[\090YY] {}
*/
-line 2, column 5: Illegal octal character code
+line 2, column 5: Illegal octal character code, non octal digit found
/* Generated by re2c */
#line 1 "error9.re"
/*!re2c
-[\009] {}
+[\009YY] {}
*/