From: Steven R. Loomis Date: Thu, 16 Mar 2017 21:49:19 +0000 (+0000) Subject: ICU-12766 support escaping on ebcdic (z) X-Git-Tag: release-59-rc~71^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=02cf91d3e6a2f739a8b7c3fd3ac1df211082aef5;p=icu ICU-12766 support escaping on ebcdic (z) X-SVN-Rev: 39842 --- diff --git a/icu4c/source/common/unicode/platform.h b/icu4c/source/common/unicode/platform.h index 41b363093cc..dc7d386281a 100644 --- a/icu4c/source/common/unicode/platform.h +++ b/icu4c/source/common/unicode/platform.h @@ -486,7 +486,7 @@ # define U_CPLUSPLUS_VERSION 1 #endif -#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11) +#if (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11) // add in std::nullptr_t namespace std { typedef decltype(nullptr) nullptr_t; diff --git a/icu4c/source/tools/escapesrc/Makefile.in b/icu4c/source/tools/escapesrc/Makefile.in index 8a0e2c1f803..7580ccdc315 100644 --- a/icu4c/source/tools/escapesrc/Makefile.in +++ b/icu4c/source/tools/escapesrc/Makefile.in @@ -95,6 +95,12 @@ $(TARGET) : $(OBJECTS) cd $(top_builddir) \ && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status +# depends on ICU being built +gen-table: tblgen$(EXEEXT) + $(INVOKE) ./tblgen$(EXEEXT) > $(srcdir)/cptbl.h + +tblgen$(EXEEXT): tblgen.o + $(LINK.cc) $(OUTOPT)$@ $^ $(LIBS) $(LIBICUUC) ifeq (,$(MAKECMDGOALS)) -include $(DEPS) diff --git a/icu4c/source/tools/escapesrc/cptbl.h b/icu4c/source/tools/escapesrc/cptbl.h new file mode 100644 index 00000000000..898e16c925d --- /dev/null +++ b/icu4c/source/tools/escapesrc/cptbl.h @@ -0,0 +1,521 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html +// generated by tblgen. You weren't going to edit it by hand, were you? + +static const char cp1047_8859_1[256] = { + (char)0x00, /* 00 */ + (char)0x01, /* 01 */ + (char)0x02, /* 02 */ + (char)0x03, /* 03 */ + (char)0x9C, /* 04 */ + (char)0x09, /* 05 */ + (char)0x86, /* 06 */ + (char)0x7F, /* 07 */ + (char)0x97, /* 08 */ + (char)0x8D, /* 09 */ + (char)0x8E, /* 0A */ + (char)0x0B, /* 0B */ + (char)0x0C, /* 0C */ + (char)0x0D, /* 0D */ + (char)0x0E, /* 0E */ + (char)0x0F, /* 0F */ + (char)0x10, /* 10 */ + (char)0x11, /* 11 */ + (char)0x12, /* 12 */ + (char)0x13, /* 13 */ + (char)0x9D, /* 14 */ + (char)0x85, /* 15 */ + (char)0x08, /* 16 */ + (char)0x87, /* 17 */ + (char)0x18, /* 18 */ + (char)0x19, /* 19 */ + (char)0x92, /* 1A */ + (char)0x8F, /* 1B */ + (char)0x1C, /* 1C */ + (char)0x1D, /* 1D */ + (char)0x1E, /* 1E */ + (char)0x1F, /* 1F */ + (char)0x80, /* 20 */ + (char)0x81, /* 21 */ + (char)0x82, /* 22 */ + (char)0x83, /* 23 */ + (char)0x84, /* 24 */ + (char)0x0A, /* 25 */ + (char)0x17, /* 26 */ + (char)0x1B, /* 27 */ + (char)0x88, /* 28 */ + (char)0x89, /* 29 */ + (char)0x8A, /* 2A */ + (char)0x8B, /* 2B */ + (char)0x8C, /* 2C */ + (char)0x05, /* 2D */ + (char)0x06, /* 2E */ + (char)0x07, /* 2F */ + (char)0x90, /* 30 */ + (char)0x91, /* 31 */ + (char)0x16, /* 32 */ + (char)0x93, /* 33 */ + (char)0x94, /* 34 */ + (char)0x95, /* 35 */ + (char)0x96, /* 36 */ + (char)0x04, /* 37 */ + (char)0x98, /* 38 */ + (char)0x99, /* 39 */ + (char)0x9A, /* 3A */ + (char)0x9B, /* 3B */ + (char)0x14, /* 3C */ + (char)0x15, /* 3D */ + (char)0x9E, /* 3E */ + (char)0x1A, /* 3F */ + (char)0x20, /* 40 */ + (char)0xA0, /* 41 */ + (char)0xE2, /* 42 */ + (char)0xE4, /* 43 */ + (char)0xE0, /* 44 */ + (char)0xE1, /* 45 */ + (char)0xE3, /* 46 */ + (char)0xE5, /* 47 */ + (char)0xE7, /* 48 */ + (char)0xF1, /* 49 */ + (char)0xA2, /* 4A */ + (char)0x2E, /* 4B */ + (char)0x3C, /* 4C */ + (char)0x28, /* 4D */ + (char)0x2B, /* 4E */ + (char)0x7C, /* 4F */ + (char)0x26, /* 50 */ + (char)0xE9, /* 51 */ + (char)0xEA, /* 52 */ + (char)0xEB, /* 53 */ + (char)0xE8, /* 54 */ + (char)0xED, /* 55 */ + (char)0xEE, /* 56 */ + (char)0xEF, /* 57 */ + (char)0xEC, /* 58 */ + (char)0xDF, /* 59 */ + (char)0x21, /* 5A */ + (char)0x24, /* 5B */ + (char)0x2A, /* 5C */ + (char)0x29, /* 5D */ + (char)0x3B, /* 5E */ + (char)0x5E, /* 5F */ + (char)0x2D, /* 60 */ + (char)0x2F, /* 61 */ + (char)0xC2, /* 62 */ + (char)0xC4, /* 63 */ + (char)0xC0, /* 64 */ + (char)0xC1, /* 65 */ + (char)0xC3, /* 66 */ + (char)0xC5, /* 67 */ + (char)0xC7, /* 68 */ + (char)0xD1, /* 69 */ + (char)0xA6, /* 6A */ + (char)0x2C, /* 6B */ + (char)0x25, /* 6C */ + (char)0x5F, /* 6D */ + (char)0x3E, /* 6E */ + (char)0x3F, /* 6F */ + (char)0xF8, /* 70 */ + (char)0xC9, /* 71 */ + (char)0xCA, /* 72 */ + (char)0xCB, /* 73 */ + (char)0xC8, /* 74 */ + (char)0xCD, /* 75 */ + (char)0xCE, /* 76 */ + (char)0xCF, /* 77 */ + (char)0xCC, /* 78 */ + (char)0x60, /* 79 */ + (char)0x3A, /* 7A */ + (char)0x23, /* 7B */ + (char)0x40, /* 7C */ + (char)0x27, /* 7D */ + (char)0x3D, /* 7E */ + (char)0x22, /* 7F */ + (char)0xD8, /* 80 */ + (char)0x61, /* 81 */ + (char)0x62, /* 82 */ + (char)0x63, /* 83 */ + (char)0x64, /* 84 */ + (char)0x65, /* 85 */ + (char)0x66, /* 86 */ + (char)0x67, /* 87 */ + (char)0x68, /* 88 */ + (char)0x69, /* 89 */ + (char)0xAB, /* 8A */ + (char)0xBB, /* 8B */ + (char)0xF0, /* 8C */ + (char)0xFD, /* 8D */ + (char)0xFE, /* 8E */ + (char)0xB1, /* 8F */ + (char)0xB0, /* 90 */ + (char)0x6A, /* 91 */ + (char)0x6B, /* 92 */ + (char)0x6C, /* 93 */ + (char)0x6D, /* 94 */ + (char)0x6E, /* 95 */ + (char)0x6F, /* 96 */ + (char)0x70, /* 97 */ + (char)0x71, /* 98 */ + (char)0x72, /* 99 */ + (char)0xAA, /* 9A */ + (char)0xBA, /* 9B */ + (char)0xE6, /* 9C */ + (char)0xB8, /* 9D */ + (char)0xC6, /* 9E */ + (char)0xA4, /* 9F */ + (char)0xB5, /* A0 */ + (char)0x7E, /* A1 */ + (char)0x73, /* A2 */ + (char)0x74, /* A3 */ + (char)0x75, /* A4 */ + (char)0x76, /* A5 */ + (char)0x77, /* A6 */ + (char)0x78, /* A7 */ + (char)0x79, /* A8 */ + (char)0x7A, /* A9 */ + (char)0xA1, /* AA */ + (char)0xBF, /* AB */ + (char)0xD0, /* AC */ + (char)0x5B, /* AD */ + (char)0xDE, /* AE */ + (char)0xAE, /* AF */ + (char)0xAC, /* B0 */ + (char)0xA3, /* B1 */ + (char)0xA5, /* B2 */ + (char)0xB7, /* B3 */ + (char)0xA9, /* B4 */ + (char)0xA7, /* B5 */ + (char)0xB6, /* B6 */ + (char)0xBC, /* B7 */ + (char)0xBD, /* B8 */ + (char)0xBE, /* B9 */ + (char)0xDD, /* BA */ + (char)0xA8, /* BB */ + (char)0xAF, /* BC */ + (char)0x5D, /* BD */ + (char)0xB4, /* BE */ + (char)0xD7, /* BF */ + (char)0x7B, /* C0 */ + (char)0x41, /* C1 */ + (char)0x42, /* C2 */ + (char)0x43, /* C3 */ + (char)0x44, /* C4 */ + (char)0x45, /* C5 */ + (char)0x46, /* C6 */ + (char)0x47, /* C7 */ + (char)0x48, /* C8 */ + (char)0x49, /* C9 */ + (char)0xAD, /* CA */ + (char)0xF4, /* CB */ + (char)0xF6, /* CC */ + (char)0xF2, /* CD */ + (char)0xF3, /* CE */ + (char)0xF5, /* CF */ + (char)0x7D, /* D0 */ + (char)0x4A, /* D1 */ + (char)0x4B, /* D2 */ + (char)0x4C, /* D3 */ + (char)0x4D, /* D4 */ + (char)0x4E, /* D5 */ + (char)0x4F, /* D6 */ + (char)0x50, /* D7 */ + (char)0x51, /* D8 */ + (char)0x52, /* D9 */ + (char)0xB9, /* DA */ + (char)0xFB, /* DB */ + (char)0xFC, /* DC */ + (char)0xF9, /* DD */ + (char)0xFA, /* DE */ + (char)0xFF, /* DF */ + (char)0x5C, /* E0 */ + (char)0xF7, /* E1 */ + (char)0x53, /* E2 */ + (char)0x54, /* E3 */ + (char)0x55, /* E4 */ + (char)0x56, /* E5 */ + (char)0x57, /* E6 */ + (char)0x58, /* E7 */ + (char)0x59, /* E8 */ + (char)0x5A, /* E9 */ + (char)0xB2, /* EA */ + (char)0xD4, /* EB */ + (char)0xD6, /* EC */ + (char)0xD2, /* ED */ + (char)0xD3, /* EE */ + (char)0xD5, /* EF */ + (char)0x30, /* F0 */ + (char)0x31, /* F1 */ + (char)0x32, /* F2 */ + (char)0x33, /* F3 */ + (char)0x34, /* F4 */ + (char)0x35, /* F5 */ + (char)0x36, /* F6 */ + (char)0x37, /* F7 */ + (char)0x38, /* F8 */ + (char)0x39, /* F9 */ + (char)0xB3, /* FA */ + (char)0xDB, /* FB */ + (char)0xDC, /* FC */ + (char)0xD9, /* FD */ + (char)0xDA, /* FE */ + (char)0x9F, /* FF */ +}; + +static const bool oldIllegal[256] = { + false, /* U+0000 */ + false, /* U+0001 */ + false, /* U+0002 */ + false, /* U+0003 */ + false, /* U+0004 */ + false, /* U+0005 */ + false, /* U+0006 */ + false, /* U+0007 */ + false, /* U+0008 */ + false, /* U+0009 */ + false, /* U+000A */ + false, /* U+000B */ + false, /* U+000C */ + false, /* U+000D */ + false, /* U+000E */ + false, /* U+000F */ + false, /* U+0010 */ + false, /* U+0011 */ + false, /* U+0012 */ + false, /* U+0013 */ + false, /* U+0014 */ + false, /* U+0015 */ + false, /* U+0016 */ + false, /* U+0017 */ + false, /* U+0018 */ + false, /* U+0019 */ + false, /* U+001A */ + false, /* U+001B */ + false, /* U+001C */ + false, /* U+001D */ + false, /* U+001E */ + false, /* U+001F */ + true, /* U+0020 */ + true, /* U+0021 */ + true, /* U+0022 */ + true, /* U+0023 */ + false, /* U+0024 */ + true, /* U+0025 */ + true, /* U+0026 */ + true, /* U+0027 */ + true, /* U+0028 */ + true, /* U+0029 */ + true, /* U+002A */ + true, /* U+002B */ + true, /* U+002C */ + true, /* U+002D */ + true, /* U+002E */ + true, /* U+002F */ + true, /* U+0030 */ + true, /* U+0031 */ + true, /* U+0032 */ + true, /* U+0033 */ + true, /* U+0034 */ + true, /* U+0035 */ + true, /* U+0036 */ + true, /* U+0037 */ + true, /* U+0038 */ + true, /* U+0039 */ + true, /* U+003A */ + true, /* U+003B */ + true, /* U+003C */ + true, /* U+003D */ + true, /* U+003E */ + true, /* U+003F */ + false, /* U+0040 */ + true, /* U+0041 */ + true, /* U+0042 */ + true, /* U+0043 */ + true, /* U+0044 */ + true, /* U+0045 */ + true, /* U+0046 */ + true, /* U+0047 */ + true, /* U+0048 */ + true, /* U+0049 */ + true, /* U+004A */ + true, /* U+004B */ + true, /* U+004C */ + true, /* U+004D */ + true, /* U+004E */ + true, /* U+004F */ + true, /* U+0050 */ + true, /* U+0051 */ + true, /* U+0052 */ + true, /* U+0053 */ + true, /* U+0054 */ + true, /* U+0055 */ + true, /* U+0056 */ + true, /* U+0057 */ + true, /* U+0058 */ + true, /* U+0059 */ + true, /* U+005A */ + true, /* U+005B */ + false, /* U+005C */ + true, /* U+005D */ + true, /* U+005E */ + true, /* U+005F */ + false, /* U+0060 */ + true, /* U+0061 */ + true, /* U+0062 */ + true, /* U+0063 */ + true, /* U+0064 */ + true, /* U+0065 */ + true, /* U+0066 */ + true, /* U+0067 */ + true, /* U+0068 */ + true, /* U+0069 */ + true, /* U+006A */ + true, /* U+006B */ + true, /* U+006C */ + true, /* U+006D */ + true, /* U+006E */ + true, /* U+006F */ + true, /* U+0070 */ + true, /* U+0071 */ + true, /* U+0072 */ + true, /* U+0073 */ + true, /* U+0074 */ + true, /* U+0075 */ + true, /* U+0076 */ + true, /* U+0077 */ + true, /* U+0078 */ + true, /* U+0079 */ + true, /* U+007A */ + true, /* U+007B */ + true, /* U+007C */ + true, /* U+007D */ + true, /* U+007E */ + false, /* U+007F */ + false, /* U+0080 */ + false, /* U+0081 */ + false, /* U+0082 */ + false, /* U+0083 */ + false, /* U+0084 */ + false, /* U+0085 */ + false, /* U+0086 */ + false, /* U+0087 */ + false, /* U+0088 */ + false, /* U+0089 */ + false, /* U+008A */ + false, /* U+008B */ + false, /* U+008C */ + false, /* U+008D */ + false, /* U+008E */ + false, /* U+008F */ + false, /* U+0090 */ + false, /* U+0091 */ + false, /* U+0092 */ + false, /* U+0093 */ + false, /* U+0094 */ + false, /* U+0095 */ + false, /* U+0096 */ + false, /* U+0097 */ + false, /* U+0098 */ + false, /* U+0099 */ + false, /* U+009A */ + false, /* U+009B */ + false, /* U+009C */ + false, /* U+009D */ + false, /* U+009E */ + false, /* U+009F */ + false, /* U+00A0 */ + false, /* U+00A1 */ + false, /* U+00A2 */ + false, /* U+00A3 */ + false, /* U+00A4 */ + false, /* U+00A5 */ + false, /* U+00A6 */ + false, /* U+00A7 */ + false, /* U+00A8 */ + false, /* U+00A9 */ + false, /* U+00AA */ + false, /* U+00AB */ + false, /* U+00AC */ + false, /* U+00AD */ + false, /* U+00AE */ + false, /* U+00AF */ + false, /* U+00B0 */ + false, /* U+00B1 */ + false, /* U+00B2 */ + false, /* U+00B3 */ + false, /* U+00B4 */ + false, /* U+00B5 */ + false, /* U+00B6 */ + false, /* U+00B7 */ + false, /* U+00B8 */ + false, /* U+00B9 */ + false, /* U+00BA */ + false, /* U+00BB */ + false, /* U+00BC */ + false, /* U+00BD */ + false, /* U+00BE */ + false, /* U+00BF */ + false, /* U+00C0 */ + false, /* U+00C1 */ + false, /* U+00C2 */ + false, /* U+00C3 */ + false, /* U+00C4 */ + false, /* U+00C5 */ + false, /* U+00C6 */ + false, /* U+00C7 */ + false, /* U+00C8 */ + false, /* U+00C9 */ + false, /* U+00CA */ + false, /* U+00CB */ + false, /* U+00CC */ + false, /* U+00CD */ + false, /* U+00CE */ + false, /* U+00CF */ + false, /* U+00D0 */ + false, /* U+00D1 */ + false, /* U+00D2 */ + false, /* U+00D3 */ + false, /* U+00D4 */ + false, /* U+00D5 */ + false, /* U+00D6 */ + false, /* U+00D7 */ + false, /* U+00D8 */ + false, /* U+00D9 */ + false, /* U+00DA */ + false, /* U+00DB */ + false, /* U+00DC */ + false, /* U+00DD */ + false, /* U+00DE */ + false, /* U+00DF */ + false, /* U+00E0 */ + false, /* U+00E1 */ + false, /* U+00E2 */ + false, /* U+00E3 */ + false, /* U+00E4 */ + false, /* U+00E5 */ + false, /* U+00E6 */ + false, /* U+00E7 */ + false, /* U+00E8 */ + false, /* U+00E9 */ + false, /* U+00EA */ + false, /* U+00EB */ + false, /* U+00EC */ + false, /* U+00ED */ + false, /* U+00EE */ + false, /* U+00EF */ + false, /* U+00F0 */ + false, /* U+00F1 */ + false, /* U+00F2 */ + false, /* U+00F3 */ + false, /* U+00F4 */ + false, /* U+00F5 */ + false, /* U+00F6 */ + false, /* U+00F7 */ + false, /* U+00F8 */ + false, /* U+00F9 */ + false, /* U+00FA */ + false, /* U+00FB */ + false, /* U+00FC */ + false, /* U+00FD */ + false, /* U+00FE */ + false, /* U+00FF */ +}; + diff --git a/icu4c/source/tools/escapesrc/escapesrc.cpp b/icu4c/source/tools/escapesrc/escapesrc.cpp index e28c5808cf9..4bca5025bd1 100644 --- a/icu4c/source/tools/escapesrc/escapesrc.cpp +++ b/icu4c/source/tools/escapesrc/escapesrc.cpp @@ -27,6 +27,10 @@ static const char kQUOT = 0x27, kDBLQ = 0x22; +# include "cptbl.h" + +# define cp1047_to_8859(c) cp1047_8859_1[c] + std::string prog; void usage() { @@ -150,7 +154,7 @@ bool appendUtf8(std::string &outstr, bool fixu8(std::string &linestr, size_t origpos, size_t &endpos) { size_t pos = origpos + 3; std::string outstr; - outstr += (kDBLQ); + outstr += '\"'; // local encoding for(;pos0; pos++,trail--) { + linestr[pos2] = cp1047_to_8859(linestr[pos2]); + } +#endif + // Proceed to decode utf-8 const uint8_t *s = (const uint8_t*) (linestr.c_str()); int32_t i = pos; int32_t length = linestr.size(); UChar32 c; + if(U8_IS_SINGLE((uint8_t)s[i]) && oldIllegal[s[i]]) { +#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) + linestr[pos] = old_byte; // put it back +#endif + continue; // single code point not previously legal for \u escaping + } - if(U8_IS_SINGLE((uint8_t)s[i])) continue; // single code point - + // otherwise, convert it to \u / \U { U8_NEXT(s, i, length, c); } if(c<0) { fprintf(stderr, "Illegal utf-8 sequence\n"); + fprintf(stderr, "Line: >>%s<<\n", linestr.c_str()); return true; } diff --git a/icu4c/source/tools/escapesrc/tblgen.cpp b/icu4c/source/tools/escapesrc/tblgen.cpp new file mode 100644 index 00000000000..e94fa9036bc --- /dev/null +++ b/icu4c/source/tools/escapesrc/tblgen.cpp @@ -0,0 +1,80 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" +#include "unicode/ucnv.h" +#include "unicode/uniset.h" +#include + +static const char *kConverter = "ibm-1047"; + +int main(int argc, const char *argv[]) { + printf("// %s\n", U_COPYRIGHT_STRING); + printf("// generated by tblgen. You weren't going to edit it by hand, were you?\n"); + printf("\n"); + + UErrorCode status = U_ZERO_ERROR; + LocalUConverterPointer cnv(ucnv_open(kConverter, &status)); + + if(U_FAILURE(status)) { + fprintf(stderr, "Failed to open %s: %s\n", kConverter, u_errorName(status)); + return 1; + } + + printf("static const char cp1047_8859_1[256] = { \n"); + for(int i=0x00; i<0x100; i++) { + char cp1047[1]; + cp1047[0] = i; + UChar u[1]; + UChar *target = u; + const char *source = cp1047; + ucnv_toUnicode(cnv.getAlias(), &target, u+1, &source, cp1047+1, nullptr, true, &status); + if(U_FAILURE(status)) { + fprintf(stderr, "Conversion failure at #%X: %s\n", i, u_errorName(status)); + return 2; + } + printf(" (char)0x%02X, /* %02X */\n", u[0], i); + } + printf("};\n\n"); + + // + // UnicodeSet oldIllegal("[:print:]", status); // [a-zA-Z0-9_}{#)(><%:;.?*+-/^&|~!=,\\u005b\\u005d\\u005c]", status); + UnicodeSet oldIllegal("[0-9 a-z A-Z " + "_ \\{ \\} \\[ \\] # \\( \\) < > % \\: ; . " + "? * + \\- / \\^ \\& | ~ ! = , \\ \" ' ]", status); + + /* + +http://www.lirmm.fr/~ducour/Doc-objets/ISO+IEC+14882-1998.pdf ( note: 1998 ) page 10, section 2.2 says: + +1 The basic source character set consists of 96 characters: the space character, the control characters repre- 15) +senting horizontal tab, vertical tab, form feed, and new-line, plus the following 91 graphical characters: +a b c d e f g h i j k l m n opqrstuvwxyz +A B C D E F G H I J K L M N OPQRSTUVWXYZ +0 12 3 4 5 6 7 8 9 + _ { } [ ] # ( ) < > % : ; . ?*+-/^&|~!=,\" +2 The universal-character-name construct provides a way to name other characters. hex-quad: +hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit +universal-character-name: \u hex-quad +\U hex-quad hex-quad +The character designated by the universal-character-name \UNNNNNNNN is that character whose character short name in ISO/IEC 10646 is NNNNNNNN; the character designated by the universal-character-name \uNNNN is that character whose character short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value for a universal character name is less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the uni- versal character name designates a character in the basic source character set, then the program is ill- formed. + + +So basically: printable ASCII plus 0x00-0x1F, 0x7F-0x9F, was all illegal. + +Some discussion at http://unicode.org/mail-arch/unicode-ml/y2003-m10/0471.html + + */ + + + + printf("static const bool oldIllegal[256] = { \n"); + for(UChar i=0x00; i<0x100;i++) { + printf(" %s, /* U+%04X */\n", + (oldIllegal.contains(i))?" true":"false", + i); + } + printf("};\n\n"); + + return 0; +}