From: Steven R. Loomis Date: Wed, 7 Feb 2018 23:31:40 +0000 (+0000) Subject: ICU-13083 cleanup unescaper, use portable calls X-Git-Tag: release-61-rc~123 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a16ecdad924fb7dd6986bd1afbb82b6d623be20f;p=icu ICU-13083 cleanup unescaper, use portable calls X-SVN-Rev: 40853 --- diff --git a/icu4c/source/tools/escapesrc/escapesrc.cpp b/icu4c/source/tools/escapesrc/escapesrc.cpp index 5e9648476e9..53f6a40db48 100644 --- a/icu4c/source/tools/escapesrc/escapesrc.cpp +++ b/icu4c/source/tools/escapesrc/escapesrc.cpp @@ -4,39 +4,76 @@ #include #include #include -#include #include #include #include #include -// with caution: +// Include this even though we aren't linking against it. #include "unicode/utf8.h" +// Include this here, to avoid needing to compile and link part of common lib +// (bootstrapping problem) +#include "utf_impl.cpp" + +/** + * What is this? + * or even: + * what IS this?? + * + * "This" is a preprocessor that makes an attempt to convert fully valid C++11 source code + * in utf-8 into.. something else. Something consumable by certain compilers (Solaris, xlC) + * which aren't quite there. + * + * - u"" or u'' gets converted to u"\uNNNN" or u'\uNNNN' + * - u8"" gets converted to "\xAA\xBB\xCC\xDD" etc. + * - if the system is EBCDIC-based, well, that's taken into account. + * + * Usage: + * escapesrc infile.cpp outfile.cpp + * Normally this is invoked by the build stage, with a rule such as: + * + * _%.cpp: $(srcdir)/%.cpp + * @$(BINDIR)/escapesrc$(EXEEXT) $< $@ + * %.o: _%.cpp + * $(COMPILE.cc) ... $@ $< + * + * Naturally, 'escapesrc' has to be excluded from said build rule. + + */ + + static const char kSPACE = 0x20, kTAB = 0x09, kLF = 0x0A, kCR = 0x0D; - // kHASH = 0x23, - // kSLASH = 0x2f, - // kSTAR = 0x2A, +// This contains a codepage and ISO 14882:1998 illegality table. +// Use "make gen-table" to rebuild it. # include "cptbl.h" +// For convenience # define cp1047_to_8859(c) cp1047_8859_1[c] +// Our app's name std::string prog; +/** + * Give the usual 1-line documentation and exit + */ void usage() { fprintf(stderr, "%s: usage: %s infile.cpp outfile.cpp\n", prog.c_str(), prog.c_str()); } - +/** + * Delete the output file (if any) + * We want to delete even if we didn't generate, because it might be stale. + */ int cleanup(const std::string &outfile) { const char *outstr = outfile.c_str(); if(outstr && *outstr) { - int rc = unlink(outstr); + int rc = std::remove(outstr); if(rc == 0) { fprintf(stderr, "%s: deleted %s\n", prog.c_str(), outstr); return 0; @@ -44,7 +81,7 @@ int cleanup(const std::string &outfile) { if( errno == ENOENT ) { return 0; // File did not exist - no error. } else { - perror("unlink"); + perror("std::remove"); return 1; } } @@ -52,16 +89,12 @@ int cleanup(const std::string &outfile) { return 0; } -// inline bool hasNonAscii(const char *line, size_t len) { -// const unsigned char *uline = reinterpret_cast(line); -// for(size_t i=0;i 0x7F) { -// return true; -// } -// } -// return false; -// } - +/** + * Skip across any known whitespace. + * @param p startpoint + * @param e limit + * @return first non-whitespace char + */ inline const char *skipws(const char *p, const char *e) { for(;p0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) { @@ -345,6 +360,12 @@ bool fixLine(int /*no*/, std::string &linestr) { return false; } +/** + * Convert a whole file + * @param infile + * @param outfile + * @return 1 on err, 0 otherwise + */ int convert(const std::string &infile, const std::string &outfile) { fprintf(stderr, "escapesrc: %s -> %s\n", infile.c_str(), outfile.c_str()); @@ -386,6 +407,9 @@ int convert(const std::string &infile, const std::string &outfile) { return 0; } +/** + * Main function + */ int main(int argc, const char *argv[]) { prog = argv[0]; @@ -399,6 +423,3 @@ int main(int argc, const char *argv[]) { return convert(infile, outfile); } - - -#include "utf_impl.cpp"