From bf17d52293a69e08c06d69351bd36706b62444f3 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 8 Mar 2017 22:08:12 +0000 Subject: [PATCH] ICU-12992 add OldUChar type to match ICU 58 UChar; add public conversion functions from char16_t * to UChar * and OldUChar * with aliasing barrier X-SVN-Rev: 39747 --- icu4c/source/common/uinvchar.h | 6 +-- icu4c/source/common/unicode/char16ptr.h | 57 +++++++++++++++++++++++ icu4c/source/common/unicode/normlzr.h | 10 +--- icu4c/source/common/unicode/umachine.h | 29 ++++++++++++ icu4c/source/common/unicode/unistr.h | 15 ++---- icu4c/source/test/iotest/stream.cpp | 11 +---- icu4c/source/tools/ctestfw/datamap.cpp | 4 +- icu4c/source/tools/gennorm2/n2builder.cpp | 10 ++-- icu4c/source/tools/genrb/reslist.h | 3 +- icu4c/source/tools/genrb/wrtxml.cpp | 5 +- icu4c/source/tools/toolutil/dbgutil.cpp | 5 +- icu4c/source/tools/toolutil/ppucd.cpp | 5 +- icu4c/source/tools/toolutil/toolutil.h | 14 ------ icu4c/source/tools/toolutil/xmlparser.cpp | 5 +- 14 files changed, 111 insertions(+), 68 deletions(-) diff --git a/icu4c/source/common/uinvchar.h b/icu4c/source/common/uinvchar.h index 2a960bdfca4..c4f9f88b9ad 100644 --- a/icu4c/source/common/uinvchar.h +++ b/icu4c/source/common/uinvchar.h @@ -64,11 +64,7 @@ uprv_isInvariantUString(const UChar *s, int32_t length); */ U_INTERNAL inline UBool U_EXPORT2 uprv_isInvariantUnicodeString(const icu::UnicodeString &s) { - const char16_t *p = s.getBuffer(); -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p); -#endif - return uprv_isInvariantUString(reinterpret_cast(p), s.length()); + return uprv_isInvariantUString(icu::toUCharPtr(s.getBuffer()), s.length()); } #endif /* __cplusplus */ diff --git a/icu4c/source/common/unicode/char16ptr.h b/icu4c/source/common/unicode/char16ptr.h index d484cfdf8ac..2e85aa71aba 100644 --- a/icu4c/source/common/unicode/char16ptr.h +++ b/icu4c/source/common/unicode/char16ptr.h @@ -14,6 +14,7 @@ * \file * \brief C++ API: char16_t pointer wrappers with * implicit conversion to/from bit-compatible raw pointer types. + * Also conversion functions from char16_t * to UChar * and OldUChar *. */ U_NAMESPACE_BEGIN @@ -227,6 +228,62 @@ const char16_t *ConstChar16Ptr::get() const { return u.cp; } #endif +/** + * Converts from const char16_t * to const UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const UChar * + * @draft ICU 59 + */ +inline U_COMMON_API const UChar *toUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from char16_t * to UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as UChar * + * @draft ICU 59 + */ +inline U_COMMON_API UChar *toUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from const char16_t * to const OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const OldUChar * + * @draft ICU 59 + */ +inline U_COMMON_API const OldUChar *toOldUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from char16_t * to OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as OldUChar * + * @draft ICU 59 + */ +inline U_COMMON_API OldUChar *toOldUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + U_NAMESPACE_END #endif // __CHAR16PTR_H__ diff --git a/icu4c/source/common/unicode/normlzr.h b/icu4c/source/common/unicode/normlzr.h index 02ef13f2fd3..0141058fb00 100644 --- a/icu4c/source/common/unicode/normlzr.h +++ b/icu4c/source/common/unicode/normlzr.h @@ -795,15 +795,9 @@ inline int32_t Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options, UErrorCode &errorCode) { - const char16_t *p1 = s1.getBuffer(); - const char16_t *p2 = s2.getBuffer(); -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p1); - U_ALIASING_BARRIER(p2); -#endif // all argument checking is done in unorm_compare - return unorm_compare(reinterpret_cast(p1), s1.length(), - reinterpret_cast(p2), s2.length(), + return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(), + toUCharPtr(s2.getBuffer()), s2.length(), options, &errorCode); } diff --git a/icu4c/source/common/unicode/umachine.h b/icu4c/source/common/unicode/umachine.h index 4c8b5c1f1e2..8c96c15cfb6 100644 --- a/icu4c/source/common/unicode/umachine.h +++ b/icu4c/source/common/unicode/umachine.h @@ -313,6 +313,7 @@ typedef int8_t UBool; * * @stable ICU 4.4 */ + #if 1 // #if 1 is normal. UChar defaults to char16_t in C++. // For configuration testing of UChar=uint16_t temporarily change this to #if 0. @@ -321,6 +322,7 @@ typedef int8_t UBool; #elif !defined(UCHAR_TYPE) # define UCHAR_TYPE uint16_t #endif + #if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) // Inside the ICU library code, never configurable. @@ -333,6 +335,33 @@ typedef int8_t UBool; typedef uint16_t UChar; #endif +/** + * \var OldUChar + * Default ICU 58 definition of UChar. + * A base type for UTF-16 code units and pointers. + * Unsigned 16-bit integer. + * + * Define OldUChar to be wchar_t if that is 16 bits wide. + * If wchar_t is not 16 bits wide, then define UChar to be uint16_t. + * + * This makes the definition of OldUChar platform-dependent + * but allows direct string type compatibility with platforms with + * 16-bit wchar_t types. + * + * This is how UChar was defined in ICU 58, for transition convenience. + * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. + * The current UChar responds to UCHAR_TYPE but OldUChar does not. + * + * @draft ICU 59 + */ +#if U_SIZEOF_WCHAR_T==2 + typedef wchar_t OldUChar; +#elif defined(__CHAR16_TYPE__) + typedef __CHAR16_TYPE__ OldUChar; +#else + typedef uint16_t OldUChar; +#endif + /** * Define UChar32 as a type for single Unicode code points. * UChar32 is a signed 32-bit integer (same as int32_t). diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 835e64ff774..2aa5d8e3a34 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -3495,13 +3495,6 @@ protected: virtual UChar32 getChar32At(int32_t offset) const; private: - static inline const UChar *constUCharPtr(const char16_t *p) { -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p); -#endif - return reinterpret_cast(p); - } - // For char* constructors. Could be made public. UnicodeString &setToUTF8(StringPiece utf8); // For extract(char*). @@ -4367,7 +4360,7 @@ UnicodeString::startsWith(const UnicodeString& srcText, inline UBool UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(constUCharPtr(srcChars)); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; } @@ -4375,7 +4368,7 @@ UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { inline UBool UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(constUCharPtr(srcChars)); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; } @@ -4398,7 +4391,7 @@ inline UBool UnicodeString::endsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(constUCharPtr(srcChars)); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(length() - srcLength, srcLength, srcChars, 0, srcLength) == 0; @@ -4409,7 +4402,7 @@ UnicodeString::endsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(constUCharPtr(srcChars + srcStart)); + srcLength = u_strlen(toUCharPtr(srcChars + srcStart)); } return doCompare(length() - srcLength, srcLength, srcChars, srcStart, srcLength) == 0; diff --git a/icu4c/source/test/iotest/stream.cpp b/icu4c/source/test/iotest/stream.cpp index 892e0d7d58a..424d4e33af5 100644 --- a/icu4c/source/test/iotest/stream.cpp +++ b/icu4c/source/test/iotest/stream.cpp @@ -49,13 +49,6 @@ const char C_NEW_LINE[] = {'\n',0}; #endif U_CDECL_END -inline const UChar *constUCharPtr(const char16_t *p) { -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p); -#endif - return reinterpret_cast(p); -} - U_CDECL_BEGIN static void U_CALLCONV TestStream(void) { @@ -113,12 +106,12 @@ static void U_CALLCONV TestStream(void) inTestStream >> inStr >> inStr2; if (inStr.compare(thisMu) != 0) { - u_austrncpy(inStrC, constUCharPtr(inStr.getBuffer()), inStr.length()); + u_austrncpy(inStrC, toUCharPtr(inStr.getBuffer()), inStr.length()); inStrC[inStr.length()] = 0; log_err("Got: \"%s\", Expected: \"tHis\\u03BC\"\n", inStrC); } if (inStr2.compare(mu) != 0) { - u_austrncpy(inStrC, constUCharPtr(inStr.getBuffer()), inStr.length()); + u_austrncpy(inStrC, toUCharPtr(inStr.getBuffer()), inStr.length()); inStrC[inStr.length()] = 0; log_err("Got: \"%s\", Expected: \"mu\"\n", inStrC); } diff --git a/icu4c/source/tools/ctestfw/datamap.cpp b/icu4c/source/tools/ctestfw/datamap.cpp index d85341ac40c..96241a0657b 100644 --- a/icu4c/source/tools/ctestfw/datamap.cpp +++ b/icu4c/source/tools/ctestfw/datamap.cpp @@ -10,8 +10,8 @@ #include "unicode/datamap.h" #include "unicode/resbund.h" +#include "unicode/unistr.h" #include "hash.h" -#include "toolutil.h" #include DataMap::~DataMap() {} @@ -21,7 +21,7 @@ int32_t DataMap::utoi(const UnicodeString &s) const { char ch[256]; - const UChar *u = constUCharPtr(s.getBuffer()); + const UChar *u = toUCharPtr(s.getBuffer()); int32_t len = s.length(); u_UCharsToChars(u, ch, len); ch[len] = 0; /* include terminating \0 */ diff --git a/icu4c/source/tools/gennorm2/n2builder.cpp b/icu4c/source/tools/gennorm2/n2builder.cpp index 5c7c9c0a016..98b10166743 100644 --- a/icu4c/source/tools/gennorm2/n2builder.cpp +++ b/icu4c/source/tools/gennorm2/n2builder.cpp @@ -282,7 +282,7 @@ uint8_t Normalizer2DataBuilder::getCC(UChar32 c) const { static UBool isWellFormed(const UnicodeString &s) { UErrorCode errorCode=U_ZERO_ERROR; - u_strToUTF8(NULL, 0, NULL, constUCharPtr(s.getBuffer()), s.length(), &errorCode); + u_strToUTF8(NULL, 0, NULL, toUCharPtr(s.getBuffer()), s.length(), &errorCode); return U_SUCCESS(errorCode) || errorCode==U_BUFFER_OVERFLOW_ERROR; } @@ -315,7 +315,7 @@ void Normalizer2DataBuilder::setRoundTripMapping(UChar32 c, const UnicodeString (int)phase, (long)c); exit(U_INVALID_FORMAT_ERROR); } - int32_t numCP=u_countChar32(constUCharPtr(m.getBuffer()), m.length()); + int32_t numCP=u_countChar32(toUCharPtr(m.getBuffer()), m.length()); if(numCP!=2) { fprintf(stderr, "error in gennorm2 phase %d: " @@ -452,7 +452,7 @@ Normalizer2DataBuilder::decompose(UChar32 start, UChar32 end, uint32_t value) { Norm &norm=norms[value]; const UnicodeString &m=*norm.mapping; UnicodeString *decomposed=NULL; - const UChar *s=constUCharPtr(m.getBuffer()); + const UChar *s=toUCharPtr(m.getBuffer()); int32_t length=m.length(); int32_t prev, i=0; UChar32 c; @@ -607,7 +607,7 @@ Normalizer2DataBuilder::reorder(Norm *p, BuilderReorderingBuffer &buffer) { if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) { return; // writeMapping() will complain about it and print the code point. } - const UChar *s=constUCharPtr(m.getBuffer()); + const UChar *s=toUCharPtr(m.getBuffer()); int32_t i=0; UChar32 c; while(i(p); -} - -inline UChar *UCharPtr(char16_t *p) { -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p); -#endif - return reinterpret_cast(p); -} - /** * ErrorCode subclass for use in ICU command-line tools. * The destructor calls handleFailure() which calls exit(errorCode) when isFailure(). diff --git a/icu4c/source/tools/toolutil/xmlparser.cpp b/icu4c/source/tools/toolutil/xmlparser.cpp index baf9a73bc3e..1ca111c0756 100644 --- a/icu4c/source/tools/toolutil/xmlparser.cpp +++ b/icu4c/source/tools/toolutil/xmlparser.cpp @@ -21,7 +21,6 @@ #include "unicode/ucnv.h" #include "unicode/regex.h" #include "filestrm.h" -#include "toolutil.h" #include "xmlparser.h" #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION @@ -210,7 +209,7 @@ UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) { goto exit; } - buffer=UCharPtr(src.getBuffer(bytesLength)); + buffer=toUCharPtr(src.getBuffer(bytesLength)); if(buffer==NULL) { // unexpected failure to reserve some string capacity errorCode=U_MEMORY_ALLOCATION_ERROR; @@ -279,7 +278,7 @@ UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) { pb=bytes; for(;;) { length=src.length(); - buffer=UCharPtr(src.getBuffer(capacity)); + buffer=toUCharPtr(src.getBuffer(capacity)); if(buffer==NULL) { // unexpected failure to reserve some string capacity errorCode=U_MEMORY_ALLOCATION_ERROR; -- 2.40.0