From: Markus Scherer Date: Sat, 25 Feb 2017 01:08:35 +0000 (+0000) Subject: ICU-12992 experimental UnicodeString with Char16Ptr and ConstChar16Ptr in constructor... X-Git-Tag: release-59-rc~119^2~18 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7dfefa55f5247e3827ccde43082e9670b390ded2;p=icu ICU-12992 experimental UnicodeString with Char16Ptr and ConstChar16Ptr in constructors and extract() X-SVN-Rev: 39706 --- diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 270229b849a..70b87ac553e 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -28,6 +28,7 @@ * \brief C++ API: Unicode String */ +#include #include "unicode/utypes.h" #include "unicode/rep.h" #include "unicode/std_string.h" @@ -56,6 +57,173 @@ u_strlen(const UChar *s); U_NAMESPACE_BEGIN +// TODO begin experiment --------------- + +/** + * \def U_ALIASING_BARRIER + * Barrier for pointer anti-aliasing optimizations even across function boundaries. + * @internal + */ +#ifdef U_ALIASING_BARRIER + // Use the predefined value. +#elif defined(__clang__) || defined(__GNUC__) +# define U_ALIASING_BARRIER(ptr) asm volatile("" : "+rm"(ptr)) +#endif + +/** + * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types, + * and from NULL. + * @draft ICU 59 + */ +class U_COMMON_API Char16Ptr { +public: + /** + * Copies the pointer. + * @draft ICU 59 + */ + inline Char16Ptr(char16_t *p); + /** + * Converts the pointer to char16_t *. + * @draft ICU 59 + */ + inline Char16Ptr(uint16_t *p); +#if U_SIZEOF_WCHAR_T==2 + /** + * Converts the pointer to char16_t *. + * @draft ICU 59 + */ + inline Char16Ptr(wchar_t *p); +#endif + /** + * nullptr constructor. + * @draft ICU 59 + */ + inline Char16Ptr(std::nullptr_t p); + /** + * Pointer access. + * @draft ICU 59 + */ + inline operator char16_t *(); + +private: +#ifdef U_ALIASING_BARRIER + template char16_t *cast(T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast(t); + } + + char16_t *p; +#else + union { + char16_t *cp; + uint16_t *up; + wchar_t *wp; + } u; +#endif +}; + +#ifdef U_ALIASING_BARRIER + +Char16Ptr::Char16Ptr(char16_t *p) : p(p) {} +Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {} +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {} +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {} + +Char16Ptr::operator char16_t *() { return p; } + +#else + +Char16Ptr::Char16Ptr(char16_t *p) { u.cp = p; } +Char16Ptr::Char16Ptr(uint16_t *p) { u.up = p; } +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; } +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) { u.cp = p; } + +Char16Ptr::operator char16_t *() { return u.cp; } + +#endif + +/** + * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types, + * and from NULL. + * @draft ICU 59 + */ +class U_COMMON_API ConstChar16Ptr { +public: + /** + * Copies the pointer. + * @draft ICU 59 + */ + inline ConstChar16Ptr(const char16_t *p); + /** + * Converts the pointer to char16_t *. + * @draft ICU 59 + */ + inline ConstChar16Ptr(const uint16_t *p); +#if U_SIZEOF_WCHAR_T==2 + /** + * Converts the pointer to char16_t *. + * @draft ICU 59 + */ + inline ConstChar16Ptr(const wchar_t *p); +#endif + /** + * nullptr constructor. + * @draft ICU 59 + */ + inline ConstChar16Ptr(const std::nullptr_t p); + /** + * Pointer access. + * @draft ICU 59 + */ + inline operator const char16_t *() const; + +private: +#ifdef U_ALIASING_BARRIER + template const char16_t *cast(const T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast(t); + } + + const char16_t *p; +#else + union { + const char16_t *cp; + const uint16_t *up; + const wchar_t *wp; + } u; +#endif +}; + +#ifdef U_ALIASING_BARRIER + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p(p) {} +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {} +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {} +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {} + +ConstChar16Ptr::operator const char16_t *() const { return p; } + +#else + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; } +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u.up = p; } +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; } +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u.cp = p; } + +ConstChar16Ptr::operator const char16_t *() const { return u.cp; } + +#endif + +// TODO end experiment ----------------- + #if !UCONFIG_NO_BREAK_ITERATION class BreakIterator; // unicode/brkiter.h #endif @@ -1454,7 +1622,7 @@ public: */ inline void extract(int32_t start, int32_t length, - UChar *dst, + Char16Ptr dst, int32_t dstStart = 0) const; /** @@ -1479,7 +1647,7 @@ public: * @stable ICU 2.0 */ int32_t - extract(UChar *dest, int32_t destCapacity, + extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const; /** @@ -2070,7 +2238,7 @@ public: * a new buffer will be allocated and the contents copied as with regularly * constructed strings. * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(UChar *dst) function detects whether the dst pointer is the same + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same * as the string buffer itself and will in this case not copy the contents. * * @param buffer The characters to alias for the UnicodeString. @@ -3004,6 +3172,46 @@ public: */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); + /** + * uint16_t * constructor. + * Delegates to UnicodeString(const UChar *). + * + * It is recommended to mark this constructor "explicit" by + * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @draft ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : + UnicodeString(static_cast(ConstChar16Ptr(text))) {} + +#if U_SIZEOF_WCHAR_T==2 + /** + * wchar_t * constructor. + * Delegates to UnicodeString(const UChar *). + * + * It is recommended to mark this constructor "explicit" by + * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @draft ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : + UnicodeString(static_cast(ConstChar16Ptr(text))) {} +#endif + + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * + * It is recommended to mark this constructor "explicit" by + * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * on the compiler command line or similar. + * @param text nullptr + * @draft ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); + /** * UChar* constructor. * @param text The characters to place in the UnicodeString. @@ -3014,6 +3222,37 @@ public: UnicodeString(const UChar *text, int32_t textLength); + /** + * uint16_t * constructor. + * Delegates to UnicodeString(const UChar *, int32_t). + * @param text UTF-16 string + * @param length string length + * @draft ICU 59 + */ + UnicodeString(const uint16_t *text, int32_t length) : + UnicodeString(static_cast(ConstChar16Ptr(text)), length) {} + +#if U_SIZEOF_WCHAR_T==2 + /** + * wchar_t * constructor. + * Delegates to UnicodeString(const UChar *, int32_t). + * @param text NUL-terminated UTF-16 string + * @param length string length + * @draft ICU 59 + */ + UnicodeString(const wchar_t *text, int32_t length) : + UnicodeString(static_cast(ConstChar16Ptr(text)), length) {} +#endif + + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param text nullptr + * @param length ignored + * @draft ICU 59 + */ + inline UnicodeString(const std::nullptr_t text, int32_t length); + /** * Readonly-aliasing UChar* constructor. * The text will be used for the UnicodeString object, but @@ -3037,7 +3276,7 @@ public: * @stable ICU 2.0 */ UnicodeString(UBool isTerminated, - const UChar *text, + ConstChar16Ptr text, int32_t textLength); /** @@ -3050,7 +3289,7 @@ public: * a new buffer will be allocated and the contents copied as with regularly * constructed strings. * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(UChar *dst) function detects whether the dst pointer is the same + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same * as the string buffer itself and will in this case not copy the contents. * * @param buffer The characters to alias for the UnicodeString. @@ -3060,6 +3299,40 @@ public: */ UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); + /** + * Writable-aliasing uint16_t * constructor. + * Delegates to UnicodeString(const UChar *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @draft ICU 59 + */ + UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(static_cast(Char16Ptr(buffer)), buffLength, buffCapacity) {} + +#if U_SIZEOF_WCHAR_T==2 + /** + * Writable-aliasing wchar_t * constructor. + * Delegates to UnicodeString(const UChar *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @draft ICU 59 + */ + UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(static_cast(Char16Ptr(buffer)), buffLength, buffCapacity) {} +#endif + + /** + * Writable-aliasing nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param buffer nullptr + * @param buffLength ignored + * @param buffCapacity ignored + * @draft ICU 59 + */ + inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); + #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION /** @@ -3772,6 +4045,18 @@ UnicodeString::UnicodeString() { fUnion.fStackFields.fLengthAndFlags=kShortString; } +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + //======================================== // Read-only implementation methods //======================================== @@ -4364,7 +4649,7 @@ UnicodeString::doExtract(int32_t start, inline void UnicodeString::extract(int32_t start, int32_t _length, - UChar *target, + Char16Ptr target, int32_t targetStart) const { doExtract(start, _length, target, targetStart); } diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index bb4de3afa7e..a4d921948f5 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -218,7 +218,7 @@ UnicodeString::UnicodeString(const UChar *text, } UnicodeString::UnicodeString(UBool isTerminated, - const UChar *text, + ConstChar16Ptr text, int32_t textLength) { fUnion.fFields.fLengthAndFlags = kReadonlyAlias; if(text == NULL) { @@ -234,7 +234,8 @@ UnicodeString::UnicodeString(UBool isTerminated, // text is terminated, or else it would have failed the above test textLength = u_strlen(text); } - setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); + setArray(const_cast(static_cast(text)), textLength, + isTerminated ? textLength + 1 : textLength); } } @@ -873,7 +874,7 @@ UnicodeString::doExtract(int32_t start, } int32_t -UnicodeString::extract(UChar *dest, int32_t destCapacity, +UnicodeString::extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const { int32_t len = length(); if(U_SUCCESS(errorCode)) {