From: Fredrik Roubert Date: Wed, 20 Feb 2019 23:23:02 +0000 (+0100) Subject: ICU-20158 Pass ByteSink all the way to _uloc_(addLikely|minimize)Subtags(). X-Git-Tag: release-64-rc~44 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=01c69cfa8503a954de65cc4c0a9e8447f1689f10;p=icu ICU-20158 Pass ByteSink all the way to _uloc_(addLikely|minimize)Subtags(). This eliminates the need for scratch buffers in any code path that ends with these functions and also eliminates the need for counting bytes, something that ByteSink will now handle correctly when needed. Existing calls to uloc_addLikelySubtags() and uloc_minimizeSubtags() throughout ICU4C implementation code are also updated to instead use either the Locale or ulocimp_* functions with the new API. None of this should have any externally visible effect, it's all about cleaning up implementation internals. --- diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp index f1b25f4600b..a6a518201c2 100644 --- a/icu4c/source/common/locid.cpp +++ b/icu4c/source/common/locid.cpp @@ -740,46 +740,10 @@ Locale::addLikelySubtags(UErrorCode& status) { return; } - // The maximized locale ID string is often longer, but there is no good - // heuristic to estimate just how much longer. Leave that to CharString. CharString maximizedLocaleID; - int32_t maximizedLocaleIDCapacity = static_cast(uprv_strlen(fullName)); - - char* buffer; - int32_t reslen; - - for (;;) { - buffer = maximizedLocaleID.getAppendBuffer( - /*minCapacity=*/maximizedLocaleIDCapacity, - /*desiredCapacityHint=*/maximizedLocaleIDCapacity, - maximizedLocaleIDCapacity, - status); - - if (U_FAILURE(status)) { - return; - } - - reslen = uloc_addLikelySubtags( - fullName, - buffer, - maximizedLocaleIDCapacity, - &status); - - if (status != U_BUFFER_OVERFLOW_ERROR) { - break; - } - - maximizedLocaleIDCapacity = reslen; - status = U_ZERO_ERROR; - } - - if (U_FAILURE(status)) { - return; - } - - maximizedLocaleID.append(buffer, reslen, status); - if (status == U_STRING_NOT_TERMINATED_WARNING) { - status = U_ZERO_ERROR; // Terminators provided by CharString. + { + CharStringByteSink sink(&maximizedLocaleID); + ulocimp_addLikelySubtags(fullName, sink, &status); } if (U_FAILURE(status)) { @@ -798,50 +762,10 @@ Locale::minimizeSubtags(UErrorCode& status) { return; } - // Except for a few edge cases (like the empty string, that is minimized to - // "en__POSIX"), minimized locale ID strings will be either the same length - // or shorter than their input. CharString minimizedLocaleID; - int32_t minimizedLocaleIDCapacity = static_cast(uprv_strlen(fullName)); - - char* buffer; - int32_t reslen; - - for (;;) { - buffer = minimizedLocaleID.getAppendBuffer( - /*minCapacity=*/minimizedLocaleIDCapacity, - /*desiredCapacityHint=*/minimizedLocaleIDCapacity, - minimizedLocaleIDCapacity, - status); - - if (U_FAILURE(status)) { - return; - } - - reslen = uloc_minimizeSubtags( - fullName, - buffer, - minimizedLocaleIDCapacity, - &status); - - if (status != U_BUFFER_OVERFLOW_ERROR) { - break; - } - - // Because of the internal minimal buffer size of CharString, I can't - // think of any input data for which this could possibly ever happen. - // Maybe it would be better replaced with an assertion instead? - minimizedLocaleIDCapacity = reslen; - status = U_ZERO_ERROR; - } - - if (U_FAILURE(status)) { - return; - } - - minimizedLocaleID.append(buffer, reslen, status); - if (status == U_STRING_NOT_TERMINATED_WARNING) { - status = U_ZERO_ERROR; // Terminators provided by CharString. + { + CharStringByteSink sink(&minimizedLocaleID); + ulocimp_minimizeSubtags(fullName, sink, &status); } if (U_FAILURE(status)) { diff --git a/icu4c/source/common/loclikely.cpp b/icu4c/source/common/loclikely.cpp index 8ceb352bd37..d4df914a9bb 100644 --- a/icu4c/source/common/loclikely.cpp +++ b/icu4c/source/common/loclikely.cpp @@ -19,6 +19,7 @@ * that then do not depend on resource bundle code and likely-subtags data. */ +#include "unicode/bytestream.h" #include "unicode/utypes.h" #include "unicode/locid.h" #include "unicode/putil.h" @@ -26,6 +27,8 @@ #include "unicode/uloc.h" #include "unicode/ures.h" #include "unicode/uscript.h" +#include "bytesinkutil.h" +#include "charstr.h" #include "cmemory.h" #include "cstring.h" #include "ulocimp.h" @@ -145,12 +148,10 @@ static const char* const unknownRegion = "ZZ"; * @param trailing Any trailing data to append to the new tag. * @param trailingLength The length of the trailing data. * @param alternateTags A string containing any alternate tags. - * @param tag The output buffer. - * @param tagCapacity The capacity of the output buffer. + * @param sink The output sink receiving the tag string. * @param err A pointer to a UErrorCode for error reporting. - * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error. **/ -static int32_t U_CALLCONV +static void U_CALLCONV createTagStringWithAlternates( const char* lang, int32_t langLength, @@ -161,16 +162,13 @@ createTagStringWithAlternates( const char* trailing, int32_t trailingLength, const char* alternateTags, - char* tag, - int32_t tagCapacity, + icu::ByteSink& sink, UErrorCode* err) { if (U_FAILURE(*err)) { goto error; } - else if (tag == NULL || - tagCapacity <= 0 || - langLength >= ULOC_LANG_CAPACITY || + else if (langLength >= ULOC_LANG_CAPACITY || scriptLength >= ULOC_SCRIPT_CAPACITY || regionLength >= ULOC_COUNTRY_CAPACITY) { goto error; @@ -184,7 +182,6 @@ createTagStringWithAlternates( **/ char tagBuffer[ULOC_FULLNAME_CAPACITY]; int32_t tagLength = 0; - int32_t capacityRemaining = tagCapacity; UBool regionAppended = FALSE; if (langLength > 0) { @@ -311,55 +308,28 @@ createTagStringWithAlternates( } } - { - const int32_t toCopy = - tagLength >= tagCapacity ? tagCapacity : tagLength; - - /** - * Copy the partial tag from our internal buffer to the supplied - * target. - **/ - uprv_memcpy( - tag, - tagBuffer, - toCopy); - - capacityRemaining -= toCopy; - } + /** + * Copy the partial tag from our internal buffer to the supplied + * target. + **/ + sink.Append(tagBuffer, tagLength); if (trailingLength > 0) { - if (*trailing != '@' && capacityRemaining > 0) { - tag[tagLength++] = '_'; - --capacityRemaining; - if (capacityRemaining > 0 && !regionAppended) { + if (*trailing != '@') { + sink.Append("_", 1); + if (!regionAppended) { /* extra separator is required */ - tag[tagLength++] = '_'; - --capacityRemaining; + sink.Append("_", 1); } } - if (capacityRemaining > 0) { - /* - * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we - * don't know if the user-supplied buffers overlap. - */ - const int32_t toCopy = - trailingLength >= capacityRemaining ? capacityRemaining : trailingLength; - - uprv_memmove( - &tag[tagLength], - trailing, - toCopy); - } + /* + * Copy the trailing data into the supplied buffer. + */ + sink.Append(trailing, trailingLength); } - tagLength += trailingLength; - - return u_terminateChars( - tag, - tagCapacity, - tagLength, - err); + return; } error: @@ -373,8 +343,6 @@ error: U_SUCCESS(*err)) { *err = U_ILLEGAL_ARGUMENT_ERROR; } - - return -1; } /** @@ -398,12 +366,10 @@ error: * @param regionLength The length of the region tag. * @param trailing Any trailing data to append to the new tag. * @param trailingLength The length of the trailing data. - * @param tag The output buffer. - * @param tagCapacity The capacity of the output buffer. + * @param sink The output sink receiving the tag string. * @param err A pointer to a UErrorCode for error reporting. - * @return The length of the tag string, which may be greater than tagCapacity. **/ -static int32_t U_CALLCONV +static void U_CALLCONV createTagString( const char* lang, int32_t langLength, @@ -413,11 +379,10 @@ createTagString( int32_t regionLength, const char* trailing, int32_t trailingLength, - char* tag, - int32_t tagCapacity, + icu::ByteSink& sink, UErrorCode* err) { - return createTagStringWithAlternates( + createTagStringWithAlternates( lang, langLength, script, @@ -427,8 +392,7 @@ createTagString( trailing, trailingLength, NULL, - tag, - tagCapacity, + sink, err); } @@ -576,7 +540,7 @@ error: goto exit; } -static int32_t U_CALLCONV +static UBool U_CALLCONV createLikelySubtagsString( const char* lang, int32_t langLength, @@ -586,17 +550,14 @@ createLikelySubtagsString( int32_t regionLength, const char* variants, int32_t variantsLength, - char* tag, - int32_t tagCapacity, - UErrorCode* err) -{ + icu::ByteSink& sink, + UErrorCode* err) { /** * ULOC_FULLNAME_CAPACITY will provide enough capacity * that we can build a string that contains the language, * script and region code without worrying about overrunning * the user-supplied buffer. **/ - char tagBuffer[ULOC_FULLNAME_CAPACITY]; char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; if(U_FAILURE(*err)) { @@ -610,25 +571,28 @@ createLikelySubtagsString( const char* likelySubtags = NULL; - createTagString( - lang, - langLength, - script, - scriptLength, - region, - regionLength, - NULL, - 0, - tagBuffer, - sizeof(tagBuffer), - err); + icu::CharString tagBuffer; + { + icu::CharStringByteSink sink(&tagBuffer); + createTagString( + lang, + langLength, + script, + scriptLength, + region, + regionLength, + NULL, + 0, + sink, + err); + } if(U_FAILURE(*err)) { goto error; } likelySubtags = findLikelySubtags( - tagBuffer, + tagBuffer.data(), likelySubtagsBuffer, sizeof(likelySubtagsBuffer), err); @@ -640,7 +604,7 @@ createLikelySubtagsString( /* Always use the language tag from the maximal string, since it may be more specific than the one provided. */ - return createTagStringWithAlternates( + createTagStringWithAlternates( NULL, 0, NULL, @@ -650,9 +614,9 @@ createLikelySubtagsString( variants, variantsLength, likelySubtags, - tag, - tagCapacity, + sink, err); + return TRUE; } } @@ -663,25 +627,28 @@ createLikelySubtagsString( const char* likelySubtags = NULL; - createTagString( - lang, - langLength, - script, - scriptLength, - NULL, - 0, - NULL, - 0, - tagBuffer, - sizeof(tagBuffer), - err); + icu::CharString tagBuffer; + { + icu::CharStringByteSink sink(&tagBuffer); + createTagString( + lang, + langLength, + script, + scriptLength, + NULL, + 0, + NULL, + 0, + sink, + err); + } if(U_FAILURE(*err)) { goto error; } likelySubtags = findLikelySubtags( - tagBuffer, + tagBuffer.data(), likelySubtagsBuffer, sizeof(likelySubtagsBuffer), err); @@ -693,7 +660,7 @@ createLikelySubtagsString( /* Always use the language tag from the maximal string, since it may be more specific than the one provided. */ - return createTagStringWithAlternates( + createTagStringWithAlternates( NULL, 0, NULL, @@ -703,9 +670,9 @@ createLikelySubtagsString( variants, variantsLength, likelySubtags, - tag, - tagCapacity, + sink, err); + return TRUE; } } @@ -716,25 +683,28 @@ createLikelySubtagsString( const char* likelySubtags = NULL; - createTagString( - lang, - langLength, - NULL, - 0, - region, - regionLength, - NULL, - 0, - tagBuffer, - sizeof(tagBuffer), - err); + icu::CharString tagBuffer; + { + icu::CharStringByteSink sink(&tagBuffer); + createTagString( + lang, + langLength, + NULL, + 0, + region, + regionLength, + NULL, + 0, + sink, + err); + } if(U_FAILURE(*err)) { goto error; } likelySubtags = findLikelySubtags( - tagBuffer, + tagBuffer.data(), likelySubtagsBuffer, sizeof(likelySubtagsBuffer), err); @@ -746,7 +716,7 @@ createLikelySubtagsString( /* Always use the language tag from the maximal string, since it may be more specific than the one provided. */ - return createTagStringWithAlternates( + createTagStringWithAlternates( NULL, 0, script, @@ -756,9 +726,9 @@ createLikelySubtagsString( variants, variantsLength, likelySubtags, - tag, - tagCapacity, + sink, err); + return TRUE; } } @@ -768,25 +738,28 @@ createLikelySubtagsString( { const char* likelySubtags = NULL; - createTagString( - lang, - langLength, - NULL, - 0, - NULL, - 0, - NULL, - 0, - tagBuffer, - sizeof(tagBuffer), - err); + icu::CharString tagBuffer; + { + icu::CharStringByteSink sink(&tagBuffer); + createTagString( + lang, + langLength, + NULL, + 0, + NULL, + 0, + NULL, + 0, + sink, + err); + } if(U_FAILURE(*err)) { goto error; } likelySubtags = findLikelySubtags( - tagBuffer, + tagBuffer.data(), likelySubtagsBuffer, sizeof(likelySubtagsBuffer), err); @@ -798,7 +771,7 @@ createLikelySubtagsString( /* Always use the language tag from the maximal string, since it may be more specific than the one provided. */ - return createTagStringWithAlternates( + createTagStringWithAlternates( NULL, 0, script, @@ -808,17 +781,13 @@ createLikelySubtagsString( variants, variantsLength, likelySubtags, - tag, - tagCapacity, + sink, err); + return TRUE; } } - return u_terminateChars( - tag, - tagCapacity, - 0, - err); + return FALSE; error: @@ -826,7 +795,7 @@ error: *err = U_ILLEGAL_ARGUMENT_ERROR; } - return -1; + return FALSE; } #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \ @@ -848,12 +817,10 @@ error: } \ } -static int32_t -_uloc_addLikelySubtags(const char* localeID, - char* maximizedLocaleID, - int32_t maximizedLocaleIDCapacity, - UErrorCode* err) -{ +static void +_uloc_addLikelySubtags(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err) { char lang[ULOC_LANG_CAPACITY]; int32_t langLength = sizeof(lang); char script[ULOC_SCRIPT_CAPACITY]; @@ -863,14 +830,12 @@ _uloc_addLikelySubtags(const char* localeID, const char* trailing = ""; int32_t trailingLength = 0; int32_t trailingIndex = 0; - int32_t resultLength = 0; + UBool success = FALSE; if(U_FAILURE(*err)) { goto error; } - else if (localeID == NULL || - maximizedLocaleID == NULL || - maximizedLocaleIDCapacity <= 0) { + if (localeID == NULL) { goto error; } @@ -901,7 +866,7 @@ _uloc_addLikelySubtags(const char* localeID, CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); - resultLength = + success = createLikelySubtagsString( lang, langLength, @@ -911,55 +876,32 @@ _uloc_addLikelySubtags(const char* localeID, regionLength, trailing, trailingLength, - maximizedLocaleID, - maximizedLocaleIDCapacity, + sink, err); - if (resultLength == 0) { + if (!success) { const int32_t localIDLength = (int32_t)uprv_strlen(localeID); /* * If we get here, we need to return localeID. */ - uprv_memcpy( - maximizedLocaleID, - localeID, - localIDLength <= maximizedLocaleIDCapacity ? - localIDLength : maximizedLocaleIDCapacity); - - resultLength = - u_terminateChars( - maximizedLocaleID, - maximizedLocaleIDCapacity, - localIDLength, - err); + sink.Append(localeID, localIDLength); } - return resultLength; + return; error: if (!U_FAILURE(*err)) { *err = U_ILLEGAL_ARGUMENT_ERROR; } - - return -1; } -static int32_t -_uloc_minimizeSubtags(const char* localeID, - char* minimizedLocaleID, - int32_t minimizedLocaleIDCapacity, - UErrorCode* err) -{ - /** - * ULOC_FULLNAME_CAPACITY will provide enough capacity - * that we can build a string that contains the language, - * script and region code without worrying about overrunning - * the user-supplied buffer. - **/ - char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY]; - int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer); +static void +_uloc_minimizeSubtags(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err) { + icu::CharString maximizedTagBuffer; char lang[ULOC_LANG_CAPACITY]; int32_t langLength = sizeof(lang); @@ -974,9 +916,7 @@ _uloc_minimizeSubtags(const char* localeID, if(U_FAILURE(*err)) { goto error; } - else if (localeID == NULL || - minimizedLocaleID == NULL || - minimizedLocaleIDCapacity <= 0) { + else if (localeID == NULL) { goto error; } @@ -1009,32 +949,32 @@ _uloc_minimizeSubtags(const char* localeID, CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); - createTagString( - lang, - langLength, - script, - scriptLength, - region, - regionLength, - NULL, - 0, - maximizedTagBuffer, - maximizedTagBufferLength, - err); - if(U_FAILURE(*err)) { - goto error; - } + { + icu::CharString base; + { + icu::CharStringByteSink sink(&base); + createTagString( + lang, + langLength, + script, + scriptLength, + region, + regionLength, + NULL, + 0, + sink, + err); + } - /** - * First, we need to first get the maximization - * from AddLikelySubtags. - **/ - maximizedTagBufferLength = - uloc_addLikelySubtags( - maximizedTagBuffer, - maximizedTagBuffer, - maximizedTagBufferLength, - err); + /** + * First, we need to first get the maximization + * from AddLikelySubtags. + **/ + { + icu::CharStringByteSink sink(&maximizedTagBuffer); + ulocimp_addLikelySubtags(base.data(), sink, err); + } + } if(U_FAILURE(*err)) { goto error; @@ -1044,9 +984,9 @@ _uloc_minimizeSubtags(const char* localeID, * Start first with just the language. **/ { - char tagBuffer[ULOC_FULLNAME_CAPACITY]; - - const int32_t tagBufferLength = + icu::CharString tagBuffer; + { + icu::CharStringByteSink sink(&tagBuffer); createLikelySubtagsString( lang, langLength, @@ -1056,19 +996,19 @@ _uloc_minimizeSubtags(const char* localeID, 0, NULL, 0, - tagBuffer, - sizeof(tagBuffer), + sink, err); + } if(U_FAILURE(*err)) { goto error; } else if (uprv_strnicmp( - maximizedTagBuffer, - tagBuffer, - tagBufferLength) == 0) { + maximizedTagBuffer.data(), + tagBuffer.data(), + tagBuffer.length()) == 0) { - return createTagString( + createTagString( lang, langLength, NULL, @@ -1077,9 +1017,9 @@ _uloc_minimizeSubtags(const char* localeID, 0, trailing, trailingLength, - minimizedLocaleID, - minimizedLocaleIDCapacity, + sink, err); + return; } } @@ -1088,9 +1028,9 @@ _uloc_minimizeSubtags(const char* localeID, **/ if (regionLength > 0) { - char tagBuffer[ULOC_FULLNAME_CAPACITY]; - - const int32_t tagBufferLength = + icu::CharString tagBuffer; + { + icu::CharStringByteSink sink(&tagBuffer); createLikelySubtagsString( lang, langLength, @@ -1100,19 +1040,19 @@ _uloc_minimizeSubtags(const char* localeID, regionLength, NULL, 0, - tagBuffer, - sizeof(tagBuffer), + sink, err); + } if(U_FAILURE(*err)) { goto error; } else if (uprv_strnicmp( - maximizedTagBuffer, - tagBuffer, - tagBufferLength) == 0) { + maximizedTagBuffer.data(), + tagBuffer.data(), + tagBuffer.length()) == 0) { - return createTagString( + createTagString( lang, langLength, NULL, @@ -1121,9 +1061,9 @@ _uloc_minimizeSubtags(const char* localeID, regionLength, trailing, trailingLength, - minimizedLocaleID, - minimizedLocaleIDCapacity, + sink, err); + return; } } @@ -1133,9 +1073,9 @@ _uloc_minimizeSubtags(const char* localeID, * maximal version that we already have. **/ if (scriptLength > 0 && regionLength > 0) { - char tagBuffer[ULOC_FULLNAME_CAPACITY]; - - const int32_t tagBufferLength = + icu::CharString tagBuffer; + { + icu::CharStringByteSink sink(&tagBuffer); createLikelySubtagsString( lang, langLength, @@ -1145,19 +1085,19 @@ _uloc_minimizeSubtags(const char* localeID, 0, NULL, 0, - tagBuffer, - sizeof(tagBuffer), + sink, err); + } if(U_FAILURE(*err)) { goto error; } else if (uprv_strnicmp( - maximizedTagBuffer, - tagBuffer, - tagBufferLength) == 0) { + maximizedTagBuffer.data(), + tagBuffer.data(), + tagBuffer.length()) == 0) { - return createTagString( + createTagString( lang, langLength, script, @@ -1166,9 +1106,9 @@ _uloc_minimizeSubtags(const char* localeID, 0, trailing, trailingLength, - minimizedLocaleID, - minimizedLocaleIDCapacity, + sink, err); + return; } } @@ -1177,18 +1117,8 @@ _uloc_minimizeSubtags(const char* localeID, * If we got here, return the locale ID parameter. **/ const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); - - uprv_memcpy( - minimizedLocaleID, - localeID, - localeIDLength <= minimizedLocaleIDCapacity ? - localeIDLength : minimizedLocaleIDCapacity); - - return u_terminateChars( - minimizedLocaleID, - minimizedLocaleIDCapacity, - localeIDLength, - err); + sink.Append(localeID, localeIDLength); + return; } error: @@ -1196,10 +1126,6 @@ error: if (!U_FAILURE(*err)) { *err = U_ILLEGAL_ARGUMENT_ERROR; } - - return -1; - - } static UBool @@ -1230,51 +1156,83 @@ do_canonicalize(const char* localeID, } U_CAPI int32_t U_EXPORT2 -uloc_addLikelySubtags(const char* localeID, - char* maximizedLocaleID, - int32_t maximizedLocaleIDCapacity, - UErrorCode* err) -{ +uloc_addLikelySubtags(const char* localeID, + char* maximizedLocaleID, + int32_t maximizedLocaleIDCapacity, + UErrorCode* status) { + if (U_FAILURE(*status)) { + return 0; + } + + icu::CheckedArrayByteSink sink( + maximizedLocaleID, maximizedLocaleIDCapacity); + + ulocimp_addLikelySubtags(localeID, sink, status); + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*status)) { + return sink.Overflowed() ? reslen : -1; + } + + if (sink.Overflowed()) { + *status = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars( + maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status); + } + + return reslen; +} + +U_CAPI void U_EXPORT2 +ulocimp_addLikelySubtags(const char* localeID, + icu::ByteSink& sink, + UErrorCode* status) { char localeBuffer[ULOC_FULLNAME_CAPACITY]; - if (!do_canonicalize( - localeID, - localeBuffer, - sizeof(localeBuffer), - err)) { - return -1; + if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) { + _uloc_addLikelySubtags(localeBuffer, sink, status); } - else { - return _uloc_addLikelySubtags( - localeBuffer, - maximizedLocaleID, - maximizedLocaleIDCapacity, - err); - } } U_CAPI int32_t U_EXPORT2 -uloc_minimizeSubtags(const char* localeID, - char* minimizedLocaleID, - int32_t minimizedLocaleIDCapacity, - UErrorCode* err) -{ +uloc_minimizeSubtags(const char* localeID, + char* minimizedLocaleID, + int32_t minimizedLocaleIDCapacity, + UErrorCode* status) { + if (U_FAILURE(*status)) { + return 0; + } + + icu::CheckedArrayByteSink sink( + minimizedLocaleID, minimizedLocaleIDCapacity); + + ulocimp_minimizeSubtags(localeID, sink, status); + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*status)) { + return sink.Overflowed() ? reslen : -1; + } + + if (sink.Overflowed()) { + *status = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars( + minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status); + } + + return reslen; +} + +U_CAPI void U_EXPORT2 +ulocimp_minimizeSubtags(const char* localeID, + icu::ByteSink& sink, + UErrorCode* status) { char localeBuffer[ULOC_FULLNAME_CAPACITY]; - if (!do_canonicalize( - localeID, - localeBuffer, - sizeof(localeBuffer), - err)) { - return -1; + if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) { + _uloc_minimizeSubtags(localeBuffer, sink, status); } - else { - return _uloc_minimizeSubtags( - localeBuffer, - minimizedLocaleID, - minimizedLocaleIDCapacity, - err); - } } // Pairs of (language subtag, + or -) for finding out fast if common languages @@ -1282,7 +1240,7 @@ uloc_minimizeSubtags(const char* localeID, static const char LANG_DIR_STRING[] = "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-"; -// Implemented here because this calls uloc_addLikelySubtags(). +// Implemented here because this calls ulocimp_addLikelySubtags(). U_CAPI UBool U_EXPORT2 uloc_isRightToLeft(const char *locale) { UErrorCode errorCode = U_ZERO_ERROR; @@ -1309,12 +1267,15 @@ uloc_isRightToLeft(const char *locale) { } // Otherwise, find the likely script. errorCode = U_ZERO_ERROR; - char likely[ULOC_FULLNAME_CAPACITY]; - (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode); + icu::CharString likely; + { + icu::CharStringByteSink sink(&likely); + ulocimp_addLikelySubtags(locale, sink, &errorCode); + } if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) { return FALSE; } - scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode); + scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode); if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || scriptLength == 0) { return FALSE; @@ -1365,11 +1326,14 @@ ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, rgLen = 0; } else if (rgLen == 0 && inferRegion) { // no unicode_region_subtag but inferRegion TRUE, try likely subtags - char locBuf[ULOC_FULLNAME_CAPACITY]; rgStatus = U_ZERO_ERROR; - (void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus); + icu::CharString locBuf; + { + icu::CharStringByteSink sink(&locBuf); + ulocimp_addLikelySubtags(localeID, sink, &rgStatus); + } if (U_SUCCESS(rgStatus)) { - rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status); + rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status); if (U_FAILURE(*status)) { rgLen = 0; } diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h index fd16af5ae52..31d3c44d0be 100644 --- a/icu4c/source/common/ulocimp.h +++ b/icu4c/source/common/ulocimp.h @@ -145,6 +145,74 @@ U_CAPI int32_t U_EXPORT2 ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, char *region, int32_t regionCapacity, UErrorCode* status); +/** + * Add the likely subtags for a provided locale ID, per the algorithm described + * in the following CLDR technical report: + * + * http://www.unicode.org/reports/tr35/#Likely_Subtags + * + * If localeID is already in the maximal form, or there is no data available + * for maximization, it will be copied to the output buffer. For example, + * "und-Zzzz" cannot be maximized, since there is no reasonable maximization. + * + * Examples: + * + * "en" maximizes to "en_Latn_US" + * + * "de" maximizes to "de_Latn_US" + * + * "sr" maximizes to "sr_Cyrl_RS" + * + * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) + * + * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) + * + * @param localeID The locale to maximize + * @param sink The output sink receiving the maximized locale + * @param err Error information if maximizing the locale failed. If the length + * of the localeID and the null-terminator is greater than the maximum allowed size, + * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. + * @internal ICU 64 + */ +U_STABLE void U_EXPORT2 +ulocimp_addLikelySubtags(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err); + +/** + * Minimize the subtags for a provided locale ID, per the algorithm described + * in the following CLDR technical report: + * + * http://www.unicode.org/reports/tr35/#Likely_Subtags + * + * If localeID is already in the minimal form, or there is no data available + * for minimization, it will be copied to the output buffer. Since the + * minimization algorithm relies on proper maximization, see the comments + * for ulocimp_addLikelySubtags for reasons why there might not be any data. + * + * Examples: + * + * "en_Latn_US" minimizes to "en" + * + * "de_Latn_US" minimizes to "de" + * + * "sr_Cyrl_RS" minimizes to "sr" + * + * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the + * script, and minimizing to "zh" would imply "zh_Hans_CN".) + * + * @param localeID The locale to minimize + * @param sink The output sink receiving the maximized locale + * @param err Error information if minimizing the locale failed. If the length + * of the localeID and the null-terminator is greater than the maximum allowed size, + * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. + * @internal ICU 64 + */ +U_STABLE void U_EXPORT2 +ulocimp_minimizeSubtags(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err); + U_CAPI const char * U_EXPORT2 locale_getKeywordsStart(const char *localeID); diff --git a/icu4c/source/common/uscript.cpp b/icu4c/source/common/uscript.cpp index 83b5f7ef168..98528c158b4 100644 --- a/icu4c/source/common/uscript.cpp +++ b/icu4c/source/common/uscript.cpp @@ -18,8 +18,11 @@ #include "unicode/uchar.h" #include "unicode/uscript.h" #include "unicode/uloc.h" +#include "bytesinkutil.h" +#include "charstr.h" #include "cmemory.h" #include "cstring.h" +#include "ulocimp.h" static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN }; static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN }; @@ -98,7 +101,6 @@ uscript_getCode(const char* nameOrAbbrOrLocale, int32_t capacity, UErrorCode* err){ UBool triedCode; - char likely[ULOC_FULLNAME_CAPACITY]; UErrorCode internalErrorCode; int32_t length; @@ -125,10 +127,13 @@ uscript_getCode(const char* nameOrAbbrOrLocale, if(U_FAILURE(*err) || length != 0) { return length; } - (void)uloc_addLikelySubtags(nameOrAbbrOrLocale, - likely, UPRV_LENGTHOF(likely), &internalErrorCode); + icu::CharString likely; + { + icu::CharStringByteSink sink(&likely); + ulocimp_addLikelySubtags(nameOrAbbrOrLocale, sink, &internalErrorCode); + } if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) { - length = getCodesFromLocale(likely, fillIn, capacity, err); + length = getCodesFromLocale(likely.data(), fillIn, capacity, err); if(U_FAILURE(*err) || length != 0) { return length; } diff --git a/icu4c/source/i18n/calendar.cpp b/icu4c/source/i18n/calendar.cpp index 63197bd27c0..4f451ee099c 100644 --- a/icu4c/source/i18n/calendar.cpp +++ b/icu4c/source/i18n/calendar.cpp @@ -3796,18 +3796,16 @@ Calendar::setWeekData(const Locale& desiredLocale, const char *type, UErrorCode& // 2). If the locale has a script designation then we ignore it, // then remove it ( i.e. "en_Latn_US" becomes "en_US" ) - char minLocaleID[ULOC_FULLNAME_CAPACITY] = { 0 }; UErrorCode myStatus = U_ZERO_ERROR; - uloc_minimizeSubtags(desiredLocale.getName(),minLocaleID,ULOC_FULLNAME_CAPACITY,&myStatus); - Locale min = Locale::createFromName(minLocaleID); + Locale min(desiredLocale); + min.minimizeSubtags(myStatus); Locale useLocale; if ( uprv_strlen(desiredLocale.getCountry()) == 0 || (uprv_strlen(desiredLocale.getScript()) > 0 && uprv_strlen(min.getScript()) == 0) ) { - char maxLocaleID[ULOC_FULLNAME_CAPACITY] = { 0 }; myStatus = U_ZERO_ERROR; - uloc_addLikelySubtags(desiredLocale.getName(),maxLocaleID,ULOC_FULLNAME_CAPACITY,&myStatus); - Locale max = Locale::createFromName(maxLocaleID); + Locale max(desiredLocale); + max.addLikelySubtags(myStatus); useLocale = Locale(max.getLanguage(),max.getCountry()); } else { useLocale = desiredLocale; diff --git a/icu4c/source/i18n/dtptngen.cpp b/icu4c/source/i18n/dtptngen.cpp index e0f19f068af..fcc5977c56d 100644 --- a/icu4c/source/i18n/dtptngen.cpp +++ b/icu4c/source/i18n/dtptngen.cpp @@ -615,16 +615,11 @@ U_CFUNC void U_CALLCONV DateTimePatternGenerator::loadAllowedHourFormatsData(UEr void DateTimePatternGenerator::getAllowedHourFormats(const Locale &locale, UErrorCode &status) { if (U_FAILURE(status)) { return; } - const char *localeID = locale.getName(); - char maxLocaleID[ULOC_FULLNAME_CAPACITY]; - int32_t length = uloc_addLikelySubtags(localeID, maxLocaleID, ULOC_FULLNAME_CAPACITY, &status); + Locale maxLocale(locale); + maxLocale.addLikelySubtags(status); if (U_FAILURE(status)) { return; - } else if (length == ULOC_FULLNAME_CAPACITY) { // no room for NUL - status = U_BUFFER_OVERFLOW_ERROR; - return; } - Locale maxLocale = Locale(maxLocaleID); const char *country = maxLocale.getCountry(); if (*country == '\0') { country = "001"; }