From: Fredrik Roubert Date: Tue, 18 Sep 2018 23:53:22 +0000 (-0700) Subject: ICU-13417 Pass length from Locale::forLanguageTag() to ultag_parse(). X-Git-Tag: release-63-rc~47 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=cc6c31432ebd32c0f5d2d07e23a20a542a432cd2;p=icu ICU-13417 Pass length from Locale::forLanguageTag() to ultag_parse(). If not passed a length, ultag_parse() will call uprv_strlen(), requiring the input buffer to be NUL terminated. This is unnecessary. --- diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp index d83c9158220..5e1942912e0 100644 --- a/icu4c/source/common/locid.cpp +++ b/icu4c/source/common/locid.cpp @@ -46,6 +46,7 @@ #include "cstring.h" #include "uassert.h" #include "uhash.h" +#include "ulocimp.h" #include "ucln_cmn.h" #include "ustr_imp.h" #include "charstr.h" @@ -858,12 +859,6 @@ Locale::forLanguageTag(StringPiece tag, UErrorCode& status) return result; } - // TODO: Remove the need for a const char* to a NUL terminated buffer. - const CharString tag_nul(tag, status); - if (U_FAILURE(status)) { - return result; - } - // If a BCP-47 language tag is passed as the language parameter to the // normal Locale constructor, it will actually fall back to invoking // uloc_forLanguageTag() to parse it if it somehow is able to detect that @@ -893,8 +888,9 @@ Locale::forLanguageTag(StringPiece tag, UErrorCode& status) return result; } - reslen = uloc_forLanguageTag( - tag_nul.data(), + reslen = ulocimp_forLanguageTag( + tag.data(), + tag.length(), buffer, resultCapacity, &parsedLength, diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp index 84e06d40f54..b0647e97a2a 100644 --- a/icu4c/source/common/uloc_tag.cpp +++ b/icu4c/source/common/uloc_tag.cpp @@ -2416,6 +2416,23 @@ uloc_forLanguageTag(const char* langtag, int32_t localeIDCapacity, int32_t* parsedLength, UErrorCode* status) { + return ulocimp_forLanguageTag( + langtag, + -1, + localeID, + localeIDCapacity, + parsedLength, + status); +} + + +U_CAPI int32_t U_EXPORT2 +ulocimp_forLanguageTag(const char* langtag, + int32_t tagLen, + char* localeID, + int32_t localeIDCapacity, + int32_t* parsedLength, + UErrorCode* status) { ULanguageTag *lt; int32_t reslen = 0; const char *subtag, *p; @@ -2423,7 +2440,7 @@ uloc_forLanguageTag(const char* langtag, int32_t i, n; UBool noRegion = TRUE; - lt = ultag_parse(langtag, -1, parsedLength, status); + lt = ultag_parse(langtag, tagLen, parsedLength, status); if (U_FAILURE(*status)) { return 0; } diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h index 869f296d022..e068362ed6e 100644 --- a/icu4c/source/common/ulocimp.h +++ b/icu4c/source/common/ulocimp.h @@ -61,6 +61,38 @@ ulocimp_getCountry(const char *localeID, char *country, int32_t countryCapacity, const char **pEnd); +/** + * Returns a locale ID for the specified BCP47 language tag string. + * If the specified language tag contains any ill-formed subtags, + * the first such subtag and all following subtags are ignored. + *

+ * This implements the 'Language-Tag' production of BCP47, and so + * supports grandfathered (regular and irregular) as well as private + * use language tags. Private use tags are represented as 'x-whatever', + * and grandfathered tags are converted to their canonical replacements + * where they exist. Note that a few grandfathered tags have no modern + * replacement, these will be converted using the fallback described in + * the first paragraph, so some information might be lost. + * @param langtag the input BCP47 language tag. + * @param tagLen the length of langtag, or -1 to call uprv_strlen(). + * @param localeID the output buffer receiving a locale ID for the + * specified BCP47 language tag. + * @param localeIDCapacity the size of the locale ID output buffer. + * @param parsedLength if not NULL, successfully parsed length + * for the input language tag is set. + * @param err error information if receiving the locald ID + * failed. + * @return the length of the locale ID. + * @internal ICU 63 + */ +U_STABLE int32_t U_EXPORT2 +ulocimp_forLanguageTag(const char* langtag, + int32_t tagLen, + char* localeID, + int32_t localeIDCapacity, + int32_t* parsedLength, + UErrorCode* err); + /** * Get the region to use for supplemental data lookup. Uses * (1) any region specified by locale tag "rg"; if none then