From: Peter Edberg Date: Wed, 8 Sep 2021 22:20:48 +0000 (-0700) Subject: ICU-20870 If locale/lang name lookup fails, canonicalize lang and try again X-Git-Tag: cldr/2021-09-15~8 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0da0fabfaefc479ffde61b1ed8c413803362915a;p=icu ICU-20870 If locale/lang name lookup fails, canonicalize lang and try again --- diff --git a/icu4c/source/common/locdispnames.cpp b/icu4c/source/common/locdispnames.cpp index 96af3f9aa84..c512a0164c2 100644 --- a/icu4c/source/common/locdispnames.cpp +++ b/icu4c/source/common/locdispnames.cpp @@ -316,17 +316,29 @@ _getStringOrCopyKey(const char *path, const char *locale, /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */ } } else { + bool isLanguageCode = (uprv_strncmp(tableKey, _kLanguages, 9) == 0); /* Language code should not be a number. If it is, set the error code. */ - if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) { + if (isLanguageCode && uprv_strtol(itemKey, NULL, 10)) { *pErrorCode = U_MISSING_RESOURCE_ERROR; } else { /* second-level item, use special fallback */ s=uloc_getTableStringWithFallback(path, locale, - tableKey, + tableKey, subTableKey, itemKey, &length, pErrorCode); + if (U_FAILURE(*pErrorCode) && isLanguageCode && itemKey != nullptr) { + // convert itemKey locale code to canonical form and try again, ICU-20870 + *pErrorCode = U_ZERO_ERROR; + Locale canonKey = Locale::createCanonical(itemKey); + s=uloc_getTableStringWithFallback(path, locale, + tableKey, + subTableKey, + canonKey.getName(), + &length, + pErrorCode); + } } } diff --git a/icu4c/source/common/locdspnm.cpp b/icu4c/source/common/locdspnm.cpp index c4c04702e64..f73cedd7286 100644 --- a/icu4c/source/common/locdspnm.cpp +++ b/icu4c/source/common/locdspnm.cpp @@ -723,11 +723,25 @@ LocaleDisplayNamesImpl::localeIdName(const char* localeId, return result; } } - if (substitute) { - return langData.get("Languages", localeId, result); - } else { - return langData.getNoFallback("Languages", localeId, result); + langData.getNoFallback("Languages", localeId, result); + if (result.isBogus() && uprv_strchr(localeId, '_') == NULL) { + // Canonicalize lang and try again, ICU-20870 + // (only for language codes without script or region) + Locale canonLocale = Locale::createCanonical(localeId); + const char* canonLocId = canonLocale.getName(); + if (nameLength == UDISPCTX_LENGTH_SHORT) { + langData.getNoFallback("Languages%short", canonLocId, result); + if (!result.isBogus()) { + return result; + } + } + langData.getNoFallback("Languages", canonLocId, result); } + if (result.isBogus() && substitute) { + // use key, this is what langData.get (with fallback) falls back to. + result.setTo(UnicodeString(localeId, -1, US_INV)); // use key ( + } + return result; } UnicodeString& @@ -742,10 +756,22 @@ LocaleDisplayNamesImpl::languageDisplayName(const char* lang, return adjustForUsageAndContext(kCapContextUsageLanguage, result); } } - if (substitute == UDISPCTX_SUBSTITUTE) { - langData.get("Languages", lang, result); - } else { - langData.getNoFallback("Languages", lang, result); + langData.getNoFallback("Languages", lang, result); + if (result.isBogus()) { + // Canonicalize lang and try again, ICU-20870 + Locale canonLocale = Locale::createCanonical(lang); + const char* canonLocId = canonLocale.getName(); + if (nameLength == UDISPCTX_LENGTH_SHORT) { + langData.getNoFallback("Languages%short", canonLocId, result); + if (!result.isBogus()) { + return adjustForUsageAndContext(kCapContextUsageLanguage, result); + } + } + langData.getNoFallback("Languages", canonLocId, result); + } + if (result.isBogus() && substitute == UDISPCTX_SUBSTITUTE) { + // use key, this is what langData.get (with fallback) falls back to. + result.setTo(UnicodeString(lang, -1, US_INV)); // use key ( } return adjustForUsageAndContext(kCapContextUsageLanguage, result); } diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c index 92f8af49915..90e7f34e3aa 100644 --- a/icu4c/source/test/cintltst/cloctst.c +++ b/icu4c/source/test/cintltst/cloctst.c @@ -59,6 +59,7 @@ static void TestBug20321UnicodeLocaleKey(void); static void TestUsingDefaultWarning(void); static void TestExcessivelyLongIDs(void); +static void TestUldnNameVariants(void); void PrintDataTable(); @@ -283,6 +284,7 @@ void addLocaleTest(TestNode** root) TESTCASE(TestUsingDefaultWarning); TESTCASE(TestBug21449InfiniteLoop); TESTCASE(TestExcessivelyLongIDs); + TESTCASE(TestUldnNameVariants); } @@ -6917,6 +6919,185 @@ static void TestBug20149() { } } +typedef enum UldnNameType { + TEST_ULDN_LOCALE, + TEST_ULDN_LANGUAGE, + TEST_ULDN_SCRIPT, + TEST_ULDN_REGION, + TEST_ULOC_LOCALE, // only valid with optStdMidLong + TEST_ULOC_LANGUAGE, // only valid with optStdMidLong + TEST_ULOC_SCRIPT, // only valid with optStdMidLong + TEST_ULOC_REGION, // only valid with optStdMidLong +} UldnNameType; + +typedef struct { + const char * localeToName; // NULL to terminate a list of these + UldnNameType nameType; + const UChar * expectResult; +} UldnItem; + +typedef struct { + const char * displayLocale; + const UDisplayContext * displayOptions; // set of 3 UDisplayContext items + const UldnItem * testItems; + int32_t countItems; +} UldnLocAndOpts; + +static const UDisplayContext optStdMidLong[3] = {UDISPCTX_STANDARD_NAMES, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, UDISPCTX_LENGTH_FULL}; +static const UDisplayContext optStdMidShrt[3] = {UDISPCTX_STANDARD_NAMES, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, UDISPCTX_LENGTH_SHORT}; +static const UDisplayContext optDiaMidLong[3] = {UDISPCTX_DIALECT_NAMES, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, UDISPCTX_LENGTH_FULL}; +static const UDisplayContext optDiaMidShrt[3] = {UDISPCTX_DIALECT_NAMES, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, UDISPCTX_LENGTH_SHORT}; + +static const UldnItem en_StdMidLong[] = { + { "en_US", TEST_ULDN_LOCALE, u"English (United States)" }, + { "en", TEST_ULDN_LANGUAGE, u"English" }, + { "en_US", TEST_ULOC_LOCALE, u"English (United States)" }, + { "en_US", TEST_ULOC_LANGUAGE, u"English" }, + { "en", TEST_ULOC_LANGUAGE, u"English" }, + // https://unicode-org.atlassian.net/browse/ICU-20870 + { "fa_AF", TEST_ULDN_LOCALE, u"Persian (Afghanistan)" }, + { "prs", TEST_ULDN_LOCALE, u"Dari" }, + { "prs_AF", TEST_ULDN_LOCALE, u"Dari (Afghanistan)" }, + { "prs_TJ", TEST_ULDN_LOCALE, u"Dari (Tajikistan)" }, + { "prs", TEST_ULDN_LANGUAGE, u"Dari" }, + { "prs", TEST_ULOC_LANGUAGE, u"Dari" }, + // https://unicode-org.atlassian.net/browse/ICU-21742 + { "ji", TEST_ULDN_LOCALE, u"Yiddish" }, + { "ji_US", TEST_ULDN_LOCALE, u"Yiddish (United States)" }, + { "ji", TEST_ULDN_LANGUAGE, u"Yiddish" }, + { "ji_US", TEST_ULOC_LOCALE, u"Yiddish (United States)" }, + { "ji", TEST_ULOC_LANGUAGE, u"Yiddish" }, + // https://unicode-org.atlassian.net/browse/ICU-11563 + { "mo", TEST_ULDN_LOCALE, u"Romanian" }, + { "mo_MD", TEST_ULDN_LOCALE, u"Romanian (Moldova)" }, + { "mo", TEST_ULDN_LANGUAGE, u"Romanian" }, + { "mo_MD", TEST_ULOC_LOCALE, u"Romanian (Moldova)" }, + { "mo", TEST_ULOC_LANGUAGE, u"Romanian" }, +}; + +static const UldnItem en_StdMidShrt[] = { + { "en_US", TEST_ULDN_LOCALE, u"English (US)" }, + { "en", TEST_ULDN_LANGUAGE, u"English" }, +}; + +static const UldnItem en_DiaMidLong[] = { + { "en_US", TEST_ULDN_LOCALE, u"American English" }, + { "fa_AF", TEST_ULDN_LOCALE, u"Dari" }, + { "prs", TEST_ULDN_LOCALE, u"Dari" }, + { "prs_AF", TEST_ULDN_LOCALE, u"Dari (Afghanistan)" }, + { "prs_TJ", TEST_ULDN_LOCALE, u"Dari (Tajikistan)" }, + { "prs", TEST_ULDN_LANGUAGE, u"Dari" }, + { "mo", TEST_ULDN_LOCALE, u"Romanian" }, + { "mo", TEST_ULDN_LANGUAGE, u"Romanian" }, +}; + +static const UldnItem en_DiaMidShrt[] = { + { "en_US", TEST_ULDN_LOCALE, u"US English" }, +}; + +static const UldnItem ro_StdMidLong[] = { // https://unicode-org.atlassian.net/browse/ICU-11563 + { "mo", TEST_ULDN_LOCALE, u"română" }, + { "mo_MD", TEST_ULDN_LOCALE, u"română (Republica Moldova)" }, + { "mo", TEST_ULDN_LANGUAGE, u"română" }, + { "mo_MD", TEST_ULOC_LOCALE, u"română (Republica Moldova)" }, + { "mo", TEST_ULOC_LANGUAGE, u"română" }, +}; + +static const UldnItem yi_StdMidLong[] = { // https://unicode-org.atlassian.net/browse/ICU-21742 + { "ji", TEST_ULDN_LOCALE, u"ייִדיש" }, + { "ji_US", TEST_ULDN_LOCALE, u"ייִדיש (פֿאַראייניגטע שטאַטן)" }, + { "ji", TEST_ULDN_LANGUAGE, u"ייִדיש" }, + { "ji_US", TEST_ULOC_LOCALE, u"ייִדיש (פֿאַראייניגטע שטאַטן)" }, + { "ji", TEST_ULOC_LANGUAGE, u"ייִדיש" }, +}; + +static const UldnLocAndOpts uldnLocAndOpts[] = { + { "en", optStdMidLong, en_StdMidLong, UPRV_LENGTHOF(en_StdMidLong) }, + { "en", optStdMidShrt, en_StdMidShrt, UPRV_LENGTHOF(en_StdMidShrt) }, + { "en", optDiaMidLong, en_DiaMidLong, UPRV_LENGTHOF(en_DiaMidLong) }, + { "en", optDiaMidShrt, en_DiaMidShrt, UPRV_LENGTHOF(en_DiaMidShrt) }, + { "ro", optStdMidLong, ro_StdMidLong, UPRV_LENGTHOF(ro_StdMidLong) }, + { "yi", optStdMidLong, yi_StdMidLong, UPRV_LENGTHOF(yi_StdMidLong) }, + { NULL, NULL, NULL, 0 } +}; + +enum { kUNameBuf = 128, kBNameBuf = 256 }; + +static void TestUldnNameVariants() { + const UldnLocAndOpts * uloPtr; + for (uloPtr = uldnLocAndOpts; uloPtr->displayLocale != NULL; uloPtr++) { + UErrorCode status = U_ZERO_ERROR; + ULocaleDisplayNames * uldn = uldn_openForContext(uloPtr->displayLocale, (UDisplayContext*)uloPtr->displayOptions, 3, &status); + if (U_FAILURE(status)) { + log_data_err("uldn_openForContext fails, displayLocale %s, contexts %03X %03X %03X: %s - Are you missing data?\n", + uloPtr->displayLocale, uloPtr->displayOptions[0], uloPtr->displayOptions[1], uloPtr->displayOptions[2], + u_errorName(status) ); + continue; + } + const UldnItem * itemPtr = uloPtr->testItems; + int32_t itemCount = uloPtr->countItems; + for (; itemCount-- > 0; itemPtr++) { + UChar uget[kUNameBuf]; + int32_t ulenget, ulenexp; + const char* typeString; + status = U_ZERO_ERROR; + switch (itemPtr->nameType) { + case TEST_ULDN_LOCALE: + ulenget = uldn_localeDisplayName(uldn, itemPtr->localeToName, uget, kUNameBuf, &status); + typeString = "uldn_localeDisplayName"; + break; + case TEST_ULDN_LANGUAGE: + ulenget = uldn_languageDisplayName(uldn, itemPtr->localeToName, uget, kUNameBuf, &status); + typeString = "uldn_languageDisplayName"; + break; + case TEST_ULDN_SCRIPT: + ulenget = uldn_scriptDisplayName(uldn, itemPtr->localeToName, uget, kUNameBuf, &status); + typeString = "uldn_scriptDisplayName"; + break; + case TEST_ULDN_REGION: + ulenget = uldn_regionDisplayName(uldn, itemPtr->localeToName, uget, kUNameBuf, &status); + typeString = "uldn_regionDisplayName"; + break; + case TEST_ULOC_LOCALE: + ulenget = uloc_getDisplayName(itemPtr->localeToName, uloPtr->displayLocale, uget, kUNameBuf, &status); + typeString = "uloc_getDisplayName"; + break; + case TEST_ULOC_LANGUAGE: + ulenget = uloc_getDisplayLanguage(itemPtr->localeToName, uloPtr->displayLocale, uget, kUNameBuf, &status); + typeString = "uloc_getDisplayLanguage"; + break; + case TEST_ULOC_SCRIPT: + ulenget = uloc_getDisplayScript(itemPtr->localeToName, uloPtr->displayLocale, uget, kUNameBuf, &status); + typeString = "uloc_getDisplayScript"; + break; + case TEST_ULOC_REGION: + ulenget = uloc_getDisplayCountry(itemPtr->localeToName, uloPtr->displayLocale, uget, kUNameBuf, &status); + typeString = "uloc_getDisplayCountry"; + break; + default: + continue; + } + if (U_FAILURE(status)) { + log_data_err("%s fails, displayLocale %s, contexts %03X %03X %03X, localeToName %s: %s\n", + typeString, uloPtr->displayLocale, uloPtr->displayOptions[0], uloPtr->displayOptions[1], uloPtr->displayOptions[2], + itemPtr->localeToName, u_errorName(status) ); + continue; + } + ulenexp = u_strlen(itemPtr->expectResult); + if (ulenget != ulenexp || u_strncmp(uget, itemPtr->expectResult, ulenexp) != 0) { + char bexp[kBNameBuf], bget[kBNameBuf]; + u_strToUTF8(bexp, kBNameBuf, NULL, itemPtr->expectResult, ulenexp, &status); + u_strToUTF8(bget, kBNameBuf, NULL, uget, ulenget, &status); + log_data_err("%s fails, displayLocale %s, contexts %03X %03X %03X, localeToName %s:\n expect %2d: %s\n get %2d: %s\n", + typeString, uloPtr->displayLocale, uloPtr->displayOptions[0], uloPtr->displayOptions[1], uloPtr->displayOptions[2], + itemPtr->localeToName, ulenexp, bexp, ulenget, bget ); + } + } + + uldn_close(uldn); + } +} + static void TestUsingDefaultWarning() { UChar buff[256]; char errorOutputBuff[256]; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java index cd56d4b60f7..d370fab22aa 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java @@ -426,13 +426,28 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames { } private String localeIdName(String localeId) { + String locIdName; if (nameLength == DisplayContext.LENGTH_SHORT) { - String locIdName = langData.get("Languages%short", localeId); + locIdName = langData.get("Languages%short", localeId); if (locIdName != null && !locIdName.equals(localeId)) { return locIdName; } } - return langData.get("Languages", localeId); + locIdName = langData.get("Languages", localeId); + if ((locIdName == null || locIdName.equals(localeId)) && localeId.indexOf('_') < 0) { + // Canonicalize lang and try again, ICU-20870 + // (only for language codes without script or region) + ULocale canonLocale = ULocale.createCanonical(localeId); + String canonLocId = canonLocale.getName(); + if (nameLength == DisplayContext.LENGTH_SHORT) { + locIdName = langData.get("Languages%short", canonLocId); + if (locIdName != null && !locIdName.equals(canonLocId)) { + return locIdName; + } + } + locIdName = langData.get("Languages", canonLocId); + } + return locIdName; } @Override @@ -441,13 +456,27 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames { if (lang.equals("root") || lang.indexOf('_') != -1) { return substituteHandling == DisplayContext.SUBSTITUTE ? lang : null; } + String langName; if (nameLength == DisplayContext.LENGTH_SHORT) { - String langName = langData.get("Languages%short", lang); + langName = langData.get("Languages%short", lang); if (langName != null && !langName.equals(lang)) { return adjustForUsageAndContext(CapitalizationContextUsage.LANGUAGE, langName); } } - return adjustForUsageAndContext(CapitalizationContextUsage.LANGUAGE, langData.get("Languages", lang)); + langName = langData.get("Languages", lang); + if (langName == null || langName.equals(lang)) { + // Canonicalize lang and try again, ICU-20870 + ULocale canonLocale = ULocale.createCanonical(lang); + String canonLocId = canonLocale.getName(); + if (nameLength == DisplayContext.LENGTH_SHORT) { + langName = langData.get("Languages%short", canonLocId); + if (langName != null && !langName.equals(canonLocId)) { + return adjustForUsageAndContext(CapitalizationContextUsage.LANGUAGE, langName); + } + } + langName = langData.get("Languages", canonLocId); + } + return adjustForUsageAndContext(CapitalizationContextUsage.LANGUAGE, langName); } @Override diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java index 2e3e3b070d9..e6314b6dd81 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java @@ -1189,22 +1189,22 @@ public class ULocaleTest extends TestFmwk { new Item("da", NM_DIA, CAP_BEG, LEN_FU, SUB_SU, "en_GB", "Britisk engelsk"), new Item("da", NM_DIA, CAP_UIL, LEN_FU, SUB_SU, "en_GB", "Britisk engelsk"), new Item("da", NM_DIA, CAP_STA, LEN_FU, SUB_SU, "en_GB", "britisk engelsk"), - new Item("es", NM_STD, CAP_MID, LEN_FU, SUB_SU, "en", "ingl\u00E9s"), - new Item("es", NM_STD, CAP_BEG, LEN_FU, SUB_SU, "en", "Ingl\u00E9s"), - new Item("es", NM_STD, CAP_UIL, LEN_FU, SUB_SU, "en", "Ingl\u00E9s"), - new Item("es", NM_STD, CAP_STA, LEN_FU, SUB_SU, "en", "Ingl\u00E9s"), - new Item("es", NM_STD, CAP_MID, LEN_FU, SUB_SU, "en_GB", "ingl\u00E9s (Reino Unido)"), - new Item("es", NM_STD, CAP_BEG, LEN_FU, SUB_SU, "en_GB", "Ingl\u00E9s (Reino Unido)"), - new Item("es", NM_STD, CAP_UIL, LEN_FU, SUB_SU, "en_GB", "Ingl\u00E9s (Reino Unido)"), - new Item("es", NM_STD, CAP_STA, LEN_FU, SUB_SU, "en_GB", "Ingl\u00E9s (Reino Unido)"), - new Item("es", NM_STD, CAP_MID, LEN_SH, SUB_SU, "en_GB", "ingl\u00E9s (RU)"), - new Item("es", NM_STD, CAP_BEG, LEN_SH, SUB_SU, "en_GB", "Ingl\u00E9s (RU)"), - new Item("es", NM_STD, CAP_UIL, LEN_SH, SUB_SU, "en_GB", "Ingl\u00E9s (RU)"), - new Item("es", NM_STD, CAP_STA, LEN_SH, SUB_SU, "en_GB", "Ingl\u00E9s (RU)"), - new Item("es", NM_DIA, CAP_MID, LEN_FU, SUB_SU, "en_GB", "ingl\u00E9s brit\u00E1nico"), - new Item("es", NM_DIA, CAP_BEG, LEN_FU, SUB_SU, "en_GB", "Ingl\u00E9s brit\u00E1nico"), - new Item("es", NM_DIA, CAP_UIL, LEN_FU, SUB_SU, "en_GB", "Ingl\u00E9s brit\u00E1nico"), - new Item("es", NM_DIA, CAP_STA, LEN_FU, SUB_SU, "en_GB", "Ingl\u00E9s brit\u00E1nico"), + new Item("es", NM_STD, CAP_MID, LEN_FU, SUB_SU, "en", "inglés"), + new Item("es", NM_STD, CAP_BEG, LEN_FU, SUB_SU, "en", "Inglés"), + new Item("es", NM_STD, CAP_UIL, LEN_FU, SUB_SU, "en", "Inglés"), + new Item("es", NM_STD, CAP_STA, LEN_FU, SUB_SU, "en", "Inglés"), + new Item("es", NM_STD, CAP_MID, LEN_FU, SUB_SU, "en_GB", "inglés (Reino Unido)"), + new Item("es", NM_STD, CAP_BEG, LEN_FU, SUB_SU, "en_GB", "Inglés (Reino Unido)"), + new Item("es", NM_STD, CAP_UIL, LEN_FU, SUB_SU, "en_GB", "Inglés (Reino Unido)"), + new Item("es", NM_STD, CAP_STA, LEN_FU, SUB_SU, "en_GB", "Inglés (Reino Unido)"), + new Item("es", NM_STD, CAP_MID, LEN_SH, SUB_SU, "en_GB", "inglés (RU)"), + new Item("es", NM_STD, CAP_BEG, LEN_SH, SUB_SU, "en_GB", "Inglés (RU)"), + new Item("es", NM_STD, CAP_UIL, LEN_SH, SUB_SU, "en_GB", "Inglés (RU)"), + new Item("es", NM_STD, CAP_STA, LEN_SH, SUB_SU, "en_GB", "Inglés (RU)"), + new Item("es", NM_DIA, CAP_MID, LEN_FU, SUB_SU, "en_GB", "inglés británico"), + new Item("es", NM_DIA, CAP_BEG, LEN_FU, SUB_SU, "en_GB", "Inglés británico"), + new Item("es", NM_DIA, CAP_UIL, LEN_FU, SUB_SU, "en_GB", "Inglés británico"), + new Item("es", NM_DIA, CAP_STA, LEN_FU, SUB_SU, "en_GB", "Inglés británico"), new Item("ru", NM_STD, CAP_MID, LEN_FU, SUB_SU, "uz_Latn", "\u0443\u0437\u0431\u0435\u043A\u0441\u043A\u0438\u0439 (\u043B\u0430\u0442\u0438\u043D\u0438\u0446\u0430)"), new Item("ru", NM_STD, CAP_BEG, LEN_FU, SUB_SU, "uz_Latn", "\u0423\u0437\u0431\u0435\u043A\u0441\u043A\u0438\u0439 (\u043B\u0430\u0442\u0438\u043D\u0438\u0446\u0430)"), new Item("ru", NM_STD, CAP_UIL, LEN_FU, SUB_SU, "uz_Latn", "\u0423\u0437\u0431\u0435\u043A\u0441\u043A\u0438\u0439 (\u043B\u0430\u0442\u0438\u043D\u0438\u0446\u0430)"), @@ -1214,6 +1214,26 @@ public class ULocaleTest extends TestFmwk { new Item("en", NM_STD, CAP_MID, LEN_SH, SUB_SU, "ur@numbers=arabext", "Urdu (X Arabic-Indic Digits)"), new Item("af", NM_STD, CAP_NON, LEN_FU, SUB_NO, "aa", "Afar"), new Item("cs", NM_STD, CAP_NON, LEN_FU, SUB_NO, "vai", "vai"), + // https://unicode-org.atlassian.net/browse/ICU-20870: + new Item("en", NM_STD, CAP_MID, LEN_FU, SUB_SU, "fa_AF", "Persian (Afghanistan)" ), + new Item("en", NM_STD, CAP_MID, LEN_FU, SUB_SU, "prs", "Dari" ), + new Item("en", NM_STD, CAP_MID, LEN_FU, SUB_SU, "prs_AF", "Dari (Afghanistan)" ), + new Item("en", NM_STD, CAP_MID, LEN_FU, SUB_SU, "prs_TJ", "Dari (Tajikistan)" ), + new Item("en", NM_DIA, CAP_MID, LEN_FU, SUB_SU, "fa_AF", "Dari" ), + new Item("en", NM_DIA, CAP_MID, LEN_FU, SUB_SU, "prs", "Dari" ), + new Item("en", NM_DIA, CAP_MID, LEN_FU, SUB_SU, "prs_AF", "Dari (Afghanistan)" ), + new Item("en", NM_DIA, CAP_MID, LEN_FU, SUB_SU, "prs_TJ", "Dari (Tajikistan)" ), + // https://unicode-org.atlassian.net/browse/ICU-21742: + new Item("en", NM_STD, CAP_MID, LEN_FU, SUB_SU, "ji", "Yiddish" ), + new Item("en", NM_STD, CAP_MID, LEN_FU, SUB_SU, "ji_US", "Yiddish (United States)" ), + new Item("yi", NM_STD, CAP_MID, LEN_FU, SUB_SU, "ji", "ייִדיש" ), + new Item("yi", NM_STD, CAP_MID, LEN_FU, SUB_SU, "ji_US", "ייִדיש (פֿאַראייניגטע שטאַטן)" ), + // https://unicode-org.atlassian.net/browse/ICU-11563: + new Item("en", NM_STD, CAP_MID, LEN_FU, SUB_SU, "mo", "Romanian" ), + new Item("en", NM_STD, CAP_MID, LEN_FU, SUB_SU, "mo_MD", "Romanian (Moldova)" ), + new Item("en", NM_DIA, CAP_MID, LEN_FU, SUB_SU, "mo", "Romanian" ), + new Item("ro", NM_STD, CAP_MID, LEN_FU, SUB_SU, "mo", "română" ), + new Item("ro", NM_STD, CAP_MID, LEN_FU, SUB_SU, "mo_MD", "română (Republica Moldova)" ), }; for (Item item: items) { ULocale locale = new ULocale(item.displayLocale); @@ -1227,14 +1247,44 @@ public class ULocaleTest extends TestFmwk { DisplayContext substituteHandling = ldn.getContext(DisplayContext.Type.SUBSTITUTE_HANDLING); if (dialectHandling != item.dialectHandling || capitalization != item.capitalization || nameLength != item.nameLength || substituteHandling != item.substituteHandling) { errln("FAIL: displayLoc: " + item.displayLocale + ", dialectNam?: " + item.dialectHandling + - ", capitalize: " + item.capitalization + ", nameLen: " + item.nameLength + ", substituteHandling: " + item.substituteHandling + ", locToName: " + item.localeToBeNamed + - ", => read back dialectNam?: " + dialectHandling + ", capitalize: " + capitalization + ", nameLen: " + nameLength + ", substituteHandling: " + substituteHandling); + ", capitalize: " + item.capitalization + ", nameLen: " + item.nameLength + + ", substituteHandling: " + item.substituteHandling + ", locToName: " + item.localeToBeNamed + + ", => read back dialectNam?: " + dialectHandling + ", capitalize: " + capitalization + + ", nameLen: " + nameLength + ", substituteHandling: " + substituteHandling); } else { + boolean checkULocaleDisplay = (dialectHandling==NM_STD && capitalization==CAP_MID && nameLength==LEN_FU && substituteHandling==SUB_SU); + boolean checkJustLanguage = item.localeToBeNamed.matches("[a-z]{2,3}"); String result = ldn.localeDisplayName(item.localeToBeNamed); if (!(item.result == null && result == null) && !(result != null && result.equals(item.result))) { errln("FAIL: displayLoc: " + item.displayLocale + ", dialectNam?: " + item.dialectHandling + - ", capitalize: " + item.capitalization + ", nameLen: " + item.nameLength + ", substituteHandling: " + item.substituteHandling + ", locToName: " + item.localeToBeNamed + - ", => expected result: " + item.result + ", got: " + result); + ", capitalize: " + item.capitalization + ", nameLen: " + item.nameLength + + ", substituteHandling: " + item.substituteHandling + "; locToName: " + item.localeToBeNamed + + ", => expected result: " + item.result + ", got LDN name: " + result); + } + if (checkULocaleDisplay) { + result = ULocale.getDisplayName(item.localeToBeNamed, locale); + if (!(item.result == null && result == null) && !(result != null && result.equals(item.result))) { + errln("FAIL: displayLoc: " + item.displayLocale + + ", dialectNam: std, capitalize: mid, nameLen: full, substitute: yes; locToName: " + item.localeToBeNamed + + ", => expected result: " + item.result + ", got ULoc name: " + result); + } + } + if (checkJustLanguage) { + result = ldn.languageDisplayName(item.localeToBeNamed); + if (!(item.result == null && result == null) && !(result != null && result.equals(item.result))) { + errln("FAIL: displayLoc: " + item.displayLocale + ", dialectNam?: " + item.dialectHandling + + ", capitalize: " + item.capitalization + ", nameLen: " + item.nameLength + + ", substituteHandling: " + item.substituteHandling + "; locToName: " + item.localeToBeNamed + + ", => expected result: " + item.result + ", got LDN lang: " + result); + } + if (checkULocaleDisplay) { + result = ULocale.getDisplayLanguage(item.localeToBeNamed, locale); + if (!(item.result == null && result == null) && !(result != null && result.equals(item.result))) { + errln("FAIL: displayLoc: " + item.displayLocale + + ", dialectNam: std, capitalize: mid, nameLen: full, substitute: yes; locToName: " + item.localeToBeNamed + + ", => expected result: " + item.result + ", got ULoc lang: " + result); + } + } } } }