From: Yoshito Umaoka Date: Wed, 15 Mar 2017 00:32:31 +0000 (+0000) Subject: ICU-12615 LocaleValidityChecker to handle the special case '-u-va-posix' X-Git-Tag: release-59-rc~107 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8263cc5d756d7d0502f16bca4d432635f89bae59;p=icu ICU-12615 LocaleValidityChecker to handle the special case '-u-va-posix' X-SVN-Rev: 39806 --- diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleValidityChecker.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleValidityChecker.java index fbe826149fa..a08b0684ff9 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleValidityChecker.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleValidityChecker.java @@ -75,7 +75,7 @@ public class LocaleValidityChecker { final Set extensionKeys = locale.getExtensionKeys(); // if (language.isEmpty()) { // // the only case where this is valid is if there is only an 'x' extension string - // if (!script.isEmpty() || !region.isEmpty() || variantString.isEmpty() + // if (!script.isEmpty() || !region.isEmpty() || variantString.isEmpty() // || extensionKeys.size() != 1 || !extensionKeys.contains('x')) { // return where.set(Datatype.x, "Null language only with x-..."); // } @@ -135,8 +135,8 @@ public class LocaleValidityChecker { } /** - * @param locale - * @param datatype + * @param locale + * @param datatype * @param extension * @param where * @return @@ -154,7 +154,7 @@ public class LocaleValidityChecker { // TODO: is empty -u- valid? for (String subtag : SEPARATOR.split(extensionString)) { - if (subtag.length() == 2 + if (subtag.length() == 2 && (tBuffer == null || subtag.charAt(1) <= '9')) { // if we have accumulated a t buffer, check that first if (tBuffer != null) { @@ -182,7 +182,7 @@ public class LocaleValidityChecker { } else { ++typeCount; switch (valueType) { - case single: + case single: if (typeCount > 1) { return where.set(datatype, key+"-"+subtag); } @@ -203,9 +203,9 @@ public class LocaleValidityChecker { break; } switch (specialCase) { - case anything: + case anything: continue; - case codepoints: + case codepoints: try { if (Integer.parseInt(subtag,16) > 0x10FFFF) { return where.set(datatype, key+"-"+subtag); @@ -324,14 +324,30 @@ public class LocaleValidityChecker { } /** - * @param language - * @param language2 + * @param datatype + * @param code + * @param where * @return */ private boolean isValid(Datatype datatype, String code, Where where) { - return code.isEmpty() ? true : - ValidIdentifiers.isValid(datatype, datasubtypes, code) != null ? true : - where == null ? false - : where.set(datatype, code); + if (code.isEmpty()) { + return true; + } + + // Note: + // BCP 47 -u- locale extension '-u-va-posix' is mapped to variant 'posix' automatically. + // For example, ULocale.forLanguageTag("en-u-va-posix").getVariant() returns "posix". + // This is only the exceptional case when -u- locale extension is mapped to a subtag type + // other than keyword. + // + // The locale validity data is based on IANA language subtag registry data and "posix" + // is not a valid variant. So we need to handle this specific case here. There are no + // othe exceptions. + if (datatype == Datatype.variant && "posix".equalsIgnoreCase(code)) { + return true; + } + + return ValidIdentifiers.isValid(datatype, datasubtypes, code) != null ? + true : (where == null ? false : where.set(datatype, code)); } } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TestLocaleValidity.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TestLocaleValidity.java index 4b0e821cac4..929ef67a589 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TestLocaleValidity.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TestLocaleValidity.java @@ -73,7 +73,7 @@ public class TestLocaleValidity extends TestFmwk { {"OK", "en-u-sd-usny"}, {"OK", "en-u-tz-adalv"}, {"OK", "en-u-va-posix"}, - + {"OK", "en-t-d0-accents"}, {"OK", "en-u-em-default"}, {"OK", "en-t-i0-handwrit"}, @@ -100,9 +100,9 @@ public class TestLocaleValidity extends TestFmwk { {"OK", "en-u-sd-uszzzz"}, // really long case - + {"OK", "en-u-ca-buddhist-ca-islamic-umalqura-cf-account-co-big5han-cu-adp-fw-fri-hc-h11-ka-noignore-kb-false-kc-false-kf-false-kk-false-kn-false-kr-latn-digit-symbol-ks-identic-kv-currency-nu-ahom-sd-usny-tz-adalv-va-posix"}, - + // bad case (for language tag) {"{language, root}", "root"}, @@ -120,16 +120,16 @@ public class TestLocaleValidity extends TestFmwk { {"Incomplete privateuse [at index 0]", "x-abc$defg"}, {"Invalid subtag: $ [at index 3]", "EN-$"}, {"Invalid subtag: $ [at index 0]", "$"}, - + // bad extension - + {"{illegal, q}", "en-q-abcdefg"}, - + {"Incomplete privateuse [at index 3]", "en-x-123456789"}, {"Empty subtag [at index 14]", "en-x-12345678--a"}, // bad subtags - + {"{variant, FOOBAR}", "zh-Hant-1606nict-1694acad-foobar"}, {"{region, AB}", "zh-Hant-AB"}, {"{language, ex}", "ex"}, @@ -137,11 +137,11 @@ public class TestLocaleValidity extends TestFmwk { {"{language, qaa}", "qaa"}, // bad types for keys - + {"{u, ca-chinesx}", "en-u-ca-chinesx"}, {"{script, Latx}", "und-Cyrl-t-und-latx"}, {"{u, sd-usca}", "en-AQ-u-sd-usca"}, - + {"{u, ca-buddhisx}", "en-u-ca-buddhisx"}, {"{u, ca-islamic-umalqurx}", "en-u-ca-islamic-umalqurx"}, // additive {"{u, cf-accounx}", "en-u-cf-accounx"}, @@ -159,16 +159,16 @@ public class TestLocaleValidity extends TestFmwk { {"{u, kr-latn}", "en-u-kr-latn-digit-latn"}, // reorder codes, duplicat {"{u, kr-zzzz}", "en-u-kr-latn-others-digit-Zzzz"}, // reorder codes, duplicat {"{u, kr-zsym}", "en-u-kr-Zsym"}, // reorder codes, duplicat - {"{u, kr-qaai}", "en-u-kr-Qaai"}, // reorder codes, duplicat + {"{u, kr-qaai}", "en-u-kr-Qaai"}, // reorder codes, duplicat {"{u, ks-identix}", "en-u-ks-identix"}, {"{u, kv-currencx}", "en-u-kv-currencx"}, {"{u, nu-ahox}", "en-u-nu-ahox"}, {"{u, sd-usnx}", "en-u-sd-usnx"}, {"{u, tz-adalx}", "en-u-tz-adalx"}, {"{u, va-posit}", "en-u-va-posit"}, - + // too many items - + {"{u, cu-usd}", "en-u-cu-adp-usd"}, // use deprecated subtags. testDeprecated checks if they work when Datasubtype.deprecated is added @@ -180,7 +180,7 @@ public class TestLocaleValidity extends TestFmwk { }; final LinkedHashSet foundKeys = new LinkedHashSet(); check(tests, foundKeys, Datasubtype.regular, Datasubtype.unknown); - + LinkedHashSet missing = new LinkedHashSet(KeyTypeData.getBcp47Keys()); missing.removeAll(foundKeys); if (!assertEquals("Missing keys", Collections.EMPTY_SET, missing)) { @@ -243,8 +243,6 @@ public class TestLocaleValidity extends TestFmwk { int count = 0; LocaleValidityChecker localeValidityChecker = new LocaleValidityChecker(datasubtypes); for (String[] test : tests) { - if (test[1].endsWith("-va-posix") && logKnownIssue("12615","Validity check wrong for -va-posix?")) - continue; check(++count, localeValidityChecker, test[0], test[1], keys); } } @@ -279,8 +277,8 @@ public class TestLocaleValidity extends TestFmwk { private void addKeys(ULocale ulocale, Set keys) { for (char cp : ulocale.getExtensionKeys()) { switch (cp) { - case 't': - case 'u': + case 't': + case 'u': String extensionString = ulocale.getExtension(cp); String[] parts = extensionString.split("-"); for (String part : parts) { @@ -321,11 +319,11 @@ public class TestLocaleValidity extends TestFmwk { } private void showValid(Datasubtype expected, Datatype datatype, Set datasubtypes, String code) { - Datasubtype value = ValidIdentifiers.isValid(datatype, datasubtypes, code); + Datasubtype value = ValidIdentifiers.isValid(datatype, datasubtypes, code); assertEquals(datatype + ", " + datasubtypes + ", " + code, expected, value); } private void showValid(Datasubtype expected, Datatype datatype, Set datasubtypes, String code, String code2) { - Datasubtype value = ValidIdentifiers.isValid(datatype, datasubtypes, code, code2); + Datasubtype value = ValidIdentifiers.isValid(datatype, datasubtypes, code, code2); assertEquals(datatype + ", " + datasubtypes + ", " + code + ", " + code2, expected, value); } }