]> granicus.if.org Git - icu/commitdiff
ICU-4229 Enhanced the checking further.
authorMark Davis <mark@macchiato.com>
Mon, 14 Dec 2015 16:14:40 +0000 (16:14 +0000)
committerMark Davis <mark@macchiato.com>
Mon, 14 Dec 2015 16:14:40 +0000 (16:14 +0000)
X-SVN-Rev: 38128

icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleValidityChecker.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TestLocaleValidity.java

index 4e9598d0afaec07998c616cac0e33b213ff21b09..b51a48f8ca1fa6344ab1c264c4426da20079cdd0 100644 (file)
@@ -93,7 +93,7 @@ public class LocaleValidityChecker {
                     if (!isValidT(locale.getExtension(c), where)) return false;
                     break;
                 case u:
-                    if (!isValidU(locale.getExtension(c), where)) return false;
+                    if (!isValidU(locale, locale.getExtension(c), where)) return false;
                     break;
                 }
             } catch (Exception e) {
@@ -104,12 +104,14 @@ public class LocaleValidityChecker {
     }
 
     enum SpecialCase {
-        normal, anything, reorder, codepoints;
+        normal, anything, reorder, codepoints, subdivision;
         static SpecialCase get(String key) {
             if (key.equals("kr")) {
                 return SpecialCase.reorder;
             } else if (key.equals("vt")) {
                 return SpecialCase.codepoints;
+            } else if (key.equals("sd")) {
+                return subdivision;
             } else if (key.equals("x0")) {
                 return anything;
             } else {
@@ -118,15 +120,17 @@ public class LocaleValidityChecker {
         }
     }
     /**
+     * @param locale 
      * @param extension
      * @param where
      * @return
      */
-    private boolean isValidU(String extensionString, Where where) {
+    private boolean isValidU(ULocale locale, String extensionString, Where where) {
         String key = "";
         int typeCount = 0;
         ValueType valueType = null;
         SpecialCase specialCase = null;
+        StringBuilder prefix = new StringBuilder();
         // TODO: is empty -u- valid?
         for (String subtag : SEPARATOR.split(extensionString)) {
             if (subtag.length() == 2) {
@@ -142,8 +146,20 @@ public class LocaleValidityChecker {
                 typeCount = 0;
             } else {
                 ++typeCount;
-                if (valueType == ValueType.single && typeCount > 1) {
-                    return where.set(Datatype.u, key+"-"+subtag);
+                switch (valueType) {
+                case single: 
+                    if (typeCount > 1) {
+                        return where.set(Datatype.u, key+"-"+subtag);
+                    }
+                    break;
+                case incremental:
+                    if (typeCount == 1) {
+                        prefix.setLength(0);
+                        prefix.append(subtag);
+                    } else {
+                        prefix.append('-').append(subtag);
+                        subtag = prefix.toString();
+                    }
                 }
                 switch (specialCase) {
                 case anything: 
@@ -162,8 +178,13 @@ public class LocaleValidityChecker {
                         return where.set(Datatype.u, key+"-"+subtag);
                     }
                     continue;
+                case subdivision:
+                    if (!isSubdivision(locale, subtag)) {
+                        return where.set(Datatype.u, key+"-"+subtag);
+                    }
+                    continue;
                 }
-                
+
                 // en-u-sd-usca
                 // en-US-u-sd-usca
                 Output<Boolean> isKnownKey = new Output<Boolean>();
@@ -180,6 +201,33 @@ public class LocaleValidityChecker {
         return true;
     }
 
+    /**
+     * @param locale
+     * @param subtag
+     * @return
+     */
+    private boolean isSubdivision(ULocale locale, String subtag) {
+        // First check if the subtag is valid
+        if (subtag.length() < 3) {
+            return false;
+        }
+        String region = subtag.substring(0, subtag.charAt(0) <= '9' ? 3 : 2);
+        String subdivision = subtag.substring(region.length());
+        if (ValidIdentifiers.isValid(Datatype.subdivision, datasubtypes, region, subdivision) == null) {
+            return false;
+        }
+        // Then check for consistency with the locale's region
+        String localeRegion = locale.getCountry();
+        if (localeRegion.isEmpty()) {
+            ULocale max = ULocale.addLikelySubtags(locale);
+            localeRegion = max.getCountry();
+        }
+        if (!region.equalsIgnoreCase(localeRegion)) {
+            return false;
+        }
+        return true;
+    }
+
     static final Set<String> REORDERING_INCLUDE = new HashSet<String>(Arrays.asList("space", "punct", "symbol", "currency", "digit", "others"));
     static final Set<String> REORDERING_EXCLUDE = new HashSet<String>(Arrays.asList("zinh", "zyyy"));
     /**
@@ -194,12 +242,12 @@ public class LocaleValidityChecker {
             return false;
         }
         return ValidIdentifiers.isValid(Datatype.script, datasubtypes, subtag) != null;
-//        space, punct, symbol, currency, digit - core groups of characters below 'a'
-//        any script code except Common and Inherited.
-//      sc ; Zinh                             ; Inherited                        ; Qaai
-//      sc ; Zyyy                             ; Common
-//        Some pairs of scripts sort primary-equal and always reorder together. For example, Katakana characters are are always reordered with Hiragana.
-//        others - where all codes not explicitly mentioned should be ordered. The script code Zzzz (Unknown Script) is a synonym for others.        return false;
+        //        space, punct, symbol, currency, digit - core groups of characters below 'a'
+        //        any script code except Common and Inherited.
+        //      sc ; Zinh                             ; Inherited                        ; Qaai
+        //      sc ; Zyyy                             ; Common
+        //        Some pairs of scripts sort primary-equal and always reorder together. For example, Katakana characters are are always reordered with Hiragana.
+        //        others - where all codes not explicitly mentioned should be ordered. The script code Zzzz (Unknown Script) is a synonym for others.        return false;
     }
 
     /**
@@ -235,14 +283,14 @@ public class LocaleValidityChecker {
     }
 
     public enum ValueType {
-        single, multiple, specific;
+        single, multiple, incremental;
         private static Set<String> multipleValueTypes = new HashSet<String>(Arrays.asList("x0", "kr", "vt"));
         private static Set<String> specificValueTypes = new HashSet<String>(Arrays.asList("ca"));
         static ValueType get(String key) {
             if (multipleValueTypes.contains(key)) {
                 return multiple;
             } else if (specificValueTypes.contains(key)) {
-                return specific;
+                return incremental;
             } else {
                 return single;
             }
index 1f2803d181a5d1dee36e4286b05c896baab8f6a7..b7ba2b52f7d14a5c7f3c5a96764916ac4772d674 100644 (file)
@@ -34,15 +34,11 @@ public class TestLocaleValidity extends TestFmwk {
 
     public void testBasic() {
         String[][] tests = {
-                {"OK", "en-u-kr-latn-digit"},
-                {"Incomplete extension 'u' [at index 3]", "en-u"},
-                {"Incomplete extension 't' [at index 3]", "en-t"},
                 {"OK", "en-u-ca-chinese"},
                 {"OK", "en-x-abcdefg"},
                 {"OK", "x-abcdefg"},
                 {"OK", "en-u-sd-usca"},
                 {"OK", "en-US-u-sd-usca"},
-                {"OK", "en-AQ-u-sd-usca"},
                 {"OK", "en-t-it"},
                 {"OK", "und-Cyrl-t-und-latn"},
                 {"OK", "root"},
@@ -53,22 +49,9 @@ public class TestLocaleValidity extends TestFmwk {
                 {"OK", "zh-Hant"},
                 {"OK", "zh-Hant-AQ"},
                 {"OK", "x-abcdefg-g-foobar"},
-                {"Empty subtag [at index 0]", ""},
-                {"{u, ca-chinesx}", "en-u-ca-chinesx"},
-                {"{illegal, q}", "en-q-abcdefg"},
-                {"Incomplete privateuse [at index 0]", "x-abc$defg"},
-                {"{script, Latx}", "und-Cyrl-t-und-latx"},
-                {"{variant, FOOBAR}", "zh-Hant-1606nict-1694acad-foobar"},
-                {"{region, AB}", "zh-Hant-AB"},
-                {"{language, ex}", "ex"},
-                {"{script, Hanx}", "zh-Hanx"},
-                {"{language, qaa}", "qaa"},
-                {"Invalid subtag: $ [at index 3]", "EN-$"},
-                {"Invalid subtag: $ [at index 0]", "$"},
-                // too many items
-                {"{u, cu-usd}", "en-u-cu-adp-usd"},
 
                 {"OK", "en-u-ca-buddhist"},
+                {"OK", "en-u-ca-islamic-umalqura"}, // additive
                 {"OK", "en-u-cf-account"},
                 {"OK", "en-u-co-big5han"},
                 {"OK", "en-u-cu-adp"},
@@ -80,17 +63,80 @@ public class TestLocaleValidity extends TestFmwk {
                 {"OK", "en-u-kf-false"},
                 {"OK", "en-u-kk-false"},
                 {"OK", "en-u-kn-false"},
-                {"OK", "en-u-kr-latn-digit-symbol"},
+                {"OK", "en-u-kr-latn-digit-symbol"}, // reorder codes, multiple
                 {"OK", "en-u-ks-identic"},
                 {"OK", "en-u-kv-currency"},
                 {"OK", "en-u-nu-ahom"},
                 {"OK", "en-u-sd-usny"},
                 {"OK", "en-u-tz-adalv"},
                 {"OK", "en-u-va-posix"},
-                {"{u, ca-civil}", "en-u-ca-islamicc"}, // deprecated
+                
+                // really long case
+                
+                {"OK", "en-u-ca-buddhist-ca-islamic-umalqura-cf-account-co-big5han-cu-adp-fw-fri-hc-h11-ka-noignore-kb-false-kc-false-kf-false-kk-false-kn-false-kr-latn-digit-symbol-ks-identic-kv-currency-nu-ahom-sd-usny-tz-adalv-va-posix"},
+                
+                // deprecated, but turned into valid by ULocale.Builder()
+                {"OK", "en-u-ca-islamicc"}, // deprecated
+                {"OK", "en-u-tz-aqams"}, // deprecated
+
+                // Bad syntax (caught by ULocale.Builder())
+                
+                {"Incomplete extension 'u' [at index 3]", "en-u"},
+                {"Incomplete extension 't' [at index 3]", "en-t"},
+                {"Empty subtag [at index 0]", ""},
+                {"Incomplete privateuse [at index 0]", "x-abc$defg"},
+                {"Invalid subtag: $ [at index 3]", "EN-$"},
+                {"Invalid subtag: $ [at index 0]", "$"},
+                
+                // bad extension
+                
+                {"{illegal, q}", "en-q-abcdefg"},
+
+                // bad subtags
+                
+                {"{variant, FOOBAR}", "zh-Hant-1606nict-1694acad-foobar"},
+                {"{region, AB}", "zh-Hant-AB"},
+                {"{language, ex}", "ex"},
+                {"{script, Hanx}", "zh-Hanx"},
+                {"{language, qaa}", "qaa"},
+
+                // bad types for keys
+                
+                {"{u, ca-chinesx}", "en-u-ca-chinesx"},
+                {"{script, Latx}", "und-Cyrl-t-und-latx"},
+                {"{u, sd-usca}", "en-AQ-u-sd-usca"},
+                
+                {"{u, ca-buddhisx}", "en-u-ca-buddhisx"},
+                {"{u, ca-islamic-umalqurx}", "en-u-ca-islamic-umalqurx"}, // additive
+                {"{u, cf-accounx}", "en-u-cf-accounx"},
+                {"{u, co-big5hax}", "en-u-co-big5hax"},
+                {"{u, cu-adx}", "en-u-cu-adx"},
+                {"{u, fw-frx}", "en-u-fw-frx"},
+                {"{u, hc-h1x}", "en-u-hc-h1x"},
+                {"{u, ka-noignorx}", "en-u-ka-noignorx"},
+                {"{u, kb-falsx}", "en-u-kb-falsx"},
+                {"{u, kc-falsx}", "en-u-kc-falsx"},
+                {"{u, kf-falsx}", "en-u-kf-falsx"},
+                {"{u, kk-falsx}", "en-u-kk-falsx"},
+                {"{u, kn-falsx}", "en-u-kn-falsx"},
+                {"{u, kr-symbox}", "en-u-kr-latn-digit-symbox"}, // reorder codes, multiple
+                {"{u, ks-identix}", "en-u-ks-identix"},
+                {"{u, kv-currencx}", "en-u-kv-currencx"},
+                {"{u, nu-ahox}", "en-u-nu-ahox"},
+                {"{u, sd-usnx}", "en-u-sd-usnx"},
+                {"{u, tz-adalx}", "en-u-tz-adalx"},
+                {"{u, va-posit}", "en-u-va-posit"},
+
+                
+                // too many items
+                
+                {"{u, cu-usd}", "en-u-cu-adp-usd"},
+
+                // use deprecated subtags. testDeprecated checks if they work when Datasubtype.deprecated is added
+                //{"{u, ca-civil}", "en-u-ca-islamicc"}, // deprecated, but turns into valid
                 {"{u, co-direct}", "en-u-co-direct"}, // deprecated
                 {"{u, kh}", "en-u-kh-false"}, // deprecated
-                {"{u, tz-aqams}", "en-u-tz-aqams"}, // deprecated
+                {"{u, tz-camtr}", "en-u-tz-camtr"}, // deprecated
                 {"{u, vt}", "en-u-vt-0020-0041"}, // deprecated
         };
         check(tests, Datasubtype.regular, Datasubtype.unknown);
@@ -120,12 +166,10 @@ public class TestLocaleValidity extends TestFmwk {
     }
 
     public void testDeprecated() {
-        LocaleValidityChecker regularAndDeprecated = new LocaleValidityChecker(EnumSet.of(Datasubtype.regular, Datasubtype.deprecated));
         String[][] tests = {
-                {"OK", "en-u-ca-islamicc"}, // deprecated
                 {"OK", "en-u-co-direct"}, // deprecated
                 {"OK", "en-u-kh-false"}, // deprecated
-                {"OK", "en-u-tz-aqams"}, // deprecated
+                {"OK", "en-u-tz-camtr"}, // deprecated
                 {"OK", "en-u-vt-0020"}, // deprecated
         };
         check(tests, Datasubtype.regular, Datasubtype.unknown, Datasubtype.deprecated);
@@ -133,9 +177,9 @@ public class TestLocaleValidity extends TestFmwk {
 
     private void check(String[][] tests, Datasubtype... datasubtypes) {
         int count = 0;
-        LocaleValidityChecker regularAndUnknown = new LocaleValidityChecker(datasubtypes);
+        LocaleValidityChecker localeValidityChecker = new LocaleValidityChecker(datasubtypes);
         for (String[] test : tests) {
-            check(++count, regularAndUnknown, test[0], test[1]);
+            check(++count, localeValidityChecker, test[0], test[1]);
         }
     }