]> granicus.if.org Git - icu/commitdiff
ICU-4229 Enhanced the checking further.
authorMark Davis <mark@macchiato.com>
Sun, 13 Dec 2015 21:37:37 +0000 (21:37 +0000)
committerMark Davis <mark@macchiato.com>
Sun, 13 Dec 2015 21:37:37 +0000 (21:37 +0000)
X-SVN-Rev: 38125

icu4j/main/classes/core/src/com/ibm/icu/impl/ValidIdentifiers.java
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/KeyTypeData.java
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleValidityChecker.java [new file with mode: 0644]
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TestLocaleValidity.java [new file with mode: 0644]

index deb75dd1b34f0d2da2749cc553c7a8f215be0776..f66ab6fee71d12ee0edf6b1c24a08609c07c06d4 100644 (file)
@@ -8,13 +8,13 @@ package com.ibm.icu.impl;
 
 import java.util.Collections;
 import java.util.EnumMap;
-import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
 
+import com.ibm.icu.impl.locale.AsciiUtil;
 import com.ibm.icu.util.UResourceBundle;
 import com.ibm.icu.util.UResourceBundleIterator;
 
@@ -32,6 +32,10 @@ public class ValidIdentifiers {
         subdivision,
         unit,
         variant,
+        u,
+        t,
+        x,
+        illegal
     }
 
     public enum Datasubtype {
@@ -43,7 +47,7 @@ public class ValidIdentifiers {
         macroregion,
     }
 
-    static class ValiditySet {
+    public static class ValiditySet {
         public final Set<String> regularData;
         public final Map<String,Set<String>> subdivisionData;
         public ValiditySet(Set<String> plainData, boolean makeMap) {
@@ -105,7 +109,7 @@ public class ValidIdentifiers {
         }
     }
 
-    static class ValidityData {
+    private static class ValidityData {
         static final Map<Datatype,Map<Datasubtype,ValiditySet>> data;
         static {
             Map<Datatype, Map<Datasubtype, ValiditySet>> _data = new EnumMap<Datatype,Map<Datasubtype,ValiditySet>>(Datatype.class);
@@ -141,6 +145,7 @@ public class ValidIdentifiers {
             data = Collections.unmodifiableMap(_data);
         }
         private static void addRange(String string, Set<String> subvalues) {
+            string = AsciiUtil.toLowerString(string);
             int pos = string.indexOf('~');
             if (pos < 0) {
                 subvalues.add(string);
@@ -148,84 +153,44 @@ public class ValidIdentifiers {
                 StringRange.expand(string.substring(0,pos), string.substring(pos+1), false, subvalues);
             }
         }
-        static Map<Datatype, Map<Datasubtype, ValiditySet>> getData() {
-            return data;
-        }
+    }
+    
+    public static Map<Datatype, Map<Datasubtype, ValiditySet>> getData() {
+        return ValidityData.data;
+    }
 
-        /**
-         * Returns the Datasubtype containing the code, or null if there is none.
-         * @param datatype
-         * @param datasubtypes
-         * @param code
-         * @return
-         */
-        static Datasubtype isValid(Datatype datatype, Set<Datasubtype> datasubtypes, String code) {
-            Map<Datasubtype, ValiditySet> subtable = data.get(datatype);
-            if (subtable != null) {
-                for (Datasubtype datasubtype : datasubtypes) {
-                    ValiditySet validitySet = subtable.get(datasubtype);
-                    if (validitySet != null) {
-                        if (validitySet.contains(code)) {
-                            return datasubtype;
-                        }
-                    }
-                }
-            }
-            return null;
-        }
-        
-        static Datasubtype isValid(Datatype datatype, Set<Datasubtype> datasubtypes, String code, String value) {
-            Map<Datasubtype, ValiditySet> subtable = data.get(datatype);
-            if (subtable != null) {
-                for (Datasubtype datasubtype : datasubtypes) {
-                    ValiditySet validitySet = subtable.get(datasubtype);
-                    if (validitySet != null) {
-                        if (validitySet.contains(code, value)) {
-                            return datasubtype;
-                        }
+    /**
+     * Returns the Datasubtype containing the code, or null if there is none.
+     */
+    public static Datasubtype isValid(Datatype datatype, Set<Datasubtype> datasubtypes, String code) {
+        Map<Datasubtype, ValiditySet> subtable = ValidityData.data.get(datatype);
+        if (subtable != null) {
+            for (Datasubtype datasubtype : datasubtypes) {
+                ValiditySet validitySet = subtable.get(datasubtype);
+                if (validitySet != null) {
+                    if (validitySet.contains(AsciiUtil.toLowerString(code))) {
+                        return datasubtype;
                     }
                 }
             }
-            return null;
         }
-
+        return null;
     }
-
-    // Quick testing for now
     
-    public static void main(String[] args) {
-        showValid(Datatype.script, EnumSet.of(Datasubtype.regular, Datasubtype.unknown), "Zzzz");
-        showValid(Datatype.script, EnumSet.of(Datasubtype.regular), "Zzzz");
-        showValid(Datatype.subdivision, EnumSet.of(Datasubtype.regular), "US-CA");
-        showValid(Datatype.subdivision, EnumSet.of(Datasubtype.regular), "US", "CA");
-        showValid(Datatype.subdivision, EnumSet.of(Datasubtype.regular), "US-?");
-        showValid(Datatype.subdivision, EnumSet.of(Datasubtype.regular), "US", "?");
-        showAll();
-    }
-
-    private static void showAll() {
-        Map<Datatype, Map<Datasubtype, ValiditySet>> data = ValidityData.getData();
-        for (Entry<Datatype, Map<Datasubtype, ValiditySet>> e1 : data.entrySet()) {
-            System.out.println(e1.getKey());
-            for (Entry<Datasubtype, ValiditySet> e2 : e1.getValue().entrySet()) {
-                System.out.println("\t" + e2.getKey());
-                System.out.println("\t\t" + e2.getValue());
+    public static Datasubtype isValid(Datatype datatype, Set<Datasubtype> datasubtypes, String code, String value) {
+        Map<Datasubtype, ValiditySet> subtable = ValidityData.data.get(datatype);
+        if (subtable != null) {
+            code = AsciiUtil.toLowerString(code);
+            value = AsciiUtil.toLowerString(value);
+            for (Datasubtype datasubtype : datasubtypes) {
+                ValiditySet validitySet = subtable.get(datasubtype);
+                if (validitySet != null) {
+                    if (validitySet.contains(code, value)) {
+                        return datasubtype;
+                    }
+                }
             }
         }
+        return null;
     }
-
-    /**
-     * @param script
-     * @param of
-     * @param string
-     */
-    private static void showValid(Datatype datatype, Set<Datasubtype> datasubtypes, String code) {
-        Datasubtype value = ValidityData.isValid(datatype, datasubtypes, code);   
-        System.out.println(datatype + ", " + datasubtypes + ", " + code + " => " + value);
-    }
-    private static void showValid(Datatype datatype, Set<Datasubtype> datasubtypes, String code, String value2) {
-        Datasubtype value = ValidityData.isValid(datatype, datasubtypes, code, value2);   
-        System.out.println(datatype + ", " + datasubtypes + ", " + code + ", " + value + " => " + value);
-    }
-
 }
index 714e822b942e4cd2d2e60c4bc5fa72b594908de4..0ff0b3454d0d7749546cfe76f9e1b3b9a3d09102 100644 (file)
@@ -1,11 +1,13 @@
 /*
  *******************************************************************************
- * Copyright (C) 2014, International Business Machines Corporation and
+ * Copyright (C) 2014-2015, International Business Machines Corporation and
  * others. All Rights Reserved.
  *******************************************************************************
  */
 package com.ibm.icu.impl.locale;
 
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -88,6 +90,15 @@ public class KeyTypeData {
         return null;
     }
 
+//    public static boolean isValid(String key, String type) {
+//        key = AsciiUtil.toLowerString(key);
+//        KeyData keyData = KEYMAP.get(key);
+//        if (keyData != null) {
+//            return keyData.bcpId;
+//        }
+//        return false;
+//    }
+
     public static String toLegacyKey(String key) {
         key = AsciiUtil.toLowerString(key);
         KeyData keyData = KEYMAP.get(key);
@@ -539,4 +550,22 @@ public class KeyTypeData {
         initFromResourceBundle();
     }
 
+    public static boolean isDeprecated(String key) {
+        return DEPRECATED_HACK_SET.contains(key);
+    }
+    
+    public static boolean isDeprecated(String key, String type) {
+        Set<String> set = DEPRECATED_HACK.get(key);
+        return set != null && set.contains(type);
+    }
+
+    // Until LDML2ICU is updated
+    static Map<String,Set<String>> DEPRECATED_HACK = new HashMap<String,Set<String>>();
+    static Set<String> DEPRECATED_HACK_SET = new HashSet<String>();
+    static {
+        DEPRECATED_HACK.put("ca", Collections.singleton("islamicc"));
+        DEPRECATED_HACK.put("co", Collections.singleton("direct"));
+        DEPRECATED_HACK.put("tz", new HashSet<String>(Arrays.asList("aqams", "camtr", "cnckg", "cnhrb", "cnkhg", "usnavajo")));
+        DEPRECATED_HACK_SET.addAll(Arrays.asList("kh", "vt"));
+    };
 }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleValidityChecker.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleValidityChecker.java
new file mode 100644 (file)
index 0000000..4e9598d
--- /dev/null
@@ -0,0 +1,261 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2015, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+package com.ibm.icu.impl.locale;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import com.ibm.icu.impl.ValidIdentifiers;
+import com.ibm.icu.impl.ValidIdentifiers.Datasubtype;
+import com.ibm.icu.impl.ValidIdentifiers.Datatype;
+import com.ibm.icu.util.IllformedLocaleException;
+import com.ibm.icu.util.Output;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * @author markdavis
+ *
+ */
+public class LocaleValidityChecker {
+    private final Set<Datasubtype> datasubtypes;
+    private final boolean allowsDeprecated;
+    public static class Where {
+        public Datatype fieldFailure;
+        public String codeFailure;
+
+        public boolean set(Datatype datatype, String code) {
+            fieldFailure = datatype;
+            codeFailure = code;
+            return false;
+        }
+        @Override
+        public String toString() {
+            return fieldFailure == null ? "OK" : "{" + fieldFailure + ", " + codeFailure + "}";
+        }
+    }
+
+    public LocaleValidityChecker(Set<Datasubtype> datasubtypes) {
+        this.datasubtypes = EnumSet.copyOf(datasubtypes);
+        allowsDeprecated = datasubtypes.contains(Datasubtype.deprecated);
+    }
+
+    public LocaleValidityChecker(Datasubtype... datasubtypes) {
+        this.datasubtypes = EnumSet.copyOf(Arrays.asList(datasubtypes));
+        allowsDeprecated = this.datasubtypes.contains(Datasubtype.deprecated);
+    }
+
+    /**
+     * @return the datasubtypes
+     */
+    public Set<Datasubtype> getDatasubtypes() {
+        return EnumSet.copyOf(datasubtypes);
+    }
+
+    static Pattern SEPARATOR = Pattern.compile("[-_]");
+
+    public boolean isValid(ULocale locale, Where where) {
+        where.set(null, null);
+        if (!isValid(Datatype.language, locale.getLanguage(), where)) {
+            // special case x
+            if (locale.getLanguage().equals("x")) {
+                where.set(null, null);
+                // TODO check syntax is ok, only alphanum{1,8}
+                return true;
+            }
+            return false;
+        }
+        if (!isValid(Datatype.script, locale.getScript(), where)) return false;
+        if (!isValid(Datatype.region, locale.getCountry(), where)) return false;
+        String variantString = locale.getVariant();
+        if (!variantString.isEmpty()) {
+            for (String variant : SEPARATOR.split(variantString)) {
+                if (!isValid(Datatype.variant, variant, where)) return false;
+            }
+        }
+        for (Character c : locale.getExtensionKeys()) {
+            try {
+                Datatype datatype = Datatype.valueOf(c+"");
+                switch (datatype) {
+                case x:
+                    // TODO : check that the rest is syntactic
+                    return true;
+                case t:
+                    if (!isValidT(locale.getExtension(c), where)) return false;
+                    break;
+                case u:
+                    if (!isValidU(locale.getExtension(c), where)) return false;
+                    break;
+                }
+            } catch (Exception e) {
+                return where.set(Datatype.illegal, c+"");
+            }
+        }
+        return true;
+    }
+
+    enum SpecialCase {
+        normal, anything, reorder, codepoints;
+        static SpecialCase get(String key) {
+            if (key.equals("kr")) {
+                return SpecialCase.reorder;
+            } else if (key.equals("vt")) {
+                return SpecialCase.codepoints;
+            } else if (key.equals("x0")) {
+                return anything;
+            } else {
+                return normal;
+            }
+        }
+    }
+    /**
+     * @param extension
+     * @param where
+     * @return
+     */
+    private boolean isValidU(String extensionString, Where where) {
+        String key = "";
+        int typeCount = 0;
+        ValueType valueType = null;
+        SpecialCase specialCase = null;
+        // TODO: is empty -u- valid?
+        for (String subtag : SEPARATOR.split(extensionString)) {
+            if (subtag.length() == 2) {
+                key = KeyTypeData.toBcpKey(subtag);
+                if (key == null) {
+                    return where.set(Datatype.u, subtag);
+                }
+                if (!allowsDeprecated && KeyTypeData.isDeprecated(key)) {
+                    return where.set(Datatype.u, key);
+                }
+                valueType = ValueType.get(key);
+                specialCase = SpecialCase.get(key);
+                typeCount = 0;
+            } else {
+                ++typeCount;
+                if (valueType == ValueType.single && typeCount > 1) {
+                    return where.set(Datatype.u, key+"-"+subtag);
+                }
+                switch (specialCase) {
+                case anything: 
+                    continue;
+                case codepoints: 
+                    try {
+                        if (Integer.parseInt(subtag,16) > 0x10FFFF) {
+                            return where.set(Datatype.u, key+"-"+subtag);
+                        }
+                    } catch (NumberFormatException e) {
+                        return where.set(Datatype.u, key+"-"+subtag);
+                    }
+                    continue;
+                case reorder:
+                    if (!isScriptReorder(subtag)) {
+                        return where.set(Datatype.u, key+"-"+subtag);
+                    }
+                    continue;
+                }
+                
+                // en-u-sd-usca
+                // en-US-u-sd-usca
+                Output<Boolean> isKnownKey = new Output<Boolean>();
+                Output<Boolean> isSpecialType = new Output<Boolean>();
+                String type = KeyTypeData.toBcpType(key, subtag, isKnownKey, isSpecialType);
+                if (type == null) {
+                    return where.set(Datatype.u, key+"-"+subtag);
+                }
+                if (!allowsDeprecated && KeyTypeData.isDeprecated(key, subtag)) {
+                    return where.set(Datatype.u, key+"-"+subtag);
+                }
+            }
+        }
+        return true;
+    }
+
+    static final Set<String> REORDERING_INCLUDE = new HashSet<String>(Arrays.asList("space", "punct", "symbol", "currency", "digit", "others"));
+    static final Set<String> REORDERING_EXCLUDE = new HashSet<String>(Arrays.asList("zinh", "zyyy"));
+    /**
+     * @param subtag
+     * @return
+     */
+    private boolean isScriptReorder(String subtag) {
+        subtag = AsciiUtil.toLowerString(subtag);
+        if (REORDERING_INCLUDE.contains(subtag)) {
+            return true;
+        } else if (REORDERING_EXCLUDE.contains(subtag)) {
+            return false;
+        }
+        return ValidIdentifiers.isValid(Datatype.script, datasubtypes, subtag) != null;
+//        space, punct, symbol, currency, digit - core groups of characters below 'a'
+//        any script code except Common and Inherited.
+//      sc ; Zinh                             ; Inherited                        ; Qaai
+//      sc ; Zyyy                             ; Common
+//        Some pairs of scripts sort primary-equal and always reorder together. For example, Katakana characters are are always reordered with Hiragana.
+//        others - where all codes not explicitly mentioned should be ordered. The script code Zzzz (Unknown Script) is a synonym for others.        return false;
+    }
+
+    /**
+     * @param extensionString
+     * @param where
+     * @return
+     */
+    private boolean isValidT(String extensionString, Where where) {
+        // TODO: is empty -t- valid?
+        // TODO stop at first tag ([a-z][0-9]) and check their validity separately
+        try {
+            ULocale locale = new ULocale.Builder().setLanguageTag(extensionString).build();
+            return isValid(locale, where);
+        } catch (IllformedLocaleException e) {
+            int startIndex = e.getErrorIndex();
+            String[] list = SEPARATOR.split(extensionString.substring(startIndex));
+            return where.set(Datatype.t, list[0]);
+        } catch (Exception e) {
+            return where.set(Datatype.t, e.getMessage());
+        }
+    }
+
+    /**
+     * @param language
+     * @param language2
+     * @return
+     */
+    private boolean isValid(Datatype datatype, String code, Where where) {
+        return datatype == Datatype.language && code.equalsIgnoreCase("root") ? true
+                : code.isEmpty() ? true
+                        : ValidIdentifiers.isValid(datatype, datasubtypes, code) != null ? true 
+                                : where == null ? false : where.set(datatype, code);
+    }
+
+    public enum ValueType {
+        single, multiple, specific;
+        private static Set<String> multipleValueTypes = new HashSet<String>(Arrays.asList("x0", "kr", "vt"));
+        private static Set<String> specificValueTypes = new HashSet<String>(Arrays.asList("ca"));
+        static ValueType get(String key) {
+            if (multipleValueTypes.contains(key)) {
+                return multiple;
+            } else if (specificValueTypes.contains(key)) {
+                return specific;
+            } else {
+                return single;
+            }
+        }
+    }
+    /*
+Type: any multiple
+{"OK", "en-t-x0-SPECIAL"}
+{"OK", "en-u-kr-REORDER_CODE"}, // Collation reorder codes; One or more collation reorder codes, see LDML Part 5: Collation
+{"OK", "en-u-vt-CODEPOINTS"}, // deprecated Collation parameter key for variable top; The variable top (one or more Unicode code points: LDML Appendix Q)
+
+Multiple-values, specific sequences
+<type name="islamic-umalqura" description="Islamic calendar, Umm al-Qura" since="24"/>
+     */
+
+}
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TestLocaleValidity.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TestLocaleValidity.java
new file mode 100644 (file)
index 0000000..1f2803d
--- /dev/null
@@ -0,0 +1,208 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2015, International Business Machines Corporation and         *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.test.util;
+
+import java.util.EnumSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.Map.Entry;
+
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.ValidIdentifiers;
+import com.ibm.icu.impl.ValidIdentifiers.Datasubtype;
+import com.ibm.icu.impl.ValidIdentifiers.Datatype;
+import com.ibm.icu.impl.ValidIdentifiers.ValiditySet;
+import com.ibm.icu.impl.locale.LocaleValidityChecker;
+import com.ibm.icu.impl.locale.LocaleValidityChecker.Where;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * @author markdavis
+ *
+ */
+public class TestLocaleValidity extends TestFmwk {
+    /**
+     * Quick check
+     */
+    public static void main(String[] args) {
+        new TestLocaleValidity().run(args);
+    }
+
+    public void testBasic() {
+        String[][] tests = {
+                {"OK", "en-u-kr-latn-digit"},
+                {"Incomplete extension 'u' [at index 3]", "en-u"},
+                {"Incomplete extension 't' [at index 3]", "en-t"},
+                {"OK", "en-u-ca-chinese"},
+                {"OK", "en-x-abcdefg"},
+                {"OK", "x-abcdefg"},
+                {"OK", "en-u-sd-usca"},
+                {"OK", "en-US-u-sd-usca"},
+                {"OK", "en-AQ-u-sd-usca"},
+                {"OK", "en-t-it"},
+                {"OK", "und-Cyrl-t-und-latn"},
+                {"OK", "root"},
+                {"OK", "und"},
+                {"OK", "en"},
+                {"OK", "en-Hant"},
+                {"OK", "zh-Hant-1606nict-1694acad"},
+                {"OK", "zh-Hant"},
+                {"OK", "zh-Hant-AQ"},
+                {"OK", "x-abcdefg-g-foobar"},
+                {"Empty subtag [at index 0]", ""},
+                {"{u, ca-chinesx}", "en-u-ca-chinesx"},
+                {"{illegal, q}", "en-q-abcdefg"},
+                {"Incomplete privateuse [at index 0]", "x-abc$defg"},
+                {"{script, Latx}", "und-Cyrl-t-und-latx"},
+                {"{variant, FOOBAR}", "zh-Hant-1606nict-1694acad-foobar"},
+                {"{region, AB}", "zh-Hant-AB"},
+                {"{language, ex}", "ex"},
+                {"{script, Hanx}", "zh-Hanx"},
+                {"{language, qaa}", "qaa"},
+                {"Invalid subtag: $ [at index 3]", "EN-$"},
+                {"Invalid subtag: $ [at index 0]", "$"},
+                // too many items
+                {"{u, cu-usd}", "en-u-cu-adp-usd"},
+
+                {"OK", "en-u-ca-buddhist"},
+                {"OK", "en-u-cf-account"},
+                {"OK", "en-u-co-big5han"},
+                {"OK", "en-u-cu-adp"},
+                {"OK", "en-u-fw-fri"},
+                {"OK", "en-u-hc-h11"},
+                {"OK", "en-u-ka-noignore"},
+                {"OK", "en-u-kb-false"},
+                {"OK", "en-u-kc-false"},
+                {"OK", "en-u-kf-false"},
+                {"OK", "en-u-kk-false"},
+                {"OK", "en-u-kn-false"},
+                {"OK", "en-u-kr-latn-digit-symbol"},
+                {"OK", "en-u-ks-identic"},
+                {"OK", "en-u-kv-currency"},
+                {"OK", "en-u-nu-ahom"},
+                {"OK", "en-u-sd-usny"},
+                {"OK", "en-u-tz-adalv"},
+                {"OK", "en-u-va-posix"},
+                {"{u, ca-civil}", "en-u-ca-islamicc"}, // deprecated
+                {"{u, co-direct}", "en-u-co-direct"}, // deprecated
+                {"{u, kh}", "en-u-kh-false"}, // deprecated
+                {"{u, tz-aqams}", "en-u-tz-aqams"}, // deprecated
+                {"{u, vt}", "en-u-vt-0020-0041"}, // deprecated
+        };
+        check(tests, Datasubtype.regular, Datasubtype.unknown);
+    }
+
+    public void testMissing() {
+        String[][] tests = {
+                {"OK", "en-u-lb-loose"},
+                {"OK", "en-u-lw-breakall"},
+                {"OK", "en-u-ms-metric"},
+                {"OK", "en-u-ss-none"},
+        };
+        check(tests, Datasubtype.regular, Datasubtype.unknown);
+    }
+
+    public void testTSubtags() {
+        String[][] tests = {
+                //                {"OK", "und-Cyrl-t-und-latn-m0-ungegn-2007"},
+                //                {"{t, ungegg}", "und-Cyrl-t-und-latn-m0-ungegg-2007"},
+                //                {"OK", "en-t-i0-handwrit"},
+                //                {"OK", "en-t-k0-101key"},
+                //                {"OK", "en-t-m0-alaloc"},
+                //                {"OK", "en-t-t0-und"},
+                //                {"OK", "en-t-x0-anythin"},
+        };
+        check(tests, Datasubtype.regular, Datasubtype.unknown);
+    }
+
+    public void testDeprecated() {
+        LocaleValidityChecker regularAndDeprecated = new LocaleValidityChecker(EnumSet.of(Datasubtype.regular, Datasubtype.deprecated));
+        String[][] tests = {
+                {"OK", "en-u-ca-islamicc"}, // deprecated
+                {"OK", "en-u-co-direct"}, // deprecated
+                {"OK", "en-u-kh-false"}, // deprecated
+                {"OK", "en-u-tz-aqams"}, // deprecated
+                {"OK", "en-u-vt-0020"}, // deprecated
+        };
+        check(tests, Datasubtype.regular, Datasubtype.unknown, Datasubtype.deprecated);
+    }
+
+    private void check(String[][] tests, Datasubtype... datasubtypes) {
+        int count = 0;
+        LocaleValidityChecker regularAndUnknown = new LocaleValidityChecker(datasubtypes);
+        for (String[] test : tests) {
+            check(++count, regularAndUnknown, test[0], test[1]);
+        }
+    }
+
+    private void check(int count, LocaleValidityChecker all, String expected, String locale) {
+        ULocale ulocale;
+        try {
+            ulocale = new ULocale.Builder().setLanguageTag(locale).build();
+        } catch (Exception e) {
+            assertEquals(count + ". " + locale, expected, e.getMessage());
+            return;
+        }
+        Where where = new Where();
+        all.isValid(ulocale, where);
+        assertEquals(count + ". " + locale, expected, where.toString());
+
+        //        ULocale ulocale2 = ULocale.forLanguageTag(locale);
+        //        final String languageTag2 = ulocale2.toLanguageTag();
+        //
+        //        if (languageTag.equals(languageTag2)) {
+        //            return;
+        //        }
+        //        all.isValid(ulocale2, where);
+        //        assertEquals(ulocale2 + ", " + ulocale2.toLanguageTag(), expected, where.toString());
+
+        // problem: ULocale("$").toLanguageTag() becomes valid
+    }
+
+
+    // Quick testing for now
+
+    public void testValidIdentifierData() {
+        showValid(Datasubtype.unknown, Datatype.script, EnumSet.of(Datasubtype.regular, Datasubtype.unknown), "Zzzz");
+        showValid(null, Datatype.script, EnumSet.of(Datasubtype.regular), "Zzzz");
+        showValid(Datasubtype.regular, Datatype.subdivision, EnumSet.of(Datasubtype.regular), "US-CA");
+        showValid(Datasubtype.regular, Datatype.subdivision, EnumSet.of(Datasubtype.regular), "US", "CA");
+        showValid(null, Datatype.subdivision, EnumSet.of(Datasubtype.regular), "US-?");
+        showValid(null, Datatype.subdivision, EnumSet.of(Datasubtype.regular), "US", "?");
+        if (isVerbose()) {
+            showAll();
+        }
+    }
+
+    private static void showAll() {
+        Map<Datatype, Map<Datasubtype, ValiditySet>> data = ValidIdentifiers.getData();
+        for (Entry<Datatype, Map<Datasubtype, ValiditySet>> e1 : data.entrySet()) {
+            System.out.println(e1.getKey());
+            for (Entry<Datasubtype, ValiditySet> e2 : e1.getValue().entrySet()) {
+                System.out.println("\t" + e2.getKey());
+                System.out.println("\t\t" + e2.getValue());
+            }
+        }
+    }
+
+    /**
+     * @param expected TODO
+     * @param script
+     * @param of
+     * @param string
+     */
+    private void showValid(Datasubtype expected, Datatype datatype, Set<Datasubtype> datasubtypes, String code) {
+        Datasubtype value = ValidIdentifiers.isValid(datatype, datasubtypes, code);   
+        assertEquals(datatype + ", " + datasubtypes + ", " + code, expected, value);
+    }
+    private void showValid(Datasubtype expected, Datatype datatype, Set<Datasubtype> datasubtypes, String code, String code2) {
+        Datasubtype value = ValidIdentifiers.isValid(datatype, datasubtypes, code, code2);   
+        assertEquals(datatype + ", " + datasubtypes + ", " + code + ", " + code2, expected, value);
+    }
+
+
+}