ICU-12812 Add new implementation, but only expose a limited API as tech preview....

author Mark Davis <mark@macchiato.com>

Fri, 17 Mar 2017 12:48:31 +0000 (12:48 +0000)

committer Mark Davis <mark@macchiato.com>

Fri, 17 Mar 2017 12:48:31 +0000 (12:48 +0000)
author Mark Davis <mark@macchiato.com>
Fri, 17 Mar 2017 12:48:31 +0000 (12:48 +0000)
committer Mark Davis <mark@macchiato.com>
Fri, 17 Mar 2017 12:48:31 +0000 (12:48 +0000)
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XCldrStub.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XCldrStub.java

new file mode 100644 (file)

index 0000000..8518491
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XCldrStub.java
@@ -0,0 +1,390 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.ibm.icu.util.ICUException;
+import com.ibm.icu.util.ICUUncheckedIOException;
+
+/**
+ * Stub class to make migration easier until we get either Guava or a higher level of Java.
+ */
+public class XCldrStub {
+
+    public static class Multimap<K, V> {
+        private final Map<K,Set<V>> map;
+        private final Class<Set<V>> setClass;
+
+        @SuppressWarnings("unchecked")
+        private Multimap(Map<K,Set<V>> map, Class<?> setClass) {
+            this.map = map;
+            this.setClass = (Class<Set<V>>) (setClass != null
+                    ? setClass
+                            : HashSet.class);
+        }
+        public Multimap<K, V> putAll(K key, V... values) {
+            if (values.length != 0) {
+                createSetIfMissing(key).addAll(Arrays.asList(values));
+            }
+            return this;
+        }
+        public void putAll(K key, Collection<V> values) {
+            if (!values.isEmpty()) {
+                createSetIfMissing(key).addAll(values);
+            }
+        }
+        public void putAll(Collection<K> keys, V value) {
+            for (K key : keys) {
+                put(key, value);
+            }
+        }
+        public void putAll(Multimap<K, V> source) {
+            for (Entry<K, Set<V>> entry : source.map.entrySet()) {
+                putAll(entry.getKey(), entry.getValue());
+            }
+        }
+        public void put(K key, V value) {
+            createSetIfMissing(key).add(value);
+        }
+        private Set<V> createSetIfMissing(K key) {
+            Set<V> old = map.get(key);
+            if (old == null) {
+                map.put(key, old = getInstance());
+            }
+            return old;
+        }
+        private Set<V> getInstance() {
+            try {
+                return setClass.newInstance();
+            } catch (Exception e) {
+                throw new ICUException(e);
+            }
+        }
+        public Set<V> get(K key) {
+            Set<V> result = map.get(key);
+            return result; //  == null ? Collections.<V>emptySet() : result;
+        }
+        public Set<K> keySet() {
+            return map.keySet();
+        }
+        public Map<K, Set<V>> asMap() {
+            return map;
+        }
+        public Set<V> values() {
+            Collection<Set<V>> values = map.values();
+            if (values.size() == 0) {
+                return Collections.<V>emptySet();
+            }
+            Set<V> result = getInstance();
+            for ( Set<V> valueSet : values) {
+                result.addAll(valueSet);
+            }
+            return result;
+        }
+        public int size() {
+            return map.size();
+        }
+        public Iterable<Entry<K, V>> entries() {
+            return new MultimapIterator<K, V>(map);
+        }
+        @Override
+        public boolean equals(Object obj) {
+            Multimap<K,V> other = (Multimap) obj;
+            return map.equals(other.map);
+        }
+    }
+
+    public static class Multimaps {
+        public static <K, V, R extends Multimap<K, V>> R invertFrom(Multimap<V, K> source, R target) {
+            for (Entry<V, Set<K>> entry : source.asMap().entrySet()) {
+                target.putAll(entry.getValue(), entry.getKey());
+            }
+            return target;
+        }
+        public static <K, V, R extends Multimap<K, V>> R invertFrom(Map<V, K> source, R target) {
+            for (Entry<V, K> entry : source.entrySet()) {
+                target.put(entry.getValue(), entry.getKey());
+            }
+            return target;
+        }
+        /**
+         * Warning, not functionally the same as Guava; only for use in invertFrom.
+         */
+        public static <K, V> Map<K,V> forMap(Map<K,V> map) {
+            return map;
+        }
+    }
+
+    private static class MultimapIterator<K,V> implements Iterator<Entry<K,V>>, Iterable<Entry<K,V>> {
+        private final Iterator<Entry<K, Set<V>>> it1;
+        private Iterator<V> it2 = null;
+        private final ReusableEntry<K,V> entry = new ReusableEntry<K,V>();
+
+        private MultimapIterator(Map<K,Set<V>> map) {
+            it1 = map.entrySet().iterator();
+        }
+        @Override
+        public boolean hasNext() {
+            return it1.hasNext() || it2 != null && it2.hasNext();
+        }
+        @Override
+        public Entry<K, V> next() {
+            if (it2 != null && it2.hasNext()) {
+                entry.value = it2.next();
+            } else {
+                Entry<K, Set<V>> e = it1.next();
+                entry.key = e.getKey();
+                it2 = e.getValue().iterator();
+            }
+            return entry;
+        }
+        @Override
+        public Iterator<Entry<K, V>> iterator() {
+            return this;
+        }
+    }
+
+    private static class ReusableEntry<K,V> implements Entry<K,V> {
+        K key;
+        V value;
+        @Override
+        public K getKey() {
+            return key;
+        }
+        @Override
+        public V getValue() {
+            return value;
+        }
+        @Override
+        public V setValue(V value) {
+            throw new UnsupportedOperationException();
+        }
+    }
+
+    public static class HashMultimap<K, V> extends Multimap<K, V> {
+        private HashMultimap() {
+            super(new HashMap<K, Set<V>>(), HashSet.class);
+        }
+        public static <K, V> HashMultimap<K, V> create() {
+            return new HashMultimap<K, V>();
+        }
+    }
+
+    public static class TreeMultimap<K, V> extends Multimap<K, V> {
+        private TreeMultimap() {
+            super(new TreeMap<K, Set<V>>(), TreeSet.class);
+        }
+        public static <K, V> TreeMultimap<K, V> create() {
+            return new TreeMultimap<K, V>();
+        }
+    }
+
+    public static class LinkedHashMultimap<K, V> extends Multimap<K, V> {
+        private LinkedHashMultimap() {
+            super(new LinkedHashMap<K, Set<V>>(), LinkedHashSet.class);
+        }
+        public static <K, V> LinkedHashMultimap<K, V> create() {
+            return new LinkedHashMultimap<K, V>();
+        }
+    }
+
+
+    public static class Counter<T> implements Iterable<T>{
+        private Map<T,Long> data;
+        @Override
+        public Iterator<T> iterator() {
+            return data.keySet().iterator();
+        }
+        public long get(T s) {
+            Long result = data.get(s);
+            return result != null ? result : 0L;
+        }
+        public void add(T item, int count) {
+            Long result = data.get(item);
+            data.put(item, result == null ? count : result + count);
+        }
+    }
+
+    public static <T> String join(T[] source, String separator) {
+        StringBuilder result = new StringBuilder();
+        for (int i = 0; i < source.length; ++i) {
+            if (i != 0) result.append(separator);
+            result.append(source[i]);
+        }
+        return result.toString();
+    }
+
+    public static <T> String join(Iterable<T> source, String separator) {
+        StringBuilder result = new StringBuilder();
+        boolean first = true;
+        for (T item : source) {
+            if (!first) result.append(separator);
+            else first = false;
+            result.append(item.toString());
+        }
+        return result.toString();
+    }
+
+    public static class CollectionUtilities {
+        public static <T, U extends Iterable<T>> String join(U source, String separator) {
+            return XCldrStub.join(source, separator);
+        }
+    }
+
+    public static class Joiner {
+        private final String separator;
+        private Joiner(String separator) {
+            this.separator = separator;
+        }
+        public static final Joiner on(String separator) {
+            return new Joiner(separator);
+        }
+        public <T> String join(T[] source) {
+            return XCldrStub.join(source, separator);
+        }
+        public <T> String join(Iterable<T> source) {
+            return XCldrStub.join(source, separator);
+        }
+    }
+
+    public static class Splitter {
+        Pattern pattern;
+        boolean trimResults = false;
+        public Splitter(char c) {
+            this(Pattern.compile("\\Q" + c + "\\E"));
+        }
+        public Splitter(Pattern p) {
+            pattern = p;
+        }
+        public static Splitter on(char c) {
+            return new Splitter(c);
+        }
+        public static Splitter on(Pattern p) {
+            return new Splitter(p);
+        }
+        public List<String> splitToList(String input) {
+            String[] items = pattern.split(input);
+            if (trimResults) {
+                for (int i = 0; i < items.length; ++i) {
+                    items[i] = items[i].trim();
+                }
+            }
+            return Arrays.asList(items);
+        }
+        public Splitter trimResults() {
+            trimResults = true;
+            return this;
+        }
+        public Iterable<String> split(String input) {
+            return splitToList(input);
+        }
+    }
+
+    public static class ImmutableSet {
+        public static <T> Set<T> copyOf(Set<T> values) {
+            return Collections.unmodifiableSet(new LinkedHashSet<T>(values)); // copy set for safety, preserve order
+        }
+    }
+    public static class ImmutableMap {
+        public static <K,V> Map<K,V> copyOf(Map<K,V> values) {
+            return Collections.unmodifiableMap(new LinkedHashMap<K,V>(values)); // copy set for safety, preserve order
+        }
+    }
+    public static class ImmutableMultimap {
+        public static <K,V> Multimap<K,V> copyOf(Multimap<K,V> values) {
+            LinkedHashMap<K, Set<V>> temp = new LinkedHashMap<K,Set<V>>(); // semi-deep copy, preserve order
+            for (Entry<K, Set<V>> entry : values.asMap().entrySet()) {
+                Set<V> value = entry.getValue();
+                temp.put(entry.getKey(), value.size() == 1
+                        ? Collections.singleton(value.iterator().next())
+                                : Collections.unmodifiableSet(new LinkedHashSet<V>(value)));
+            }
+            return new Multimap<K,V>(Collections.unmodifiableMap(temp), null);
+        }
+    }
+
+    public static class FileUtilities {
+        public static final Charset UTF8 = Charset.forName("utf-8");
+
+        public static BufferedReader openFile(Class<?> class1, String file) {
+            return openFile(class1, file, UTF8);
+        }
+
+        public static BufferedReader openFile(Class<?> class1, String file, Charset charset) {
+            // URL path = null;
+            // String externalForm = null;
+            try {
+                final InputStream resourceAsStream = class1.getResourceAsStream(file);
+                if (charset == null) {
+                    charset = UTF8;
+                }
+                InputStreamReader reader = new InputStreamReader(resourceAsStream, charset);
+                BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64);
+                return bufferedReader;
+            } catch (Exception e) {
+                String className = class1 == null ? null : class1.getCanonicalName();
+                String canonicalName = null;
+                try {
+                    String relativeFileName = getRelativeFileName(class1, "../util/");
+                    canonicalName = new File(relativeFileName).getCanonicalPath();
+                } catch (Exception e1) {
+                    throw new ICUUncheckedIOException("Couldn't open file: " + file + "; relative to class: "
+                        + className, e);
+                }
+                throw new ICUUncheckedIOException("Couldn't open file " + file + "; in path " + canonicalName + "; relative to class: "
+                    + className, e);
+            }
+        }
+        public static String getRelativeFileName(Class<?> class1, String filename) {
+            URL resource = class1.getResource(filename);
+            String resourceString = resource.toString();
+            if (resourceString.startsWith("file:")) {
+                return resourceString.substring(5);
+            } else if (resourceString.startsWith("jar:file:")) {
+                return resourceString.substring(9);
+            } else {
+                throw new ICUUncheckedIOException("File not found: " + resourceString);
+            }
+        }
+    }
+
+    static public class RegexUtilities {
+        public static int findMismatch(Matcher m, CharSequence s) {
+            int i;
+            for (i = 1; i < s.length(); ++i) {
+                boolean matches = m.reset(s.subSequence(0, i)).matches();
+                if (!matches && !m.hitEnd()) {
+                    break;
+                }
+            }
+            return i - 1;
+        }
+        public static String showMismatch(Matcher m, CharSequence s) {
+            int failPoint = findMismatch(m, s);
+            String show = s.subSequence(0, failPoint) + "☹" + s.subSequence(failPoint, s.length());
+            return show;
+        }
+    }
+}
+\ No newline at end of file
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java

new file mode 100644 (file)

index 0000000..28c406e
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
@@ -0,0 +1,667 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.Set;
+import java.util.TreeMap;
+
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
+import com.ibm.icu.util.ICUException;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.ULocale.Minimize;
+import com.ibm.icu.util.UResourceBundle;
+
+public class XLikelySubtags {
+
+    private static final XLikelySubtags DEFAULT = new XLikelySubtags();
+
+    public static final XLikelySubtags getDefault() {
+        return DEFAULT;
+    }
+
+    static abstract class Maker {
+        abstract <V> V make();
+
+        @SuppressWarnings("unchecked")
+        public <K,V> V getSubtable(Map<K, V> langTable, final K language) {
+            V scriptTable = langTable.get(language);
+            if (scriptTable == null) {
+                langTable.put(language, scriptTable = (V) make());
+            }
+            return scriptTable;
+        }
+
+        static final Maker HASHMAP = new Maker() {
+            @Override
+            @SuppressWarnings("unchecked")
+            public Map<Object,Object> make() {
+                return new HashMap<Object,Object>();
+            }
+        };
+
+        static final Maker TREEMAP = new Maker() {
+            @Override
+            @SuppressWarnings("unchecked")
+            public Map<Object,Object> make() {
+                return new TreeMap<Object,Object>();
+            }
+        };
+    }
+
+    public static class Aliases {
+        final Map<String, String> toCanonical;
+        final Multimap<String, String> toAliases;
+        public String getCanonical(String alias) {
+            String canonical = toCanonical.get(alias);
+            return canonical == null ? alias : canonical;
+        }
+        public Set<String> getAliases(String canonical) {
+            Set<String> aliases = toAliases.get(canonical);
+            return aliases == null ? Collections.singleton(canonical) : aliases;
+        }
+        public Aliases(String key) {
+            UResourceBundle metadata = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"metadata",ICUResourceBundle.ICU_DATA_CLASS_LOADER);
+            UResourceBundle metadataAlias = metadata.get("alias");
+            UResourceBundle territoryAlias = metadataAlias.get(key);
+            Map<String, String> toCanonical1 = new HashMap<String, String>();
+            for ( int i = 0 ; i < territoryAlias.getSize(); i++ ) {
+                UResourceBundle res = territoryAlias.get(i);
+                String aliasFrom = res.getKey();
+                if (aliasFrom.contains("_")) {
+                    continue; // only simple aliasing
+                }
+                String aliasReason = res.get("reason").getString();
+                if (aliasReason.equals("overlong")) {
+                    continue;
+                }
+                String aliasTo = res.get("replacement").getString();
+                int spacePos = aliasTo.indexOf(' ');
+                String aliasFirst = spacePos < 0 ? aliasTo : aliasTo.substring(0, spacePos);
+                if (aliasFirst.contains("_")) {
+                    continue; // only simple aliasing
+                }
+                toCanonical1.put(aliasFrom, aliasFirst);
+            }
+            if (key.equals("language")) {
+                toCanonical1.put("mo", "ro"); // special case
+            }
+            toCanonical = Collections.unmodifiableMap(toCanonical1);
+            toAliases = Multimaps.invertFrom(toCanonical1, HashMultimap.<String,String>create());
+        }
+    }
+
+    public static class LSR {
+        public final String language;
+        public final String script;
+        public final String region;
+
+        public static Aliases LANGUAGE_ALIASES = new Aliases("language");
+        public static Aliases REGION_ALIASES = new Aliases("territory");
+
+        public static LSR from(String language, String script, String region) {
+            return new LSR(language, script, region);
+        }
+
+        // from http://unicode.org/reports/tr35/#Unicode_language_identifier
+        // but simplified to requiring language subtag, and nothing beyond region
+        // #1 is language
+        // #2 is script
+        // #3 is region
+//        static final String pat =
+//                "language_id = (unicode_language_subtag)"
+//                        + "(?:sep(unicode_script_subtag))?"
+//                        + "(?:sep(unicode_region_subtag))?;\n"
+//                        + "unicode_language_subtag = alpha{2,3}|alpha{5,8};\n"
+//                        + "unicode_script_subtag = alpha{4};\n"
+//                        + "unicode_region_subtag  = alpha{2}|digit{3};\n"
+//                        + "sep    = [-_];\n"
+//                        + "digit  = [0-9];\n"
+//                        + "alpha   = [A-Za-z];\n"
+//                        ;
+//        static {
+//            System.out.println(pat);
+//            System.out.println(new UnicodeRegex().compileBnf(pat));
+//        }
+//        static final Pattern LANGUAGE_PATTERN = Pattern.compile(
+//                "([a-zA-Z0-9]+)" // (?:[-_]([a-zA-Z0-9]+))?(?:[-_]([a-zA-Z0-9]+))?"
+//                //new UnicodeRegex().compileBnf(pat)
+//                );
+//
+        // TODO: fix this to check for format. Not required, since this is only called internally, but safer for the future.
+        static LSR from(String languageIdentifier) {
+            String[] parts = languageIdentifier.split("[-_]");
+            if (parts.length < 1 || parts.length > 3) {
+                throw new ICUException("too many subtags");
+            }
+            String lang = parts[0].toLowerCase();
+            String p2 = parts.length < 2 ? "": parts[1];
+            String p3 = parts.length < 3 ? "": parts[2];
+            return p2.length() < 4 ? new LSR(lang, "", p2) : new LSR(lang, p2, p3);
+
+            //            Matcher matcher = LANGUAGE_PATTERN.matcher(languageIdentifier);
+            //            if (!matcher.matches()) {
+            //                return new LSR(matcher.group(1), matcher.group(2), matcher.group(3));
+            //            }
+            //            System.out.println(RegexUtilities.showMismatch(matcher, languageIdentifier));
+            //            throw new ICUException("invalid language id");
+        }
+
+        public static LSR from(ULocale locale) {
+            return new LSR(locale.getLanguage(), locale.getScript(), locale.getCountry());
+        }
+
+        public static LSR fromMaximalized(ULocale locale) {
+            return fromMaximalized(locale.getLanguage(), locale.getScript(), locale.getCountry());
+        }
+
+        public static LSR fromMaximalized(String language, String script, String region) {
+            String canonicalLanguage = LANGUAGE_ALIASES.getCanonical(language);
+            // script is ok
+            String canonicalRegion = REGION_ALIASES.getCanonical(region); // getCanonical(REGION_ALIASES.get(region));
+
+            return DEFAULT.maximize(canonicalLanguage, script, canonicalRegion);
+        }
+
+        public LSR(String language, String script, String region) {
+            this.language = language;
+            this.script = script;
+            this.region = region;
+        }
+
+        @Override
+        public String toString() {
+            StringBuilder result = new StringBuilder(language);
+            if (!script.isEmpty()) {
+                result.append('-').append(script);
+            }
+            if (!region.isEmpty()) {
+                result.append('-').append(region);
+            }
+            return result.toString();
+        }
+        public LSR replace(String language2, String script2, String region2) {
+            if (language2 == null && script2 == null && region2 == null) return this;
+            return new LSR(
+                    language2 == null ? language: language2,
+                            script2 == null ? script : script2,
+                                    region2 == null ? region : region2);
+        }
+        @Override
+        public boolean equals(Object obj) {
+            LSR other = (LSR) obj;
+            return language.equals(other.language)
+                    && script.equals(other.script)
+                    && region.equals(other.region);
+        }
+        @Override
+        public int hashCode() {
+            return Objects.hash(language, script, region);
+        }
+    }
+
+    final Map<String, Map<String, Map<String, LSR>>> langTable;
+
+    public XLikelySubtags() {
+        this(getDefaultRawData(), true);
+    }
+
+    private static Map<String, String> getDefaultRawData() {
+        Map<String, String> rawData = new TreeMap<String, String>();
+        UResourceBundle bundle = UResourceBundle.getBundleInstance( ICUData.ICU_BASE_NAME, "likelySubtags");
+        for (Enumeration<String> enumer = bundle.getKeys(); enumer.hasMoreElements();) {
+            String key = enumer.nextElement();
+            rawData.put(key, bundle.getString(key));
+        }
+        return rawData;
+    }
+
+    public XLikelySubtags(Map<String, String> rawData, boolean skipNoncanonical) {
+        this.langTable = init(rawData, skipNoncanonical);
+    }
+
+    private Map<String, Map<String, Map<String, LSR>>> init(final Map<String, String> rawData, boolean skipNoncanonical) {
+        // prepare alias info. We want a mapping from the canonical form to all aliases
+
+        //Multimap<String,String> canonicalToAliasLanguage = HashMultimap.create();
+        //        getAliasInfo(LANGUAGE_ALIASES, canonicalToAliasLanguage);
+
+        // Don't bother with script; there are none
+
+        //Multimap<String,String> canonicalToAliasRegion = HashMultimap.create();
+        //        getAliasInfo(REGION_ALIASES, canonicalToAliasRegion);
+
+        Maker maker = Maker.TREEMAP;
+        Map<String, Map<String, Map<String, LSR>>> result = maker.make();
+//        Splitter bar = Splitter.on('_');
+//        int last = -1;
+        // set the base data
+        Map<LSR,LSR> internCache = new HashMap<LSR,LSR>();
+        for (Entry<String, String> sourceTarget : rawData.entrySet()) {
+            LSR ltp = LSR.from(sourceTarget.getKey());
+            final String language = ltp.language;
+            final String script = ltp.script;
+            final String region = ltp.region;
+
+            ltp = LSR.from(sourceTarget.getValue());
+            String languageTarget = ltp.language;
+            final String scriptTarget = ltp.script;
+            final String regionTarget = ltp.region;
+
+            set(result, language, script, region, languageTarget, scriptTarget, regionTarget, internCache);
+            // now add aliases
+            Collection<String> languageAliases = LSR.LANGUAGE_ALIASES.getAliases(language);
+//            if (languageAliases.isEmpty()) {
+//                languageAliases = Collections.singleton(language);
+//            }
+            Collection<String> regionAliases = LSR.REGION_ALIASES.getAliases(region);
+//            if (regionAliases.isEmpty()) {
+//                regionAliases = Collections.singleton(region);
+//            }
+            for (String languageAlias : languageAliases) {
+                for (String regionAlias : regionAliases) {
+                    if (languageAlias.equals(language) && regionAlias.equals(region)) {
+                        continue;
+                    }
+                    set(result, languageAlias, script, regionAlias, languageTarget, scriptTarget, regionTarget, internCache);
+                }
+            }
+        }
+        // hack
+        set(result, "und", "Latn", "", "en", "Latn", "US", internCache);
+
+        // hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
+        // <likelySubtag from="und_GH" to="ak_Latn_GH"/>
+
+        // so und-Latn-GH   =>  ak-Latn-GH
+        Map<String, Map<String, LSR>> undScriptMap = result.get("und");
+        Map<String, LSR> undEmptyRegionMap = undScriptMap.get("");
+        for (Entry<String, LSR> regionEntry : undEmptyRegionMap.entrySet()) {
+            final LSR value = regionEntry.getValue();
+            set(result, "und", value.script, value.region, value);
+        }
+        //
+        // check that every level has "" (or "und")
+        if (!result.containsKey("und")) {
+            throw new IllegalArgumentException("failure: base");
+        }
+        for (Entry<String, Map<String, Map<String, LSR>>> langEntry : result.entrySet()) {
+            String lang = langEntry.getKey();
+            final Map<String, Map<String, LSR>> scriptMap = langEntry.getValue();
+            if (!scriptMap.containsKey("")) {
+                throw new IllegalArgumentException("failure: " + lang);
+            }
+            for (Entry<String, Map<String, LSR>> scriptEntry : scriptMap.entrySet()) {
+                String script = scriptEntry.getKey();
+                final Map<String, LSR> regionMap = scriptEntry.getValue();
+                if (!regionMap.containsKey("")) {
+                    throw new IllegalArgumentException("failure: " + lang + "-" + script);
+                }
+                //                for (Entry<String, LSR> regionEntry : regionMap.entrySet()) {
+                //                    String region = regionEntry.getKey();
+                //                    LSR value = regionEntry.getValue();
+                //                }
+            }
+        }
+        return result;
+    }
+
+//    private void getAliasInfo(Map<String, R2<List<String>, String>> aliasInfo, Multimap<String, String> canonicalToAlias) {
+//        for (Entry<String, R2<List<String>, String>> e : aliasInfo.entrySet()) {
+//            final String alias = e.getKey();
+//            if (alias.contains("_")) {
+//                continue; // only do simple aliasing
+//            }
+//            String canonical = getCanonical(e.getValue());
+//            canonicalToAlias.put(canonical, alias);
+//        }
+//    }
+
+//    private static String getCanonical(R2<List<String>, String> aliasAndReason) {
+//        if (aliasAndReason == null) {
+//            return null;
+//        }
+//        if (aliasAndReason.get1().equals("overlong")) {
+//            return null;
+//        }
+//        List<String> value = aliasAndReason.get0();
+//        if (value.size() != 1) {
+//            return null;
+//        }
+//        final String canonical = value.iterator().next();
+//        if (canonical.contains("_")) {
+//            return null; // only do simple aliasing
+//        }
+//        return canonical;
+//    }
+
+    private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region,
+            final String languageTarget, final String scriptTarget, final String regionTarget, Map<LSR, LSR> internCache) {
+        LSR newValue = new LSR(languageTarget, scriptTarget, regionTarget);
+        LSR oldValue = internCache.get(newValue);
+        if (oldValue == null) {
+            internCache.put(newValue, newValue);
+            oldValue = newValue;
+        }
+        set(langTable, language, script, region, oldValue);
+    }
+
+    private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region, LSR newValue) {
+        Map<String, Map<String, LSR>> scriptTable = Maker.TREEMAP.getSubtable(langTable, language);
+        Map<String, LSR> regionTable = Maker.TREEMAP.getSubtable(scriptTable, script);
+        LSR oldValue = regionTable.get(region);
+        if (oldValue != null) {
+            int debug = 0;
+        }
+        regionTable.put(region, newValue);
+    }
+
+    /**
+     * Convenience methods
+     * @param source
+     * @return
+     */
+    public LSR maximize(String source) {
+        return maximize(ULocale.forLanguageTag(source));
+    }
+
+    public LSR maximize(ULocale source) {
+        return maximize(source.getLanguage(), source.getScript(), source.getCountry());
+    }
+
+    public LSR maximize(LSR source) {
+        return maximize(source.language, source.script, source.region);
+    }
+
+    //    public static ULocale addLikelySubtags(ULocale loc) {
+    //
+    //    }
+
+    /**
+     * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
+     */
+    public LSR maximize(String language, String script, String region) {
+        int retainOldMask = 0;
+        Map<String, Map<String, LSR>> scriptTable = langTable.get(language);
+        if (scriptTable == null) { // cannot happen if language == "und"
+            retainOldMask |= 4;
+            scriptTable = langTable.get("und");
+        } else if (!language.equals("und")) {
+            retainOldMask |= 4;
+        }
+
+        if (script.equals("Zzzz")) {
+            script = "";
+        }
+        Map<String, LSR> regionTable = scriptTable.get(script);
+        if (regionTable == null) { // cannot happen if script == ""
+            retainOldMask |= 2;
+            regionTable = scriptTable.get("");
+        } else if (!script.isEmpty()) {
+            retainOldMask |= 2;
+        }
+
+        if (region.equals("ZZ")) {
+            region = "";
+        }
+        LSR result = regionTable.get(region);
+        if (result == null) { // cannot happen if region == ""
+            retainOldMask |= 1;
+            result = regionTable.get("");
+            if (result == null) {
+                return null;
+            }
+        } else if (!region.isEmpty()) {
+            retainOldMask |= 1;
+        }
+
+        switch (retainOldMask) {
+        default:
+        case 0: return result;
+        case 1: return result.replace(null, null, region);
+        case 2: return result.replace(null, script, null);
+        case 3: return result.replace(null, script, region);
+        case 4: return result.replace(language, null, null);
+        case 5: return result.replace(language, null, region);
+        case 6: return result.replace(language, script, null);
+        case 7: return result.replace(language, script, region);
+        }
+    }
+
+    private LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn, Minimize fieldToFavor) {
+        LSR result = maximize(languageIn, scriptIn, regionIn);
+
+        // We could try just a series of checks, like:
+        // LSR result2 = addLikelySubtags(languageIn, "", "");
+        // if result.equals(result2) return result2;
+        // However, we can optimize 2 of the cases:
+        //   (languageIn, "", "")
+        //   (languageIn, "", regionIn)
+
+        Map<String, Map<String, LSR>> scriptTable = langTable.get(result.language);
+
+        Map<String, LSR> regionTable0 = scriptTable.get("");
+        LSR value00 = regionTable0.get("");
+        boolean favorRegionOk = false;
+        if (result.script.equals(value00.script)) { //script is default
+            if (result.region.equals(value00.region)) {
+                return result.replace(null, "", "");
+            } else if (fieldToFavor == fieldToFavor.FAVOR_REGION) {
+                return result.replace(null, "", null);
+            } else {
+                favorRegionOk = true;
+            }
+        }
+
+        // The last case is not as easy to optimize.
+        // Maybe do later, but for now use the straightforward code.
+        LSR result2 = maximize(languageIn, scriptIn, "");
+        if (result2.equals(result)) {
+            return result.replace(null, null, "");
+        } else if (favorRegionOk) {
+            return result.replace(null, "", null);
+        }
+        return result;
+    }
+
+    private static <V> StringBuilder show(Map<String,V> map, String indent, StringBuilder output) {
+        String first = indent.isEmpty() ? "" : "\t";
+        for (Entry<String,V> e : map.entrySet()) {
+            String key = e.getKey();
+            V value = e.getValue();
+            output.append(first + (key.isEmpty() ? "∅" : key));
+            if (value instanceof Map) {
+                show((Map)value, indent+"\t", output);
+            } else {
+                output.append("\t" + Objects.toString(value)).append("\n");
+            }
+            first = indent;
+        }
+        return output;
+    }
+
+    @Override
+    public String toString() {
+        return show(langTable, "", new StringBuilder()).toString();
+    }
+
+    //    public static void main(String[] args) {
+    //        System.out.println(LSR.fromMaximalized(ULocale.ENGLISH));
+    //
+    //        final Map<String, String> rawData = sdi.getLikelySubtags();
+    //        XLikelySubtags ls = XLikelySubtags.getDefault();
+    //        System.out.println(ls);
+    //        ls.maximize(new ULocale("iw"));
+    //        if (true) return;
+    //
+    //        LanguageTagParser ltp = new LanguageTagParser();
+    //
+    //        // get all the languages, scripts, and regions
+    //        Set<String> languages = new TreeSet<String>();
+    //        Set<String> scripts = new TreeSet<String>();
+    //        Set<String> regions = new TreeSet<String>();
+    //        Counter<String> languageCounter = new Counter<String>();
+    //        Counter<String> scriptCounter = new Counter<String>();
+    //        Counter<String> regionCounter = new Counter<String>();
+    //
+    //        for (Entry<String, String> sourceTarget : rawData.entrySet()) {
+    //            final String source = sourceTarget.getKey();
+    //            ltp.set(source);
+    //            languages.add(ltp.getLanguage());
+    //            scripts.add(ltp.getScript());
+    //            regions.add(ltp.getRegion());
+    //            final String target = sourceTarget.getValue();
+    //            ltp.set(target);
+    //            add(target, languageCounter, ltp.getLanguage(), 1);
+    //            add(target, scriptCounter, ltp.getScript(), 1);
+    //            add(target, regionCounter, ltp.getRegion(), 1);
+    //        }
+    //        ltp.set("und-Zzzz-ZZ");
+    //        languageCounter.add(ltp.getLanguage(), 1);
+    //        scriptCounter.add(ltp.getScript(), 1);
+    //        regionCounter.add(ltp.getRegion(), 1);
+    //
+    //        if (SHORT) {
+    //            removeSingletons(languages, languageCounter);
+    //            removeSingletons(scripts, scriptCounter);
+    //            removeSingletons(regions, regionCounter);
+    //        }
+    //
+    //        System.out.println("languages: " + languages.size() + "\n\t" + languages + "\n\t" + languageCounter);
+    //        System.out.println("scripts: " + scripts.size() + "\n\t" + scripts + "\n\t" + scriptCounter);
+    //        System.out.println("regions: " + regions.size() + "\n\t" + regions + "\n\t" + regionCounter);
+    //
+    //        int maxCount = Integer.MAX_VALUE;
+    //
+    //        int counter = maxCount;
+    //        long tempTime = System.nanoTime();
+    //        newMax:
+    //            for (String language : languages) {
+    //                for (String script : scripts) {
+    //                    for (String region : regions) {
+    //                        if (--counter < 0) break newMax;
+    //                        LSR result = ls.maximize(language, script, region);
+    //                    }
+    //                }
+    //            }
+    //        long newMaxTime = System.nanoTime() - tempTime;
+    //        System.out.println("newMaxTime: " + newMaxTime);
+    //
+    //        counter = maxCount;
+    //        tempTime = System.nanoTime();
+    //        newMin:
+    //            for (String language : languages) {
+    //                for (String script : scripts) {
+    //                    for (String region : regions) {
+    //                        if (--counter < 0) break newMin;
+    //                        LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
+    //                    }
+    //                }
+    //            }
+    //        long newMinTime = System.nanoTime() - tempTime;
+    //        System.out.println("newMinTime: " + newMinTime);
+    //
+    //        // *****
+    //
+    //        tempTime = System.nanoTime();
+    //        counter = maxCount;
+    //        oldMax:
+    //            for (String language : languages) {
+    //                for (String script : scripts) {
+    //                    for (String region : regions) {
+    //                        if (--counter < 0) break oldMax;
+    //                        ULocale tempLocale = new ULocale(language, script, region);
+    //                        ULocale max = ULocale.addLikelySubtags(tempLocale);
+    //                    }
+    //                }
+    //            }
+    //        long oldMaxTime = System.nanoTime() - tempTime;
+    //        System.out.println("oldMaxTime: " + oldMaxTime + "\t" + oldMaxTime/newMaxTime + "x");
+    //
+    //        counter = maxCount;
+    //        tempTime = System.nanoTime();
+    //        oldMin:
+    //            for (String language : languages) {
+    //                for (String script : scripts) {
+    //                    for (String region : regions) {
+    //                        if (--counter < 0) break oldMin;
+    //                        ULocale tempLocale = new ULocale(language, script, region);
+    //                        ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
+    //                    }
+    //                }
+    //            }
+    //        long oldMinTime = System.nanoTime() - tempTime;
+    //        System.out.println("oldMinTime: " + oldMinTime + "\t" + oldMinTime/newMinTime + "x");
+    //
+    //        counter = maxCount;
+    //        testMain:
+    //            for (String language : languages) {
+    //                System.out.println(language);
+    //                int tests = 0;
+    //                for (String script : scripts) {
+    //                    for (String region : regions) {
+    //                        ++tests;
+    //                        if (--counter < 0) break testMain;
+    //                        LSR maxNew = ls.maximize(language, script, region);
+    //                        LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
+    //                        LSR minNewR = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_REGION);
+    //
+    //                        ULocale tempLocale = new ULocale(language, script, region);
+    //                        ULocale maxOld = ULocale.addLikelySubtags(tempLocale);
+    //                        ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
+    //                        ULocale minOldR = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_REGION);
+    //
+    //                        // check values
+    //                        final String maxNewS = String.valueOf(maxNew);
+    //                        final String maxOldS = maxOld.toLanguageTag();
+    //                        boolean sameMax = maxOldS.equals(maxNewS);
+    //
+    //                        final String minNewSS = String.valueOf(minNewS);
+    //                        final String minOldSS = minOldS.toLanguageTag();
+    //                        boolean sameMinS = minNewSS.equals(minOldSS);
+    //
+    //                        final String minNewRS = String.valueOf(minNewR);
+    //                        final String minOldRS = minOldS.toLanguageTag();
+    //                        boolean sameMinR = minNewRS.equals(minOldRS);
+    //
+    //                        if (sameMax && sameMinS && sameMinR) continue;
+    //                        System.out.println(new LSR(language, script, region)
+    //                                + "\tmax: " + maxNew
+    //                                + (sameMax ? "" : "≠" + maxOldS)
+    //                                + "\tminS: " + minNewS
+    //                                + (sameMinS ? "" : "≠" + minOldS)
+    //                                + "\tminR: " + minNewR
+    //                                + (sameMinR ? "" : "≠" + minOldR)
+    //                                );
+    //                    }
+    //                }
+    //                System.out.println(language + ": " + tests);
+    //            }
+    //    }
+    //
+    //    private static void add(String target, Counter<String> languageCounter, String language, int count) {
+    //        if (language.equals("aa")) {
+    //            int debug = 0;
+    //        }
+    //        languageCounter.add(language, count);
+    //    }
+    //
+    //    private static void removeSingletons(Set<String> languages, Counter<String> languageCounter) {
+    //        for (String s : languageCounter) {
+    //            final long count = languageCounter.get(s);
+    //            if (count <= 1) {
+    //                languages.remove(s);
+    //            }
+    //        }
+    //    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleDistance.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleDistance.java

new file mode 100644 (file)

index 0000000..6f15a28
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleDistance.java
@@ -0,0 +1,1338 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.function.Predicate;
+
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.Row;
+import com.ibm.icu.impl.Row.R4;
+import com.ibm.icu.impl.locale.XCldrStub.CollectionUtilities;
+import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap;
+import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
+import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
+import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
+import com.ibm.icu.impl.locale.XCldrStub.Splitter;
+import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap;
+import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
+import com.ibm.icu.impl.locale.XLocaleDistance.RegionMapper.Builder;
+import com.ibm.icu.text.LocaleDisplayNames;
+import com.ibm.icu.util.LocaleMatcher;
+import com.ibm.icu.util.Output;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundleIterator;
+
+public class XLocaleDistance {
+
+    static final boolean PRINT_OVERRIDES = false;
+
+    public static final int ABOVE_THRESHOLD = 100;
+
+    @Deprecated
+    public static final String ANY = "�"; // matches any character. Uses value above any subtag.
+
+    private static String fixAny(String string) {
+        return "*".equals(string) ? ANY : string;
+    }
+
+    static final LocaleDisplayNames english = LocaleDisplayNames.getInstance(ULocale.ENGLISH);
+
+    private static List<R4<String, String, Integer, Boolean>> xGetLanguageMatcherData() {
+        List<R4<String, String, Integer, Boolean>> distanceList = new ArrayList<R4<String, String, Integer, Boolean>>();
+
+        ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
+        ICUResourceBundle languageMatchingNew = suppData.findTopLevel("languageMatchingNew");
+        ICUResourceBundle written = (ICUResourceBundle) languageMatchingNew.get("written");
+
+        for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
+            ICUResourceBundle item = (ICUResourceBundle) iter.next();
+            boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
+            distanceList.add(
+                    (R4<String, String, Integer, Boolean>)            // note: .freeze returning wrong type, so casting.
+                    Row.of(
+                            item.getString(0),
+                            item.getString(1),
+                            Integer.parseInt(item.getString(2)),
+                            oneway)
+                    .freeze());
+        }
+        return Collections.unmodifiableList(distanceList);
+    }
+
+    private static Set<String> xGetParadigmLocales() {
+        ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
+        ICUResourceBundle languageMatchingInfo = suppData.findTopLevel("languageMatchingInfo");
+        ICUResourceBundle writtenParadigmLocales = (ICUResourceBundle) languageMatchingInfo.get("written")
+                .get("paradigmLocales");
+//      paradigmLocales{ "en", "en-GB",... }
+        HashSet<String> paradigmLocales = new HashSet<String>(Arrays.asList(writtenParadigmLocales.getStringArray()));
+        return Collections.unmodifiableSet(paradigmLocales);
+    }
+
+    private static Map<String, String> xGetMatchVariables() {
+        ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
+        ICUResourceBundle languageMatchingInfo = suppData.findTopLevel("languageMatchingInfo");
+        ICUResourceBundle writtenMatchVariables = (ICUResourceBundle) languageMatchingInfo.get("written")
+                .get("matchVariable");
+//        matchVariable{ americas{"019"} cnsar{"HK+MO"} ...}
+
+        HashMap<String,String> matchVariables = new HashMap<String,String>();
+        for (Enumeration<String> enumer = writtenMatchVariables.getKeys(); enumer.hasMoreElements(); ) {
+            String key = enumer.nextElement();
+            matchVariables.put(key, writtenMatchVariables.getString(key));
+        }
+        return Collections.unmodifiableMap(matchVariables);
+    }
+
+    private static Multimap<String, String> xGetContainment() {
+        TreeMultimap<String,String> containment = TreeMultimap.create();
+        containment
+        .putAll("001", "019", "002", "150", "142", "009")
+        .putAll("011", "BF", "BJ", "CI", "CV", "GH", "GM", "GN", "GW", "LR", "ML", "MR", "NE", "NG", "SH", "SL", "SN", "TG")
+        .putAll("013", "BZ", "CR", "GT", "HN", "MX", "NI", "PA", "SV")
+        .putAll("014", "BI", "DJ", "ER", "ET", "KE", "KM", "MG", "MU", "MW", "MZ", "RE", "RW", "SC", "SO", "SS", "TZ", "UG", "YT", "ZM", "ZW")
+        .putAll("142", "145", "143", "030", "034", "035")
+        .putAll("143", "TM", "TJ", "KG", "KZ", "UZ")
+        .putAll("145", "AE", "AM", "AZ", "BH", "CY", "GE", "IL", "IQ", "JO", "KW", "LB", "OM", "PS", "QA", "SA", "SY", "TR", "YE", "NT", "YD")
+        .putAll("015", "DZ", "EG", "EH", "LY", "MA", "SD", "TN", "EA", "IC")
+        .putAll("150", "154", "155", "151", "039")
+        .putAll("151", "BG", "BY", "CZ", "HU", "MD", "PL", "RO", "RU", "SK", "UA", "SU")
+        .putAll("154", "GG", "IM", "JE", "AX", "DK", "EE", "FI", "FO", "GB", "IE", "IS", "LT", "LV", "NO", "SE", "SJ")
+        .putAll("155", "AT", "BE", "CH", "DE", "FR", "LI", "LU", "MC", "NL", "DD", "FX")
+        .putAll("017", "AO", "CD", "CF", "CG", "CM", "GA", "GQ", "ST", "TD", "ZR")
+        .putAll("018", "BW", "LS", "NA", "SZ", "ZA")
+        .putAll("019", "021", "013", "029", "005", "003", "419")
+        .putAll("002", "015", "011", "017", "014", "018")
+        .putAll("021", "BM", "CA", "GL", "PM", "US")
+        .putAll("029", "AG", "AI", "AW", "BB", "BL", "BQ", "BS", "CU", "CW", "DM", "DO", "GD", "GP", "HT", "JM", "KN", "KY", "LC", "MF", "MQ", "MS", "PR", "SX", "TC", "TT", "VC", "VG", "VI", "AN")
+        .putAll("003", "021", "013", "029")
+        .putAll("030", "CN", "HK", "JP", "KP", "KR", "MN", "MO", "TW")
+        .putAll("035", "BN", "ID", "KH", "LA", "MM", "MY", "PH", "SG", "TH", "TL", "VN", "BU", "TP")
+        .putAll("039", "AD", "AL", "BA", "ES", "GI", "GR", "HR", "IT", "ME", "MK", "MT", "RS", "PT", "SI", "SM", "VA", "XK", "CS", "YU")
+        .putAll("419", "013", "029", "005")
+        .putAll("005", "AR", "BO", "BR", "CL", "CO", "EC", "FK", "GF", "GY", "PE", "PY", "SR", "UY", "VE")
+        .putAll("053", "AU", "NF", "NZ")
+        .putAll("054", "FJ", "NC", "PG", "SB", "VU")
+        .putAll("057", "FM", "GU", "KI", "MH", "MP", "NR", "PW")
+        .putAll("061", "AS", "CK", "NU", "PF", "PN", "TK", "TO", "TV", "WF", "WS")
+        .putAll("034", "AF", "BD", "BT", "IN", "IR", "LK", "MV", "NP", "PK")
+        .putAll("009", "053", "054", "057", "061", "QO")
+        .putAll("QO", "AQ", "BV", "CC", "CX", "GS", "HM", "IO", "TF", "UM", "AC", "CP", "DG", "TA")
+        ;
+        //Can't use following, because data from CLDR is discarded
+//        ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
+//        UResourceBundle territoryContainment = suppData.get("territoryContainment");
+//        for (int i = 0 ; i < territoryContainment.getSize(); i++) {
+//            UResourceBundle mapping = territoryContainment.get(i);
+//            String parent = mapping.getKey();
+//            for (int j = 0 ; j < mapping.getSize(); j++) {
+//                String child = mapping.getString(j);
+//                containment.put(parent,child);
+//                System.out.println(parent + " => " + child);
+//            }
+//        }
+        TreeMultimap<String,String> containmentResolved = TreeMultimap.create();
+        fill("001", containment, containmentResolved);
+        return ImmutableMultimap.copyOf(containmentResolved);
+    }
+
+    private static Set<String> fill(String region, TreeMultimap<String, String> containment, Multimap<String, String> toAddTo) {
+        Set<String> contained = containment.get(region);
+        if (contained == null) {
+            return Collections.emptySet();
+        }
+        toAddTo.putAll(region, contained); // do top level
+        // then recursively
+        for (String subregion : contained) {
+            toAddTo.putAll(region, fill(subregion, containment, toAddTo));
+        }
+        return toAddTo.get(region);
+    }
+
+
+    static final Multimap<String,String> CONTAINER_TO_CONTAINED;
+    static final Multimap<String,String> CONTAINER_TO_CONTAINED_FINAL;
+    static {
+//         Multimap<String, String> containerToContainedTemp = xGetContainment();
+//         fill(Region.getInstance("001"), containerToContainedTemp);
+
+        CONTAINER_TO_CONTAINED = xGetContainment();
+        Multimap<String, String> containerToFinalContainedBuilder = TreeMultimap.create();
+        for (Entry<String, Set<String>> entry : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
+            String container = entry.getKey();
+            for (String contained : entry.getValue()) {
+                if (CONTAINER_TO_CONTAINED.get(contained) == null) {
+                    containerToFinalContainedBuilder.put(container, contained);
+                }
+            }
+        }
+        CONTAINER_TO_CONTAINED_FINAL = ImmutableMultimap.copyOf(containerToFinalContainedBuilder);
+    }
+
+    final static private Set<String> ALL_FINAL_REGIONS = ImmutableSet.copyOf(CONTAINER_TO_CONTAINED_FINAL.get("001"));
+
+    // end of data from CLDR
+
+    private final DistanceTable languageDesired2Supported;
+    private final RegionMapper regionMapper;
+    private final int defaultLanguageDistance;
+    private final int defaultScriptDistance;
+    private final int defaultRegionDistance;
+
+    @Deprecated
+    public static abstract class DistanceTable {
+        abstract int getDistance(String desiredLang, String supportedlang, Output<DistanceTable> table, boolean starEquals);
+        abstract Set<String> getCloser(int threshold);
+        abstract String toString(boolean abbreviate);
+        public DistanceTable compact() {
+            return this;
+        }
+        //        public Integer getInternalDistance(String a, String b) {
+        //            return null;
+        //        }
+        public DistanceNode getInternalNode(String any, String any2) {
+            return null;
+        }
+        public Map<String, Set<String>> getInternalMatches() {
+            return null;
+        }
+        public boolean isEmpty() {
+            return true;
+        }
+    }
+
+    @Deprecated
+    public static class DistanceNode {
+        final int distance;
+
+        public DistanceNode(int distance) {
+            this.distance = distance;
+        }
+
+        public DistanceTable getDistanceTable() {
+            return null;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            if (!(obj instanceof DistanceNode)) {
+                return false;
+            }
+            DistanceNode other = (DistanceNode) obj;
+            return distance == other.distance;
+        }
+        @Override
+        public int hashCode() {
+            return distance;
+        }
+        @Override
+        public String toString() {
+            return "\ndistance: " + distance;
+        }
+    }
+
+    private interface IdMapper<K,V> {
+        public V toId(K source);
+    }
+
+    static class IdMakerFull<T> implements IdMapper<T,Integer> {
+        private final Map<T, Integer> objectToInt = new HashMap<T, Integer>();
+        private final List<T> intToObject = new ArrayList<T>();
+        final String name; // for debugging
+
+        IdMakerFull(String name) {
+            this.name = name;
+        }
+
+        IdMakerFull() {
+            this("unnamed");
+        }
+
+        IdMakerFull(String name, T zeroValue) {
+            this(name);
+            add(zeroValue);
+        }
+
+        /**
+         * Return an id, making one if there wasn't one already.
+         */
+        public Integer add(T source) {
+            Integer result = objectToInt.get(source);
+            if (result == null) {
+                Integer newResult = intToObject.size();
+                objectToInt.put(source, newResult);
+                intToObject.add(source);
+                return newResult;
+            } else {
+                return result;
+            }
+        }
+
+        /**
+         * Return an id, or null if there is none.
+         */
+        @Override
+        public Integer toId(T source) {
+            return objectToInt.get(source);
+            //            return value == null ? 0 : value;
+        }
+
+        /**
+         * Return the object for the id, or null if there is none.
+         */
+        public T fromId(int id) {
+            return intToObject.get(id);
+        }
+
+        /**
+         * Return interned object
+         */
+        public T intern(T source) {
+            return fromId(add(source));
+        }
+
+        public int size() {
+            return intToObject.size();
+        }
+        /**
+         * Same as add, except if the object didn't have an id, return null;
+         */
+        public Integer getOldAndAdd(T source) {
+            Integer result = objectToInt.get(source);
+            if (result == null) {
+                Integer newResult = intToObject.size();
+                objectToInt.put(source, newResult);
+                intToObject.add(source);
+            }
+            return result;
+        }
+
+        @Override
+        public String toString() {
+            return size() + ": " + intToObject;
+        }
+        @Override
+        public boolean equals(Object obj) {
+            if (!(obj instanceof IdMakerFull)) {
+                return false;
+            }
+            IdMakerFull<T> other = (IdMakerFull) obj;
+            return intToObject.equals(other.intToObject);
+        }
+        @Override
+        public int hashCode() {
+            return intToObject.hashCode();
+        }
+    }
+
+    static class StringDistanceNode extends DistanceNode {
+        final DistanceTable distanceTable;
+
+        public StringDistanceNode(int distance, DistanceTable distanceTable) {
+            super(distance);
+            this.distanceTable = distanceTable;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            if (!(obj instanceof StringDistanceNode)) {
+                return false;
+            }
+            StringDistanceNode other = (StringDistanceNode) obj;
+            return distance == other.distance && Objects.equals(distanceTable, other.distanceTable);
+        }
+        @Override
+        public int hashCode() {
+            return distance ^ Objects.hashCode(distanceTable);
+        }
+
+        StringDistanceNode(int distance) {
+            this(distance, new StringDistanceTable());
+        }
+
+        public void addSubtables(String desiredSub, String supportedSub, CopyIfEmpty r) {
+            ((StringDistanceTable) distanceTable).addSubtables(desiredSub, supportedSub, r);
+        }
+        @Override
+        public String toString() {
+            return "distance: " + distance + "\n" + distanceTable;
+        }
+
+        public void copyTables(StringDistanceTable value) {
+            if (value != null) {
+                ((StringDistanceTable)distanceTable).copy(value);
+            }
+        }
+
+        @Override
+        public DistanceTable getDistanceTable() {
+            return distanceTable;
+        }
+    }
+
+    public XLocaleDistance(DistanceTable datadistancetable2, RegionMapper regionMapper) {
+        languageDesired2Supported = datadistancetable2;
+        this.regionMapper = regionMapper;
+
+        StringDistanceNode languageNode = (StringDistanceNode) ((StringDistanceTable) languageDesired2Supported).subtables.get(ANY).get(ANY);
+        defaultLanguageDistance = languageNode.distance;
+        StringDistanceNode scriptNode = (StringDistanceNode) ((StringDistanceTable)languageNode.distanceTable).subtables.get(ANY).get(ANY);
+        defaultScriptDistance = scriptNode.distance;
+        DistanceNode regionNode = ((StringDistanceTable)scriptNode.distanceTable).subtables.get(ANY).get(ANY);
+        defaultRegionDistance = regionNode.distance;
+    }
+
+    private static Map newMap() { // for debugging
+        return new TreeMap();
+    }
+
+    /**
+     * Internal class
+     */
+    @Deprecated
+    public static class StringDistanceTable extends DistanceTable {
+        final Map<String, Map<String, DistanceNode>> subtables;
+
+        StringDistanceTable(Map<String, Map<String, DistanceNode>> tables) {
+            subtables = tables;
+        }
+        StringDistanceTable() {
+            this(newMap());
+        }
+
+        @Override
+        public boolean isEmpty() {
+            return subtables.isEmpty();
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            if (!(obj instanceof StringDistanceTable)) {
+                return false;
+            }
+            StringDistanceTable other = (StringDistanceTable) obj;
+            return subtables.equals(other.subtables);
+        }
+        @Override
+        public int hashCode() {
+            return subtables.hashCode();
+        }
+
+        @Override
+        public int getDistance(String desired, String supported, Output<DistanceTable> distanceTable, boolean starEquals) {
+            boolean star = false;
+            Map<String, DistanceNode> sub2 = subtables.get(desired);
+            if (sub2 == null) {
+                sub2 = subtables.get(ANY); // <*, supported>
+                star = true;
+            }
+            DistanceNode value = sub2.get(supported);   // <*/desired, supported>
+            if (value == null) {
+                value = sub2.get(ANY);  // <*/desired, *>
+                if (value == null && !star) {
+                    sub2 = subtables.get(ANY);   // <*, supported>
+                    value = sub2.get(supported);
+                    if (value == null) {
+                        value = sub2.get(ANY);   // <*, *>
+                    }
+                }
+                star = true;
+            }
+            if (distanceTable != null) {
+                distanceTable.value = ((StringDistanceNode) value).distanceTable;
+            }
+            return starEquals && star && desired.equals(supported) ? 0 : value.distance;
+        }
+
+        public void copy(StringDistanceTable other) {
+            for (Entry<String, Map<String, DistanceNode>> e1 : other.subtables.entrySet()) {
+                for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
+                    DistanceNode value = e2.getValue();
+                    DistanceNode subNode = addSubtable(e1.getKey(), e2.getKey(), value.distance);
+                }
+            }
+        }
+
+        DistanceNode addSubtable(String desired, String supported, int distance) {
+            Map<String, DistanceNode> sub2 = subtables.get(desired);
+            if (sub2 == null) {
+                subtables.put(desired, sub2 = newMap());
+            }
+            DistanceNode oldNode = sub2.get(supported);
+            if (oldNode != null) {
+                return oldNode;
+            }
+
+            final StringDistanceNode newNode = new StringDistanceNode(distance);
+            sub2.put(supported, newNode);
+            return newNode;
+        }
+
+        /**
+         * Return null if value doesn't exist
+         */
+        private DistanceNode getNode(String desired, String supported) {
+            Map<String, DistanceNode> sub2 = subtables.get(desired);
+            if (sub2 == null) {
+                return null;
+            }
+            return sub2.get(supported);
+        }
+
+
+        /** add table for each subitem that matches and doesn't have a table already
+         */
+        public void addSubtables(
+                String desired, String supported,
+                Predicate<DistanceNode> action) {
+            int count = 0;
+            DistanceNode node = getNode(desired, supported);
+            if (node == null) {
+                // get the distance it would have
+                Output<DistanceTable> node2 = new Output<DistanceTable>();
+                int distance = getDistance(desired, supported, node2, true);
+                // now add it
+                node = addSubtable(desired, supported, distance);
+                if (node2.value != null) {
+                    ((StringDistanceNode)node).copyTables((StringDistanceTable)(node2.value));
+                }
+            }
+            action.test(node);
+        }
+
+        public void addSubtables(String desiredLang, String supportedLang,
+                String desiredScript, String supportedScript,
+                int percentage) {
+
+            // add to all the values that have the matching desiredLang and supportedLang
+            boolean haveKeys = false;
+            for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
+                String key1 = e1.getKey();
+                final boolean desiredIsKey = desiredLang.equals(key1);
+                if (desiredIsKey || desiredLang.equals(ANY)) {
+                    for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
+                        String key2 = e2.getKey();
+                        final boolean supportedIsKey = supportedLang.equals(key2);
+                        haveKeys |= (desiredIsKey && supportedIsKey);
+                        if (supportedIsKey || supportedLang.equals(ANY)) {
+                            DistanceNode value = e2.getValue();
+                            ((StringDistanceTable)value.getDistanceTable()).addSubtable(desiredScript, supportedScript, percentage);
+                        }
+                    }
+                }
+            }
+            // now add the sequence explicitly
+            StringDistanceTable dt = new StringDistanceTable();
+            dt.addSubtable(desiredScript, supportedScript, percentage);
+            CopyIfEmpty r = new CopyIfEmpty(dt);
+            addSubtables(desiredLang, supportedLang, r);
+        }
+
+        public void addSubtables(String desiredLang, String supportedLang,
+                String desiredScript, String supportedScript,
+                String desiredRegion, String supportedRegion,
+                int percentage) {
+
+            // add to all the values that have the matching desiredLang and supportedLang
+            boolean haveKeys = false;
+            for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
+                String key1 = e1.getKey();
+                final boolean desiredIsKey = desiredLang.equals(key1);
+                if (desiredIsKey || desiredLang.equals(ANY)) {
+                    for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
+                        String key2 = e2.getKey();
+                        final boolean supportedIsKey = supportedLang.equals(key2);
+                        haveKeys |= (desiredIsKey && supportedIsKey);
+                        if (supportedIsKey || supportedLang.equals(ANY)) {
+                            StringDistanceNode value = (StringDistanceNode) e2.getValue();
+                            ((StringDistanceTable)value.distanceTable).addSubtables(desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
+                        }
+                    }
+                }
+            }
+            // now add the sequence explicitly
+
+            StringDistanceTable dt = new StringDistanceTable();
+            dt.addSubtable(desiredRegion, supportedRegion, percentage);
+            AddSub r = new AddSub(desiredScript, supportedScript, dt);
+            addSubtables(desiredLang,  supportedLang,  r);
+        }
+
+        @Override
+        public String toString() {
+            return toString(false);
+        }
+
+        @Override
+        public String toString(boolean abbreviate) {
+            return toString(abbreviate, "", new IdMakerFull<Object>("interner"), new StringBuilder()).toString();
+        }
+
+        public StringBuilder toString(boolean abbreviate, String indent, IdMakerFull<Object> intern, StringBuilder buffer) {
+            String indent2 = indent.isEmpty() ? "" : "\t";
+            Integer id = abbreviate ? intern.getOldAndAdd(subtables) : null;
+            if (id != null) {
+                buffer.append(indent2).append('#').append(id).append('\n');
+            } else for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
+                final Map<String, DistanceNode> subsubtable = e1.getValue();
+                buffer.append(indent2).append(e1.getKey());
+                String indent3 = "\t";
+                id = abbreviate ? intern.getOldAndAdd(subsubtable) : null;
+                if (id != null) {
+                    buffer.append(indent3).append('#').append(id).append('\n');
+                } else for (Entry<String, DistanceNode> e2 : subsubtable.entrySet()) {
+                    DistanceNode value = e2.getValue();
+                    buffer.append(indent3).append(e2.getKey());
+                    id = abbreviate ? intern.getOldAndAdd(value) : null;
+                    if (id != null) {
+                        buffer.append('\t').append('#').append(id).append('\n');
+                    } else {
+                        buffer.append('\t').append(value.distance);
+                        final DistanceTable distanceTable = value.getDistanceTable();
+                        if (distanceTable != null) {
+                            id = abbreviate ? intern.getOldAndAdd(distanceTable) : null;
+                            if (id != null) {
+                                buffer.append('\t').append('#').append(id).append('\n');
+                            } else {
+                                ((StringDistanceTable)distanceTable).toString(abbreviate, indent+"\t\t\t", intern, buffer);
+                            }
+                        } else {
+                            buffer.append('\n');
+                        }
+                    }
+                    indent3 = indent+'\t';
+                }
+                indent2 = indent;
+            }
+            return buffer;
+        }
+
+        @Override
+        public StringDistanceTable compact() {
+            return new CompactAndImmutablizer().compact(this);
+        }
+
+        @Override
+        public Set<String> getCloser(int threshold) {
+            Set<String> result = new HashSet<String>();
+            for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
+                String desired = e1.getKey();
+                for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
+                    if (e2.getValue().distance < threshold) {
+                        result.add(desired);
+                        break;
+                    }
+                }
+            }
+            return result;
+        }
+
+        public Integer getInternalDistance(String a, String b) {
+            Map<String, DistanceNode> subsub = subtables.get(a);
+            if (subsub == null) {
+                return null;
+            }
+            DistanceNode dnode = subsub.get(b);
+            return dnode == null ? null : dnode.distance;
+        }
+
+        @Override
+        public DistanceNode getInternalNode(String a, String b) {
+            Map<String, DistanceNode> subsub = subtables.get(a);
+            if (subsub == null) {
+                return null;
+            }
+            return subsub.get(b);
+        }
+
+        @Override
+        public Map<String, Set<String>> getInternalMatches() {
+            Map<String, Set<String>> result = new LinkedHashMap<String, Set<String>>();
+            for (Entry<String, Map<String, DistanceNode>> entry : subtables.entrySet()) {
+                result.put(entry.getKey(), new LinkedHashSet<String>(entry.getValue().keySet()));
+            }
+            return result;
+        }
+    }
+
+    static class CopyIfEmpty implements Predicate<DistanceNode> {
+        private final StringDistanceTable toCopy;
+        CopyIfEmpty(StringDistanceTable resetIfNotNull) {
+            this.toCopy = resetIfNotNull;
+        }
+        @Override
+        public boolean test(DistanceNode node) {
+            final StringDistanceTable subtables = (StringDistanceTable) node.getDistanceTable();
+            if (subtables.subtables.isEmpty()) {
+                subtables.copy(toCopy);
+            }
+            return true;
+        }
+    }
+
+    static class AddSub implements Predicate<DistanceNode> {
+        private final String desiredSub;
+        private final String supportedSub;
+        private final CopyIfEmpty r;
+
+        AddSub(String desiredSub, String supportedSub, StringDistanceTable distanceTableToCopy) {
+            this.r = new CopyIfEmpty(distanceTableToCopy);
+            this.desiredSub = desiredSub;
+            this.supportedSub = supportedSub;
+        }
+        @Override
+        public boolean test(DistanceNode node) {
+            if (node == null) {
+                throw new IllegalArgumentException("bad structure");
+            } else {
+                ((StringDistanceNode)node).addSubtables(desiredSub, supportedSub, r);
+            }
+            return true;
+        }
+    }
+
+    public int distance(ULocale desired, ULocale supported, int threshold, DistanceOption distanceOption) {
+        LSR supportedLSR = LSR.fromMaximalized(supported);
+        LSR desiredLSR = LSR.fromMaximalized(desired);
+        return distanceRaw(desiredLSR, supportedLSR, threshold, distanceOption);
+    }
+
+    /**
+     * Returns distance, from 0 to ABOVE_THRESHOLD.
+     * ULocales must be in canonical, addLikelySubtags format. Returns distance
+     * @param desired
+     * @param supported
+     * @param distanceOption
+     * @return
+     */
+    public int distanceRaw(LSR desired, LSR supported, int threshold, DistanceOption distanceOption) {
+        return distanceRaw(desired.language, supported.language,
+                desired.script, supported.script,
+                desired.region, supported.region,
+                threshold, distanceOption);
+    }
+
+    public enum DistanceOption {NORMAL, SCRIPT_FIRST}
+
+    /**
+     * Returns distance, from 0 to ABOVE_THRESHOLD.
+     * ULocales must be in canonical, addLikelySubtags format. Returns distance
+     */
+    public int distanceRaw(
+            String desiredLang, String supportedlang,
+            String desiredScript, String supportedScript,
+            String desiredRegion, String supportedRegion,
+            int threshold,
+            DistanceOption distanceOption) {
+
+        Output<DistanceTable> subtable = new Output<DistanceTable>();
+
+        int distance = languageDesired2Supported.getDistance(desiredLang, supportedlang, subtable, true);
+        boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST;
+        if (scriptFirst) {
+            distance >>= 2;
+        }
+        if (distance < 0) {
+            distance = 0;
+        } else if (distance >= threshold) {
+            return ABOVE_THRESHOLD;
+        }
+
+        int scriptDistance = subtable.value.getDistance(desiredScript, supportedScript, subtable, true);
+        if (scriptFirst) {
+            scriptDistance >>= 1;
+        }
+        distance += scriptDistance;
+        if (distance >= threshold) {
+            return ABOVE_THRESHOLD;
+        }
+
+        if (desiredRegion.equals(supportedRegion)) {
+            return distance;
+        }
+
+        // From here on we know the regions are not equal
+
+        final String desiredPartition = regionMapper.toId(desiredRegion);
+        final String supportedPartition = regionMapper.toId(supportedRegion);
+        int subdistance;
+
+        // check for macros. If one is found, we take the maximum distance
+        // this could be optimized by adding some more structure, but probably not worth it.
+
+        Collection<String> desiredPartitions = desiredPartition.isEmpty() ? regionMapper.macroToPartitions.get(desiredRegion) : null;
+        Collection<String> supportedPartitions = supportedPartition.isEmpty() ? regionMapper.macroToPartitions.get(supportedRegion) : null;
+        if (desiredPartitions != null || supportedPartitions != null) {
+            subdistance = 0;
+            // make the code simple for now
+            if (desiredPartitions == null) {
+                desiredPartitions = Collections.singleton(desiredPartition);
+            }
+            if (supportedPartitions == null) {
+                supportedPartitions = Collections.singleton(supportedPartition);
+            }
+
+            for (String desiredPartition2 : desiredPartitions) {
+                for (String supportedPartition2 : supportedPartitions) {
+                    int tempSubdistance = subtable.value.getDistance(desiredPartition2, supportedPartition2, null, false);
+                    if (subdistance < tempSubdistance) {
+                        subdistance = tempSubdistance;
+                    }
+                }
+            }
+        } else {
+            subdistance = subtable.value.getDistance(desiredPartition, supportedPartition, null, false);
+        }
+        distance += subdistance;
+        return distance >= threshold ? ABOVE_THRESHOLD : distance;
+    }
+
+
+    private static final XLocaleDistance DEFAULT;
+
+    public static XLocaleDistance getDefault() {
+        return DEFAULT;
+    }
+
+    static {
+        String[][] variableOverrides = {
+                {"$enUS", "AS+GU+MH+MP+PR+UM+US+VI"},
+
+                {"$cnsar", "HK+MO"},
+
+                {"$americas", "019"},
+
+                {"$maghreb", "MA+DZ+TN+LY+MR+EH"},
+        };
+        String[] paradigmRegions = {
+                "en", "en-GB", "es", "es-419", "pt-BR", "pt-PT"
+        };
+        String[][] regionRuleOverrides = {
+                {"ar_*_$maghreb", "ar_*_$maghreb", "96"},
+                {"ar_*_$!maghreb", "ar_*_$!maghreb", "96"},
+                {"ar_*_*", "ar_*_*", "95"},
+
+                {"en_*_$enUS", "en_*_$enUS", "96"},
+                {"en_*_$!enUS", "en_*_$!enUS", "96"},
+                {"en_*_*", "en_*_*", "95"},
+
+                {"es_*_$americas", "es_*_$americas", "96"},
+                {"es_*_$!americas", "es_*_$!americas", "96"},
+                {"es_*_*", "es_*_*", "95"},
+
+                {"pt_*_$americas", "pt_*_$americas", "96"},
+                {"pt_*_$!americas", "pt_*_$!americas", "96"},
+                {"pt_*_*", "pt_*_*", "95"},
+
+                {"zh_Hant_$cnsar", "zh_Hant_$cnsar", "96"},
+                {"zh_Hant_$!cnsar", "zh_Hant_$!cnsar", "96"},
+                {"zh_Hant_*", "zh_Hant_*", "95"},
+
+                {"*_*_*", "*_*_*", "96"},
+        };
+
+        Builder rmb = new RegionMapper.Builder().addParadigms(paradigmRegions);
+        for (String[] variableRule : variableOverrides) {
+            rmb.add(variableRule[0], variableRule[1]);
+        }
+        if (PRINT_OVERRIDES) {
+            System.out.println("\t\t<languageMatches type=\"written\" alt=\"enhanced\">");
+            System.out.println("\t\t\t<paradigmLocales locales=\"" + XCldrStub.join(paradigmRegions, " ")
+            + "\"/>");
+            for (String[] variableRule : variableOverrides) {
+                System.out.println("\t\t\t<matchVariable id=\"" + variableRule[0]
+                        + "\" value=\""
+                        + variableRule[1]
+                                + "\"/>");
+            }
+        }
+
+        final StringDistanceTable defaultDistanceTable = new StringDistanceTable();
+        final RegionMapper defaultRegionMapper = rmb.build();
+
+        Splitter bar = Splitter.on('_');
+
+        List<Row.R4<List<String>, List<String>, Integer, Boolean>>[] sorted = new ArrayList[3];
+        sorted[0] = new ArrayList<Row.R4<List<String>, List<String>, Integer, Boolean>>();
+        sorted[1] = new ArrayList<Row.R4<List<String>, List<String>, Integer, Boolean>>();
+        sorted[2] = new ArrayList<Row.R4<List<String>, List<String>, Integer, Boolean>>();
+
+        // sort the rules so that the language-only are first, then the language-script, and finally the language-script-region.
+        for (R4<String, String, Integer, Boolean> info : xGetLanguageMatcherData()) {
+            String desiredRaw = info.get0();
+            String supportedRaw = info.get1();
+            List<String> desired = bar.splitToList(desiredRaw);
+            List<String> supported = bar.splitToList(supportedRaw);
+            Boolean oneway = info.get3();
+            int distance = desiredRaw.equals("*_*") ? 50 : info.get2();
+            int size = desired.size();
+
+            // for now, skip size == 3
+            if (size == 3) continue;
+
+            sorted[size-1].add(Row.of(desired, supported, distance, oneway));
+        }
+
+        for (List<Row.R4<List<String>, List<String>, Integer, Boolean>> item1 : sorted) {
+            int debug = 0;
+            for (Row.R4<List<String>, List<String>, Integer, Boolean> item2 : item1) {
+                List<String> desired = item2.get0();
+                List<String> supported = item2.get1();
+                Integer distance = item2.get2();
+                Boolean oneway = item2.get3();
+                add(defaultDistanceTable, desired, supported, distance);
+                if (oneway != Boolean.TRUE && !desired.equals(supported)) {
+                    add(defaultDistanceTable, supported, desired, distance);
+                }
+                printMatchXml(desired, supported, distance, oneway);
+            }
+        }
+
+        // add new size=3
+        for (String[] rule : regionRuleOverrides) {
+            //            if (PRINT_OVERRIDES) System.out.println("\t\t\t<languageMatch desired=\""
+            //                + rule[0]
+            //                    + "\" supported=\""
+            //                    + rule[1]
+            //                        + "\" distance=\""
+            //                        + rule[2]
+            //                            + "\"/>");
+            if (rule[0].equals("en_*_*") || rule[1].equals("*_*_*")) {
+                int debug = 0;
+            }
+            List<String> desiredBase = new ArrayList<String>(bar.splitToList(rule[0]));
+            List<String> supportedBase = new ArrayList<String>(bar.splitToList(rule[1]));
+            Integer distance = 100-Integer.parseInt(rule[2]);
+            printMatchXml(desiredBase, supportedBase, distance, false);
+
+            Collection<String> desiredRegions = defaultRegionMapper.getIdsFromVariable(desiredBase.get(2));
+            if (desiredRegions.isEmpty()) {
+                throw new IllegalArgumentException("Bad region variable: " + desiredBase.get(2));
+            }
+            Collection<String> supportedRegions = defaultRegionMapper.getIdsFromVariable(supportedBase.get(2));
+            if (supportedRegions.isEmpty()) {
+                throw new IllegalArgumentException("Bad region variable: " + supportedBase.get(2));
+            }
+            for (String desiredRegion2 : desiredRegions) {
+                desiredBase.set(2, desiredRegion2.toString()); // fix later
+                for (String supportedRegion2 : supportedRegions) {
+                    supportedBase.set(2, supportedRegion2.toString()); // fix later
+                    add(defaultDistanceTable, desiredBase, supportedBase, distance);
+                    add(defaultDistanceTable, supportedBase, desiredBase, distance);
+                }
+            }
+        }
+        if (PRINT_OVERRIDES) {
+            System.out.println("\t\t</languageMatches>");
+        }
+
+        DEFAULT = new XLocaleDistance(defaultDistanceTable.compact(), defaultRegionMapper);
+
+        if (false && PRINT_OVERRIDES) {
+            System.out.println(defaultRegionMapper);
+            System.out.println(defaultDistanceTable);
+            throw new IllegalArgumentException();
+        }
+    }
+
+    private static void printMatchXml(List<String> desired, List<String> supported, Integer distance, Boolean oneway) {
+        if (PRINT_OVERRIDES) {
+            String desiredStr = CollectionUtilities.join(desired, "_");
+            String supportedStr = CollectionUtilities.join(supported, "_");
+            String desiredName = fixedName(desired);
+            String supportedName = fixedName(supported);
+            System.out.println("\t\t\t<languageMatch"
+                    + " desired=\"" + desiredStr
+                    + "\"\tsupported=\"" + supportedStr
+                    + "\"\tdistance=\"" + distance
+                    + (!oneway ? "" : "\"\toneway=\"true")
+                    + "\"/>\t<!-- " + desiredName + " ⇒ " + supportedName + " -->");
+        }
+    }
+
+    private static String fixedName(List<String> match) {
+        List<String> alt = new ArrayList<String>(match);
+        StringBuilder result = new StringBuilder();
+        switch(alt.size()) {
+        case 3:
+            String region = alt.get(2);
+            if (region.equals("*") || region.startsWith("$")) {
+                result.append(region);
+            } else {
+                result.append(english.regionDisplayName(region));
+            }
+        case 2:
+            String script = alt.get(1);
+            if (script.equals("*")) {
+                result.insert(0, script);
+            } else {
+                result.insert(0, english.scriptDisplayName(script));
+            }
+        case 1:
+            String language = alt.get(0);
+            if (language.equals("*")) {
+                result.insert(0, language);
+            } else {
+                result.insert(0, english.languageDisplayName(language));
+            }
+        }
+        return CollectionUtilities.join(alt, "; ");
+    }
+
+    static public void add(StringDistanceTable languageDesired2Supported, List<String> desired, List<String> supported, int percentage) {
+        int size = desired.size();
+        if (size != supported.size() || size < 1 || size > 3) {
+            throw new IllegalArgumentException();
+        }
+        final String desiredLang = fixAny(desired.get(0));
+        final String supportedLang = fixAny(supported.get(0));
+        if (size == 1) {
+            languageDesired2Supported.addSubtable(desiredLang, supportedLang, percentage);
+        } else {
+            final String desiredScript = fixAny(desired.get(1));
+            final String supportedScript = fixAny(supported.get(1));
+            if (size == 2) {
+                languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, percentage);
+            } else {
+                final String desiredRegion = fixAny(desired.get(2));
+                final String supportedRegion = fixAny(supported.get(2));
+                languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
+            }
+        }
+    }
+
+    @Override
+    public String toString() {
+        return toString(false);
+    }
+
+    public String toString(boolean abbreviate) {
+        return regionMapper + "\n" + languageDesired2Supported.toString(abbreviate);
+    }
+
+
+    //    public static XLocaleDistance createDefaultInt() {
+    //        IntDistanceTable d = new IntDistanceTable(DEFAULT_DISTANCE_TABLE);
+    //        return new XLocaleDistance(d, DEFAULT_REGION_MAPPER);
+    //    }
+
+    static Set<String> getContainingMacrosFor(Collection<String> input, Set<String> output) {
+        output.clear();
+        for (Entry<String, Set<String>> entry : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
+            if (input.containsAll(entry.getValue())) { // example; if all southern Europe are contained, then add S. Europe
+                output.add(entry.getKey());
+            }
+        }
+        return output;
+    }
+
+    static class RegionMapper implements IdMapper<String,String> {
+        /**
+         * Used for processing rules. At the start we have a variable setting like $A1=US+CA+MX. We generate a mapping from $A1 to a set of partitions {P1, P2}
+         * When we hit a rule that contains a variable, we replace that rule by multiple rules for the partitions.
+         */
+        final Multimap<String,String> variableToPartition;
+        /**
+         * Used for executing the rules. We map a region to a partition before processing.
+         */
+        final Map<String,String> regionToPartition;
+        /**
+         * Used to support es_419 compared to es_AR, etc.
+         * @param variableToPartitionIn
+         * @param regionToPartitionIn
+         */
+        final Multimap<String,String> macroToPartitions;
+        /**
+         * Used to get the paradigm region for a cluster, if there is one
+         */
+        final Set<ULocale> paradigms;
+
+        private RegionMapper(
+                Multimap<String, String> variableToPartitionIn,
+                Map<String, String> regionToPartitionIn,
+                Multimap<String,String> macroToPartitionsIn,
+                Set<ULocale> paradigmsIn) {
+            variableToPartition = ImmutableMultimap.copyOf(variableToPartitionIn);
+            regionToPartition = ImmutableMap.copyOf(regionToPartitionIn);
+            macroToPartitions = ImmutableMultimap.copyOf(macroToPartitionsIn);
+            paradigms = ImmutableSet.copyOf(paradigmsIn);
+        }
+
+        @Override
+        public String toId(String region) {
+            String result = regionToPartition.get(region);
+            return result == null ? "" : result;
+        }
+
+        public Collection<String> getIdsFromVariable(String variable) {
+            if (variable.equals("*")) {
+                return Collections.singleton("*");
+            }
+            Collection<String> result = variableToPartition.get(variable);
+            if (result == null || result.isEmpty()) {
+                throw new IllegalArgumentException("Variable not defined: " + variable);
+            }
+            return result;
+        }
+
+        public Set<String> regions() {
+            return regionToPartition.keySet();
+        }
+
+        public Set<String> variables() {
+            return variableToPartition.keySet();
+        }
+
+        @Override
+        public String toString() {
+            TreeMultimap<String, String> partitionToVariables = Multimaps.invertFrom(variableToPartition,
+                    TreeMultimap.<String, String>create());
+            TreeMultimap<String, String> partitionToRegions = TreeMultimap.create();
+            for (Entry<String, String> e : regionToPartition.entrySet()) {
+                partitionToRegions.put(e.getValue(), e.getKey());
+            }
+            StringBuilder buffer = new StringBuilder();
+            buffer.append("Partition ➠ Variables ➠ Regions (final)");
+            for (Entry<String, Set<String>> e : partitionToVariables.asMap().entrySet()) {
+                buffer.append('\n');
+                buffer.append(e.getKey() + "\t" + e.getValue() + "\t" + partitionToRegions.get(e.getKey()));
+            }
+            buffer.append("\nMacro ➠ Partitions");
+            for (Entry<String, Set<String>> e : macroToPartitions.asMap().entrySet()) {
+                buffer.append('\n');
+                buffer.append(e.getKey() + "\t" + e.getValue());
+            }
+
+            return buffer.toString();
+        }
+
+        static class Builder {
+            final private Multimap<String, String> regionToRawPartition = TreeMultimap.create();
+            final private RegionSet regionSet = new RegionSet();
+            final private Set<ULocale> paradigms = new LinkedHashSet<ULocale>();
+
+            void add(String variable, String barString) {
+                Set<String> tempRegions = regionSet.parseSet(barString);
+
+                for (String region : tempRegions) {
+                    regionToRawPartition.put(region, variable);
+                }
+
+                // now add the inverse variable
+
+                Set<String> inverse = regionSet.inverse();
+                String inverseVariable = "$!" + variable.substring(1);
+                for (String region : inverse) {
+                    regionToRawPartition.put(region, inverseVariable);
+                }
+            }
+
+            public Builder addParadigms(String... paradigmRegions) {
+                for (String paradigm : paradigmRegions) {
+                    paradigms.add(new ULocale(paradigm));
+                }
+                return this;
+            }
+
+            RegionMapper build() {
+                final IdMakerFull<Collection<String>> id = new IdMakerFull<Collection<String>>("partition");
+                Multimap<String,String> variableToPartitions = TreeMultimap.create();
+                Map<String,String> regionToPartition = new TreeMap<String,String>();
+                Multimap<String,String> partitionToRegions = TreeMultimap.create();
+
+                for (Entry<String, Set<String>> e : regionToRawPartition.asMap().entrySet()) {
+                    final String region = e.getKey();
+                    final Collection<String> rawPartition = e.getValue();
+                    String partition = String.valueOf((char)('α' + id.add(rawPartition)));
+
+                    regionToPartition.put(region, partition);
+                    partitionToRegions.put(partition, region);
+
+                    for (String variable : rawPartition) {
+                        variableToPartitions.put(variable, partition);
+                    }
+                }
+
+                // we get a mapping of each macro to the partitions it intersects with
+                Multimap<String,String> macroToPartitions = TreeMultimap.create();
+                for (Entry<String, Set<String>> e : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
+                    String macro = e.getKey();
+                    for (Entry<String, Set<String>> e2 : partitionToRegions.asMap().entrySet()) {
+                        String partition = e2.getKey();
+                        if (!Collections.disjoint(e.getValue(), e2.getValue())) {
+                            macroToPartitions.put(macro, partition);
+                        }
+                    }
+                }
+
+                return new RegionMapper(
+                        variableToPartitions,
+                        regionToPartition,
+                        macroToPartitions,
+                        paradigms);
+            }
+        }
+    }
+
+    /**
+     * Parses a string of regions like "US+005-BR" and produces a set of resolved regions.
+     * All macroregions are fully resolved to sets of non-macro regions.
+     * <br>Syntax is simple for now:
+     * <pre>regionSet := region ([-+] region)*</pre>
+     * No precedence, so "x+y-y+z" is (((x+y)-y)+z) NOT (x+y)-(y+z)
+     */
+    private static class RegionSet {
+        private enum Operation {add, remove}
+        // temporaries used in processing
+        final private Set<String> tempRegions = new TreeSet<String>();
+        private Operation operation = null;
+
+        private Set<String> parseSet(String barString) {
+            operation = Operation.add;
+            int last = 0;
+            tempRegions.clear();
+            int i = 0;
+            for (; i < barString.length(); ++i) {
+                char c = barString.charAt(i); // UTF16 is ok, since syntax is only ascii
+                switch(c) {
+                case '+':
+                    add(barString, last, i);
+                    last = i+1;
+                    operation = Operation.add;
+                    break;
+                case '-':
+                    add(barString, last, i);
+                    last = i+1;
+                    operation = Operation.remove;
+                    break;
+                }
+            }
+            add(barString, last, i);
+            return tempRegions;
+        }
+
+        private Set<String> inverse() {
+            TreeSet<String> result = new TreeSet<String>(ALL_FINAL_REGIONS);
+            result.removeAll(tempRegions);
+            return result;
+        }
+
+        private void add(String barString, int last, int i) {
+            if (i > last) {
+                String region = barString.substring(last,i);
+                changeSet(operation, region);
+            }
+        }
+
+        private void changeSet(Operation operation, String region) {
+            Collection<String> contained = CONTAINER_TO_CONTAINED_FINAL.get(region);
+            if (contained != null && !contained.isEmpty()) {
+                if (Operation.add == operation) {
+                    tempRegions.addAll(contained);
+                } else {
+                    tempRegions.removeAll(contained);
+                }
+            } else if (Operation.add == operation) {
+                tempRegions.add(region);
+            } else {
+                tempRegions.remove(region);
+            }
+        }
+    }
+
+    public static <K,V> Multimap<K,V> invertMap(Map<V,K> map) {
+        return Multimaps.invertFrom(Multimaps.forMap(map), LinkedHashMultimap.<K,V>create());
+    }
+
+    public Set<ULocale> getParadigms() {
+        return regionMapper.paradigms;
+    }
+
+    public int getDefaultLanguageDistance() {
+        return defaultLanguageDistance;
+    }
+
+    public int getDefaultScriptDistance() {
+        return defaultScriptDistance;
+    }
+
+    public int getDefaultRegionDistance() {
+        return defaultRegionDistance;
+    }
+
+    static class CompactAndImmutablizer extends IdMakerFull<Object> {
+        StringDistanceTable compact(StringDistanceTable item) {
+            if (toId(item) != null) {
+                return (StringDistanceTable) intern(item);
+            }
+            return new StringDistanceTable(compact(item.subtables, 0));
+        }
+        <K,T> Map<K,T> compact(Map<K,T> item, int level) {
+            if (toId(item) != null) {
+                return (Map<K,T>)intern(item);
+            }
+            Map<K,T> copy = new LinkedHashMap<K,T>();
+            for (Entry<K,T> entry : item.entrySet()) {
+                T value = entry.getValue();
+                if (value instanceof Map) {
+                    copy.put(entry.getKey(), (T)compact((Map)value, level+1));
+                } else {
+                    copy.put(entry.getKey(), (T)compact((DistanceNode)value));
+                }
+            }
+            return ImmutableMap.copyOf(copy);
+        }
+        DistanceNode compact(DistanceNode item) {
+            if (toId(item) != null) {
+                return (DistanceNode) intern(item);
+            }
+            final DistanceTable distanceTable = item.getDistanceTable();
+            if (distanceTable == null || distanceTable.isEmpty()) {
+                return new DistanceNode(item.distance);
+            } else {
+                return new StringDistanceNode(item.distance, compact((StringDistanceTable)((StringDistanceNode)item).distanceTable));
+            }
+        }
+    }
+
+    @Deprecated
+    public StringDistanceTable internalGetDistanceTable() {
+        return (StringDistanceTable) languageDesired2Supported;
+    }
+
+    public static void main(String[] args) {
+        //      for (Entry<String, Collection<String>> entry : containerToContained.asMap().entrySet()) {
+        //          System.out.println(entry.getKey() + "\t⥢" + entry.getValue() + "; " + containerToFinalContained.get(entry.getKey()));
+        //      }
+        //      final Multimap<String,String> regionToMacros = ImmutableMultimap.copyOf(Multimaps.invertFrom(containerToContained, TreeMultimap.create()));
+        //      for (Entry<String, Collection<String>> entry : regionToMacros.asMap().entrySet()) {
+        //          System.out.println(entry.getKey() + "\t⥤ " + entry.getValue());
+        //      }
+        if (PRINT_OVERRIDES) {
+            System.out.println(getDefault().toString(true));
+        }
+        DistanceTable table = getDefault().languageDesired2Supported;
+        DistanceTable compactedTable = table.compact();
+        if (!table.equals(compactedTable)) {
+            throw new IllegalArgumentException("Compaction isn't equal");
+        }
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java

new file mode 100644 (file)

index 0000000..3bd8a16
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java
@@ -0,0 +1,473 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
+import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
+import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimap;
+import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
+import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
+import com.ibm.icu.util.LocalePriorityList;
+import com.ibm.icu.util.Output;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * Immutable class that picks best match between user's desired locales and application's supported locales.
+ * @author markdavis
+ */
+public class XLocaleMatcher {
+    private static final LSR UND = new LSR("und","","");
+    private static final ULocale UND_LOCALE = new ULocale("und");
+
+    // normally the default values, but can be set via constructor
+
+    private final XLocaleDistance localeDistance;
+    private final int thresholdDistance;
+    private final int demotionPerAdditionalDesiredLocale;
+    private final DistanceOption distanceOption;
+
+    // built based on application's supported languages in constructor
+
+    private final Map<LSR, Set<ULocale>> supportedLanguages; // the locales in the collection are ordered!
+    private final Set<ULocale> exactSupportedLocales; // the locales in the collection are ordered!
+    private final ULocale defaultLanguage;
+
+
+    public static class Builder {
+        private Set<ULocale> supportedLanguagesList;
+        private int thresholdDistance = -1;
+        private int demotionPerAdditionalDesiredLocale = -1;;
+        private ULocale defaultLanguage;
+        private XLocaleDistance localeDistance;
+        private DistanceOption distanceOption;
+        /**
+         * @param languagePriorityList the languagePriorityList to set
+         * @return
+         */
+        public Builder setSupportedLocales(String languagePriorityList) {
+            this.supportedLanguagesList = asSet(LocalePriorityList.add(languagePriorityList).build());
+            return this;
+        }
+        public Builder setSupportedLocales(LocalePriorityList languagePriorityList) {
+            this.supportedLanguagesList = asSet(languagePriorityList);
+            return this;
+        }
+        public Builder setSupportedLocales(Set<ULocale> languagePriorityList) {
+            this.supportedLanguagesList = languagePriorityList;
+            return this;
+        }
+
+        /**
+         * @param thresholdDistance the thresholdDistance to set, with -1 = default
+         * @return
+         */
+        public Builder setThresholdDistance(int thresholdDistance) {
+            this.thresholdDistance = thresholdDistance;
+            return this;
+        }
+        /**
+         * @param demotionPerAdditionalDesiredLocale the demotionPerAdditionalDesiredLocale to set, with -1 = default
+         * @return
+         */
+        public Builder setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale) {
+            this.demotionPerAdditionalDesiredLocale = demotionPerAdditionalDesiredLocale;
+            return this;
+        }
+
+        /**
+         * @param localeDistance the localeDistance to set, with default = XLocaleDistance.getDefault().
+         * @return
+         */
+        public Builder setLocaleDistance(XLocaleDistance localeDistance) {
+            this.localeDistance = localeDistance;
+            return this;
+        }
+
+        /**
+         * Set the default language, with null = default = first supported language
+         * @param defaultLanguage
+         * @return
+         */
+        public Builder setDefaultLanguage(ULocale defaultLanguage) {
+            this.defaultLanguage = defaultLanguage;
+            return this;
+        }
+
+        /**
+         * If true, then the language differences are smaller than than script differences.
+         * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
+         * @param distanceOption
+         * @return
+         */
+        public Builder setDistanceOption(DistanceOption distanceOption) {
+            this.distanceOption = distanceOption;
+            return this;
+        }
+
+        public XLocaleMatcher build() {
+            return new XLocaleMatcher(this);
+        }
+    }
+
+    /**
+     * Returns a builder used in chaining parameters for building a Locale Matcher.
+     * @return
+     */
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    /** Convenience method */
+    public XLocaleMatcher(String supportedLocales) {
+        this(builder().setSupportedLocales(supportedLocales));
+    }
+    /** Convenience method */
+    public XLocaleMatcher(LocalePriorityList supportedLocales) {
+        this(builder().setSupportedLocales(supportedLocales));
+    }
+    /** Convenience method */
+    public XLocaleMatcher(Set<ULocale> supportedLocales) {
+        this(builder().setSupportedLocales(supportedLocales));
+    }
+
+    /**
+     * Create a locale matcher with the given parameters.
+     * @param supportedLocales
+     * @param thresholdDistance
+     * @param demotionPerAdditionalDesiredLocale
+     * @param localeDistance
+     * @param likelySubtags
+     */
+    private XLocaleMatcher(Builder builder) {
+        localeDistance = builder.localeDistance == null ? XLocaleDistance.getDefault()
+            : builder.localeDistance;
+        thresholdDistance = builder.thresholdDistance < 0 ? localeDistance.getDefaultScriptDistance()
+            : builder.thresholdDistance;
+        // only do AFTER above are set
+        Set<LSR> paradigms = extractLsrSet(localeDistance.getParadigms());
+        final Multimap<LSR, ULocale> temp2 = extractLsrMap(builder.supportedLanguagesList, paradigms);
+        supportedLanguages = temp2.asMap();
+        exactSupportedLocales = ImmutableSet.copyOf(temp2.values());
+        defaultLanguage = builder.defaultLanguage != null ? builder.defaultLanguage
+            : supportedLanguages.isEmpty() ? null
+                : supportedLanguages.entrySet().iterator().next().getValue().iterator().next(); // first language
+        demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ? localeDistance.getDefaultRegionDistance()+1
+            : builder.demotionPerAdditionalDesiredLocale;
+        distanceOption = builder.distanceOption;
+    }
+
+    // Result is not immutable!
+    private Set<LSR> extractLsrSet(Set<ULocale> languagePriorityList) {
+        Set<LSR> result = new LinkedHashSet<LSR>();
+        for (ULocale item : languagePriorityList) {
+            final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
+            result.add(max);
+        }
+        return result;
+    }
+
+    private Multimap<LSR,ULocale> extractLsrMap(Set<ULocale> languagePriorityList, Set<LSR> priorities) {
+        Multimap<LSR, ULocale> builder = LinkedHashMultimap.create();
+        for (ULocale item : languagePriorityList) {
+            final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
+            builder.put(max, item);
+        }
+        if (builder.size() > 1 && priorities != null) {
+            // for the supported list, we put any priorities before all others, except for the first.
+            Multimap<LSR, ULocale> builder2 = LinkedHashMultimap.create();
+
+            // copy the long way so the priorities are in the same order as in the original
+            boolean first = true;
+            for (Entry<LSR, Set<ULocale>> entry : builder.asMap().entrySet()) {
+                final LSR key = entry.getKey();
+                if (first || priorities.contains(key)) {
+                    builder2.putAll(key, entry.getValue());
+                    first = false;
+                }
+            }
+            // now copy the rest
+            builder2.putAll(builder);
+            if (!builder2.equals(builder)) {
+                throw new IllegalArgumentException();
+            }
+            builder = builder2;
+        }
+        return ImmutableMultimap.copyOf(builder);
+    }
+
+
+    /** Convenience method */
+    public ULocale getBestMatch(ULocale ulocale) {
+        return getBestMatch(ulocale, null);
+    }
+    /** Convenience method */
+    public ULocale getBestMatch(String languageList) {
+        return getBestMatch(LocalePriorityList.add(languageList).build(), null);
+    }
+    /** Convenience method */
+    public ULocale getBestMatch(ULocale... locales) {
+        return getBestMatch(new LinkedHashSet<ULocale>(Arrays.asList(locales)), null);
+    }
+    /** Convenience method */
+    public ULocale getBestMatch(Set<ULocale> desiredLanguages) {
+        return getBestMatch(desiredLanguages, null);
+    }
+    /** Convenience method */
+    public ULocale getBestMatch(LocalePriorityList desiredLanguages) {
+        return getBestMatch(desiredLanguages, null);
+    }
+    /** Convenience method */
+    public ULocale getBestMatch(LocalePriorityList desiredLanguages, Output<ULocale> outputBestDesired) {
+        return getBestMatch(asSet(desiredLanguages), outputBestDesired);
+    }
+
+    // TODO add LocalePriorityList method asSet() for ordered Set view backed by LocalePriorityList
+    private static Set<ULocale> asSet(LocalePriorityList languageList) {
+        Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order
+        for (ULocale locale : languageList) {
+            temp.add(locale);
+        };
+        return temp;
+    }
+
+    /**
+     * Get the best match between the desired languages and supported languages
+     * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
+     * @param outputBestDesired The one of the desired languages that matched best.
+     * Set to null if the best match was not below the threshold distance.
+     * @return
+     */
+    public ULocale getBestMatch(Set<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
+        // fast path for singleton
+        if (desiredLanguages.size() == 1) {
+            return getBestMatch(desiredLanguages.iterator().next(), outputBestDesired);
+        }
+        // TODO produce optimized version for single desired ULocale
+        Multimap<LSR, ULocale> desiredLSRs = extractLsrMap(desiredLanguages,null);
+        int bestDistance = Integer.MAX_VALUE;
+        ULocale bestDesiredLocale = null;
+        Collection<ULocale> bestSupportedLocales = null;
+        int delta = 0;
+        mainLoop:
+            for (final Entry<LSR, ULocale> desiredLsrAndLocale : desiredLSRs.entries()) {
+                // quick check for exact match
+                ULocale desiredLocale = desiredLsrAndLocale.getValue();
+                LSR desiredLSR = desiredLsrAndLocale.getKey();
+                if (delta < bestDistance) {
+                    if (exactSupportedLocales.contains(desiredLocale)) {
+                        if (outputBestDesired != null) {
+                            outputBestDesired.value = desiredLocale;
+                        }
+                        return desiredLocale;
+                    }
+                    // quick check for maximized locale
+                    Collection<ULocale> found = supportedLanguages.get(desiredLSR);
+                    if (found != null) {
+                        // if we find one in the set, return first (lowest). We already know the exact one isn't there.
+                        if (outputBestDesired != null) {
+                            outputBestDesired.value = desiredLocale;
+                        }
+                        return found.iterator().next();
+                    }
+                }
+                for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
+                    int distance = delta + localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
+                        thresholdDistance, distanceOption);
+                    if (distance < bestDistance) {
+                        bestDistance = distance;
+                        bestDesiredLocale = desiredLocale;
+                        bestSupportedLocales = supportedLsrAndLocale.getValue();
+                        if (distance == 0) {
+                            break mainLoop;
+                        }
+                    }
+                }
+                delta += demotionPerAdditionalDesiredLocale;
+            }
+        if (bestDistance >= thresholdDistance) {
+            if (outputBestDesired != null) {
+                outputBestDesired.value = null;
+            }
+            return defaultLanguage;
+        }
+        if (outputBestDesired != null) {
+            outputBestDesired.value = bestDesiredLocale;
+        }
+        // pick exact match if there is one
+        if (bestSupportedLocales.contains(bestDesiredLocale)) {
+            return bestDesiredLocale;
+        }
+        // otherwise return first supported, combining variants and extensions from bestDesired
+        return bestSupportedLocales.iterator().next();
+    }
+
+    /**
+     * Get the best match between the desired languages and supported languages
+     * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
+     * @param outputBestDesired The one of the desired languages that matched best.
+     * Set to null if the best match was not below the threshold distance.
+     * @return
+     */
+    public ULocale getBestMatch(ULocale desiredLocale, Output<ULocale> outputBestDesired) {
+        int bestDistance = Integer.MAX_VALUE;
+        ULocale bestDesiredLocale = null;
+        Collection<ULocale> bestSupportedLocales = null;
+
+        // quick check for exact match, with hack for und
+        final LSR desiredLSR = desiredLocale.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(desiredLocale);
+
+        if (exactSupportedLocales.contains(desiredLocale)) {
+            if (outputBestDesired != null) {
+                outputBestDesired.value = desiredLocale;
+            }
+            return desiredLocale;
+        }
+        // quick check for maximized locale
+        if (distanceOption == DistanceOption.NORMAL) {
+            Collection<ULocale> found = supportedLanguages.get(desiredLSR);
+            if (found != null) {
+                // if we find one in the set, return first (lowest). We already know the exact one isn't there.
+                if (outputBestDesired != null) {
+                    outputBestDesired.value = desiredLocale;
+                }
+                return found.iterator().next();
+            }
+        }
+        for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
+            int distance = localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
+                thresholdDistance, distanceOption);
+            if (distance < bestDistance) {
+                bestDistance = distance;
+                bestDesiredLocale = desiredLocale;
+                bestSupportedLocales = supportedLsrAndLocale.getValue();
+                if (distance == 0) {
+                    break;
+                }
+            }
+        }
+        if (bestDistance >= thresholdDistance) {
+            if (outputBestDesired != null) {
+                outputBestDesired.value = null;
+            }
+            return defaultLanguage;
+        }
+        if (outputBestDesired != null) {
+            outputBestDesired.value = bestDesiredLocale;
+        }
+        // pick exact match if there is one
+        if (bestSupportedLocales.contains(bestDesiredLocale)) {
+            return bestDesiredLocale;
+        }
+        // otherwise return first supported, combining variants and extensions from bestDesired
+        return bestSupportedLocales.iterator().next();
+    }
+
+    /** Combine features of the desired locale into those of the supported, and return result. */
+    public static ULocale combine(ULocale bestSupported, ULocale bestDesired) {
+        // for examples of extensions, variants, see
+        //  http://unicode.org/repos/cldr/tags/latest/common/bcp47/
+        //  http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
+
+        if (!bestSupported.equals(bestDesired) && bestDesired != null) {
+            // add region, variants, extensions
+            ULocale.Builder b = new ULocale.Builder().setLocale(bestSupported);
+
+            // copy the region from the desired, if there is one
+            String region = bestDesired.getCountry();
+            if (!region.isEmpty()) {
+                b.setRegion(region);
+            }
+
+            // copy the variants from desired, if there is one
+            // note that this will override any subvariants. Eg "sco-ulster-fonipa" + "…-fonupa" => "sco-fonupa" (nuking ulster)
+            String variants = bestDesired.getVariant();
+            if (!variants.isEmpty()) {
+                b.setVariant(variants);
+            }
+
+            // copy the extensions from desired, if there are any
+            // note that this will override any subkeys. Eg "th-u-nu-latn-ca-buddhist" + "…-u-nu-native" => "th-u-nu-native" (nuking calendar)
+            for (char extensionKey : bestDesired.getExtensionKeys()) {
+                b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
+            }
+            bestSupported = b.build();
+        }
+        return bestSupported;
+    }
+
+    /** Returns the distance between the two languages. The values are not necessarily symmetric.
+     * @param desired A locale desired by the user
+     * @param supported A locale supported by a program.
+     * @return A return of 0 is a complete match, and 100 is a failure case (above the thresholdDistance).
+     * A language is first maximized with add likely subtags, then compared.
+     */
+    public int distance(ULocale desired, ULocale supported) {
+        return localeDistance.distanceRaw(
+            LSR.fromMaximalized(desired),
+            LSR.fromMaximalized(supported), thresholdDistance, distanceOption);
+    }
+
+    /** Convenience method */
+    public int distance(String desiredLanguage, String supportedLanguage) {
+        return localeDistance.distanceRaw(
+            LSR.fromMaximalized(new ULocale(desiredLanguage)),
+            LSR.fromMaximalized(new ULocale(supportedLanguage)),
+            thresholdDistance, distanceOption);
+    }
+
+    @Override
+    public String toString() {
+        return exactSupportedLocales.toString();
+    }
+
+    /** Return the inverse of the distance: that is, 1-distance(desired, supported) */
+    public double match(ULocale desired, ULocale supported) {
+        return (100-distance(desired, supported))/100.0;
+    }
+
+    /**
+     * Returns a fraction between 0 and 1, where 1 means that the languages are a
+     * perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
+     * <br>Note that
+     * the precise values may change over time; no code should be made dependent
+     * on the values remaining constant.
+     * @param desired Desired locale
+     * @param desiredMax Maximized locale (using likely subtags)
+     * @param supported Supported locale
+     * @param supportedMax Maximized locale (using likely subtags)
+     * @return value between 0 and 1, inclusive.
+     * @deprecated Use the form with 2 parameters instead.
+     */
+    @Deprecated
+    public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
+        return match(desired, supported);
+    }
+
+    /**
+     * Canonicalize a locale (language). Note that for now, it is canonicalizing
+     * according to CLDR conventions (he vs iw, etc), since that is what is needed
+     * for likelySubtags.
+     * @param ulocale language/locale code
+     * @return ULocale with remapped subtags.
+     * @stable ICU 4.4
+     */
+    public ULocale canonicalize(ULocale ulocale) {
+        // TODO
+        return null;
+    }
+
+    /**
+     * @return the thresholdDistance. Any distance above this value is treated as a match failure.
+     */
+    public int getThresholdDistance() {
+        return thresholdDistance;
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java

index 1a53cbbcffd8ba713753f99041e24e4fee3ca9b3..0b39b16083f6101e6cedc610da778ea2b274ac4e 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
@@ -25,19 +25,22 @@ import com.ibm.icu.impl.Relation;
  import com.ibm.icu.impl.Row;
  import com.ibm.icu.impl.Row.R3;
  import com.ibm.icu.impl.Utility;
+import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
+import com.ibm.icu.impl.locale.XLocaleMatcher;
+import com.ibm.icu.impl.locale.XLocaleMatcher.Builder;
  
  /**
   * Provides a way to match the languages (locales) supported by a product to the
   * languages (locales) acceptable to a user, and get the best match. For
   * example:
- * 
+ *
   * <pre>
   * LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en");
- * 
+ *
   * // afterwards:
   * matcher.getBestMatch("en-US").toLanguageTag() =&gt; "en"
   * </pre>
- * 
+ *
   * It takes into account when languages are close to one another, such as fil
   * and tl, and when language regional variants are close, like en-GB and en-AU.
   * It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test
@@ -46,7 +49,7 @@ import com.ibm.icu.impl.Utility;
   * product will just need one static instance, built with the languages
   * that it supports. However, it may want multiple instances with different
   * default languages based on additional information, such as the domain.
- * 
+ *
   * @author markdavis@google.com
   * @stable ICU 4.4
   */
@@ -83,7 +86,7 @@ public class LocaleMatcher {
       * threshold, that default language is chosen. Typically the default is English,
       * but it could be different based on additional information, such as the domain
       * of the page.
-     * 
+     *
       * @param languagePriorityList weighted list
       * @stable ICU 4.4
       */
@@ -94,7 +97,7 @@ public class LocaleMatcher {
      /**
       * Create a new language matcher from a String form. The highest-weighted
       * language is the default.
-     * 
+     *
       * @param languagePriorityListString String form of LanguagePriorityList
       * @stable ICU 4.4
       */
@@ -124,6 +127,7 @@ public class LocaleMatcher {
      @Deprecated
      public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) {
          this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze();
+        this.languagePriorityList = languagePriorityList;
          for (final ULocale language : languagePriorityList) {
              add(language, languagePriorityList.getWeight(language));
          }
@@ -179,7 +183,7 @@ public class LocaleMatcher {
  
      /**
       * Get the best match for a LanguagePriorityList
-     * 
+     *
       * @param languageList list to match
       * @return best matching language code
       * @stable ICU 4.4
@@ -206,7 +210,7 @@ public class LocaleMatcher {
  
      /**
       * Convenience method: Get the best match for a LanguagePriorityList
-     * 
+     *
       * @param languageList String form of language priority list
       * @return best matching language code
       * @stable ICU 4.4
@@ -217,7 +221,7 @@ public class LocaleMatcher {
  
      /**
       * Get the best match for an individual language code.
-     * 
+     *
       * @param ulocale locale/language code to match
       * @return best matching language code
       * @stable ICU 4.4
@@ -241,14 +245,14 @@ public class LocaleMatcher {
       */
      @Override
      public String toString() {
-        return "{" + defaultLanguage + ", " 
+        return "{" + defaultLanguage + ", "
              + localeToMaxLocaleAndWeight + "}";
      }
      // ================= Privates =====================
  
      /**
       * Get the best match for an individual language code.
-     * 
+     *
       * @param languageCode
       * @return best matching language code and weight (as per
       *         {@link #match(ULocale, ULocale)})
@@ -291,9 +295,9 @@ public class LocaleMatcher {
          }
          return bestTableMatch;
      }
-    
+
      /**
-     * @internal 
+     * @internal
       * @deprecated This API is ICU internal only.
       */
      @Deprecated
@@ -309,7 +313,7 @@ public class LocaleMatcher {
      }
  
      /**
-     * We preprocess the data to get just the possible matches for each desired base language. 
+     * We preprocess the data to get just the possible matches for each desired base language.
       */
      private void processMapping() {
          for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) {
@@ -343,7 +347,7 @@ public class LocaleMatcher {
      }
  
      Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<Row.R3<ULocale, ULocale, Double>>();
-    Map<String,Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData 
+    Map<String,Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData
      = new LinkedHashMap<String,Set<Row.R3<ULocale, ULocale, Double>>>();
  
      // =============== Special Mapping Information ==============
@@ -444,6 +448,7 @@ public class LocaleMatcher {
              return (region == null ? "*" : region);
          }
  
+        @Override
          public String toString() {
              String result = getLanguage();
              if (level != Level.language) {
@@ -487,7 +492,7 @@ public class LocaleMatcher {
  
      enum Level {
          language(0.99),
-        script(0.2), 
+        script(0.2),
          region(0.04);
  
          final double worst;
@@ -527,7 +532,7 @@ public class LocaleMatcher {
              }
          }
  
-        double getScore(ULocale dMax, String desiredRaw, String desiredMax, 
+        double getScore(ULocale dMax, String desiredRaw, String desiredMax,
              ULocale sMax, String supportedRaw, String supportedMax) {
              double distance = 0;
              if (!desiredMax.equals(supportedMax)) {
@@ -543,7 +548,7 @@ public class LocaleMatcher {
                  System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale);
              }
              for (R3<LocalePatternMatcher,LocalePatternMatcher,Double> datum : scores) { // : result
-                if (datum.get0().matches(desiredLocale) 
+                if (datum.get0().matches(desiredLocale)
                      && datum.get1().matches(supportedLocale)) {
                      if (DEBUG) {
                          System.out.println("\t\t\t\tFOUND\t" + datum);
@@ -557,6 +562,7 @@ public class LocaleMatcher {
              return level.worst;
          }
  
+        @Override
          public String toString() {
              StringBuilder result = new StringBuilder().append(level);
              for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) {
@@ -566,6 +572,7 @@ public class LocaleMatcher {
          }
  
  
+        @Override
          @SuppressWarnings("unchecked")
          public ScoreData cloneAsThawed() {
              try {
@@ -581,10 +588,12 @@ public class LocaleMatcher {
  
          private volatile boolean frozen = false;
  
+        @Override
          public ScoreData freeze() {
              return this;
          }
  
+        @Override
          public boolean isFrozen() {
              return frozen;
          }
@@ -638,6 +647,7 @@ public class LocaleMatcher {
           * @internal
           * @deprecated This API is ICU internal only.
           */
+        @Override
          @Deprecated
          public String toString() {
              return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores;
@@ -746,11 +756,12 @@ public class LocaleMatcher {
              return this;
          }
  
-        /** 
+        /**
           * {@inheritDoc}
           * @internal
           * @deprecated This API is ICU internal only.
           */
+        @Override
          @Deprecated
          public LanguageMatcherData cloneAsThawed() {
              LanguageMatcherData result;
@@ -766,11 +777,12 @@ public class LocaleMatcher {
              }
          }
  
-        /** 
+        /**
           * {@inheritDoc}
           * @internal
           * @deprecated This API is ICU internal only.
           */
+        @Override
          @Deprecated
          public LanguageMatcherData freeze() {
              languageScores.freeze();
@@ -781,11 +793,12 @@ public class LocaleMatcher {
              return this;
          }
  
-        /** 
+        /**
           * {@inheritDoc}
           * @internal
           * @deprecated This API is ICU internal only.
           */
+        @Override
          @Deprecated
          public boolean isFrozen() {
              return frozen;
@@ -793,6 +806,7 @@ public class LocaleMatcher {
      }
  
      LanguageMatcherData matcherData;
+    LocalePriorityList languagePriorityList;
  
      private static final LanguageMatcherData defaultWritten;
  
@@ -845,4 +859,84 @@ public class LocaleMatcher {
          final LocaleMatcher matcher = new LocaleMatcher("");
          return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b));
      }
+
+    transient XLocaleMatcher xLocaleMatcher = null;
+    transient ULocale xDefaultLanguage = null;
+    transient boolean xFavorScript = false;
+
+    /*
+     * Returns the distance between the two languages, using the new CLDR syntax (see getBestMatch).
+     * The values are not necessarily symmetric.
+     * @param desired A locale desired by the user
+     * @param supported A locale supported by a program.
+     * @return A return of 0 is a complete match, and 100 is a complete mismatch (above the thresholdDistance).
+     * A language is first maximized with add likely subtags, then compared.
+     * @internal
+     * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+     */
+    @Deprecated
+    public int distance(ULocale desired, ULocale supported) {
+        return getLocaleMatcher().distance(desired, supported);
+    }
+
+    private synchronized XLocaleMatcher getLocaleMatcher() {
+        if (xLocaleMatcher == null) {
+            Builder builder = XLocaleMatcher.builder();
+            builder.setSupportedLocales(languagePriorityList);
+            if (xDefaultLanguage != null) {
+                builder.setDefaultLanguage(xDefaultLanguage);
+            }
+            if (xFavorScript) {
+                builder.setDistanceOption(DistanceOption.SCRIPT_FIRST);
+            }
+            xLocaleMatcher = builder.build();
+        }
+        return xLocaleMatcher;
+    }
+
+    /**
+     * Get the best match between the desired languages and supported languages
+     * This supports the new CLDR syntax to provide for better matches within
+     * regional clusters (such as maghreb Arabic vs non-maghreb Arabic, or regions that use en-GB vs en-US)
+     * and also matching between regions and macroregions, such as comparing es-419 to es-AR).
+     * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
+     * @param outputBestDesired The one of the desired languages that matched best.
+     * Set to null if the best match was not below the threshold distance.
+     * @return best-match supported language
+     * @internal
+     * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+     */
+    @Deprecated
+    public ULocale getBestMatch(LinkedHashSet<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
+        return getLocaleMatcher().getBestMatch(desiredLanguages, outputBestDesired);
+    }
+
+    /**
+     * Set the default language, with null = default = first supported language
+     * @param defaultLanguage Language to use in case the threshold for distance is exceeded.
+     * @return this, for chaining
+     * @internal
+     * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+     */
+    @Deprecated
+    public synchronized LocaleMatcher setDefaultLanguage(ULocale defaultLanguage) {
+        this.xDefaultLanguage = defaultLanguage;
+        xLocaleMatcher = null;
+        return this;
+    }
+
+    /**
+     * If true, then the language differences are smaller than than script differences.
+     * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
+     * @param favorScript Set to true to treat script as most important.
+     * @return this, for chaining.
+     * @internal
+     * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+     */
+    @Deprecated
+    public synchronized LocaleMatcher setFavorScript(boolean favorScript) {
+        this.xFavorScript = favorScript;
+        xLocaleMatcher = null;
+        return this;
+    }
  }
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/DataDrivenTestHelper.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/DataDrivenTestHelper.java

new file mode 100644 (file)

index 0000000..308be8c
--- /dev/null
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/DataDrivenTestHelper.java
@@ -0,0 +1,187 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.dev.test.util;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+import com.ibm.icu.dev.test.AbstractTestLog;
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.dev.util.CollectionUtilities;
+import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
+import com.ibm.icu.impl.locale.XCldrStub.Splitter;
+import com.ibm.icu.util.ICUUncheckedIOException;
+
+abstract public class DataDrivenTestHelper {
+
+    public static final List<String> DEBUG_LINE = Collections.singletonList("@debug");
+    public static final Splitter SEMICOLON = Splitter.on(';').trimResults();
+    public static final Splitter EQUAL_SPLIT = Splitter.on('=').trimResults();
+    public static final String SEPARATOR = " ; \t";
+
+    protected TestFmwk framework = null;
+    protected int minArgumentCount = 3;
+    protected int maxArgumentCount = 4;
+    private List<List<String>> lines = new ArrayList<List<String>>();
+    private List<String> comments = new ArrayList<String>();
+
+    public DataDrivenTestHelper setFramework(TestFmwk testFramework) {
+        this.framework = testFramework;
+        return this;
+    }
+
+    public <T extends Appendable> T appendLines(T out) {
+        try {
+            for (int i = 0; i < lines.size(); ++i) {
+                List<String> components = lines.get(i);
+                String comment = comments.get(i);
+                if (components.isEmpty()) {
+                    if(!comment.isEmpty()) {
+                        out.append("# ").append(comment);
+                    }
+                } else {
+                    String first = components.iterator().next();
+                    String sep = first.startsWith("@") ? "=" : SEPARATOR;
+                    out.append(CollectionUtilities.join(components, sep));
+                    if (!comment.isEmpty()) {
+                        out.append("\t# ").append(comment);
+                    }
+                }
+                out.append('\n');
+            }
+            return out;
+        } catch (IOException e) {
+            throw new ICUUncheckedIOException(e);
+        }
+    }
+
+    protected DataDrivenTestHelper addLine(List<String> arguments, String commentBase) {
+        lines.add(Collections.unmodifiableList(arguments));
+        comments.add(commentBase);
+        return this;
+    }
+
+    public DataDrivenTestHelper run(Class<?> classFileIsRelativeTo, String file) {
+        return load(classFileIsRelativeTo, file)
+            .test();
+    }
+
+    public boolean isTestLine(List<String> arguments) {
+        return !arguments.isEmpty() && !arguments.equals(DEBUG_LINE);
+    }
+
+    public DataDrivenTestHelper test() {
+        boolean breakpoint = false;
+        for (int i = 0; i < lines.size(); ++i) {
+            List<String> arguments = lines.get(i);
+            String comment = comments.get(i);
+            if (arguments.isEmpty()) {
+                if (!comment.isEmpty()) {
+                    AbstractTestLog.logln(comment);
+                }
+                continue;
+            } else if (arguments.equals(DEBUG_LINE)) {
+                breakpoint = true;
+                continue;
+            } else {
+                String first = arguments.get(0);
+                if (first.startsWith("@")) {
+                    handleParams(comment, arguments);
+                    continue;
+                }
+            }
+            try {
+                handle(i, breakpoint, comment, arguments);
+            } catch (Exception e) {
+                e.printStackTrace();
+                AbstractTestLog.errln("Illegal data test file entry (" + i + "): " + arguments + " # " + comment);
+            }
+            breakpoint = false;
+        }
+        return this;
+    }
+
+    public DataDrivenTestHelper load(Class<?> classFileIsRelativeTo, String file) {
+        BufferedReader in = null;
+        try {
+            in = FileUtilities.openFile(classFileIsRelativeTo, file);
+            //boolean breakpoint = false;
+
+            while (true) {
+                String line = in.readLine();
+                if (line == null) {
+                    break;
+                }
+                line = line.trim();
+                if (line.isEmpty()) {
+                    addLine(Collections.<String>emptyList(), "");
+                    continue;
+                }
+                int hash = line.indexOf('#');
+                String comment = "";
+                String commentBase = "";
+                if (hash >= 0) {
+                    commentBase = line.substring(hash+1).trim();
+                    line = line.substring(0,hash).trim();
+                    comment = "# " + commentBase;
+                    if (!line.isEmpty()) {
+                        comment = "\t" + comment;
+                    }
+                }
+                if (line.isEmpty()) {
+                    addLine(Collections.<String>emptyList(), commentBase);
+                    continue;
+                }
+                if (line.startsWith("@")) {
+                    List<String> keyValue = EQUAL_SPLIT.splitToList(line);
+                    addLine(keyValue, comment);
+                    continue;
+                }
+                List<String> arguments = SEMICOLON.splitToList(line);
+                if (arguments.size() < minArgumentCount || arguments.size() > maxArgumentCount) {
+                    AbstractTestLog.errln("Malformed data line:" + line + comment);
+                    continue;
+                }
+                addLine(arguments, commentBase);
+            }
+        } catch (IOException e) {
+            throw new ICUUncheckedIOException(e);
+        } finally {
+            if (in != null) {
+                try {
+                    in.close();
+                } catch (IOException e) {
+                    throw new ICUUncheckedIOException(e);
+                }
+            }
+        }
+        lines = Collections.unmodifiableList(lines); // should do deep unmodifiable...
+        comments = Collections.unmodifiableList(comments);
+        return this;
+    }
+
+    protected boolean assertEquals(String message, Object expected, Object actual) {
+        return TestFmwk.handleAssert(Objects.equals(expected, actual), message, stringFor(expected), stringFor(actual), null, false);
+    }
+
+    private final String stringFor(Object obj) {
+        return obj == null ? "null"
+            : obj instanceof String ? "\"" + obj + '"'
+                : obj instanceof Number ? String.valueOf(obj)
+                    : obj.getClass().getName() + "<" + obj + ">";
+    }
+
+    abstract public void handle(int lineNumber, boolean breakpoint, String commentBase, List<String> arguments);
+
+    public void handleParams(String comment, List<String> arguments) {
+        throw new IllegalArgumentException("Unrecognized parameter: " + arguments);
+    }
+
+    public List<List<String>> getLines() {
+        return lines;
+    }
+}
+\ No newline at end of file
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java

index f8f24908df205aad88eab04c63eb23115c4d498d..9c82ba59d598adb815dbd5c09625de05d12b29e0 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
@@ -9,6 +9,8 @@
  
  package com.ibm.icu.dev.test.util;
  
+import java.util.Arrays;
+import java.util.LinkedHashSet;
  import java.util.Set;
  import java.util.TreeSet;
  
@@ -18,11 +20,12 @@ import com.ibm.icu.dev.test.TestFmwk;
  import com.ibm.icu.util.LocaleMatcher;
  import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
  import com.ibm.icu.util.LocalePriorityList;
+import com.ibm.icu.util.Output;
  import com.ibm.icu.util.ULocale;
  
  /**
   * Test the LocaleMatcher.
- * 
+ *
   * @author markdavis
   */
  @SuppressWarnings("deprecation")
@@ -490,7 +493,7 @@ public class LocaleMatcherTest extends TestFmwk {
          LocaleMatcher matcher;
          matcher = new LocaleMatcher("mul, nl");
          assertEquals("nl", matcher.getBestMatch("af").toString()); // af => nl
-        
+
          matcher = new LocaleMatcher("mul, af");
          assertEquals("mul", matcher.getBestMatch("nl").toString()); // but nl !=> af
      }
@@ -618,7 +621,7 @@ public class LocaleMatcherTest extends TestFmwk {
          }
      }
  
-    private long timeLocaleMatcher(String title, String desired, LocaleMatcher matcher, 
+    private long timeLocaleMatcher(String title, String desired, LocaleMatcher matcher,
          boolean showmessage, int iterations, long comparisonTime) {
          long start = System.nanoTime();
          for (int i = iterations; i > 0; --i) {
@@ -629,11 +632,36 @@ public class LocaleMatcherTest extends TestFmwk {
              + (comparisonTime > 0 ? (delta * 100 / comparisonTime - 100) + "% longer" : ""));
          return delta;
      }
-    
+
      @Test
      public void Test8288() {
          final LocaleMatcher matcher = newLocaleMatcher("it, en");
          assertEquals("it", matcher.getBestMatch("und").toString());
          assertEquals("en", matcher.getBestMatch("und, en").toString());
      }
+
+    @Test
+    public void TestTechPreview() {
+        final LocaleMatcher matcher = newLocaleMatcher("it, en, ru");
+        ULocale und = new ULocale("und");
+        ULocale bulgarian = new ULocale("bg");
+        ULocale russian = new ULocale("ru");
+
+        assertEquals("es-419/MX", 4, matcher.distance(new ULocale("es","419"), new ULocale("es","MX")));
+        assertEquals("es-ES/DE", 4, matcher.distance(new ULocale("es","DE"), new ULocale("es","ES")));
+
+        Output<ULocale> outputBestDesired = new Output<ULocale>();
+
+        ULocale best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
+        assertEquals(ULocale.ITALIAN, best);
+        assertEquals(null, outputBestDesired.value);
+
+        matcher.setDefaultLanguage(ULocale.JAPANESE);
+        best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
+        assertEquals(ULocale.JAPANESE, best);
+
+        matcher.setFavorScript(true);
+        best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, bulgarian)), outputBestDesired);
+        assertEquals(russian, best);
+    }
  }
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java

new file mode 100644 (file)

index 0000000..a1cb208
--- /dev/null
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java
@@ -0,0 +1,206 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.dev.test.util;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.junit.Test;
+
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
+import com.ibm.icu.impl.locale.XLocaleDistance;
+import com.ibm.icu.impl.locale.XLocaleDistance.DistanceNode;
+import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
+import com.ibm.icu.impl.locale.XLocaleDistance.DistanceTable;
+import com.ibm.icu.util.LocaleMatcher;
+import com.ibm.icu.util.Output;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * Test the XLocaleDistance.
+ *
+ * @author markdavis
+ */
+public class XLocaleDistanceTest extends TestFmwk {
+    private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
+
+    public static final int FAIL = XLocaleDistance.ABOVE_THRESHOLD;
+
+    private XLocaleDistance localeMatcher = XLocaleDistance.getDefault();
+    DataDrivenTestHelper tfh = new MyTestFileHandler()
+            .setFramework(this)
+            .load(XLocaleDistanceTest.class, "data/localeDistanceTest.txt");
+
+    static class Arguments {
+        final ULocale desired;
+        final ULocale supported;
+        final int desiredToSupported;
+        final int supportedToDesired;
+
+        public Arguments(List<String> args) {
+            this.desired = new ULocale.Builder().setLanguageTag(args.get(0)).build(); // use more complicated expression to check syntax
+            this.supported = new ULocale.Builder().setLanguageTag(args.get(1)).build();
+            this.desiredToSupported = Integer.parseInt(args.get(2));
+            this.supportedToDesired = args.size() > 3 ? Integer.parseInt(args.get(3)) : this.desiredToSupported;
+        }
+    }
+
+    @Test
+    public void testTiming() {
+        List<Arguments> testArgs = new ArrayList<Arguments>();
+        for (List<String> line : tfh.getLines()) {
+            if (tfh.isTestLine(line)) {
+                testArgs.add(new Arguments(line));
+            }
+        }
+        Arguments[] tests = testArgs.toArray(new Arguments[testArgs.size()]);
+
+        final LocaleMatcher oldLocaleMatcher = new LocaleMatcher("");
+
+        long likelyTime = 0;
+        long newLikelyTime = 0;
+        long newTimeMinusLikely = 0;
+        //long intTime = 0;
+        long oldTimeMinusLikely = 0;
+        final int maxIterations = 1000;
+
+        for (int iterations = maxIterations; iterations > 0; --iterations) {
+            // int count=0;
+            for (Arguments test : tests) {
+                final ULocale desired = test.desired;
+                final ULocale supported = test.supported;
+                //final int desiredToSupported = test.desiredToSupported;
+                //final int supportedToDesired = test.supportedToDesired;
+
+                long temp = System.nanoTime();
+                final ULocale desiredMax = ULocale.addLikelySubtags(desired);
+                final ULocale supportedMax = ULocale.addLikelySubtags(supported);
+                likelyTime += System.nanoTime()-temp;
+
+                temp = System.nanoTime();
+                //double distOld1 = oldLocaleMatcher.match(desired, desiredMax, supported, supportedMax);
+                //double distOld2 = oldLocaleMatcher.match(supported, supportedMax, desired, desiredMax);
+                oldTimeMinusLikely += System.nanoTime()-temp;
+
+                temp = System.nanoTime();
+                final LSR desiredLSR = LSR.fromMaximalized(desired);
+                final LSR supportedLSR = LSR.fromMaximalized(supported);
+                newLikelyTime += System.nanoTime()-temp;
+
+                temp = System.nanoTime();
+                int dist1 = localeMatcher.distanceRaw(desiredLSR, supportedLSR, 1000, DistanceOption.NORMAL);
+                int dist2 = localeMatcher.distanceRaw(supportedLSR, desiredLSR, 1000, DistanceOption.NORMAL);
+                newTimeMinusLikely += System.nanoTime()-temp;
+            }
+        }
+        final long oldTime = oldTimeMinusLikely+likelyTime;
+        final long newTime = newLikelyTime+newTimeMinusLikely;
+        logln("\n");
+        logln("\tlikelyTime:\t" + likelyTime/maxIterations);
+        logln("\toldTime-likelyTime:\t" + oldTimeMinusLikely/maxIterations);
+        logln("totalOld:\t" + oldTime/maxIterations);
+        logln("\tnewLikelyTime:\t" + newLikelyTime/maxIterations);
+        logln("totalNew:\t" + newTime/maxIterations);
+        assertTrue("newTime < 20% of oldTime", newTime * 5 < oldTime);
+        //logln("\tnewIntTime-newLikelyTime-extractTime:\t" + intTime/maxIterations);
+        //logln("totalInt:\t" + (intTime)/maxIterations);
+    }
+
+    @Test
+    @SuppressWarnings("deprecation")
+    public void testInternalTable() {
+        checkTables(localeMatcher.internalGetDistanceTable(), "", 1);
+    }
+
+    @SuppressWarnings("deprecation")
+    private void checkTables(DistanceTable internalGetDistanceTable, String title, int depth) {
+        // Check that ANY, ANY is always present, and that the table has a depth of exactly 3 everyplace.
+        Map<String, Set<String>> matches = internalGetDistanceTable.getInternalMatches();
+
+        // must have ANY,ANY
+        boolean haveANYANY = false;
+        for (Entry<String, Set<String>> entry : matches.entrySet()) {
+            String first = entry.getKey();
+            boolean haveANYfirst = first.equals(XLocaleDistance.ANY);
+            for (String second : entry.getValue()) {
+                haveANYANY |= haveANYfirst && second.equals(XLocaleDistance.ANY);
+                DistanceNode distanceNode = internalGetDistanceTable.getInternalNode(first, second);
+                DistanceTable subDistanceTable = distanceNode.getDistanceTable();
+                if (subDistanceTable == null || subDistanceTable.isEmpty()) {
+                    if (depth != 3) {
+                        logln("depth should be 3");
+                    }
+                    if (distanceNode.getClass() != DistanceNode.class) {
+                        logln("should be plain DistanceNode");
+                    }
+                } else {
+                    if (depth >= 3) {
+                        logln("depth should be ≤ 3");
+                    }
+                    if (distanceNode.getClass() == DistanceNode.class) {
+                        logln("should NOT be plain DistanceNode");
+                    }
+                    checkTables(subDistanceTable, first + "," + second + ",", depth+1);
+                }
+            }
+        }
+        if (!haveANYANY) {
+            logln("ANY-ANY not in" + matches);
+        }
+    }
+
+    @Test
+    public void testShowDistanceTable() {
+        if (isVerbose()) {
+            System.out.println(XLocaleDistance.getDefault().toString(false));
+        }
+    }
+
+    @Test
+    public void testDataDriven() throws IOException {
+        tfh.test();
+        if (REFORMAT) {
+            System.out.println(tfh.appendLines(new StringBuffer()));
+        }
+    }
+
+    class MyTestFileHandler extends DataDrivenTestHelper {
+        final XLocaleDistance distance = XLocaleDistance.getDefault();
+        Output<ULocale> bestDesired = new Output<ULocale>();
+        private DistanceOption distanceOption = DistanceOption.NORMAL;
+        private Integer threshold = distance.getDefaultScriptDistance();
+
+        @Override
+        public void handle(int lineNumber, boolean breakpoint, String commentBase, List<String> arguments) {
+            if (breakpoint) {
+                breakpoint = false; // put debugger breakpoint here to break at @debug in test file
+            }
+            Arguments args = new Arguments(arguments);
+            int supportedToDesiredActual = distance.distance(args.supported, args.desired, threshold, distanceOption);
+            int desiredToSupportedActual = distance.distance(args.desired, args.supported, threshold, distanceOption);
+            String desiredTag = args.desired.toLanguageTag();
+            String supportedTag = args.supported.toLanguageTag();
+            final String comment = commentBase.isEmpty() ? "" : "\t# " + commentBase;
+            if (assertEquals("(" + lineNumber + ") " + desiredTag + " to " + supportedTag + comment, args.desiredToSupported, desiredToSupportedActual)) {
+                assertEquals("(" + lineNumber + ") " + supportedTag + " to " + desiredTag + comment, args.supportedToDesired, supportedToDesiredActual);
+            }
+        }
+        @Override
+        public void handleParams(String comment, List<String> arguments) {
+            String switchArg = arguments.get(0);
+            if (switchArg.equals("@DistanceOption")) {
+                distanceOption = DistanceOption.valueOf(arguments.get(1));
+            } else if (switchArg.equals("@Threshold")) {
+                threshold = Integer.valueOf(arguments.get(1));
+            } else {
+                super.handleParams(comment, arguments);
+            }
+            return;
+        }
+    }
+}
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java

new file mode 100644 (file)

index 0000000..8e3b083
--- /dev/null
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java
@@ -0,0 +1,334 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.dev.test.util;
+
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.regex.Pattern;
+
+import org.junit.Test;
+
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.locale.XCldrStub.Joiner;
+import com.ibm.icu.impl.locale.XCldrStub.Splitter;
+import com.ibm.icu.impl.locale.XLocaleDistance;
+import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
+import com.ibm.icu.impl.locale.XLocaleMatcher;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.LocaleMatcher;
+import com.ibm.icu.util.LocalePriorityList;
+import com.ibm.icu.util.Output;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * Test the XLocaleMatcher.
+ *
+ * @author markdavis
+ */
+public class XLocaleMatcherTest extends TestFmwk {
+    private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
+
+    private static final int REGION_DISTANCE = 4;
+
+    private static final XLocaleDistance LANGUAGE_MATCHER_DATA = XLocaleDistance.getDefault();
+
+    private XLocaleMatcher newXLocaleMatcher() {
+        return new XLocaleMatcher("");
+    }
+
+    private XLocaleMatcher newXLocaleMatcher(LocalePriorityList build) {
+        return new XLocaleMatcher(build);
+    }
+
+    private XLocaleMatcher newXLocaleMatcher(String string) {
+        return new XLocaleMatcher(LocalePriorityList.add(string).build());
+    }
+
+    private XLocaleMatcher newXLocaleMatcher(LocalePriorityList string, int d) {
+        return XLocaleMatcher.builder().setSupportedLocales(string).setThresholdDistance(d).build();
+    }
+
+    private XLocaleMatcher newXLocaleMatcher(LocalePriorityList string, int d, DistanceOption distanceOption) {
+        return XLocaleMatcher
+            .builder()
+            .setSupportedLocales(string)
+            .setThresholdDistance(d)
+            .setDistanceOption(distanceOption)
+            .build();
+    }
+
+    //    public void testParentLocales() {
+    //        // find all the regions that have a closer relation because of an explicit parent
+    //        Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
+    //        explicitParents.remove("root");
+    //        Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
+    //        for (String locale : explicitParents) {
+    //            while (true) {
+    //                locale = LocaleIDParser.getParent(locale);
+    //                if (locale == null || locale.equals("root")) {
+    //                    break;
+    //                }
+    //                otherParents.add(locale);
+    //            }
+    //        }
+    //        otherParents.remove("root");
+    //
+    //        for (String locale : CONFIG.getCldrFactory().getAvailable()) {
+    //            String parentId = LocaleIDParser.getParent(locale);
+    //            String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
+    //            if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
+    //                continue;
+    //            }
+    //            System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
+    //        }
+    //    }
+
+
+// TBD reenable with override data
+//    public void testOverrideData() {
+//        double threshold = 0.05;
+//        XLocaleDistance XLocaleMatcherData = new XLocaleDistance()
+//        .addDistance("br", "fr", 10, true)
+//        .addDistance("es", "cy", 10, true);
+//        logln(XLocaleMatcherData.toString());
+//
+//        final XLocaleMatcher matcher = newXLocaleMatcher(
+//            LocalePriorityList
+//            .add(ULocale.ENGLISH)
+//            .add(ULocale.FRENCH)
+//            .add(ULocale.UK)
+//            .build(), XLocaleMatcherData, threshold);
+//        logln(matcher.toString());
+//
+//        assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
+//        assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
+//        // way
+//    }
+
+
+    private void assertEquals(Object expected, Object string) {
+        assertEquals("", expected, string);
+    }
+
+    /**
+     * If all the base languages are the same, then each sublocale matches
+     * itself most closely
+     */
+    @Test
+    public void testExactMatches() {
+        String lastBase = "";
+        TreeSet<ULocale> sorted = new TreeSet<ULocale>();
+        for (ULocale loc : ULocale.getAvailableLocales()) {
+            String language = loc.getLanguage();
+            if (!lastBase.equals(language)) {
+                check(sorted);
+                sorted.clear();
+                lastBase = language;
+            }
+            sorted.add(loc);
+        }
+        check(sorted);
+    }
+
+    private void check(Set<ULocale> sorted) {
+        if (sorted.isEmpty()) {
+            return;
+        }
+        check2(sorted);
+        ULocale first = sorted.iterator().next();
+        ULocale max = ULocale.addLikelySubtags(first);
+        sorted.add(max);
+        check2(sorted);
+    }
+
+    /**
+     * @param sorted
+     */
+    private void check2(Set<ULocale> sorted) {
+        // TODO Auto-generated method stub
+        logln("Checking: " + sorted);
+        XLocaleMatcher matcher = newXLocaleMatcher(
+            LocalePriorityList.add(
+                sorted.toArray(new ULocale[sorted.size()]))
+            .build());
+        for (ULocale loc : sorted) {
+            String stringLoc = loc.toString();
+            assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
+        }
+    }
+
+    @Test
+    public void testComputeDistance_monkeyTest() {
+        String[] codes = ULocale.getISOCountries();
+        Random random = new Random();
+        XLocaleMatcher lm = newXLocaleMatcher();
+        for (int i = 0; i < 1000; ++i) {
+            String x = codes[random.nextInt(codes.length)];
+            String y = codes[random.nextInt(codes.length)];
+            double d = lm.distance(ULocale.forLanguageTag("xx-Xxxx-"+x), ULocale.forLanguageTag("xx-Xxxx-"+y));
+            if (x.equals("ZZ") || y.equals("ZZ")) {
+                assertEquals("dist(regionDistance," + x + ") = 0", REGION_DISTANCE, d);
+            } else if (x.equals(y)) {
+                assertEquals("dist(x,x) = 0", 0.0, d);
+            } else {
+                assertTrue("dist(" + x + "," + y + ") > 0", d > 0);
+                assertTrue("dist(" + x + "," + y + ") ≤ " + REGION_DISTANCE, d <= REGION_DISTANCE);
+            }
+        }
+    }
+
+
+    @Test
+    public void testPerf() {
+        if (LANGUAGE_MATCHER_DATA == null) {
+            return; // skip except when testing data
+        }
+        final ULocale desired = new ULocale("sv");
+
+        final String shortList = "en, sv";
+        final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu";
+        final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
+
+        final XLocaleMatcher matcherShort = newXLocaleMatcher(shortList);
+        final XLocaleMatcher matcherLong = newXLocaleMatcher(longList);
+        final XLocaleMatcher matcherVeryLong = newXLocaleMatcher(veryLongList);
+
+        final LocaleMatcher matcherShortOld = new LocaleMatcher(shortList);
+        final LocaleMatcher matcherLongOld = new LocaleMatcher(longList);
+        final LocaleMatcher matcherVeryLongOld = new LocaleMatcher(veryLongList);
+
+        //XLocaleMatcher.DEBUG = true;
+        ULocale expected = new ULocale("sv");
+        assertEquals(expected, matcherShort.getBestMatch(desired));
+        assertEquals(expected, matcherLong.getBestMatch(desired));
+        assertEquals(expected, matcherVeryLong.getBestMatch(desired));
+        //XLocaleMatcher.DEBUG = false;
+
+        long timeShortNew=0;
+        long timeMediumNew=0;
+        long timeLongNew=0;
+
+        for (int i = 0; i < 2; ++i) {
+            int iterations = i == 0 ? 1000 : 1000000;
+            boolean showMessage = i != 0;
+            timeShortNew = timeXLocaleMatcher("Duration (few  supported):\t", desired, matcherShort, showMessage, iterations);
+            timeMediumNew = timeXLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations);
+            timeLongNew = timeXLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations);
+        }
+
+        long timeShortOld=0;
+        long timeMediumOld=0;
+        long timeLongOld=0;
+
+        for (int i = 0; i < 2; ++i) {
+            int iterations = i == 0 ? 1000 : 100000;
+            boolean showMessage = i != 0;
+            timeShortOld = timeLocaleMatcher("Old Duration (few  supported):\t", desired, matcherShortOld, showMessage, iterations);
+            timeMediumOld = timeLocaleMatcher("Old Duration (med. supported):\t", desired, matcherLongOld, showMessage, iterations);
+            timeLongOld = timeLocaleMatcher("Old Duration (many supported):\t", desired, matcherVeryLongOld, showMessage, iterations);
+        }
+
+        assertTrue("timeShortNew (=" + timeShortNew + ") < 25% of timeShortOld (=" + timeShortOld + ")", timeShortNew * 4 < timeShortOld);
+        assertTrue("timeMediumNew (=" + timeMediumNew + ") < 25% of timeMediumOld (=" + timeMediumOld + ")", timeMediumNew * 4 < timeMediumOld);
+        assertTrue("timeLongNew (=" + timeLongNew + ") < 25% of timeLongOld (=" + timeLongOld + ")", timeLongNew * 4 < timeLongOld);
+
+    }
+
+    private long timeXLocaleMatcher(String title, ULocale desired, XLocaleMatcher matcher,
+        boolean showmessage, int iterations) {
+        long start = System.nanoTime();
+        for (int i = iterations; i > 0; --i) {
+            matcher.getBestMatch(desired);
+        }
+        long delta = System.nanoTime() - start;
+        if (showmessage) logln(title + (delta / iterations) + " nanos");
+        return (delta / iterations);
+    }
+
+    private long timeLocaleMatcher(String title, ULocale desired, LocaleMatcher matcher,
+        boolean showmessage, int iterations) {
+        long start = System.nanoTime();
+        for (int i = iterations; i > 0; --i) {
+            matcher.getBestMatch(desired);
+        }
+        long delta = System.nanoTime() - start;
+        if (showmessage) logln(title + (delta / iterations) + " nanos");
+        return (delta / iterations);
+    }
+
+    @Test
+    public void testDataDriven() throws IOException {
+        DataDrivenTestHelper tfh = new MyTestFileHandler()
+            .setFramework(this)
+            .run(XLocaleMatcherTest.class, "data/localeMatcherTest.txt");
+        if (REFORMAT) {
+            System.out.println(tfh.appendLines(new StringBuilder()));
+        }
+    }
+
+    private static final Splitter COMMA_SPACE = Splitter.on(Pattern.compile(",\\s*|\\s+")).trimResults();
+    private static final Joiner JOIN_COMMA_SPACE = Joiner.on(", ");
+    private static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
+
+    class MyTestFileHandler extends DataDrivenTestHelper {
+
+        Output<ULocale> bestDesired = new Output<ULocale>();
+        DistanceOption distanceOption = DistanceOption.NORMAL;
+        int threshold = -1;
+
+        @Override
+        public void handle(int lineNumber, boolean breakpoint, String commentBase, List<String> arguments) {
+            List<String> supported = COMMA_SPACE.splitToList(arguments.get(0));
+            final String supportedReformatted = JOIN_COMMA_SPACE.join(supported);
+            LocalePriorityList supportedList = LocalePriorityList.add(supportedReformatted).build();
+
+            Iterable<String> desired = COMMA_SPACE.split(arguments.get(1));
+            final String desiredReformatted = JOIN_COMMA_SPACE.join(desired);
+            LocalePriorityList desiredList = LocalePriorityList.add(desiredReformatted).build();
+
+            String expected = arguments.get(2);
+            String expectedLanguageTag = expected.equals("null") ? null : new ULocale(expected).toLanguageTag();
+
+            String expectedUi = arguments.size() < 4 ? null : arguments.get(3);
+            String expectedUiLanguageTag = expectedUi == null || expectedUi.equals("null") ? null
+                : new ULocale(expectedUi).toLanguageTag();
+
+            if (breakpoint) {
+                breakpoint = false; // put debugger breakpoint here to break at @debug in test file
+            }
+
+            XLocaleMatcher matcher = threshold < 0 && distanceOption == DistanceOption.NORMAL
+                ? newXLocaleMatcher(supportedList)
+                : newXLocaleMatcher(supportedList, threshold, distanceOption);
+            commentBase = "(" + lineNumber + ") " + commentBase;
+
+            ULocale bestSupported;
+            if (expectedUi != null) {
+                bestSupported = matcher.getBestMatch(desiredList, bestDesired);
+                ULocale bestUI = XLocaleMatcher.combine(bestSupported, bestDesired.value);
+                assertEquals(commentBase + " (UI)", expectedUiLanguageTag, bestUI == null ? null : bestUI.toLanguageTag());
+            } else {
+                bestSupported = matcher.getBestMatch(desiredList);
+            }
+            String bestMatchLanguageTag = bestSupported == null ? null : bestSupported.toLanguageTag();
+            assertEquals(commentBase, expectedLanguageTag, bestMatchLanguageTag);
+        }
+
+        @Override
+        public void handleParams(String comment, List<String> arguments) {
+            String switchItem = arguments.get(0);
+            if (switchItem.equals("@DistanceOption")) {
+                distanceOption = DistanceOption.valueOf(arguments.get(1));
+            } else if (switchItem.equals("@Threshold")) {
+                threshold = Integer.valueOf(arguments.get(1));
+            } else {
+                super.handleParams(comment, arguments);
+            }
+            return;
+        }
+    }
+}
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeDistanceTest.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeDistanceTest.txt

new file mode 100644 (file)

index 0000000..ba783b5
--- /dev/null
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeDistanceTest.txt
@@ -0,0 +1,66 @@
+# © 2017 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
+# Data-driven test for XLocaleDistance.
+# Format
+# • supported ; desired ; dist(s,d) ; dist(d,x)
+# • argument 4 only used when different
+# • 100 = fail
+# A line starting with @debug will reach a statement in the test code where you can put a breakpoint for debugging
+# The test code also supports reformatting this file, by setting the REFORMAT flag.
+
+en-CA ; en-CA ; 0 
+ar-MK ; en-CA ; 100
+
+iw ;   he ;    0
+zh ;   cmn ;   0
+
+# fallback languages get closer distances, between script (40) and region (4)
+ 
+@debug
+to ;   en ;    14 ;    100
+no ;   no-DE ;         4
+nn ;   no ;    10
+no-DE ;        nn ;    14
+no ;   no ;    0
+no ;   da ;    12
+da ;   zh-Hant ;       100
+zh-Hant ;      zh-Hans ;       23 ;    19
+zh-Hans ;      en ;    100
+
+en-US ;        en-AU ;         5  # across clusters
+en-VI ;        en-GU ;         4       # within cluster
+en-AU ;        en-CA ;         4       # within cluster
+
+# testScript
+en-CA ;        en-Cyrl ;       100
+en-Cyrl ;      es-MX ;         100
+
+hr ;   sr ;    100
+#hr ;  sr-Latn ;       8
+sr ;   sr-Latn ;       5
+
+# test419
+# Should be as good as any in cluster
+es-MX ;        es-AR ;         4
+@debug
+es-MX ;        es-419 ;        4
+es-MX ;        es-MX ;         0
+es-MX ;        es-ES ;         5
+es-MX ;        es-PT ;         5
+es-MX ;        es-150 ;        5
+es-419 ;       es-AR ;         4
+es-419 ;       es-419 ;        0
+es-419 ;       es-MX ;         4
+es-419 ;       es-ES ;         5
+es-419 ;       es-PT ;         5
+es-419 ;       es-150 ;        5
+es-ES ;        es-AR ;         5
+es-ES ;        es-419 ;        5
+es-ES ;        es-MX ;         5
+es-ES ;        es-ES ;         0
+es-ES ;        es-PT ;         4
+es-419 ;       es-150 ;        5
+
+# testEuEc
+xx-Xxxx-EC; xx-Xxxx-EU; 4
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt

new file mode 100644 (file)

index 0000000..0e3e3a5
--- /dev/null
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt
@@ -0,0 +1,387 @@
+# © 2017 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
+# Data-driven test for the XLocaleMatcher.
+# Format
+# • Everything after "#" is a comment
+# • Arguments are separated by ";". They are:
+
+# supported ; desired ; expected
+
+# • The supported may have the threshold distance reset as a first item, eg 50, en, fr
+# A line starting with @debug will reach a statement in the test code where you can put a breakpoint for debugging
+# The test code also supports reformatting this file, by setting the REFORMAT flag.
+
+##################################################
+# testParentLocales
+
+# es-419, es-AR, and es-MX are in a cluster; es is in a different one
+
+@debug
+es-419, es-ES ;        es-AR ;         es-419
+es-ES, es-419 ;        es-AR ;         es-419
+
+es-419, es ;   es-AR ;         es-419
+es, es-419 ;   es-AR ;         es-419
+
+es-MX, es ;    es-AR ;         es-MX
+es, es-MX ;    es-AR ;         es-MX
+
+# en-GB, en-AU, and en-NZ are in a cluster; en in a different one
+
+en-GB, en-US ;         en-AU ;         en-GB
+en-US, en-GB ;         en-AU ;         en-GB
+
+en-GB, en ;    en-AU ;         en-GB
+en, en-GB ;    en-AU ;         en-GB
+
+en-NZ, en-US ;         en-AU ;         en-NZ
+en-US, en-NZ ;         en-AU ;         en-NZ
+
+en-NZ, en ;    en-AU ;         en-NZ
+en, en-NZ ;    en-AU ;         en-NZ
+
+# pt-AU and pt-PT in one cluster; pt-BR in another
+
+pt-PT, pt-BR ;         pt-AO ;         pt-PT
+pt-BR, pt-PT ;         pt-AO ;         pt-PT
+
+pt-PT, pt ;    pt-AO ;         pt-PT
+pt, pt-PT ;    pt-AO ;         pt-PT
+
+zh-MO, zh-TW ;         zh-HK ;         zh-MO
+zh-TW, zh-MO ;         zh-HK ;         zh-MO
+
+zh-MO, zh-TW ;         zh-HK ;         zh-MO
+zh-TW, zh-MO ;         zh-HK ;         zh-MO
+
+zh-MO, zh-CN ;         zh-HK ;         zh-MO
+zh-CN, zh-MO ;         zh-HK ;         zh-MO
+
+zh-MO, zh ;    zh-HK ;         zh-MO
+zh, zh-MO ;    zh-HK ;         zh-MO
+
+##################################################
+# testChinese
+
+zh-CN, zh-TW, iw ;     zh-Hant-TW ;    zh-TW
+zh-CN, zh-TW, iw ;     zh-Hant ;       zh-TW
+zh-CN, zh-TW, iw ;     zh-TW ;         zh-TW
+zh-CN, zh-TW, iw ;     zh-Hans-CN ;    zh-CN
+zh-CN, zh-TW, iw ;     zh-CN ;         zh-CN
+zh-CN, zh-TW, iw ;     zh ;    zh-CN
+
+##################################################
+# testenGB
+
+fr, en, en-GB, es-419, es-MX, es ;     en-NZ ;         en-GB
+fr, en, en-GB, es-419, es-MX, es ;     es-ES ;         es
+fr, en, en-GB, es-419, es-MX, es ;     es-AR ;         es-419
+fr, en, en-GB, es-419, es-MX, es ;     es-MX ;         es-MX
+
+##################################################
+# testFallbacks
+
+91, en, hi ;   sa ;    hi
+
+##################################################
+# testBasics
+
+fr, en-GB, en ;        en-GB ;         en-GB
+fr, en-GB, en ;        en ;    en
+fr, en-GB, en ;        fr ;    fr
+fr, en-GB, en ;        ja ;    fr      # return first if no match
+
+##################################################
+# testFallback
+
+# check that script fallbacks are handled right
+
+zh-CN, zh-TW, iw ;     zh-Hant ;       zh-TW
+zh-CN, zh-TW, iw ;     zh ;    zh-CN
+zh-CN, zh-TW, iw ;     zh-Hans-CN ;    zh-CN
+zh-CN, zh-TW, iw ;     zh-Hant-HK ;    zh-TW
+zh-CN, zh-TW, iw ;     he-IT ;         iw
+
+##################################################
+# testSpecials
+
+# check that nearby languages are handled
+
+en, fil, ro, nn ;      tl ;    fil
+en, fil, ro, nn ;      mo ;    ro
+en, fil, ro, nn ;      nb ;    nn
+
+# make sure default works
+
+en, fil, ro, nn ;      ja ;    en
+
+##################################################
+# testRegionalSpecials
+
+# verify that en-AU is closer to en-GB than to en (which is en-US)
+
+en, en-GB, es, es-419 ;        es-MX ;         es-419
+en, en-GB, es, es-419 ;        en-AU ;         en-GB
+en, en-GB, es, es-419 ;        es-ES ;         es
+
+##################################################
+# testHK
+
+# HK and MO are closer to each other for Hant than to TW
+
+zh, zh-TW, zh-MO ;     zh-HK ;         zh-MO
+zh, zh-TW, zh-HK ;     zh-MO ;         zh-HK
+
+##################################################
+# testMatch-exact
+
+# see localeDistance.txt
+
+##################################################
+# testMatch-none
+
+# see localeDistance.txt
+
+##################################################
+# testMatch-matchOnMazimized
+
+zh, zh-Hant ;  und-TW ;        zh-Hant # und-TW should be closer to zh-Hant than to zh
+en-Hant-TW, und-TW ;   zh-Hant ;       und-TW  # zh-Hant should be closer to und-TW than to en-Hant-TW
+en-Hant-TW, und-TW ;   zh ;    und-TW  # zh should be closer to und-TW than to en-Hant-TW
+
+##################################################
+# testMatchGrandfatheredCode
+
+fr, i-klingon, en-Latn-US ;    en-GB-oed ;     en-Latn-US
+
+##################################################
+# testGetBestMatchForList-exactMatch
+fr, en-GB, ja, es-ES, es-MX ;  ja, de ;        ja
+
+##################################################
+# testGetBestMatchForList-simpleVariantMatch
+fr, en-GB, ja, es-ES, es-MX ;  de, en-US ;     en-GB   # Intentionally avoiding a perfect-match or two candidates for variant matches.
+
+# Fallback.
+
+fr, en-GB, ja, es-ES, es-MX ;  de, zh ;        fr
+
+##################################################
+# testGetBestMatchForList-matchOnMaximized
+# Check that if the preference is maximized already, it works as well.
+
+en, ja ;       ja-Jpan-JP, en-AU ;     ja      # Match for ja-Jpan-JP (maximized already)
+
+# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
+
+en, ja ;       ja-JP, en-US ;  ja      # Match for ja-Jpan-JP (maximized already)
+
+# Check that if the preference is maximized already, it works as well.
+
+en, ja ;       ja-Jpan-JP, en-US ;     ja      # Match for ja-Jpan-JP (maximized already)
+
+##################################################
+# testGetBestMatchForList-noMatchOnMaximized
+# Regression test for http://b/5714572 .
+# de maximizes to de-DE. Pick the exact match for the secondary language instead.
+en, de, fr, ja ;       de-CH, fr ;     de
+
+##################################################
+# testBestMatchForTraditionalChinese
+
+# Scenario: An application that only supports Simplified Chinese (and some other languages),
+# but does not support Traditional Chinese. zh-Hans-CN could be replaced with zh-CN, zh, or
+# zh-Hans, it wouldn't make much of a difference.
+
+# The script distance (simplified vs. traditional Han) is considered small enough
+# to be an acceptable match. The regional difference is considered almost insignificant.
+
+fr, zh-Hans-CN, en-US ;        zh-TW ;         zh-Hans-CN
+fr, zh-Hans-CN, en-US ;        zh-Hant ;       zh-Hans-CN
+
+# For geo-political reasons, you might want to avoid a zh-Hant -> zh-Hans match.
+# In this case, if zh-TW, zh-HK or a tag starting with zh-Hant is requested, you can
+# change your call to getBestMatch to include a 2nd language preference.
+# "en" is a better match since its distance to "en-US" is closer than the distance
+# from "zh-TW" to "zh-CN" (script distance).
+
+fr, zh-Hans-CN, en-US ;        zh-TW, en ;     en-US
+fr, zh-Hans-CN, en-US ;        zh-Hant-CN, en, en ;    en-US
+fr, zh-Hans-CN, en-US ;        zh-Hans, en ;   zh-Hans-CN
+
+##################################################
+# testUndefined
+# When the undefined language doesn't match anything in the list,
+# getBestMatch returns the default, as usual.
+
+it, fr ;       und ;   it
+
+# When it *does* occur in the list, bestMatch returns it, as expected.
+it, und ;      und ;   und
+
+# The unusual part: max("und") = "en-Latn-US", and since matching is based on maximized
+# tags, the undefined language would normally match English. But that would produce the
+# counterintuitive results that getBestMatch("und", XLocaleMatcher("it,en")) would be "en", and
+# getBestMatch("en", XLocaleMatcher("it,und")) would be "und".
+
+# To avoid that, we change the matcher's definitions of max
+# so that max("und")="und". That produces the following, more desirable
+# results:
+
+it, en ;       und ;   it
+it, und ;      en ;    it
+
+##################################################
+# testGetBestMatch-regionDistance
+
+es-AR, es ;    es-MX ;         es-AR
+fr, en, en-GB ;        en-CA ;         en-GB
+de-AT, de-DE, de-CH ;  de ;    de-DE
+
+##################################################
+# testAsymmetry
+
+mul, nl ;      af ;    nl      # af => nl
+mul, af ;      nl ;    mul     # but nl !=> af
+
+##################################################
+# testGetBestMatchForList-matchOnMaximized2
+
+# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
+
+fr, en-GB, ja, es-ES, es-MX ;  ja-JP, en-GB ;  ja      # Match for ja-JP, with likely region subtag
+
+# Check that if the preference is maximized already, it works as well.
+
+fr, en-GB, ja, es-ES, es-MX ;  ja-Jpan-JP, en-GB ;     ja      # Match for ja-Jpan-JP (maximized already)
+
+##################################################
+# testGetBestMatchForList-closeEnoughMatchOnMaximized
+
+en-GB, en, de, fr, ja ;        de-CH, fr ;     de
+en-GB, en, de, fr, ja ;        en-US, ar, nl, de, ja ;         en
+
+##################################################
+# testGetBestMatchForPortuguese
+
+# pt might be supported and not pt-PT
+
+# European user who prefers Spanish over Brazillian Portuguese as a fallback.
+
+pt-PT, pt-BR, es, es-419 ;     pt-PT, es, pt ;         pt-PT
+pt-PT, pt, es, es-419 ;        pt-PT, es, pt ;         pt-PT   # pt implicit
+
+# Brazillian user who prefers South American Spanish over European Portuguese as a fallback.
+# The asymmetry between this case and above is because it's "pt-PT" that's missing between the
+# matchers as "pt-BR" is a much more common language.
+
+pt-PT, pt-BR, es, es-419 ;     pt, es-419, pt-PT ;     pt-BR
+pt-PT, pt-BR, es, es-419 ;     pt-PT, es, pt ;         pt-PT
+pt-PT, pt, es, es-419 ;        pt-PT, es, pt ;         pt-PT
+pt-PT, pt, es, es-419 ;        pt, es-419, pt-PT ;     pt
+
+pt-BR, es, es-419 ;    pt, es-419, pt-PT ;     pt-BR
+
+# Code that adds the user's country can get "pt-US" for a user's language.
+# That should fall back to "pt-BR".
+
+pt-PT, pt-BR, es, es-419 ;     pt-US, pt-PT ;  pt-BR
+pt-PT, pt, es, es-419 ;        pt-US, pt-PT, pt ;      pt      # pt-BR implicit
+
+##################################################
+# testVariantWithScriptMatch 1 and 2
+
+fr, en, sv ;   en-GB ;         en
+fr, en, sv ;   en-GB ;         en
+en, sv ;       en-GB, sv ;     en
+
+##################################################
+# testLongLists
+
+en, sv ;       sv ;    sv
+af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu ;         sv ;    sv
+af, af-NA, af-ZA, agq, agq-CM, ak, ak-GH, am, am-ET, ar, ar-001, ar-AE, ar-BH, ar-DJ, ar-DZ, ar-EG, ar-EH, ar-ER, ar-IL, ar-IQ, ar-JO, ar-KM, ar-KW, ar-LB, ar-LY, ar-MA, ar-MR, ar-OM, ar-PS, ar-QA, ar-SA, ar-SD, ar-SO, ar-SS, ar-SY, ar-TD, ar-TN, ar-YE, as, as-IN, asa, asa-TZ, ast, ast-ES, az, az-Cyrl, az-Cyrl-AZ, az-Latn, az-Latn-AZ, bas, bas-CM, be, be-BY, bem, bem-ZM, bez, bez-TZ, bg, bg-BG, bm, bm-ML, bn, bn-BD, bn-IN, bo, bo-CN, bo-IN, br, br-FR, brx, brx-IN, bs, bs-Cyrl, bs-Cyrl-BA, bs-Latn, bs-Latn-BA, ca, ca-AD, ca-ES, ca-ES-VALENCIA, ca-FR, ca-IT, ce, ce-RU, cgg, cgg-UG, chr, chr-US, ckb, ckb-IQ, ckb-IR, cs, cs-CZ, cu, cu-RU, cy, cy-GB, da, da-DK, da-GL, dav, dav-KE, de, de-AT, de-BE, de-CH, de-DE, de-LI, de-LU, dje, dje-NE, dsb, dsb-DE, dua, dua-CM, dyo, dyo-SN, dz, dz-BT, ebu, ebu-KE, ee, ee-GH, ee-TG, el, el-CY, el-GR, en, en-001, en-150, en-AG, en-AI, en-AS, en-AT, en-AU, en-BB, en-BE, en-BI, en-BM, en-BS, en-BW, en-BZ, en-CA, en-CC, en-CH, en-CK, en-CM, en-CX, en-CY, en-DE, en-DG, en-DK, en-DM, en-ER, en-FI, en-FJ, en-FK, en-FM, en-GB, en-GD, en-GG, en-GH, en-GI, en-GM, en-GU, en-GY, en-HK, en-IE, en-IL, en-IM, en-IN, en-IO, en-JE, en-JM, en-KE, en-KI, en-KN, en-KY, en-LC, en-LR, en-LS, en-MG, en-MH, en-MO, en-MP, en-MS, en-MT, en-MU, en-MW, en-MY, en-NA, en-NF, en-NG, en-NL, en-NR, en-NU, en-NZ, en-PG, en-PH, en-PK, en-PN, en-PR, en-PW, en-RW, en-SB, en-SC, en-SD, en-SE, en-SG, en-SH, en-SI, en-SL, en-SS, en-SX, en-SZ, en-TC, en-TK, en-TO, en-TT, en-TV, en-TZ, en-UG, en-UM, en-US, en-US-POSIX, en-VC, en-VG, en-VI, en-VU, en-WS, en-ZA, en-ZM, en-ZW, eo, eo-001, es, es-419, es-AR, es-BO, es-CL, es-CO, es-CR, es-CU, es-DO, es-EA, es-EC, es-ES, es-GQ, es-GT, es-HN, es-IC, es-MX, es-NI, es-PA, es-PE, es-PH, es-PR, es-PY, es-SV, es-US, es-UY, es-VE, et, et-EE, eu, eu-ES, ewo, ewo-CM, fa, fa-AF, fa-IR, ff, ff-CM, ff-GN, ff-MR, ff-SN, fi, fi-FI, fil, fil-PH, fo, fo-DK, fo-FO, fr, fr-BE, fr-BF, fr-BI, fr-BJ, fr-BL, fr-CA, fr-CD, fr-CF, fr-CG, fr-CH, fr-CI, fr-CM, fr-DJ, fr-DZ, fr-FR, fr-GA, fr-GF, fr-GN, fr-GP, fr-GQ, fr-HT, fr-KM, fr-LU, fr-MA, fr-MC, fr-MF, fr-MG, fr-ML, fr-MQ, fr-MR, fr-MU, fr-NC, fr-NE, fr-PF, fr-PM, fr-RE, fr-RW, fr-SC, fr-SN, fr-SY, fr-TD, fr-TG, fr-TN, fr-VU, fr-WF, fr-YT, fur, fur-IT, fy, fy-NL, ga, ga-IE, gd, gd-GB, gl, gl-ES, gsw, gsw-CH, gsw-FR, gsw-LI, gu, gu-IN, guz, guz-KE, gv, gv-IM, ha, ha-GH, ha-NE, ha-NG, haw, haw-US, he, he-IL, hi, hi-IN, hr, hr-BA, hr-HR, hsb, hsb-DE, hu, hu-HU, hy, hy-AM, id, id-ID, ig, ig-NG, ii, ii-CN, is, is-IS, it, it-CH, it-IT, it-SM, ja, ja-JP, jgo, jgo-CM, jmc, jmc-TZ, ka, ka-GE, kab, kab-DZ, kam, kam-KE, kde, kde-TZ, kea, kea-CV, khq, khq-ML, ki, ki-KE, kk, kk-KZ, kkj, kkj-CM, kl, kl-GL, kln, kln-KE, km, km-KH, kn, kn-IN, ko, ko-KP, ko-KR, kok, kok-IN, ks, ks-IN, ksb, ksb-TZ, ksf, ksf-CM, ksh, ksh-DE, kw, kw-GB, ky, ky-KG, lag, lag-TZ, lb, lb-LU, lg, lg-UG, lkt, lkt-US, ln, ln-AO, ln-CD, ln-CF, ln-CG, lo, lo-LA, lrc, lrc-IQ, lrc-IR, lt, lt-LT, lu, lu-CD, luo, luo-KE, luy, luy-KE, lv, lv-LV, mas, mas-KE, mas-TZ, mer, mer-KE, mfe, mfe-MU, mg, mg-MG, mgh, mgh-MZ, mgo, mgo-CM, mk, mk-MK, ml, ml-IN, mn, mn-MN, mr, mr-IN, ms, ms-BN, ms-MY, ms-SG, mt, mt-MT, mua, mua-CM, my, my-MM, mzn, mzn-IR, naq, naq-NA, nb, nb-NO, nb-SJ, nd, nd-ZW, ne, ne-IN, ne-NP, nl, nl-AW, nl-BE, nl-BQ, nl-CW, nl-NL, nl-SR, nl-SX, nmg, nmg-CM, nn, nn-NO, nnh, nnh-CM, nus, nus-SS, nyn, nyn-UG, om, om-ET, om-KE, or, or-IN, os, os-GE, os-RU, pa, pa-Arab, pa-Arab-PK, pa-Guru, pa-Guru-IN, pl, pl-PL, prg, prg-001, ps, ps-AF, pt, pt-AO, pt-BR, pt-CV, pt-GW, pt-MO, pt-MZ, pt-PT, pt-ST, pt-TL, qu, qu-BO, qu-EC, qu-PE, rm, rm-CH, rn, rn-BI, ro, ro-MD, ro-RO, rof, rof-TZ, root, ru, ru-BY, ru-KG, ru-KZ, ru-MD, ru-RU, ru-UA, rw, rw-RW, rwk, rwk-TZ, sah, sah-RU, saq, saq-KE, sbp, sbp-TZ, se, se-FI, se-NO, se-SE, seh, seh-MZ, ses, ses-ML, sg, sg-CF, shi, shi-Latn, shi-Latn-MA, shi-Tfng, shi-Tfng-MA, si, si-LK, sk, sk-SK, sl, sl-SI, smn, smn-FI, sn, sn-ZW, so, so-DJ, so-ET, so-KE, so-SO, sq, sq-AL, sq-MK, sq-XK, sr, sr-Cyrl, sr-Cyrl-BA, sr-Cyrl-ME, sr-Cyrl-RS, sr-Cyrl-XK, sr-Latn, sr-Latn-BA, sr-Latn-ME, sr-Latn-RS, sr-Latn-XK, sv, sv-AX, sv-FI, sv-SE, sw, sw-CD, sw-KE, sw-TZ, sw-UG, ta, ta-IN, ta-LK, ta-MY, ta-SG, te, te-IN, teo, teo-KE, teo-UG, th, th-TH, ti, ti-ER, ti-ET, tk, tk-TM, to, to-TO, tr, tr-CY, tr-TR, twq, twq-NE, tzm, tzm-MA, ug, ug-CN, uk, uk-UA, ur, ur-IN, ur-PK, uz, uz-Arab, uz-Arab-AF, uz-Cyrl, uz-Cyrl-UZ, uz-Latn, uz-Latn-UZ, vai, vai-Latn, vai-Latn-LR, vai-Vaii, vai-Vaii-LR, vi, vi-VN, vo, vo-001, vun, vun-TZ, wae, wae-CH, xog, xog-UG, yav, yav-CM, yi, yi-001, yo, yo-BJ, yo-NG, zgh, zgh-MA, zh, zh-Hans, zh-Hans-CN, zh-Hans-HK, zh-Hans-MO, zh-Hans-SG, zh-Hant, zh-Hant-HK, zh-Hant-MO, zh-Hant-TW, zu, zu-ZA ;  sv ;    sv
+
+##################################################
+# test8288
+
+it, en ;       und ;   it
+it, en ;       und, en ;       en
+
+# examples from
+# http://unicode.org/repos/cldr/tags/latest/common/bcp47/
+# http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
+
+##################################################
+# testUnHack
+
+en-NZ, en-IT ;         en-US ;         en-NZ
+
+##################################################
+# testEmptySupported => null
+ ;     en ;    null
+
+##################################################
+# testVariantsAndExtensions
+##################################################
+# tests the .combine() method
+
+und, fr ;      fr-BE-fonipa ;  fr ;    fr-BE-fonipa
+und, fr-CA ;   fr-BE-fonipa ;  fr-CA ;         fr-BE-fonipa
+und, fr-fonupa ;       fr-BE-fonipa ;  fr-fonupa ;     fr-BE-fonipa
+und, no ;      nn-BE-fonipa ;  no ;    no-BE-fonipa
+und, en-GB-u-sd-gbsct ;        en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin ;    en-GB-u-sd-gbsct ;      en-GB-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin
+
+en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ;    fr-PSCRACK ;    fr-PSCRACK
+en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ;    fr ;    fr-PSCRACK
+en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ;    de-CH ;         de-PSCRACK
+
+##################################################
+# testClusters
+# we favor es-419 over others in cluster. Clusters: es- {ES, MA, EA} {419, AR, MX}
+
+und, es, es-MA, es-MX, es-419 ;        es-AR ;         es-419
+und, es-MA, es, es-419, es-MX ;        es-AR ;         es-419
+und, es, es-MA, es-MX, es-419 ;        es-EA ;         es
+und, es-MA, es, es-419, es-MX ;        es-EA ;         es
+
+# of course, fall back to within cluster
+
+und, es, es-MA, es-MX ;        es-AR ;         es-MX
+und, es-MA, es, es-MX ;        es-AR ;         es-MX
+und, es-MA, es-MX, es-419 ;    es-EA ;         es-MA
+und, es-MA, es-419, es-MX ;    es-EA ;         es-MA
+
+# we favor es-GB over others in cluster. Clusters: en- {US, GU, VI} {GB, IN, ZA}
+
+und, en, en-GU, en-IN, en-GB ;         en-ZA ;         en-GB
+und, en-GU, en, en-GB, en-IN ;         en-ZA ;         en-GB
+und, en, en-GU, en-IN, en-GB ;         en-VI ;         en
+und, en-GU, en, en-GB, en-IN ;         en-VI ;         en
+
+# of course, fall back to within cluster
+
+und, en, en-GU, en-IN ;        en-ZA ;         en-IN
+und, en-GU, en, en-IN ;        en-ZA ;         en-IN
+und, en-GU, en-IN, en-GB ;     en-VI ;         en-GU
+und, en-GU, en-GB, en-IN ;     en-VI ;         en-GU
+
+##################################################
+# testThreshold
+@Threshold=60
+
+50, und, fr-CA-fonupa ;        fr-BE-fonipa ;  fr-CA-fonupa ;  fr-BE-fonipa
+50, und, fr-Cyrl-CA-fonupa ;   fr-BE-fonipa ;  fr-Cyrl-CA-fonupa ;     fr-Cyrl-BE-fonipa
+
+@Threshold=-1 # restore
+
+##################################################
+# testScriptFirst
+@DistanceOption=SCRIPT_FIRST
+@debug
+
+ru, fr ; zh, pl ; fr
+ru, fr ; zh-Cyrl, pl ; ru
+#hr, en-Cyrl; sr ; en-Cyrl
+da, ru, hr; sr ; ru
+\ No newline at end of file
author	Mark Davis <mark@macchiato.com>
	Fri, 17 Mar 2017 12:48:31 +0000 (12:48 +0000)
committer	Mark Davis <mark@macchiato.com>
	Fri, 17 Mar 2017 12:48:31 +0000 (12:48 +0000)
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XCldrStub.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleDistance.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/DataDrivenTestHelper.java	[new file with mode: 0644]	patch \| blob
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java	[new file with mode: 0644]	patch \| blob
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java	[new file with mode: 0644]	patch \| blob
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeDistanceTest.txt	[new file with mode: 0644]	patch \| blob
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt	[new file with mode: 0644]	patch \| blob