]> granicus.if.org Git - icu/commitdiff
ICU-20330 simplify LocaleMatcher code:
authorMarkus Scherer <markus.icu@gmail.com>
Tue, 8 Jan 2019 01:41:08 +0000 (17:41 -0800)
committerMarkus Scherer <markus.icu@gmail.com>
Fri, 8 Feb 2019 18:35:48 +0000 (10:35 -0800)
- widen API from LocalePriorityList to Iterable
- merge getBestMatch(multiple locales) and getBestMatch(single locale) into one function
- process desired locales incrementally, create fewer objects
- reject poor matches early: use bestDistance-demotion for threshold
- add API for java.util.Locale, convert incrementally
- new feature: tracks indexes of supported and desired locales which eliminates conversion of result objects in wrappers around getBestMatch() as shown by the java.util.Locale API here
- simpler data structures, more serialization-friendly (easier to port to C++)
  - e.g., use a BytesTrie each for likelySubtags & locale distance, instead of layers of TreeMap
- un-hardcode locale matcher data; use modern resource bundle functions
- split builder code & runtime code into separate classes
- move LSR to simple top-level value class, cache regionIndex in LSR
- simpler handling of private use languages and pseudolocales
- simplify RegionMapper
- LocaleDistance builder: move the node distance into the DistanceTable, remove DistanceNode
- support distance rules with region codes, not just with variables
- enforce & use distance rule constraints:
  - no rule with *,supported or desired,*
  - no rule with language * and script/region non-*
- distance trie collapse a (desired, supported)=(ANY, ANY) pair into a single *
- look up each desired language only once for all supported LSRs
- remove layers-of-Maps compaction (trie builder compacts)
- remove unused XML printing
- remove other unused code
- make XLocaleMatcherTest.testPerf() exercise locale distance lookup code

13 files changed:
icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java
icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java
icu4j/main/classes/core/src/com/ibm/icu/impl/UResource.java
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleDistance.java [deleted file]
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java
icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java

index 653391ca3a46c6ea15b12385d8dec6e45c74fde0..6a63d0e5bfeb85358ef14dd77f1bddbbf84aba24 100644 (file)
@@ -248,7 +248,7 @@ public  class ICUResourceBundle extends UResourceBundle {
      * @internal ICU 3.0
      */
     public static final String[] getKeywordValues(String baseName, String keyword) {
-        Set<String> keywords = new HashSet<String>();
+        Set<String> keywords = new HashSet<>();
         ULocale locales[] = getAvailEntry(baseName, ICU_DATA_CLASS_LOADER).getULocaleList();
         int i;
 
@@ -364,6 +364,26 @@ public  class ICUResourceBundle extends UResourceBundle {
         return result;
     }
 
+    public UResource.Value getValueWithFallback(String path) throws MissingResourceException {
+        ICUResourceBundle rb;
+        if (path.isEmpty()) {
+            rb = this;
+        } else {
+            rb = findResourceWithFallback(path, this, null);
+            if (rb == null) {
+                throw new MissingResourceException(
+                    "Can't find resource for bundle "
+                    + this.getClass().getName() + ", key " + getType(),
+                    path, getKey());
+            }
+        }
+        ReaderValue readerValue = new ReaderValue();
+        ICUResourceBundleImpl impl = (ICUResourceBundleImpl)rb;
+        readerValue.reader = impl.wholeBundle.reader;
+        readerValue.res = impl.getResource();
+        return readerValue;
+    }
+
     public void getAllItemsWithFallbackNoFail(String path, UResource.Sink sink) {
         try {
             getAllItemsWithFallback(path, sink);
@@ -512,8 +532,8 @@ public  class ICUResourceBundle extends UResourceBundle {
      * @return the list of converted ULocales
      */
     public static final Locale[] getLocaleList(ULocale[] ulocales) {
-        ArrayList<Locale> list = new ArrayList<Locale>(ulocales.length);
-        HashSet<Locale> uniqueSet = new HashSet<Locale>();
+        ArrayList<Locale> list = new ArrayList<>(ulocales.length);
+        HashSet<Locale> uniqueSet = new HashSet<>();
         for (int i = 0; i < ulocales.length; i++) {
             Locale loc = ulocales[i].toLocale();
             if (!uniqueSet.contains(loc)) {
@@ -662,7 +682,7 @@ public  class ICUResourceBundle extends UResourceBundle {
 
     private static Set<String> createFullLocaleNameSet(String baseName, ClassLoader loader) {
         String bn = baseName.endsWith("/") ? baseName : baseName + "/";
-        Set<String> set = new HashSet<String>();
+        Set<String> set = new HashSet<>();
         String skipScan = ICUConfig.get("com.ibm.icu.impl.ICUResourceBundle.skipRuntimeLocaleResourceScan", "false");
         if (!skipScan.equalsIgnoreCase("true")) {
             // scan available locale resources under the base url first
@@ -707,7 +727,7 @@ public  class ICUResourceBundle extends UResourceBundle {
     }
 
     private static Set<String> createLocaleNameSet(String baseName, ClassLoader loader) {
-        HashSet<String> set = new HashSet<String>();
+        HashSet<String> set = new HashSet<>();
         addLocaleIDsFromIndexBundle(baseName, loader, set);
         return Collections.unmodifiableSet(set);
     }
@@ -1408,7 +1428,7 @@ public  class ICUResourceBundle extends UResourceBundle {
         String bundleName;
         String rpath = wholeBundle.reader.getAlias(_resource);
         if (aliasesVisited == null) {
-            aliasesVisited = new HashMap<String, String>();
+            aliasesVisited = new HashMap<>();
         }
         if (aliasesVisited.get(rpath) != null) {
             throw new IllegalArgumentException(
index 5976f3518bdf7e2b2bd2ca6a20b06ad6f3014432..564d60c689c74b8c60b5f6bf050d9d5d85cd1dc2 100644 (file)
@@ -1065,6 +1065,17 @@ public final class ICUResourceBundleReader {
             }
             return false;
         }
+        @Override
+        public boolean findValue(CharSequence key, UResource.Value value) {
+            ReaderValue readerValue = (ReaderValue)value;
+            int i = findTableItem(readerValue.reader, key);
+            if (i >= 0) {
+                readerValue.res = getContainerResource(readerValue.reader, i);
+                return true;
+            } else {
+                return false;
+            }
+        }
     }
     private static final class Table1632 extends Table {
         @Override
index 04574f4cc0cc637e87000048c124c5124d7822f1..2c9ba45e6c3e3ab72708dc1f0b5c96a741f0aae2 100644 (file)
@@ -290,16 +290,22 @@ public final class UResource {
      */
     public interface Table {
         /**
-         * @return The number of items in the array resource.
+         * @return The number of items in the table resource.
          */
         public int getSize();
         /**
-         * @param i Array item index.
+         * @param i Table item index.
          * @param key Output-only, receives the key of the i'th item.
          * @param value Output-only, receives the value of the i'th item.
          * @return true if i is non-negative and less than getSize().
          */
         public boolean getKeyAndValue(int i, Key key, Value value);
+        /**
+         * @param key Key string to find in the table.
+         * @param value Output-only, receives the value of the item with that key.
+         * @return true if the table contains the key.
+         */
+        public boolean findValue(CharSequence key, Value value);
     }
 
     /**
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java
new file mode 100644 (file)
index 0000000..dd32de0
--- /dev/null
@@ -0,0 +1,72 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.util.Objects;
+
+final class LSR {
+    static final int REGION_INDEX_LIMIT = 1000 + 26 * 26;
+
+    final String language;
+    final String script;
+    final String region;
+    /** Index for region, negative if ill-formed. @see indexForRegion */
+    final int regionIndex;
+
+    LSR(String language, String script, String region) {
+        this.language = language;
+        this.script = script;
+        this.region = region;
+        regionIndex = indexForRegion(region);
+    }
+
+    /**
+     * Returns a non-negative index for a well-formed region code.
+     * Do not rely on a particular region->index mapping; it may change.
+     * Returns -1 for ill-formed strings.
+     */
+    static final int indexForRegion(String region) {
+        if (region.length() == 2) {
+            int a = region.charAt(0) - 'A';
+            if (a < 0 || 25 < a) { return -1; }
+            int b = region.charAt(1) - 'A';
+            if (b < 0 || 25 < b) { return -1; }
+            return 26 * a + b + 1000;
+        } else if (region.length() == 3) {
+            int a = region.charAt(0) - '0';
+            if (a < 0 || 9 < a) { return -1; }
+            int b = region.charAt(1) - '0';
+            if (b < 0 || 9 < b) { return -1; }
+            int c = region.charAt(2) - '0';
+            if (c < 0 || 9 < c) { return -1; }
+            return (10 * a + b) * 10 + c;
+        }
+        return -1;
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder result = new StringBuilder(language);
+        if (!script.isEmpty()) {
+            result.append('-').append(script);
+        }
+        if (!region.isEmpty()) {
+            result.append('-').append(region);
+        }
+        return result.toString();
+    }
+    @Override
+    public boolean equals(Object obj) {
+        LSR other;
+        return this == obj ||
+                (obj != null
+                && obj.getClass() == this.getClass()
+                && language.equals((other = (LSR) obj).language)
+                && script.equals(other.script)
+                && region.equals(other.region));
+    }
+    @Override
+    public int hashCode() {
+        return Objects.hash(language, script, region);
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java
new file mode 100644 (file)
index 0000000..b6fad04
--- /dev/null
@@ -0,0 +1,265 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.UResource;
+import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
+import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.util.BytesTrieBuilder;
+import com.ibm.icu.util.ICUException;
+
+/**
+ * Builds data for XLikelySubtags.
+ * Reads source data from ICU resource bundles.
+ */
+class LikelySubtagsBuilder {
+    private static final boolean DEBUG_OUTPUT = false;
+
+    private static ICUResourceBundle getSupplementalDataBundle(String name) {
+        return ICUResourceBundle.getBundleInstance(
+            ICUData.ICU_BASE_NAME, name,
+            ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
+    }
+
+    private static final class AliasesBuilder {
+        final Map<String, String> toCanonical = new HashMap<>();
+        final Multimap<String, String> toAliases;
+
+        public Set<String> getAliases(String canonical) {
+            Set<String> aliases = toAliases.get(canonical);
+            return aliases == null ? Collections.singleton(canonical) : aliases;
+        }
+
+        public AliasesBuilder(String type) {
+            ICUResourceBundle metadata = getSupplementalDataBundle("metadata");
+            UResource.Value value = metadata.getValueWithFallback("alias/" + type);
+            UResource.Table aliases = value.getTable();
+            UResource.Key key = new UResource.Key();
+            for (int i = 0; aliases.getKeyAndValue(i, key, value); ++i) {
+                String aliasFrom = key.toString();
+                if (aliasFrom.contains("_")) {
+                    continue; // only simple aliasing
+                }
+                UResource.Table table = value.getTable();
+                if (table.findValue("reason", value) && value.getString().equals("overlong")) {
+                    continue;
+                }
+                if (!table.findValue("replacement", value)) {
+                    continue;
+                }
+                String aliasTo = value.getString();
+                int spacePos = aliasTo.indexOf(' ');
+                String aliasFirst = spacePos < 0 ? aliasTo : aliasTo.substring(0, spacePos);
+                if (aliasFirst.contains("_")) {
+                    continue; // only simple aliasing
+                }
+                toCanonical.put(aliasFrom, aliasFirst);
+            }
+            if (type.equals("language")) {
+                toCanonical.put("mo", "ro"); // special case
+            }
+            toAliases = Multimaps.invertFrom(toCanonical, HashMultimap.<String, String>create());
+
+            if (DEBUG_OUTPUT) {
+                System.out.println("*** " + type + " aliases");
+                for (Map.Entry<String, String> mapping : new TreeMap<>(toCanonical).entrySet()) {
+                    System.out.println(mapping);
+                }
+            }
+        }
+    }
+
+    private static final class TrieBuilder {
+        byte[] bytes = new byte[24];
+        BytesTrieBuilder tb = new BytesTrieBuilder();
+
+        void addMapping(String s, int value) {
+            // s contains only ASCII characters.
+            s.getBytes(0, s.length(), bytes, 0);
+            tb.add(bytes, s.length(), value);
+        }
+
+        BytesTrie build() {
+            ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
+            // Allocate an array with just the necessary capacity,
+            // so that we do not hold on to a larger array for a long time.
+            byte[] bytes = new byte[buffer.remaining()];
+            buffer.get(bytes);
+            if (DEBUG_OUTPUT) {
+                System.out.println("likely subtags trie size: " + bytes.length + " bytes");
+            }
+            return new BytesTrie(bytes, 0);
+        }
+    }
+
+    static XLikelySubtags.Data build() {
+        AliasesBuilder languageAliasesBuilder = new AliasesBuilder("language");
+        AliasesBuilder regionAliasesBuilder = new AliasesBuilder("territory");
+
+        Map<String, Map<String, Map<String, LSR>>> langTable =
+                makeTable(languageAliasesBuilder, regionAliasesBuilder);
+
+        TrieBuilder trieBuilder = new TrieBuilder();
+        Map<LSR, Integer> lsrIndexes = new LinkedHashMap<>();
+        // Bogus LSR at index 0 for some code to easily distinguish between
+        // intermediate match points and real result values.
+        LSR bogus = new LSR("", "", "");
+        lsrIndexes.put(bogus, 0);
+        // We could prefill the lsrList with common locales to give them small indexes,
+        // and see if that improves performance a little.
+        for (Map.Entry<String, Map<String, Map<String, LSR>>> ls :  langTable.entrySet()) {
+            String lang = ls.getKey();
+            if (lang.equals("und")) {
+                lang = "*";
+            }
+            // Create a match point for the language.
+            trieBuilder.addMapping(lang, 0);
+            Map<String, Map<String, LSR>> scriptTable = ls.getValue();
+            for (Map.Entry<String, Map<String, LSR>> sr :  scriptTable.entrySet()) {
+                String script = sr.getKey();
+                if (script.isEmpty()) {
+                    script = "*";
+                }
+                // Match point for lang+script.
+                trieBuilder.addMapping(lang + script, 0);
+                Map<String, LSR> regionTable = sr.getValue();
+                for (Map.Entry<String, LSR> r2lsr :  regionTable.entrySet()) {
+                    String region = r2lsr.getKey();
+                    if (region.isEmpty()) {
+                        region = "*";
+                    }
+                    // Map the whole lang+script+region to a unique, dense index of the LSR.
+                    LSR lsr = r2lsr.getValue();
+                    Integer index = lsrIndexes.get(lsr);
+                    int i;
+                    if (index != null) {
+                        i = index.intValue();
+                    } else {
+                        i = lsrIndexes.size();
+                        lsrIndexes.put(lsr, i);
+                    }
+                    trieBuilder.addMapping(lang + script + region, i);
+                }
+            }
+        }
+        BytesTrie trie = trieBuilder.build();
+        LSR[] lsrs = lsrIndexes.keySet().toArray(new LSR[lsrIndexes.size()]);
+        return new XLikelySubtags.Data(
+                languageAliasesBuilder.toCanonical, regionAliasesBuilder.toCanonical, trie, lsrs);
+    }
+
+    private static Map<String, Map<String, Map<String, LSR>>> makeTable(
+            AliasesBuilder languageAliasesBuilder, AliasesBuilder regionAliasesBuilder) {
+        Map<String, Map<String, Map<String, LSR>>> result = new TreeMap<>();
+        // set the base data
+        ICUResourceBundle likelySubtags = getSupplementalDataBundle("likelySubtags");
+        UResource.Value value = likelySubtags.getValueWithFallback("");
+        UResource.Table table = value.getTable();
+        UResource.Key key = new UResource.Key();
+        for (int i = 0; table.getKeyAndValue(i, key, value); ++i) {
+            LSR ltp = lsrFromLocaleID(key.toString());  // source
+            final String language = ltp.language;
+            final String script = ltp.script;
+            final String region = ltp.region;
+
+            ltp = lsrFromLocaleID(value.getString());  // target
+            String languageTarget = ltp.language;
+            final String scriptTarget = ltp.script;
+            final String regionTarget = ltp.region;
+
+            set(result, language, script, region, languageTarget, scriptTarget, regionTarget);
+            // now add aliases
+            Collection<String> languageAliases = languageAliasesBuilder.getAliases(language);
+            Collection<String> regionAliases = regionAliasesBuilder.getAliases(region);
+            for (String languageAlias : languageAliases) {
+                for (String regionAlias : regionAliases) {
+                    if (languageAlias.equals(language) && regionAlias.equals(region)) {
+                        continue;
+                    }
+                    set(result, languageAlias, script, regionAlias,
+                            languageTarget, scriptTarget, regionTarget);
+                }
+            }
+        }
+        // hack
+        set(result, "und", "Latn", "", "en", "Latn", "US");
+
+        // hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
+        // <likelySubtag from="und_GH" to="ak_Latn_GH"/>
+
+        // so und-Latn-GH   =>  ak-Latn-GH
+        Map<String, Map<String, LSR>> undScriptMap = result.get("und");
+        Map<String, LSR> undEmptyRegionMap = undScriptMap.get("");
+        for (Map.Entry<String, LSR> regionEntry : undEmptyRegionMap.entrySet()) {
+            final LSR lsr = regionEntry.getValue();
+            set(result, "und", lsr.script, lsr.region, lsr);
+        }
+        //
+        // check that every level has "" (or "und")
+        if (!result.containsKey("und")) {
+            throw new IllegalArgumentException("failure: base");
+        }
+        for (Map.Entry<String, Map<String, Map<String, LSR>>> langEntry : result.entrySet()) {
+            String lang = langEntry.getKey();
+            final Map<String, Map<String, LSR>> scriptMap = langEntry.getValue();
+            if (!scriptMap.containsKey("")) {
+                throw new IllegalArgumentException("failure: " + lang);
+            }
+            for (Map.Entry<String, Map<String, LSR>> scriptEntry : scriptMap.entrySet()) {
+                String script = scriptEntry.getKey();
+                final Map<String, LSR> regionMap = scriptEntry.getValue();
+                if (!regionMap.containsKey("")) {
+                    throw new IllegalArgumentException("failure: " + lang + "-" + script);
+                }
+            }
+        }
+        return result;
+    }
+
+    // Parses locale IDs in the likelySubtags data, not arbitrary language tags.
+    private static LSR lsrFromLocaleID(String languageIdentifier) {
+        String[] parts = languageIdentifier.split("[-_]");
+        if (parts.length < 1 || parts.length > 3) {
+            throw new ICUException("too many subtags");
+        }
+        String lang = parts[0];
+        String p2 = parts.length < 2 ? "" : parts[1];
+        String p3 = parts.length < 3 ? "" : parts[2];
+        return p2.length() < 4 ? new LSR(lang, "", p2) : new LSR(lang, p2, p3);
+    }
+
+    private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
+            final String language, final String script, final String region,
+            final String languageTarget, final String scriptTarget, final String regionTarget) {
+        LSR target = new LSR(languageTarget, scriptTarget, regionTarget);
+        set(langTable, language, script, region, target);
+    }
+
+    private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
+            final String language, final String script, final String region, LSR newValue) {
+        Map<String, Map<String, LSR>> scriptTable = getSubtable(langTable, language);
+        Map<String, LSR> regionTable = getSubtable(scriptTable, script);
+        regionTable.put(region, newValue);
+    }
+
+    private static <K, V, T> Map<V, T> getSubtable(Map<K, Map<V, T>> table, final K language) {
+        Map<V, T> subTable = table.get(language);
+        if (subTable == null) {
+            table.put(language, subTable = new TreeMap<>());
+        }
+        return subTable;
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java
new file mode 100644 (file)
index 0000000..44c7169
--- /dev/null
@@ -0,0 +1,343 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+
+import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * Off-line-built data for LocaleMatcher.
+ * Mostly but not only the data for mapping locales to their maximized forms.
+ */
+public class LocaleDistance {
+    private static final int ABOVE_THRESHOLD = 100;
+
+    private static final boolean DEBUG_OUTPUT = false;
+
+    // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
+    // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
+    // There is also a trie value for each subsequence of whole subtags.
+    // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
+    private final BytesTrie trie;
+
+    /**
+     * Maps each region to zero or more single-character partitions.
+     */
+    private final byte[] regionToPartitionsIndex;
+    private final String[][] partitionArrays;
+
+    /**
+     * Used to get the paradigm region for a cluster, if there is one.
+     */
+    private final Set<LSR> paradigmLSRs;
+
+    private final int defaultLanguageDistance;
+    private final int defaultScriptDistance;
+    private final int defaultRegionDistance;
+
+    // TODO: Load prebuilt data from a resource bundle
+    // to avoid the dependency on the builder code.
+    // VisibleForTesting
+    public static final LocaleDistance INSTANCE = LocaleDistanceBuilder.build();
+
+    LocaleDistance(BytesTrie trie,
+            byte[] regionToPartitionsIndex, String[][] partitionArrays,
+            Set<LSR> paradigmLSRs) {
+        this.trie = trie;
+        if (DEBUG_OUTPUT) {
+            System.out.println("*** locale distance");
+            testOnlyPrintDistanceTable();
+        }
+        this.regionToPartitionsIndex = regionToPartitionsIndex;
+        this.partitionArrays = partitionArrays;
+        this.paradigmLSRs = paradigmLSRs;
+
+        BytesTrie iter = new BytesTrie(trie);
+        BytesTrie.Result result = iter.next('*');
+        assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
+        defaultLanguageDistance = iter.getValue();
+        result = iter.next('*');
+        assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
+        defaultScriptDistance = iter.getValue();
+        result = iter.next('*');
+        assert result.hasValue();
+        defaultRegionDistance = iter.getValue();
+    }
+
+    // VisibleForTesting
+    public int testOnlyDistance(ULocale desired, ULocale supported,
+            int threshold, DistanceOption distanceOption) {
+        LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported);
+        LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired);
+        return getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR },
+                threshold, distanceOption) & 0xff;
+    }
+
+    public enum DistanceOption {REGION_FIRST, SCRIPT_FIRST}
+    // NOTE: Replaced "NORMAL" with "REGION_FIRST". By default, scripts have greater weight
+    // than regions, so they might be considered the "normal" case.
+
+    /**
+     * Finds the supported LSR with the smallest distance from the desired one.
+     * Equivalent LSR subtags must be normalized into a canonical form.
+     *
+     * <p>Returns the index of the lowest-distance supported LSR in bits 31..8
+     * (negative if none has a distance below the threshold),
+     * and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
+     */
+    int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
+            int threshold, DistanceOption distanceOption) {
+        BytesTrie iter = new BytesTrie(trie);
+        // Look up the desired language only once for all supported LSRs.
+        // Its "distance" is either a match point value of 0, or a non-match negative value.
+        // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+        // Set wantValue=true so that iter reads & skips the match point value.
+        int desLangDistance = trieNext(iter, desired.language, true, true);
+        long desLangState = desLangDistance >= 0 && supportedLsrs.length > 1 ? iter.getState64() : 0;
+        // Index of the supported LSR with the lowest distance.
+        int bestIndex = -1;
+        for (int slIndex = 0; slIndex < supportedLsrs.length; ++slIndex) {
+            LSR supported = supportedLsrs[slIndex];
+            boolean star = false;
+            int distance = desLangDistance;
+            if (distance >= 0) {
+                if (slIndex != 0) {
+                    iter.resetToState64(desLangState);
+                }
+                distance = trieNext(iter, supported.language, true, true);
+            }
+            // Note: The data builder verifies that there are no rules with "any" (*) language and
+            // real (non *) script or region subtags.
+            // This means that if the lookup for either language fails we can use
+            // the default distances without further lookups.
+            if (distance < 0) {  // <*, *>
+                if (desired.language.equals(supported.language)) {
+                    distance = 0;
+                } else {
+                    distance = defaultLanguageDistance;
+                }
+                star = true;
+            }
+            assert 0 <= distance && distance <= 100;
+            boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST;
+            if (scriptFirst) {
+                distance >>= 2;
+            }
+            if (distance >= threshold) {
+                continue;
+            }
+
+            int scriptDistance;
+            if (star) {
+                if (desired.script.equals(supported.script)) {
+                    scriptDistance = 0;
+                } else {
+                    scriptDistance = defaultScriptDistance;
+                }
+            } else {
+                scriptDistance = getDesSuppDistance(iter, iter.getState64(),
+                        desired.script, supported.script, false);
+            }
+            if (scriptFirst) {
+                scriptDistance >>= 1;
+            }
+            distance += scriptDistance;
+            if (distance >= threshold) {
+                continue;
+            }
+
+            if (desired.region.equals(supported.region)) {
+                // regionDistance = 0
+            } else if (star) {
+                distance += defaultRegionDistance;
+            } else {
+                long startState = iter.getState64();
+
+                // From here on we know the regions are not equal.
+                // Map each region to zero or more partitions. (zero = one empty string)
+                // If either side has more than one, then we find the maximum distance.
+                // This could be optimized by adding some more structure, but probably not worth it.
+                final String[] desiredPartitions = partitionsForRegion(desired);
+                final String[] supportedPartitions = partitionsForRegion(supported);
+                int regionDistance;
+
+                if (desiredPartitions.length > 1 || supportedPartitions.length > 1) {
+                    regionDistance = getRegionPartitionsDistance(iter, startState,
+                            desiredPartitions, supportedPartitions, threshold - distance);
+                } else {
+                    regionDistance = getDesSuppDistance(iter, startState,
+                            desiredPartitions[0], supportedPartitions[0], true);
+                }
+                distance += regionDistance;
+            }
+            if (distance < threshold) {
+                if (distance == 0) {
+                    return slIndex << 8;
+                }
+                bestIndex = slIndex;
+                threshold = distance;
+            }
+        }
+        return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
+    }
+
+    private int getRegionPartitionsDistance(BytesTrie iter, long startState,
+            String[] desiredPartitions, String[] supportedPartitions, int threshold) {
+        int regionDistance = -1;
+        for (String dp : desiredPartitions) {
+            for (String sp : supportedPartitions) {
+                if (regionDistance >= 0) {  // no need to reset in first iteration
+                    iter.resetToState64(startState);
+                }
+                int d = getDesSuppDistance(iter, startState, dp, sp, true);
+                if (regionDistance < d) {
+                    if (d >= threshold) {
+                        return d;
+                    }
+                    regionDistance = d;
+                }
+            }
+        }
+        assert regionDistance >= 0;
+        return regionDistance;
+    }
+
+    // Modified from
+    // DistanceTable#getDistance(desired, supported, Output distanceTable, starEquals).
+    private static final int getDesSuppDistance(BytesTrie iter, long startState,
+            String desired, String supported, boolean finalSubtag) {
+        // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+        int distance = trieNext(iter, desired, false, true);
+        if (distance >= 0) {
+            distance = trieNext(iter, supported, true, !finalSubtag);
+        }
+        if (distance < 0) {
+            BytesTrie.Result result = iter.resetToState64(startState).next('*');  // <*, *>
+            assert finalSubtag ? result.hasValue() : result == BytesTrie.Result.INTERMEDIATE_VALUE;
+            if (!finalSubtag && desired.equals(supported)) {
+                distance = 0;  // same language or script
+            } else {
+                distance = iter.getValue();
+                assert distance >= 0;
+            }
+        }
+        return distance;
+    }
+
+    private static final int trieNext(BytesTrie iter, String s, boolean wantValue, boolean wantNext) {
+        if (s.isEmpty()) {
+            return -1;  // no empty subtags in the distance data
+        }
+        BytesTrie.Result result;
+        int end = s.length() - 1;
+        for (int i = 0;; ++i) {
+            int c = s.charAt(i);
+            assert c <= 0x7f;
+            if (i < end) {
+                result = iter.next(c);
+                if (!result.hasNext()) {
+                    return -1;
+                }
+            } else {
+                // last character of this subtag
+                result = iter.next(c | 0x80);
+                break;
+            }
+        }
+        if (wantValue) {
+            if (wantNext) {
+                if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
+                    return iter.getValue();
+                }
+            } else {
+                if (result.hasValue()) {
+                    return iter.getValue();
+                }
+            }
+        } else {
+            if (wantNext) {
+                if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
+                    return 0;
+                }
+            } else {
+                if (result.hasValue()) {
+                    return 0;
+                }
+            }
+        }
+        return -1;
+    }
+
+    @Override
+    public String toString() {
+        return testOnlyGetDistanceTable(true).toString();
+    }
+
+    private String[] partitionsForRegion(LSR lsr) {
+        // ill-formed region -> one empty string
+        int pIndex = lsr.regionIndex >= 0 ? regionToPartitionsIndex[lsr.regionIndex] : 0;
+        return partitionArrays[pIndex];
+    }
+
+    boolean isParadigmLSR(LSR lsr) {
+        return paradigmLSRs.contains(lsr);
+    }
+
+    // VisibleForTesting
+    public int getDefaultScriptDistance() {
+        return defaultScriptDistance;
+    }
+
+    int getDefaultRegionDistance() {
+        return defaultRegionDistance;
+    }
+
+    // VisibleForTesting
+    public Map<String, Integer> testOnlyGetDistanceTable(boolean skipIntermediateMatchPoints) {
+        Map<String, Integer> map = new LinkedHashMap<>();
+        StringBuilder sb = new StringBuilder();
+        for (BytesTrie.Entry entry : trie) {
+            sb.setLength(0);
+            int numSubtags = 0;
+            int length = entry.bytesLength();
+            for (int i = 0; i < length; ++i) {
+                byte b = entry.byteAt(i);
+                if (b == '*') {
+                    // One * represents a (desired, supported) = (ANY, ANY) pair.
+                    sb.append("*-*-");
+                    numSubtags += 2;
+                } else {
+                    if (b >= 0) {
+                        sb.append((char) b);
+                    } else {  // end of subtag
+                        sb.append((char) (b & 0x7f)).append('-');
+                        ++numSubtags;
+                    }
+                }
+            }
+            assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-';
+            if (!skipIntermediateMatchPoints || (numSubtags & 1) == 0) {
+                sb.setLength(sb.length() - 1);
+                String s = sb.toString();
+                if (!skipIntermediateMatchPoints && s.endsWith("*-*")) {
+                    // Re-insert single-ANY match points to show consistent structure
+                    // for the test code.
+                    map.put(s.substring(0, s.length() - 2), 0);
+                }
+                map.put(s, entry.value);
+            }
+        }
+        return map;
+    }
+
+    // VisibleForTesting
+    public void testOnlyPrintDistanceTable() {
+        for (Map.Entry<String, Integer> mapping : testOnlyGetDistanceTable(true).entrySet()) {
+            System.out.println(mapping);
+        }
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java
new file mode 100644 (file)
index 0000000..aa5bc53
--- /dev/null
@@ -0,0 +1,781 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.UResource;
+import com.ibm.icu.impl.locale.XCldrStub.Multimap;
+import com.ibm.icu.impl.locale.XCldrStub.Predicate;
+import com.ibm.icu.impl.locale.XCldrStub.Splitter;
+import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap;
+import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.util.BytesTrieBuilder;
+import com.ibm.icu.util.Output;
+import com.ibm.icu.util.ULocale;
+
+public final class LocaleDistanceBuilder {
+    private static final String ANY = "�"; // matches any character. Uses value above any subtag.
+
+    private static final boolean DEBUG_OUTPUT = false;
+
+    private static String fixAny(String string) {
+        return "*".equals(string) ? ANY : string;
+    }
+
+    private static ICUResourceBundle getSupplementalDataBundle(String name) {
+        return ICUResourceBundle.getBundleInstance(
+            ICUData.ICU_BASE_NAME, name,
+            ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
+    }
+
+    private static final class TerritoryContainment {
+        /** Directed, acyclic containment graph. Maps each container to its direct contents. */
+        final Multimap<String, String> graph = TreeMultimap.create();
+        /** Maps each container to all of its contents, direct and indirect. */
+        final Multimap<String, String> resolved = TreeMultimap.create();
+        /** Maps each container only to its leaf contents. */
+        final Multimap<String, String> toLeavesOnly = TreeMultimap.create();
+        /** The leaves of the graph. */
+        final Set<String> leaves;
+
+        TerritoryContainment(ICUResourceBundle supplementalData) {
+            UResource.Value value = supplementalData.getValueWithFallback("territoryContainment");
+            UResource.Key key = new UResource.Key();
+            addContainments(key, value);
+            resolve("001");
+
+            for (Map.Entry<String, Set<String>> entry : resolved.asMap().entrySet()) {
+                String container = entry.getKey();
+                for (String contained : entry.getValue()) {
+                    if (resolved.get(contained) == null) {  // a leaf node (usually a country)
+                        toLeavesOnly.put(container, contained);
+                    }
+                }
+            }
+            leaves = toLeavesOnly.get("001");
+        }
+
+        private void addContainments(UResource.Key key, UResource.Value value) {
+            UResource.Table containers = value.getTable();
+            for (int i = 0; containers.getKeyAndValue(i, key, value); ++i) {
+                if (key.length() <= 3) {
+                    String container = key.toString();
+                    String[] contents = value.getStringArrayOrStringAsArray();
+                    for (String s : contents) {
+                        graph.put(container, s);
+                    }
+                } else {
+                    addContainments(key, value);  // containedGroupings etc.
+                }
+            }
+        }
+
+        private Set<String> resolve(String region) {
+            Set<String> contained = graph.get(region);
+            if (contained == null) {
+                return Collections.emptySet();
+            }
+            resolved.putAll(region, contained); // do top level
+            // then recursively
+            for (String subregion : contained) {
+                resolved.putAll(region, resolve(subregion));
+            }
+            return resolved.get(region);
+        }
+    }
+
+    private static final class Rule {
+        final List<String> desired;
+        final List<String> supported;
+        final int distance;
+        final boolean oneway;
+
+        Rule(List<String> desired, List<String> supported, int distance, boolean oneway) {
+            this.desired = desired;
+            this.supported = supported;
+            this.distance = distance;
+            this.oneway = oneway;
+        }
+    }
+
+    private static final <T> int makeUniqueIndex(Map<T, Integer> objectToInt, T source) {
+        Integer result = objectToInt.get(source);
+        if (result == null) {
+            int newResult = objectToInt.size();
+            objectToInt.put(source, newResult);
+            return newResult;
+        } else {
+            return result;
+        }
+    }
+
+    private static final class TrieBuilder {
+        byte[] bytes = new byte[24];
+        int length = 0;
+        BytesTrieBuilder tb = new BytesTrieBuilder();
+
+        void addStar(int value) {
+            assert value >= 0;
+            bytes[length++] = '*';
+            tb.add(bytes, length, value);
+        }
+
+        void addSubtag(String s, int value) {
+            assert !s.isEmpty();
+            assert value >= 0;
+            assert !s.equals(ANY);
+            int end = s.length() - 1;
+            for (int i = 0;; ++i) {
+                char c = s.charAt(i);
+                assert c <= 0x7f;
+                if (i < end) {
+                    bytes[length++] = (byte) c;
+                } else {
+                    // Mark the last character as a terminator to avoid overlap matches.
+                    bytes[length++] = (byte) (c | 0x80);
+                    break;
+                }
+            }
+            tb.add(bytes, length, value);
+        }
+
+        BytesTrie build() {
+            ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
+            // Allocate an array with just the necessary capacity,
+            // so that we do not hold on to a larger array for a long time.
+            byte[] bytes = new byte[buffer.remaining()];
+            buffer.get(bytes);
+            if (DEBUG_OUTPUT) {
+                System.out.println("distance trie size: " + bytes.length + " bytes");
+            }
+            return new BytesTrie(bytes, 0);
+        }
+    }
+
+    private static final class DistanceTable {
+        final int nodeDistance;  // distance for the lookup so far
+        final Map<String, Map<String, DistanceTable>> subtables;
+
+        DistanceTable(int distance) {
+            nodeDistance = distance;
+            subtables = new TreeMap<>();
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            DistanceTable other;
+            return this == obj ||
+                    (obj != null
+                    && obj.getClass() == this.getClass()
+                    && nodeDistance == (other = (DistanceTable) obj).nodeDistance
+                    && subtables.equals(other.subtables));
+        }
+        @Override
+        public int hashCode() {
+            return nodeDistance ^ subtables.hashCode();
+        }
+
+        public int getDistance(String desired, String supported, Output<DistanceTable> distanceTable, boolean starEquals) {
+            boolean star = false;
+            Map<String, DistanceTable> sub2 = subtables.get(desired);
+            if (sub2 == null) {
+                sub2 = subtables.get(ANY); // <*, supported>
+                star = true;
+            }
+            DistanceTable value = sub2.get(supported);   // <*/desired, supported>
+            if (value == null) {
+                value = sub2.get(ANY);  // <*/desired, *>
+                if (value == null && !star) {
+                    sub2 = subtables.get(ANY);   // <*, supported>
+                    value = sub2.get(supported);
+                    if (value == null) {
+                        value = sub2.get(ANY);   // <*, *>
+                    }
+                }
+                star = true;
+            }
+            if (distanceTable != null) {
+                distanceTable.value = value;
+            }
+            int result = starEquals && star && desired.equals(supported) ? 0 : value.nodeDistance;
+            return result;
+        }
+
+        void copy(DistanceTable other) {
+            for (Map.Entry<String, Map<String, DistanceTable>> e1 : other.subtables.entrySet()) {
+                for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
+                    DistanceTable value = e2.getValue();
+                    addSubtable(e1.getKey(), e2.getKey(), value.nodeDistance);
+                }
+            }
+        }
+
+        DistanceTable addSubtable(String desired, String supported, int distance) {
+            Map<String, DistanceTable> sub2 = subtables.get(desired);
+            if (sub2 == null) {
+                subtables.put(desired, sub2 = new TreeMap<>());
+            }
+            DistanceTable oldNode = sub2.get(supported);
+            if (oldNode != null) {
+                return oldNode;
+            }
+
+            final DistanceTable newNode = new DistanceTable(distance);
+            sub2.put(supported, newNode);
+            return newNode;
+        }
+
+        /**
+         * Return null if value doesn't exist
+         */
+        private DistanceTable getNode(String desired, String supported) {
+            Map<String, DistanceTable> sub2 = subtables.get(desired);
+            if (sub2 == null) {
+                return null;
+            }
+            return sub2.get(supported);
+        }
+
+
+        /** add table for each subitem that matches and doesn't have a table already
+         */
+        void addSubtables(
+                String desired, String supported,
+                Predicate<DistanceTable> action) {
+            DistanceTable node = getNode(desired, supported);
+            if (node == null) {
+                // get the distance it would have
+                Output<DistanceTable> node2 = new Output<>();
+                int distance = getDistance(desired, supported, node2, true);
+                // now add it
+                node = addSubtable(desired, supported, distance);
+                if (node2.value != null) {
+                    DistanceTable nextTable = node2.value;
+                    node.copy(nextTable);
+                }
+            }
+            action.test(node);
+        }
+
+        void addSubtables(String desiredLang, String supportedLang,
+                String desiredScript, String supportedScript,
+                int percentage) {
+
+            // add to all the values that have the matching desiredLang and supportedLang
+            @SuppressWarnings("unused")
+            boolean haveKeys = false;
+            for (Map.Entry<String, Map<String, DistanceTable>> e1 : subtables.entrySet()) {
+                String key1 = e1.getKey();
+                final boolean desiredIsKey = desiredLang.equals(key1);
+                if (desiredIsKey || desiredLang.equals(ANY)) {
+                    for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
+                        String key2 = e2.getKey();
+                        final boolean supportedIsKey = supportedLang.equals(key2);
+                        haveKeys |= (desiredIsKey && supportedIsKey);
+                        if (supportedIsKey || supportedLang.equals(ANY)) {
+                            DistanceTable value = e2.getValue();
+                            value.addSubtable(desiredScript, supportedScript, percentage);
+                        }
+                    }
+                }
+            }
+            // now add the sequence explicitly
+            DistanceTable dt = new DistanceTable(-1);
+            dt.addSubtable(desiredScript, supportedScript, percentage);
+            CopyIfEmpty r = new CopyIfEmpty(dt);
+            addSubtables(desiredLang, supportedLang, r);
+        }
+
+        void addSubtables(String desiredLang, String supportedLang,
+                String desiredScript, String supportedScript,
+                String desiredRegion, String supportedRegion,
+                int percentage) {
+
+            // add to all the values that have the matching desiredLang and supportedLang
+            @SuppressWarnings("unused")
+            boolean haveKeys = false;
+            for (Map.Entry<String, Map<String, DistanceTable>> e1 : subtables.entrySet()) {
+                String key1 = e1.getKey();
+                final boolean desiredIsKey = desiredLang.equals(key1);
+                if (desiredIsKey || desiredLang.equals(ANY)) {
+                    for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
+                        String key2 = e2.getKey();
+                        final boolean supportedIsKey = supportedLang.equals(key2);
+                        haveKeys |= (desiredIsKey && supportedIsKey);
+                        if (supportedIsKey || supportedLang.equals(ANY)) {
+                            DistanceTable value = e2.getValue();
+                            value.addSubtables(desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
+                        }
+                    }
+                }
+            }
+            // now add the sequence explicitly
+
+            DistanceTable dt = new DistanceTable(-1);
+            dt.addSubtable(desiredRegion, supportedRegion, percentage);
+            AddSub r = new AddSub(desiredScript, supportedScript, dt);
+            addSubtables(desiredLang,  supportedLang,  r);
+        }
+
+        @Override
+        public String toString() {
+            StringBuilder sb = new StringBuilder("distance: ").append(nodeDistance).append('\n');
+            return toString("", sb).toString();
+        }
+
+        private StringBuilder toString(String indent, StringBuilder buffer) {
+            String indent2 = indent.isEmpty() ? "" : "\t";
+            for (Map.Entry<String, Map<String, DistanceTable>> e1 : subtables.entrySet()) {
+                final Map<String, DistanceTable> subsubtable = e1.getValue();
+                buffer.append(indent2).append(e1.getKey());
+                String indent3 = "\t";
+                for (Map.Entry<String, DistanceTable> e2 : subsubtable.entrySet()) {
+                    DistanceTable value = e2.getValue();
+                    buffer.append(indent3).append(e2.getKey());
+                    buffer.append('\t').append(value.nodeDistance);
+                    value.toString(indent+"\t\t\t", buffer);
+                    buffer.append('\n');
+                    indent3 = indent+'\t';
+                }
+                indent2 = indent;
+            }
+            return buffer;
+        }
+
+        void toTrie(TrieBuilder builder) {
+            int startLength = builder.length;
+            for (Map.Entry<String, Map<String, DistanceTable>> desSuppNode : subtables.entrySet()) {
+                String desired = desSuppNode.getKey();
+                Map<String, DistanceTable> suppNodeMap = desSuppNode.getValue();
+                // Collapse ANY-ANY into one single *.
+                if (desired.equals(ANY)) {
+                    assert suppNodeMap.size() == 1;
+                    DistanceTable node = suppNodeMap.get(ANY);
+                    builder.addStar(node.nodeDistance);
+                    node.toTrie(builder);
+                } else {
+                    builder.addSubtag(desired, 0);
+                    int desiredLength = builder.length;
+                    for (Map.Entry<String, DistanceTable> suppNode : suppNodeMap.entrySet()) {
+                        String supported = suppNode.getKey();
+                        assert !supported.equals(ANY);
+                        DistanceTable node = suppNode.getValue();
+                        builder.addSubtag(supported, node.nodeDistance);
+                        node.toTrie(builder);
+                        builder.length = desiredLength;
+                    }
+                }
+                builder.length = startLength;
+            }
+        }
+    }
+
+    private static final class CopyIfEmpty implements Predicate<DistanceTable> {
+        private final DistanceTable toCopy;
+        CopyIfEmpty(DistanceTable resetIfNotNull) {
+            this.toCopy = resetIfNotNull;
+        }
+        @Override
+        public boolean test(DistanceTable node) {
+            if (node.subtables.isEmpty()) {
+                node.copy(toCopy);
+            }
+            return true;
+        }
+    }
+
+    private static final class AddSub implements Predicate<DistanceTable> {
+        private final String desiredSub;
+        private final String supportedSub;
+        private final CopyIfEmpty r;
+
+        AddSub(String desiredSub, String supportedSub, DistanceTable distanceTableToCopy) {
+            this.r = new CopyIfEmpty(distanceTableToCopy);
+            this.desiredSub = desiredSub;
+            this.supportedSub = supportedSub;
+        }
+        @Override
+        public boolean test(DistanceTable node) {
+            if (node == null) {
+                throw new IllegalArgumentException("bad structure");
+            } else {
+                node.addSubtables(desiredSub, supportedSub, r);
+            }
+            return true;
+        }
+    }
+
+    private static Collection<String> getIdsFromVariable(
+            Multimap<String, String> variableToPartition, String variable) {
+        if (variable.equals("*")) {
+            return Collections.singleton("*");
+        }
+        Collection<String> result = variableToPartition.get(variable);
+        if (result == null || result.isEmpty()) {
+            throw new IllegalArgumentException("Variable not defined: " + variable);
+        }
+        return result;
+    }
+
+    static LocaleDistance build() {
+        // From CLDR supplementalData/languageMatching/languageMatches type="written_new"/
+        //   and then paradigmLocales, matchVariable, and the last languageMatch items.
+        ICUResourceBundle supplementalData = getSupplementalDataBundle("supplementalData");
+        String[] paradigms = supplementalData.getValueWithFallback(
+                "languageMatchingInfo/written/paradigmLocales").getStringArray();
+        Set<LSR> paradigmLSRs = new HashSet<>();  // could be TreeSet if LSR were Comparable
+        for (String paradigm : paradigms) {
+            ULocale pl = new ULocale(paradigm);
+            paradigmLSRs.add(XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl));
+        }
+
+        TerritoryContainment tc = new TerritoryContainment(supplementalData);
+
+        RegionMapperBuilder rmb = new RegionMapperBuilder(tc);
+        UResource.Value value = supplementalData.getValueWithFallback(
+                "languageMatchingInfo/written/matchVariable");
+        UResource.Table variables = value.getTable();
+        UResource.Key key = new UResource.Key();
+        for (int i = 0; variables.getKeyAndValue(i, key, value); ++i) {
+            String variable = "$" + key.toString();
+            String regions = value.getString();
+            rmb.add(variable, regions);
+        }
+
+        // Parse the rules.
+        // We could almost process them while reading them from the source data,
+        // but a rule may contain a region code rather than a variable.
+        // We need to create a variable for each such region code
+        // before rmb.build() and before processing the rules.
+        Splitter bar = Splitter.on('_');
+
+        int prevSize = 0;
+        value = supplementalData.getValueWithFallback("languageMatchingNew/written");
+        UResource.Array matches = value.getArray();
+        List<Rule> rules = new ArrayList<>(matches.getSize());
+        for (int i = 0; matches.getValue(i, value); ++i) {
+            String[] tuple = value.getStringArray();
+            int distance = Integer.parseInt(tuple[2]);
+            boolean oneway = tuple.length >= 4 && tuple[3].equals("1");
+            List<String> desired = new ArrayList<>(bar.splitToList(tuple[0]));
+            List<String> supported = new ArrayList<>(bar.splitToList(tuple[1]));
+            int size = desired.size();
+            if (size != supported.size()) {
+                throw new IllegalArgumentException("uneven languageMatches pair");
+            }
+            if (size < prevSize) {
+                throw new IllegalArgumentException("languageMatches out of order");
+            }
+            prevSize = size;
+            // Implementation shortcuts assume:
+            // - At any level, either both or neither rule subtags are *.
+            // - If the rule language subtags are *, the other-level subtags must also be *.
+            // If there are rules that do not fit these constraints,
+            // then we need to revise the implementation.
+            int langStars = checkStars(desired.get(0), supported.get(0), false);
+            if (size >= 2) {
+                checkStars(desired.get(1), supported.get(1), langStars == 2);
+            }
+            if (size == 3) {
+                checkStars(desired.get(2), supported.get(2), langStars == 2);
+                rmb.ensureRegionIsVariable(desired);
+                rmb.ensureRegionIsVariable(supported);
+            }
+            rules.add(new Rule(desired, supported, distance, oneway));
+        }
+
+        rmb.build();
+
+        /**
+         * Used for processing rules. At the start we have a variable setting like $A1=US+CA+MX.
+         * We generate a mapping from $A1 to a set of partitions {P1, P2}
+         * When we hit a rule that contains a variable,
+         * we replace that rule by multiple rules for the partitions.
+         */
+        final Multimap<String, String> variableToPartition = rmb.variableToPartitions;
+
+        final DistanceTable defaultDistanceTable = new DistanceTable(-1);
+        for (Rule rule : rules) {
+            List<String> desired = rule.desired;
+            List<String> supported = rule.supported;
+            if (rule.desired.size() <= 2) {
+                // language-only or language-script
+                add(defaultDistanceTable, desired, supported, rule.distance);
+                if (!rule.oneway && !desired.equals(supported)) {
+                    add(defaultDistanceTable, supported, desired, rule.distance);
+                }
+            } else {
+                // language-script-region
+                Collection<String> desiredRegions = getIdsFromVariable(variableToPartition, desired.get(2));
+                Collection<String> supportedRegions = getIdsFromVariable(variableToPartition, supported.get(2));
+                for (String desiredRegion2 : desiredRegions) {
+                    desired.set(2, desiredRegion2.toString()); // fix later
+                    for (String supportedRegion2 : supportedRegions) {
+                        supported.set(2, supportedRegion2.toString()); // fix later
+                        add(defaultDistanceTable, desired, supported, rule.distance);
+                        if (!rule.oneway) {
+                            add(defaultDistanceTable, supported, desired, rule.distance);
+                        }
+                    }
+                }
+            }
+        }
+
+        TrieBuilder trieBuilder = new TrieBuilder();
+        defaultDistanceTable.toTrie(trieBuilder);
+        BytesTrie trie = trieBuilder.build();
+        return new LocaleDistance(
+                trie, rmb.regionToPartitionsIndex, rmb.partitionArrays, paradigmLSRs);
+    }
+
+    private static int checkStars(String desired, String supported, boolean allStars) {
+        int stars = (desired.equals("*") ? 1 : 0) + (supported.equals("*") ? 1 : 0);
+        if (stars == 1) {
+            throw new IllegalArgumentException("either both or neither rule subtags must be *: " +
+                    desired + ", " + supported);
+        }
+        if (allStars && stars != 2) {
+            throw new IllegalArgumentException("both language subtags are * --> " +
+                    "both rule subtags on all levels must be *: " +
+                    desired + ", " + supported);
+        }
+        return stars;
+    }
+
+    private static void add(DistanceTable languageDesired2Supported,
+            List<String> desired, List<String> supported, int percentage) {
+        int size = desired.size();
+        if (size != supported.size() || size < 1 || size > 3) {
+            throw new IllegalArgumentException();
+        }
+        final String desiredLang = fixAny(desired.get(0));
+        final String supportedLang = fixAny(supported.get(0));
+        if (size == 1) {
+            languageDesired2Supported.addSubtable(desiredLang, supportedLang, percentage);
+        } else {
+            final String desiredScript = fixAny(desired.get(1));
+            final String supportedScript = fixAny(supported.get(1));
+            if (size == 2) {
+                languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, percentage);
+            } else {
+                final String desiredRegion = fixAny(desired.get(2));
+                final String supportedRegion = fixAny(supported.get(2));
+                languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
+            }
+        }
+    }
+
+    private static final class RegionMapperBuilder {
+        private final Set<String> variables = new HashSet<>();
+        final private Multimap<String, String> regionToRawPartition = TreeMultimap.create();
+        final private RegionSet regionSet;
+        private final TerritoryContainment tc;
+
+        // build() output
+        Multimap<String, String> variableToPartitions;
+        private byte[] regionToPartitionsIndex;
+        private String[][] partitionArrays;
+
+        RegionMapperBuilder(TerritoryContainment tc) {
+            regionSet = new RegionSet(tc);
+            this.tc = tc;
+        }
+
+        private boolean isKnownVariable(String variable) {
+            return variables.contains(variable) || variable.equals("*");
+        }
+
+        void add(String variable, String barString) {
+            assert !isKnownVariable(variable);
+            assert variable.startsWith("$");
+            assert !variable.startsWith("$!");
+            variables.add(variable);
+            Set<String> tempRegions = regionSet.parseSet(barString);
+
+            for (String region : tempRegions) {
+                regionToRawPartition.put(region, variable);
+            }
+
+            // now add the inverse variable
+
+            Set<String> inverse = regionSet.inverse();
+            String inverseVariable = "$!" + variable.substring(1);
+            assert !isKnownVariable(inverseVariable);
+            variables.add(inverseVariable);
+            for (String region : inverse) {
+                regionToRawPartition.put(region, inverseVariable);
+            }
+        }
+
+        void ensureRegionIsVariable(List<String> lsrList) {
+            String region = lsrList.get(2);
+            if (!isKnownVariable(region)) {
+                assert LSR.indexForRegion(region) >= 0;  // well-formed region subtag
+                String variable = "$" + region;
+                add(variable, region);
+                lsrList.set(2, variable);
+            }
+        }
+
+        void build() {
+            // Partitions as sets of variables.
+            // LinkedHashMap to store & number unique sets.
+            // Example: {"$!cnsar", "$!enUS", "$!maghreb", "$americas"}
+            Map<Collection<String>, Integer> partitionVariables = new LinkedHashMap<>();
+            // Partitions as sets of lookup ID strings.
+            // Example: {"1", "5"}
+            Map<Collection<String>, Integer> partitionStrings = new LinkedHashMap<>();
+            // pIndex 0: default value in regionToPartitionsIndex
+            Collection<String> noPartitions = Collections.singleton("");
+            makeUniqueIndex(partitionStrings, noPartitions);
+
+            // Example: "$americas" -> {"1", "5"}
+            variableToPartitions = TreeMultimap.create();
+            // Maps the index of each region code to a pIndex into partitionStrings.
+            regionToPartitionsIndex = new byte[LSR.REGION_INDEX_LIMIT];
+            // Maps a partition string to the set of region codes in that partition.
+            // Example: "5" -> {"PR", "US", "VI"}
+            Multimap<String, String> partitionToRegions = TreeMultimap.create();
+
+            for (Map.Entry<String, Set<String>> e : regionToRawPartition.asMap().entrySet()) {
+                final String region = e.getKey();
+                final Collection<String> rawPartition = e.getValue();
+                // Single-character string.
+                // Must be an ASCII character and must not be '*'.
+                // Used to start with α.
+                char partitionChar = (char) ('0' + makeUniqueIndex(partitionVariables, rawPartition));
+                assert partitionChar <= 0x7f;
+                String partition = String.valueOf(partitionChar);
+                int pIndex = makeUniqueIndex(partitionStrings, Collections.singleton(partition));
+                // The pIndex must fit into a byte.
+                // For Java code simplicity, we want it to also be non-negative.
+                assert pIndex <= 0x7f;
+
+                regionToPartitionsIndex[LSR.indexForRegion(region)] = (byte) pIndex;
+                partitionToRegions.put(partition, region);
+
+                for (String variable : rawPartition) {
+                    variableToPartitions.put(variable, partition);
+                }
+            }
+
+            // We get a mapping of each macro to the partitions it intersects with.
+            // Example: "419" -> {"1", "5"}
+            Multimap<String,String> macroToPartitions = TreeMultimap.create();
+            for (Map.Entry<String, Set<String>> e : tc.resolved.asMap().entrySet()) {
+                String macro = e.getKey();
+                for (Map.Entry<String, Set<String>> e2 : partitionToRegions.asMap().entrySet()) {
+                    String partition = e2.getKey();
+                    if (!Collections.disjoint(e.getValue(), e2.getValue())) {
+                        macroToPartitions.put(macro, partition);
+                    }
+                }
+            }
+
+            // Create a combined mapping from a region code, which can be a macro region,
+            // via the getRegionIndex() of that region code,
+            // to a set of single-character partition strings.
+            for (Map.Entry<String, Set<String>> m2p : macroToPartitions.asMap().entrySet()) {
+                String macro = m2p.getKey();
+                int regionIndex = LSR.indexForRegion(macro);
+                if (regionToPartitionsIndex[regionIndex] == 0) {
+                    Set<String> partitions = m2p.getValue();
+                    int pIndex = makeUniqueIndex(partitionStrings, partitions);
+                    regionToPartitionsIndex[regionIndex] = (byte) pIndex;
+                }
+            }
+
+            // Turn the Collection of Collections into an array of arrays.
+            Collection<Collection<String>> list = partitionStrings.keySet();
+            partitionArrays = new String[list.size()][];
+            int i = 0;
+            for (Collection<String> partitions : list) {
+                partitionArrays[i++] = partitions.toArray(new String[partitions.size()]);
+            }
+        }
+    }
+
+    /**
+     * Parses a string of regions like "US+005-BR" and produces a set of resolved regions.
+     * All macroregions are fully resolved to sets of non-macro regions.
+     * <br>Syntax is simple for now:
+     * <pre>regionSet := region ([-+] region)*</pre>
+     * No precedence, so "x+y-y+z" is (((x+y)-y)+z) NOT (x+y)-(y+z)
+     */
+    private static final class RegionSet {
+        private enum Operation {add, remove}
+        private final TerritoryContainment tc;
+        // temporaries used in processing
+        final private Set<String> tempRegions = new TreeSet<>();
+        private Operation operation = null;
+
+        RegionSet(TerritoryContainment tc) {
+            this.tc = tc;
+        }
+
+        private Set<String> parseSet(String barString) {
+            operation = Operation.add;
+            int last = 0;
+            tempRegions.clear();
+            int i = 0;
+            for (; i < barString.length(); ++i) {
+                char c = barString.charAt(i); // UTF16 is ok, since syntax is only ascii
+                switch(c) {
+                case '+':
+                    add(barString, last, i);
+                    last = i+1;
+                    operation = Operation.add;
+                    break;
+                case '-':
+                    add(barString, last, i);
+                    last = i+1;
+                    operation = Operation.remove;
+                    break;
+                }
+            }
+            add(barString, last, i);
+            return tempRegions;
+        }
+
+        private Set<String> inverse() {
+            TreeSet<String> result = new TreeSet<>(tc.leaves);
+            result.removeAll(tempRegions);
+            return result;
+        }
+
+        private void add(String barString, int last, int i) {
+            if (i > last) {
+                String region = barString.substring(last,i);
+                changeSet(operation, region);
+            }
+        }
+
+        private void changeSet(Operation operation, String region) {
+            Collection<String> contained = tc.toLeavesOnly.get(region);
+            if (contained != null && !contained.isEmpty()) {
+                if (Operation.add == operation) {
+                    tempRegions.addAll(contained);
+                } else {
+                    tempRegions.removeAll(contained);
+                }
+            } else if (Operation.add == operation) {
+                tempRegions.add(region);
+            } else {
+                tempRegions.remove(region);
+            }
+        }
+    }
+}
index e1631b4daf5d92e22b1468d501d924ff1cd54012..26b540fc23eed595516f8f78027bd06ffe1c16f6 100644 (file)
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.locale;
 
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Enumeration;
-import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
 import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Objects;
 import java.util.Set;
-import java.util.TreeMap;
 
-import com.ibm.icu.impl.ICUData;
-import com.ibm.icu.impl.ICUResourceBundle;
-import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
-import com.ibm.icu.impl.locale.XCldrStub.Multimap;
-import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
-import com.ibm.icu.util.ICUException;
+import com.ibm.icu.util.BytesTrie;
 import com.ibm.icu.util.ULocale;
-import com.ibm.icu.util.ULocale.Minimize;
-import com.ibm.icu.util.UResourceBundle;
 
-public class XLikelySubtags {
-
-    private static final XLikelySubtags DEFAULT = new XLikelySubtags();
-
-    public static final XLikelySubtags getDefault() {
-        return DEFAULT;
-    }
-
-    private static <K, V, T> Map<V, T> getSubtable(Map<K, Map<V, T>> table, final K language) {
-        Map<V, T> subTable = table.get(language);
-        if (subTable == null) {
-            table.put(language, subTable = new TreeMap<>());
+public final class XLikelySubtags {
+    private static final String PSEUDO_ACCENTS_PREFIX = "'";  // -XA, -PSACCENT
+    private static final String PSEUDO_BIDI_PREFIX = "+";  // -XB, -PSBIDI
+    private static final String PSEUDO_CRACKED_PREFIX = ",";  // -XC, -PSCRACK
+
+    private static final boolean DEBUG_OUTPUT = false;
+
+    // TODO: Load prebuilt data from a resource bundle
+    // to avoid the dependency on the builder code.
+    static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build());
+
+    static final class Data {
+        private final Map<String, String> languageAliases;
+        private final Map<String, String> regionAliases;
+        private final BytesTrie trie;
+        private final LSR[] lsrs;
+
+        Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
+                BytesTrie trie, LSR[] lsrs) {
+            this.languageAliases = languageAliases;
+            this.regionAliases = regionAliases;
+            this.trie = trie;
+            this.lsrs = lsrs;
         }
-        return subTable;
     }
 
-    public static class Aliases {
-        final Map<String, String> toCanonical;
-        final Multimap<String, String> toAliases;
-        public String getCanonical(String alias) {
-            String canonical = toCanonical.get(alias);
-            return canonical == null ? alias : canonical;
-        }
-        public Set<String> getAliases(String canonical) {
-            Set<String> aliases = toAliases.get(canonical);
-            return aliases == null ? Collections.singleton(canonical) : aliases;
-        }
-        public Aliases(String key) {
-            UResourceBundle metadata = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"metadata",ICUResourceBundle.ICU_DATA_CLASS_LOADER);
-            UResourceBundle metadataAlias = metadata.get("alias");
-            UResourceBundle territoryAlias = metadataAlias.get(key);
-            Map<String, String> toCanonical1 = new HashMap<>();
-            for ( int i = 0 ; i < territoryAlias.getSize(); i++ ) {
-                UResourceBundle res = territoryAlias.get(i);
-                String aliasFrom = res.getKey();
-                if (aliasFrom.contains("_")) {
-                    continue; // only simple aliasing
-                }
-                String aliasReason = res.get("reason").getString();
-                if (aliasReason.equals("overlong")) {
-                    continue;
-                }
-                String aliasTo = res.get("replacement").getString();
-                int spacePos = aliasTo.indexOf(' ');
-                String aliasFirst = spacePos < 0 ? aliasTo : aliasTo.substring(0, spacePos);
-                if (aliasFirst.contains("_")) {
-                    continue; // only simple aliasing
-                }
-                toCanonical1.put(aliasFrom, aliasFirst);
+    private final Map<String, String> languageAliases;
+    private final Map<String, String> regionAliases;
+
+    // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
+    // There is also a trie value for each intermediate lang and lang+script.
+    // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
+    private final BytesTrie trie;
+    private final long trieUndState;
+    private final long trieUndZzzzState;
+    private final int defaultLsrIndex;
+    private final LSR[] lsrs;
+
+    private XLikelySubtags(XLikelySubtags.Data data) {
+        languageAliases = data.languageAliases;
+        regionAliases = data.regionAliases;
+        trie = data.trie;
+        lsrs = data.lsrs;
+
+        // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
+        BytesTrie.Result result = trie.next('*');
+        assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
+        int value = trie.getValue();
+        assert value == 0;
+        trieUndState = trie.getState64();
+        result = trie.next('*');
+        assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
+        value = trie.getValue();
+        assert value == 0;
+        trieUndZzzzState = trie.getState64();
+        result = trie.next('*');
+        assert result.hasValue();
+        defaultLsrIndex = trie.getValue();
+        trie.reset();
+
+        if (DEBUG_OUTPUT) {
+            System.out.println("*** likely subtags");
+            for (Map.Entry<String, LSR> mapping : getTable().entrySet()) {
+                System.out.println(mapping);
             }
-            if (key.equals("language")) {
-                toCanonical1.put("mo", "ro"); // special case
-            }
-            toCanonical = Collections.unmodifiableMap(toCanonical1);
-            toAliases = Multimaps.invertFrom(toCanonical1, HashMultimap.<String,String>create());
         }
     }
 
-    public static class LSR {
-        public final String language;
-        public final String script;
-        public final String region;
-
-        public static Aliases LANGUAGE_ALIASES = new Aliases("language");
-        public static Aliases REGION_ALIASES = new Aliases("territory");
+    private static String getCanonical(Map<String, String> aliases, String alias) {
+        String canonical = aliases.get(alias);
+        return canonical == null ? alias : canonical;
+    }
 
-        public static LSR from(String language, String script, String region) {
-            return new LSR(language, script, region);
+    LSR makeMaximizedLsrFrom(ULocale locale) {
+        String name = locale.getName();
+        if (name.startsWith("@x=")) {
+            // Private use language tag x-subtag-subtag...
+            return new LSR(name, "", "");
         }
 
-        // from http://unicode.org/reports/tr35/#Unicode_language_identifier
-        // but simplified to requiring language subtag, and nothing beyond region
-        // #1 is language
-        // #2 is script
-        // #3 is region
-        //        static final String pat =
-        //                "language_id = (unicode_language_subtag)"
-        //                        + "(?:sep(unicode_script_subtag))?"
-        //                        + "(?:sep(unicode_region_subtag))?;\n"
-        //                        + "unicode_language_subtag = alpha{2,3}|alpha{5,8};\n"
-        //                        + "unicode_script_subtag = alpha{4};\n"
-        //                        + "unicode_region_subtag  = alpha{2}|digit{3};\n"
-        //                        + "sep    = [-_];\n"
-        //                        + "digit  = [0-9];\n"
-        //                        + "alpha   = [A-Za-z];\n"
-        //                        ;
-        //        static {
-        //            System.out.println(pat);
-        //            System.out.println(new UnicodeRegex().compileBnf(pat));
-        //        }
-        //        static final Pattern LANGUAGE_PATTERN = Pattern.compile(
-        //                "([a-zA-Z0-9]+)" // (?:[-_]([a-zA-Z0-9]+))?(?:[-_]([a-zA-Z0-9]+))?"
-        //                //new UnicodeRegex().compileBnf(pat)
-        //                );
-        //
-        // NOTE: Should we fix this to check for format?
-        // ANSWER: Not required, since this is only called internally. Moreover, we deliberately
-        // use invalid language tags ("x1", "x2", etc.) to represent pseudo-locales. See below.
-        static LSR from(String languageIdentifier) {
-            String[] parts = languageIdentifier.split("[-_]");
-            if (parts.length < 1 || parts.length > 3) {
-                throw new ICUException("too many subtags");
+        // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
+        // They should match only themselves,
+        // not other locales with what looks like the same language and script subtags.
+        String language = locale.getLanguage();
+        String script = locale.getScript();
+        String region = locale.getCountry();
+        if (region.length() == 2 && region.charAt(0) == 'X') {
+            switch (region.charAt(1)) {
+            case 'A':
+                return new LSR(PSEUDO_ACCENTS_PREFIX + language,
+                        PSEUDO_ACCENTS_PREFIX + script, region);
+            case 'B':
+                return new LSR(PSEUDO_BIDI_PREFIX + language,
+                        PSEUDO_BIDI_PREFIX + script, region);
+            case 'C':
+                return new LSR(PSEUDO_CRACKED_PREFIX + language,
+                        PSEUDO_CRACKED_PREFIX + script, region);
+            default:  // normal locale
+                break;
             }
-            String lang = parts[0].toLowerCase();
-            String p2 = parts.length < 2 ? "" : parts[1];
-            String p3 = parts.length < 3 ? "" : parts[2];
-            return p2.length() < 4 ? new LSR(lang, "", p2) : new LSR(lang, p2, p3);
-
-            //        Matcher matcher = LANGUAGE_PATTERN.matcher(languageIdentifier);
-            //        if (!matcher.matches()) {
-            //            return new LSR(matcher.group(1), matcher.group(2), matcher.group(3));
-            //        }
-            //        System.out.println(RegexUtilities.showMismatch(matcher, languageIdentifier));
-            //        throw new ICUException("invalid language id");
         }
 
-        private static final HashMap<ULocale, LSR> pseudoReplacements = new HashMap<>(11);
-
-        // Note code in XLocaledistance.java handle pseudo-regions XA, XB, and XC, making them
-        // very distant from any other locale. Similarly, it establishes that any of the
-        // invalid locales below ("x1", "x2", ..., "x7", and "x8-en") are very distant
-        // from any other locale.
-        static {
-      String[][] source = {
-        {"x-bork", "x1", "", ""},
-        {"x-elmer", "x2", "", ""},
-        {"x-hacker", "x3", "", ""},
-        {"x-piglatin", "x4", "", ""},
-        {"x-pirate", "x5", "", ""},
-        {"en-XA", "x6", "", ""},
-        {"en-PSACCENT", "x6", "", ""}, // Note: same as for ex-XA
-        {"ar-XB", "x7", "", ""},
-        {"ar-PSBIDI", "x7", "", ""}, // Note: same as for ar-XB
-        {"en-XC", "x8", "en", ""}, // Note: language is stored in LSR.script field
-        {"en-PSCRACK", "x8", "en", ""}, // Note: same as for en-XC
-      };
-            for (int i = 0; i < source.length; ++i) {
-                pseudoReplacements.put(new ULocale(source[i][0]),
-                    new LSR(source[i][1], source[i][2], source[i][3]));
+        String variant = locale.getVariant();
+        if (variant.startsWith("PS")) {
+            switch (variant) {
+            case "PSACCENT":
+                return new LSR(PSEUDO_ACCENTS_PREFIX + language,
+                        PSEUDO_ACCENTS_PREFIX + script, region.isEmpty() ? "XA" : region);
+            case "PSBIDI":
+                return new LSR(PSEUDO_BIDI_PREFIX + language,
+                        PSEUDO_BIDI_PREFIX + script, region.isEmpty() ? "XB" : region);
+            case "PSCRACK":
+                return new LSR(PSEUDO_CRACKED_PREFIX + language,
+                        PSEUDO_CRACKED_PREFIX + script, region.isEmpty() ? "XC" : region);
+            default:  // normal locale
+                break;
             }
-
         }
 
-        public static LSR from(ULocale locale) {
-            LSR replacement = pseudoReplacements.get(locale);
-            if (replacement != null) {
-                return replacement;
-            }
-            // Map *-*-*-PSCRACK to x8-***, same as for en-PSCRACK.
-            if ("PSCRACK".equals(locale.getVariant())) {
-                return new LSR(
-                    "x8", locale.getLanguage() + locale.getScript() + locale.getCountry(), "");
-            }
-            return new LSR(locale.getLanguage(), locale.getScript(), locale.getCountry());
-        }
+        language = getCanonical(languageAliases, language);
+        // script is ok
+        region = getCanonical(regionAliases, region);
+        return INSTANCE.maximize(language, script, region);
+    }
 
-        public static LSR fromMaximalized(ULocale locale) {
-            LSR replacement = pseudoReplacements.get(locale);
-            if (replacement != null) {
-                return replacement;
-            }
-            // Map *-*-*-PSCRACK to x8-***, same as for en-PSCRACK.
-            if ("PSCRACK".equals(locale.getVariant())) {
-                return new LSR(
-                    "x8", locale.getLanguage() + locale.getScript() + locale.getCountry(), "");
-            }
-            return fromMaximalized(locale.getLanguage(), locale.getScript(), locale.getCountry());
+    /**
+     * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
+     */
+    private LSR maximize(String language, String script, String region) {
+        int retainOldMask = 0;
+        BytesTrie iter = new BytesTrie(trie);
+        // language lookup
+        if (language.equals("und")) {
+            language = "";
         }
-
-        public static LSR fromMaximalized(String language, String script, String region) {
-            String canonicalLanguage = LANGUAGE_ALIASES.getCanonical(language);
-            // script is ok
-            String canonicalRegion = REGION_ALIASES.getCanonical(region);
-
-            return DEFAULT.maximize(canonicalLanguage, script, canonicalRegion);
+        long state;
+        int value = trieNext(iter, language, false);
+        if (value >= 0) {
+            if (!language.isEmpty()) {
+                retainOldMask |= 4;
+            }
+            state = iter.getState64();
+        } else {
+            retainOldMask |= 4;
+            iter.resetToState64(trieUndState);  // "und" ("*")
+            state = 0;
         }
-
-        public LSR(String language, String script, String region) {
-            this.language = language;
-            this.script = script;
-            this.region = region;
+        // script lookup
+        if (script.equals("Zzzz")) {
+            script = "";
         }
-
-        @Override
-        public String toString() {
-            StringBuilder result = new StringBuilder(language);
+        value = trieNext(iter, script, false);
+        if (value >= 0) {
             if (!script.isEmpty()) {
-                result.append('-').append(script);
-            }
-            if (!region.isEmpty()) {
-                result.append('-').append(region);
+                retainOldMask |= 2;
             }
-            return result.toString();
-        }
-        public LSR replace(String language2, String script2, String region2) {
-            if (language2 == null && script2 == null && region2 == null) return this;
-            return new LSR(
-                    language2 == null ? language: language2,
-                            script2 == null ? script : script2,
-                                    region2 == null ? region : region2);
-        }
-        @Override
-        public boolean equals(Object obj) {
-            LSR other;
-            return this == obj ||
-                    (obj != null
-                    && obj.getClass() == this.getClass()
-                    && language.equals((other = (LSR) obj).language)
-                    && script.equals(other.script)
-                    && region.equals(other.region));
-        }
-        @Override
-        public int hashCode() {
-            return Objects.hash(language, script, region);
-        }
-    }
-
-    final Map<String, Map<String, Map<String, LSR>>> langTable;
-
-    public XLikelySubtags() {
-        this(getDefaultRawData());
-    }
-
-    private static Map<String, String> getDefaultRawData() {
-        Map<String, String> rawData = new TreeMap<>();
-        UResourceBundle bundle = UResourceBundle.getBundleInstance( ICUData.ICU_BASE_NAME, "likelySubtags");
-        for (Enumeration<String> enumer = bundle.getKeys(); enumer.hasMoreElements();) {
-            String key = enumer.nextElement();
-            rawData.put(key, bundle.getString(key));
-        }
-        return rawData;
-    }
-
-    public XLikelySubtags(Map<String, String> rawData) {
-        this.langTable = init(rawData);
-    }
-
-    private Map<String, Map<String, Map<String, LSR>>> init(final Map<String, String> rawData) {
-        // prepare alias info. We want a mapping from the canonical form to all aliases
-
-        //Multimap<String,String> canonicalToAliasLanguage = HashMultimap.create();
-        //        getAliasInfo(LANGUAGE_ALIASES, canonicalToAliasLanguage);
-
-        // Don't bother with script; there are none
-
-        //Multimap<String,String> canonicalToAliasRegion = HashMultimap.create();
-        //        getAliasInfo(REGION_ALIASES, canonicalToAliasRegion);
-
-        Map<String, Map<String, Map<String, LSR>>> result = new TreeMap<>();
-        //        Splitter bar = Splitter.on('_');
-        //        int last = -1;
-        // set the base data
-        Map<LSR,LSR> internCache = new HashMap<>();
-        for (Entry<String, String> sourceTarget : rawData.entrySet()) {
-            LSR ltp = LSR.from(sourceTarget.getKey());
-            final String language = ltp.language;
-            final String script = ltp.script;
-            final String region = ltp.region;
-
-            ltp = LSR.from(sourceTarget.getValue());
-            String languageTarget = ltp.language;
-            final String scriptTarget = ltp.script;
-            final String regionTarget = ltp.region;
-
-            set(result, language, script, region, languageTarget, scriptTarget, regionTarget, internCache);
-            // now add aliases
-            Collection<String> languageAliases = LSR.LANGUAGE_ALIASES.getAliases(language);
-            //            if (languageAliases.isEmpty()) {
-            //                languageAliases = Collections.singleton(language);
-            //            }
-            Collection<String> regionAliases = LSR.REGION_ALIASES.getAliases(region);
-            //            if (regionAliases.isEmpty()) {
-            //                regionAliases = Collections.singleton(region);
-            //            }
-            for (String languageAlias : languageAliases) {
-                for (String regionAlias : regionAliases) {
-                    if (languageAlias.equals(language) && regionAlias.equals(region)) {
-                        continue;
-                    }
-                    set(result, languageAlias, script, regionAlias, languageTarget, scriptTarget, regionTarget, internCache);
-                }
+            state = iter.getState64();
+        } else {
+            retainOldMask |= 2;
+            if (state == 0) {
+                iter.resetToState64(trieUndZzzzState);  // "und-Zzzz" ("**")
+            } else {
+                iter.resetToState64(state);
+                value = trieNext(iter, "", false);
+                assert value == 0;
+                state = iter.getState64();
             }
         }
-        // hack
-        set(result, "und", "Latn", "", "en", "Latn", "US", internCache);
-
-        // hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
-        // <likelySubtag from="und_GH" to="ak_Latn_GH"/>
-
-        // so und-Latn-GH   =>  ak-Latn-GH
-        Map<String, Map<String, LSR>> undScriptMap = result.get("und");
-        Map<String, LSR> undEmptyRegionMap = undScriptMap.get("");
-        for (Entry<String, LSR> regionEntry : undEmptyRegionMap.entrySet()) {
-            final LSR value = regionEntry.getValue();
-            set(result, "und", value.script, value.region, value);
-        }
-        //
-        // check that every level has "" (or "und")
-        if (!result.containsKey("und")) {
-            throw new IllegalArgumentException("failure: base");
+        // region lookup
+        if (region.equals("ZZ")) {
+            region = "";
         }
-        for (Entry<String, Map<String, Map<String, LSR>>> langEntry : result.entrySet()) {
-            String lang = langEntry.getKey();
-            final Map<String, Map<String, LSR>> scriptMap = langEntry.getValue();
-            if (!scriptMap.containsKey("")) {
-                throw new IllegalArgumentException("failure: " + lang);
+        value = trieNext(iter, region, true);
+        if (value >= 0) {
+            if (!region.isEmpty()) {
+                retainOldMask |= 1;
             }
-            for (Entry<String, Map<String, LSR>> scriptEntry : scriptMap.entrySet()) {
-                String script = scriptEntry.getKey();
-                final Map<String, LSR> regionMap = scriptEntry.getValue();
-                if (!regionMap.containsKey("")) {
-                    throw new IllegalArgumentException("failure: " + lang + "-" + script);
+        } else {
+            retainOldMask |= 1;
+            if (state == 0) {
+                value = defaultLsrIndex;
+            } else {
+                iter.resetToState64(state);
+                value = trieNext(iter, "", true);
+                if (value < 0) {  // TODO: should never happen?! just assert value >= 0?
+                    return null;
                 }
-                //                for (Entry<String, LSR> regionEntry : regionMap.entrySet()) {
-                //                    String region = regionEntry.getKey();
-                //                    LSR value = regionEntry.getValue();
-                //                }
             }
         }
-        return result;
-    }
+        LSR result = lsrs[value];
 
-    //    private void getAliasInfo(Map<String, R2<List<String>, String>> aliasInfo, Multimap<String, String> canonicalToAlias) {
-    //        for (Entry<String, R2<List<String>, String>> e : aliasInfo.entrySet()) {
-    //            final String alias = e.getKey();
-    //            if (alias.contains("_")) {
-    //                continue; // only do simple aliasing
-    //            }
-    //            String canonical = getCanonical(e.getValue());
-    //            canonicalToAlias.put(canonical, alias);
-    //        }
-    //    }
-
-    //    private static String getCanonical(R2<List<String>, String> aliasAndReason) {
-    //        if (aliasAndReason == null) {
-    //            return null;
-    //        }
-    //        if (aliasAndReason.get1().equals("overlong")) {
-    //            return null;
-    //        }
-    //        List<String> value = aliasAndReason.get0();
-    //        if (value.size() != 1) {
-    //            return null;
-    //        }
-    //        final String canonical = value.iterator().next();
-    //        if (canonical.contains("_")) {
-    //            return null; // only do simple aliasing
-    //        }
-    //        return canonical;
-    //    }
-
-    private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region,
-            final String languageTarget, final String scriptTarget, final String regionTarget, Map<LSR, LSR> internCache) {
-        LSR newValue = new LSR(languageTarget, scriptTarget, regionTarget);
-        LSR oldValue = internCache.get(newValue);
-        if (oldValue == null) {
-            internCache.put(newValue, newValue);
-            oldValue = newValue;
+        if (language.isEmpty()) {
+            language = "und";
         }
-        set(langTable, language, script, region, oldValue);
-    }
-
-    private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region, LSR newValue) {
-        Map<String, Map<String, LSR>> scriptTable = getSubtable(langTable, language);
-        Map<String, LSR> regionTable = getSubtable(scriptTable, script);
-        //        LSR oldValue = regionTable.get(region);
-        //        if (oldValue != null) {
-        //            int debug = 0;
-        //        }
-        regionTable.put(region, newValue);
-    }
-
-    /**
-     * Convenience methods
-     */
-    public LSR maximize(String source) {
-        return maximize(ULocale.forLanguageTag(source));
-    }
-
-    public LSR maximize(ULocale source) {
-        return maximize(source.getLanguage(), source.getScript(), source.getCountry());
-    }
-
-    public LSR maximize(LSR source) {
-        return maximize(source.language, source.script, source.region);
-    }
-
-    //    public static ULocale addLikelySubtags(ULocale loc) {
-    //
-    //    }
 
-    /**
-     * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
-     */
-    public LSR maximize(String language, String script, String region) {
-        int retainOldMask = 0;
-        Map<String, Map<String, LSR>> scriptTable = langTable.get(language);
-        if (scriptTable == null) { // cannot happen if language == "und"
-            retainOldMask |= 4;
-            scriptTable = langTable.get("und");
-        } else if (!language.equals("und")) {
-            retainOldMask |= 4;
+        if (retainOldMask == 0) {
+            return result;
         }
-
-        if (script.equals("Zzzz")) {
-            script = "";
+        if ((retainOldMask & 4) == 0) {
+            language = result.language;
         }
-        Map<String, LSR> regionTable = scriptTable.get(script);
-        if (regionTable == null) { // cannot happen if script == ""
-            retainOldMask |= 2;
-            regionTable = scriptTable.get("");
-        } else if (!script.isEmpty()) {
-            retainOldMask |= 2;
+        if ((retainOldMask & 2) == 0) {
+            script = result.script;
         }
-
-        if (region.equals("ZZ")) {
-            region = "";
+        if ((retainOldMask & 1) == 0) {
+            region = result.region;
         }
-        LSR result = regionTable.get(region);
-        if (result == null) { // cannot happen if region == ""
-            retainOldMask |= 1;
-            result = regionTable.get("");
-            if (result == null) {
-                return null;
+        return new LSR(language, script, region);
+    }
+
+    private static final int trieNext(BytesTrie iter, String s, boolean finalSubtag) {
+        BytesTrie.Result result;
+        if (s.isEmpty()) {
+            result = iter.next('*');
+        } else {
+            int end = s.length() - 1;
+            for (int i = 0;; ++i) {
+                result = iter.next(s.charAt(i));
+                if (i < end) {
+                    if (!result.hasNext()) {
+                        return -1;
+                    }
+                } else {
+                    // last character of this subtag
+                    break;
+                }
             }
-        } else if (!region.isEmpty()) {
-            retainOldMask |= 1;
         }
-
-        switch (retainOldMask) {
-        default:
-        case 0: return result;
-        case 1: return result.replace(null, null, region);
-        case 2: return result.replace(null, script, null);
-        case 3: return result.replace(null, script, region);
-        case 4: return result.replace(language, null, null);
-        case 5: return result.replace(language, null, region);
-        case 6: return result.replace(language, script, null);
-        case 7: return result.replace(language, script, region);
+        if (!finalSubtag) {
+            if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
+                return 0;  // value should be 0, don't care
+            }
+        } else {
+            if (result.hasValue()) {
+                return iter.getValue();
+            }
         }
+        return -1;
     }
 
-    @SuppressWarnings("unused")
-    private LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn, Minimize fieldToFavor) {
+    LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn,
+            ULocale.Minimize fieldToFavor) {
         LSR result = maximize(languageIn, scriptIn, regionIn);
 
         // We could try just a series of checks, like:
@@ -475,16 +261,20 @@ public class XLikelySubtags {
         //   (languageIn, "", "")
         //   (languageIn, "", regionIn)
 
-        Map<String, Map<String, LSR>> scriptTable = langTable.get(result.language);
-
-        Map<String, LSR> regionTable0 = scriptTable.get("");
-        LSR value00 = regionTable0.get("");
+        // value00 = lookup(result.language, "", "")
+        BytesTrie iter = new BytesTrie(trie);
+        int value = trieNext(iter, result.language, false);
+        assert value >= 0;
+        value = trieNext(iter, "", false);
+        assert value >= 0;
+        value = trieNext(iter, "", true);
+        LSR value00 = lsrs[value];
         boolean favorRegionOk = false;
         if (result.script.equals(value00.script)) { //script is default
             if (result.region.equals(value00.region)) {
-                return result.replace(null, "", "");
-            } else if (fieldToFavor == Minimize.FAVOR_REGION) {
-                return result.replace(null, "", null);
+                return new LSR(result.language, "", "");
+            } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
+                return new LSR(result.language, "", result.region);
             } else {
                 favorRegionOk = true;
             }
@@ -494,201 +284,40 @@ public class XLikelySubtags {
         // Maybe do later, but for now use the straightforward code.
         LSR result2 = maximize(languageIn, scriptIn, "");
         if (result2.equals(result)) {
-            return result.replace(null, null, "");
+            return new LSR(result.language, result.script, "");
         } else if (favorRegionOk) {
-            return result.replace(null, "", null);
+            return new LSR(result.language, "", result.region);
         }
         return result;
     }
 
-    private static StringBuilder show(Map<?,?> map, String indent, StringBuilder output) {
-        String first = indent.isEmpty() ? "" : "\t";
-        for (Entry<?,?> e : map.entrySet()) {
-            String key = e.getKey().toString();
-            Object value = e.getValue();
-            output.append(first + (key.isEmpty() ? "∅" : key));
-            if (value instanceof Map) {
-                show((Map<?,?>)value, indent+"\t", output);
+    private Map<String, LSR> getTable() {
+        Map<String, LSR> map = new LinkedHashMap<>();
+        Set<String> prefixes = new HashSet<>();
+        StringBuilder sb = new StringBuilder();
+        for (BytesTrie.Entry entry : trie) {
+            sb.setLength(0);
+            int length = entry.bytesLength();
+            for (int i = 0; i < length;) {
+                byte b = entry.byteAt(i++);
+                sb.append((char) b);
+                if (i < length && prefixes.contains(sb.toString())) {
+                    sb.append('-');
+                }
+            }
+            String s = sb.toString();
+            if (entry.value == 0) {
+                // intermediate match point
+                prefixes.add(s);
             } else {
-                output.append("\t" + Objects.toString(value)).append("\n");
+                map.put(s, lsrs[entry.value]);
             }
-            first = indent;
         }
-        return output;
+        return map;
     }
 
     @Override
     public String toString() {
-        return show(langTable, "", new StringBuilder()).toString();
+        return getTable().toString();
     }
-
-    //    public static void main(String[] args) {
-    //        System.out.println(LSR.fromMaximalized(ULocale.ENGLISH));
-    //
-    //        final Map<String, String> rawData = sdi.getLikelySubtags();
-    //        XLikelySubtags ls = XLikelySubtags.getDefault();
-    //        System.out.println(ls);
-    //        ls.maximize(new ULocale("iw"));
-    //        if (true) return;
-    //
-    //        LanguageTagParser ltp = new LanguageTagParser();
-    //
-    //        // get all the languages, scripts, and regions
-    //        Set<String> languages = new TreeSet<String>();
-    //        Set<String> scripts = new TreeSet<String>();
-    //        Set<String> regions = new TreeSet<String>();
-    //        Counter<String> languageCounter = new Counter<String>();
-    //        Counter<String> scriptCounter = new Counter<String>();
-    //        Counter<String> regionCounter = new Counter<String>();
-    //
-    //        for (Entry<String, String> sourceTarget : rawData.entrySet()) {
-    //            final String source = sourceTarget.getKey();
-    //            ltp.set(source);
-    //            languages.add(ltp.getLanguage());
-    //            scripts.add(ltp.getScript());
-    //            regions.add(ltp.getRegion());
-    //            final String target = sourceTarget.getValue();
-    //            ltp.set(target);
-    //            add(target, languageCounter, ltp.getLanguage(), 1);
-    //            add(target, scriptCounter, ltp.getScript(), 1);
-    //            add(target, regionCounter, ltp.getRegion(), 1);
-    //        }
-    //        ltp.set("und-Zzzz-ZZ");
-    //        languageCounter.add(ltp.getLanguage(), 1);
-    //        scriptCounter.add(ltp.getScript(), 1);
-    //        regionCounter.add(ltp.getRegion(), 1);
-    //
-    //        if (SHORT) {
-    //            removeSingletons(languages, languageCounter);
-    //            removeSingletons(scripts, scriptCounter);
-    //            removeSingletons(regions, regionCounter);
-    //        }
-    //
-    //        System.out.println("languages: " + languages.size() + "\n\t" + languages + "\n\t" + languageCounter);
-    //        System.out.println("scripts: " + scripts.size() + "\n\t" + scripts + "\n\t" + scriptCounter);
-    //        System.out.println("regions: " + regions.size() + "\n\t" + regions + "\n\t" + regionCounter);
-    //
-    //        int maxCount = Integer.MAX_VALUE;
-    //
-    //        int counter = maxCount;
-    //        long tempTime = System.nanoTime();
-    //        newMax:
-    //            for (String language : languages) {
-    //                for (String script : scripts) {
-    //                    for (String region : regions) {
-    //                        if (--counter < 0) break newMax;
-    //                        LSR result = ls.maximize(language, script, region);
-    //                    }
-    //                }
-    //            }
-    //        long newMaxTime = System.nanoTime() - tempTime;
-    //        System.out.println("newMaxTime: " + newMaxTime);
-    //
-    //        counter = maxCount;
-    //        tempTime = System.nanoTime();
-    //        newMin:
-    //            for (String language : languages) {
-    //                for (String script : scripts) {
-    //                    for (String region : regions) {
-    //                        if (--counter < 0) break newMin;
-    //                        LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
-    //                    }
-    //                }
-    //            }
-    //        long newMinTime = System.nanoTime() - tempTime;
-    //        System.out.println("newMinTime: " + newMinTime);
-    //
-    //        // *****
-    //
-    //        tempTime = System.nanoTime();
-    //        counter = maxCount;
-    //        oldMax:
-    //            for (String language : languages) {
-    //                for (String script : scripts) {
-    //                    for (String region : regions) {
-    //                        if (--counter < 0) break oldMax;
-    //                        ULocale tempLocale = new ULocale(language, script, region);
-    //                        ULocale max = ULocale.addLikelySubtags(tempLocale);
-    //                    }
-    //                }
-    //            }
-    //        long oldMaxTime = System.nanoTime() - tempTime;
-    //        System.out.println("oldMaxTime: " + oldMaxTime + "\t" + oldMaxTime/newMaxTime + "x");
-    //
-    //        counter = maxCount;
-    //        tempTime = System.nanoTime();
-    //        oldMin:
-    //            for (String language : languages) {
-    //                for (String script : scripts) {
-    //                    for (String region : regions) {
-    //                        if (--counter < 0) break oldMin;
-    //                        ULocale tempLocale = new ULocale(language, script, region);
-    //                        ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
-    //                    }
-    //                }
-    //            }
-    //        long oldMinTime = System.nanoTime() - tempTime;
-    //        System.out.println("oldMinTime: " + oldMinTime + "\t" + oldMinTime/newMinTime + "x");
-    //
-    //        counter = maxCount;
-    //        testMain:
-    //            for (String language : languages) {
-    //                System.out.println(language);
-    //                int tests = 0;
-    //                for (String script : scripts) {
-    //                    for (String region : regions) {
-    //                        ++tests;
-    //                        if (--counter < 0) break testMain;
-    //                        LSR maxNew = ls.maximize(language, script, region);
-    //                        LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
-    //                        LSR minNewR = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_REGION);
-    //
-    //                        ULocale tempLocale = new ULocale(language, script, region);
-    //                        ULocale maxOld = ULocale.addLikelySubtags(tempLocale);
-    //                        ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
-    //                        ULocale minOldR = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_REGION);
-    //
-    //                        // check values
-    //                        final String maxNewS = String.valueOf(maxNew);
-    //                        final String maxOldS = maxOld.toLanguageTag();
-    //                        boolean sameMax = maxOldS.equals(maxNewS);
-    //
-    //                        final String minNewSS = String.valueOf(minNewS);
-    //                        final String minOldSS = minOldS.toLanguageTag();
-    //                        boolean sameMinS = minNewSS.equals(minOldSS);
-    //
-    //                        final String minNewRS = String.valueOf(minNewR);
-    //                        final String minOldRS = minOldS.toLanguageTag();
-    //                        boolean sameMinR = minNewRS.equals(minOldRS);
-    //
-    //                        if (sameMax && sameMinS && sameMinR) continue;
-    //                        System.out.println(new LSR(language, script, region)
-    //                                + "\tmax: " + maxNew
-    //                                + (sameMax ? "" : "≠" + maxOldS)
-    //                                + "\tminS: " + minNewS
-    //                                + (sameMinS ? "" : "≠" + minOldS)
-    //                                + "\tminR: " + minNewR
-    //                                + (sameMinR ? "" : "≠" + minOldR)
-    //                                );
-    //                    }
-    //                }
-    //                System.out.println(language + ": " + tests);
-    //            }
-    //    }
-    //
-    //    private static void add(String target, Counter<String> languageCounter, String language, int count) {
-    //        if (language.equals("aa")) {
-    //            int debug = 0;
-    //        }
-    //        languageCounter.add(language, count);
-    //    }
-    //
-    //    private static void removeSingletons(Set<String> languages, Counter<String> languageCounter) {
-    //        for (String s : languageCounter) {
-    //            final long count = languageCounter.get(s);
-    //            if (count <= 1) {
-    //                languages.remove(s);
-    //            }
-    //        }
-    //    }
 }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleDistance.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleDistance.java
deleted file mode 100644 (file)
index bfde807..0000000
+++ /dev/null
@@ -1,1393 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.locale;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Enumeration;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Objects;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-import com.ibm.icu.impl.ICUResourceBundle;
-import com.ibm.icu.impl.Row;
-import com.ibm.icu.impl.Row.R4;
-import com.ibm.icu.impl.locale.XCldrStub.CollectionUtilities;
-import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap;
-import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
-import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
-import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
-import com.ibm.icu.impl.locale.XCldrStub.Multimap;
-import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
-import com.ibm.icu.impl.locale.XCldrStub.Predicate;
-import com.ibm.icu.impl.locale.XCldrStub.Splitter;
-import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap;
-import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
-import com.ibm.icu.impl.locale.XLocaleDistance.RegionMapper.Builder;
-import com.ibm.icu.text.LocaleDisplayNames;
-import com.ibm.icu.util.LocaleMatcher;
-import com.ibm.icu.util.Output;
-import com.ibm.icu.util.ULocale;
-import com.ibm.icu.util.UResourceBundleIterator;
-
-public class XLocaleDistance {
-
-    static final boolean PRINT_OVERRIDES = false;
-
-    public static final int ABOVE_THRESHOLD = 100;
-
-    // Activates debugging output to stderr with details of GetBestMatch.
-    // Be sure to set this to false before checking this in for production!
-    private static final boolean TRACE_DISTANCE = false;
-
-    @Deprecated
-    public static final String ANY = "�"; // matches any character. Uses value above any subtag.
-
-    private static String fixAny(String string) {
-        return "*".equals(string) ? ANY : string;
-    }
-
-    static final LocaleDisplayNames english = LocaleDisplayNames.getInstance(ULocale.ENGLISH);
-
-    private static List<R4<String, String, Integer, Boolean>> xGetLanguageMatcherData() {
-        List<R4<String, String, Integer, Boolean>> distanceList = new ArrayList<>();
-
-        ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
-        ICUResourceBundle languageMatchingNew = suppData.findTopLevel("languageMatchingNew");
-        ICUResourceBundle written = (ICUResourceBundle) languageMatchingNew.get("written");
-
-        for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
-            ICUResourceBundle item = (ICUResourceBundle) iter.next();
-            boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
-            distanceList.add(
-                    (R4<String, String, Integer, Boolean>)            // note: .freeze returning wrong type, so casting.
-                    Row.of(
-                            item.getString(0),
-                            item.getString(1),
-                            Integer.parseInt(item.getString(2)),
-                            oneway)
-                    .freeze());
-        }
-        return Collections.unmodifiableList(distanceList);
-    }
-
-    @SuppressWarnings("unused")
-    private static Set<String> xGetParadigmLocales() {
-        ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
-        ICUResourceBundle languageMatchingInfo = suppData.findTopLevel("languageMatchingInfo");
-        ICUResourceBundle writtenParadigmLocales = (ICUResourceBundle) languageMatchingInfo.get("written")
-                .get("paradigmLocales");
-        //      paradigmLocales{ "en", "en-GB",... }
-        HashSet<String> paradigmLocales = new HashSet<>(Arrays.asList(writtenParadigmLocales.getStringArray()));
-        return Collections.unmodifiableSet(paradigmLocales);
-    }
-
-    @SuppressWarnings("unused")
-    private static Map<String, String> xGetMatchVariables() {
-        ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
-        ICUResourceBundle languageMatchingInfo = suppData.findTopLevel("languageMatchingInfo");
-        ICUResourceBundle writtenMatchVariables = (ICUResourceBundle) languageMatchingInfo.get("written")
-                .get("matchVariable");
-        //        matchVariable{ americas{"019"} cnsar{"HK+MO"} ...}
-
-        HashMap<String,String> matchVariables = new HashMap<>();
-        for (Enumeration<String> enumer = writtenMatchVariables.getKeys(); enumer.hasMoreElements(); ) {
-            String key = enumer.nextElement();
-            matchVariables.put(key, writtenMatchVariables.getString(key));
-        }
-        return Collections.unmodifiableMap(matchVariables);
-    }
-
-    private static Multimap<String, String> xGetContainment() {
-        TreeMultimap<String,String> containment = TreeMultimap.create();
-        containment
-        .putAll("001", "019", "002", "150", "142", "009")
-        .putAll("011", "BF", "BJ", "CI", "CV", "GH", "GM", "GN", "GW", "LR", "ML", "MR", "NE", "NG", "SH", "SL", "SN", "TG")
-        .putAll("013", "BZ", "CR", "GT", "HN", "MX", "NI", "PA", "SV")
-        .putAll("014", "BI", "DJ", "ER", "ET", "KE", "KM", "MG", "MU", "MW", "MZ", "RE", "RW", "SC", "SO", "SS", "TZ", "UG", "YT", "ZM", "ZW")
-        .putAll("142", "145", "143", "030", "034", "035")
-        .putAll("143", "TM", "TJ", "KG", "KZ", "UZ")
-        .putAll("145", "AE", "AM", "AZ", "BH", "CY", "GE", "IL", "IQ", "JO", "KW", "LB", "OM", "PS", "QA", "SA", "SY", "TR", "YE", "NT", "YD")
-        .putAll("015", "DZ", "EG", "EH", "LY", "MA", "SD", "TN", "EA", "IC")
-        .putAll("150", "154", "155", "151", "039")
-        .putAll("151", "BG", "BY", "CZ", "HU", "MD", "PL", "RO", "RU", "SK", "UA", "SU")
-        .putAll("154", "GG", "IM", "JE", "AX", "DK", "EE", "FI", "FO", "GB", "IE", "IS", "LT", "LV", "NO", "SE", "SJ")
-        .putAll("155", "AT", "BE", "CH", "DE", "FR", "LI", "LU", "MC", "NL", "DD", "FX")
-        .putAll("017", "AO", "CD", "CF", "CG", "CM", "GA", "GQ", "ST", "TD", "ZR")
-        .putAll("018", "BW", "LS", "NA", "SZ", "ZA")
-        .putAll("019", "021", "013", "029", "005", "003", "419")
-        .putAll("002", "015", "011", "017", "014", "018")
-        .putAll("021", "BM", "CA", "GL", "PM", "US")
-        .putAll("029", "AG", "AI", "AW", "BB", "BL", "BQ", "BS", "CU", "CW", "DM", "DO", "GD", "GP", "HT", "JM", "KN", "KY", "LC", "MF", "MQ", "MS", "PR", "SX", "TC", "TT", "VC", "VG", "VI", "AN")
-        .putAll("003", "021", "013", "029")
-        .putAll("030", "CN", "HK", "JP", "KP", "KR", "MN", "MO", "TW")
-        .putAll("035", "BN", "ID", "KH", "LA", "MM", "MY", "PH", "SG", "TH", "TL", "VN", "BU", "TP")
-        .putAll("039", "AD", "AL", "BA", "ES", "GI", "GR", "HR", "IT", "ME", "MK", "MT", "RS", "PT", "SI", "SM", "VA", "XK", "CS", "YU")
-        .putAll("419", "013", "029", "005")
-        .putAll("005", "AR", "BO", "BR", "CL", "CO", "EC", "FK", "GF", "GY", "PE", "PY", "SR", "UY", "VE")
-        .putAll("053", "AU", "NF", "NZ")
-        .putAll("054", "FJ", "NC", "PG", "SB", "VU")
-        .putAll("057", "FM", "GU", "KI", "MH", "MP", "NR", "PW")
-        .putAll("061", "AS", "CK", "NU", "PF", "PN", "TK", "TO", "TV", "WF", "WS")
-        .putAll("034", "AF", "BD", "BT", "IN", "IR", "LK", "MV", "NP", "PK")
-        .putAll("009", "053", "054", "057", "061", "QO")
-        .putAll("QO", "AQ", "BV", "CC", "CX", "GS", "HM", "IO", "TF", "UM", "AC", "CP", "DG", "TA")
-        ;
-        //Can't use following, because data from CLDR is discarded
-        //        ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
-        //        UResourceBundle territoryContainment = suppData.get("territoryContainment");
-        //        for (int i = 0 ; i < territoryContainment.getSize(); i++) {
-        //            UResourceBundle mapping = territoryContainment.get(i);
-        //            String parent = mapping.getKey();
-        //            for (int j = 0 ; j < mapping.getSize(); j++) {
-        //                String child = mapping.getString(j);
-        //                containment.put(parent,child);
-        //                System.out.println(parent + " => " + child);
-        //            }
-        //        }
-        TreeMultimap<String,String> containmentResolved = TreeMultimap.create();
-        fill("001", containment, containmentResolved);
-        return ImmutableMultimap.copyOf(containmentResolved);
-    }
-
-    private static Set<String> fill(String region, TreeMultimap<String, String> containment, Multimap<String, String> toAddTo) {
-        Set<String> contained = containment.get(region);
-        if (contained == null) {
-            return Collections.emptySet();
-        }
-        toAddTo.putAll(region, contained); // do top level
-        // then recursively
-        for (String subregion : contained) {
-            toAddTo.putAll(region, fill(subregion, containment, toAddTo));
-        }
-        return toAddTo.get(region);
-    }
-
-
-    static final Multimap<String,String> CONTAINER_TO_CONTAINED;
-    static final Multimap<String,String> CONTAINER_TO_CONTAINED_FINAL;
-    static {
-        //         Multimap<String, String> containerToContainedTemp = xGetContainment();
-        //         fill(Region.getInstance("001"), containerToContainedTemp);
-
-        CONTAINER_TO_CONTAINED = xGetContainment();
-        Multimap<String, String> containerToFinalContainedBuilder = TreeMultimap.create();
-        for (Entry<String, Set<String>> entry : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
-            String container = entry.getKey();
-            for (String contained : entry.getValue()) {
-                if (CONTAINER_TO_CONTAINED.get(contained) == null) {
-                    containerToFinalContainedBuilder.put(container, contained);
-                }
-            }
-        }
-        CONTAINER_TO_CONTAINED_FINAL = ImmutableMultimap.copyOf(containerToFinalContainedBuilder);
-    }
-
-    final static private Set<String> ALL_FINAL_REGIONS = ImmutableSet.copyOf(CONTAINER_TO_CONTAINED_FINAL.get("001"));
-
-    // end of data from CLDR
-
-    private final DistanceTable languageDesired2Supported;
-    private final RegionMapper regionMapper;
-    private final int defaultLanguageDistance;
-    private final int defaultScriptDistance;
-    private final int defaultRegionDistance;
-
-    @Deprecated
-    public static abstract class DistanceTable {
-        abstract int getDistance(String desiredLang, String supportedlang, Output<DistanceTable> table, boolean starEquals);
-        abstract Set<String> getCloser(int threshold);
-        abstract String toString(boolean abbreviate);
-        public DistanceTable compact() {
-            return this;
-        }
-        //        public Integer getInternalDistance(String a, String b) {
-        //            return null;
-        //        }
-        public DistanceNode getInternalNode(String any, String any2) {
-            return null;
-        }
-        public Map<String, Set<String>> getInternalMatches() {
-            return null;
-        }
-        public boolean isEmpty() {
-            return true;
-        }
-    }
-
-    @Deprecated
-    public static class DistanceNode {
-        final int distance;
-
-        public DistanceNode(int distance) {
-            this.distance = distance;
-        }
-
-        public DistanceTable getDistanceTable() {
-            return null;
-        }
-
-        @Override
-        public boolean equals(Object obj) {
-            return this == obj ||
-                    (obj != null
-                    && obj.getClass() == this.getClass()
-                    && distance == ((DistanceNode) obj).distance);
-        }
-        @Override
-        public int hashCode() {
-            return distance;
-        }
-        @Override
-        public String toString() {
-            return "\ndistance: " + distance;
-        }
-    }
-
-    private interface IdMapper<K,V> {
-        public V toId(K source);
-    }
-
-    static class IdMakerFull<T> implements IdMapper<T,Integer> {
-        private final Map<T, Integer> objectToInt = new HashMap<>();
-        private final List<T> intToObject = new ArrayList<>();
-        final String name; // for debugging
-
-        IdMakerFull(String name) {
-            this.name = name;
-        }
-
-        IdMakerFull() {
-            this("unnamed");
-        }
-
-        IdMakerFull(String name, T zeroValue) {
-            this(name);
-            add(zeroValue);
-        }
-
-        /**
-         * Return an id, making one if there wasn't one already.
-         */
-        public Integer add(T source) {
-            Integer result = objectToInt.get(source);
-            if (result == null) {
-                Integer newResult = intToObject.size();
-                objectToInt.put(source, newResult);
-                intToObject.add(source);
-                return newResult;
-            } else {
-                return result;
-            }
-        }
-
-        /**
-         * Return an id, or null if there is none.
-         */
-        @Override
-        public Integer toId(T source) {
-            return objectToInt.get(source);
-            //            return value == null ? 0 : value;
-        }
-
-        /**
-         * Return the object for the id, or null if there is none.
-         */
-        public T fromId(int id) {
-            return intToObject.get(id);
-        }
-
-        /**
-         * Return interned object
-         */
-        public T intern(T source) {
-            return fromId(add(source));
-        }
-
-        public int size() {
-            return intToObject.size();
-        }
-        /**
-         * Same as add, except if the object didn't have an id, return null;
-         */
-        public Integer getOldAndAdd(T source) {
-            Integer result = objectToInt.get(source);
-            if (result == null) {
-                Integer newResult = intToObject.size();
-                objectToInt.put(source, newResult);
-                intToObject.add(source);
-            }
-            return result;
-        }
-
-        @Override
-        public String toString() {
-            return size() + ": " + intToObject;
-        }
-        @Override
-        public boolean equals(Object obj) {
-            return this == obj ||
-                    (obj != null
-                    && obj.getClass() == this.getClass()
-                    && intToObject.equals(((IdMakerFull<?>) obj).intToObject));
-        }
-        @Override
-        public int hashCode() {
-            return intToObject.hashCode();
-        }
-    }
-
-    static class StringDistanceNode extends DistanceNode {
-        final DistanceTable distanceTable;
-
-        public StringDistanceNode(int distance, DistanceTable distanceTable) {
-            super(distance);
-            this.distanceTable = distanceTable;
-        }
-
-        @Override
-        public boolean equals(Object obj) {
-            StringDistanceNode other;
-            return this == obj ||
-                    (obj != null
-                    && obj.getClass() == this.getClass()
-                    && distance == (other = (StringDistanceNode) obj).distance
-                    && Objects.equals(distanceTable, other.distanceTable)
-                    && super.equals(other));
-        }
-        @Override
-        public int hashCode() {
-            return distance ^ Objects.hashCode(distanceTable);
-        }
-
-        StringDistanceNode(int distance) {
-            this(distance, new StringDistanceTable());
-        }
-
-        public void addSubtables(String desiredSub, String supportedSub, CopyIfEmpty r) {
-            ((StringDistanceTable) distanceTable).addSubtables(desiredSub, supportedSub, r);
-        }
-        @Override
-        public String toString() {
-            return "distance: " + distance + "\n" + distanceTable;
-        }
-
-        public void copyTables(StringDistanceTable value) {
-            if (value != null) {
-                ((StringDistanceTable)distanceTable).copy(value);
-            }
-        }
-
-        @Override
-        public DistanceTable getDistanceTable() {
-            return distanceTable;
-        }
-    }
-
-    public XLocaleDistance(DistanceTable datadistancetable2, RegionMapper regionMapper) {
-        languageDesired2Supported = datadistancetable2;
-        this.regionMapper = regionMapper;
-
-        StringDistanceNode languageNode = (StringDistanceNode) ((StringDistanceTable) languageDesired2Supported).subtables.get(ANY).get(ANY);
-        defaultLanguageDistance = languageNode.distance;
-        StringDistanceNode scriptNode = (StringDistanceNode) ((StringDistanceTable)languageNode.distanceTable).subtables.get(ANY).get(ANY);
-        defaultScriptDistance = scriptNode.distance;
-        DistanceNode regionNode = ((StringDistanceTable)scriptNode.distanceTable).subtables.get(ANY).get(ANY);
-        defaultRegionDistance = regionNode.distance;
-    }
-
-    @SuppressWarnings("rawtypes")
-    private static Map newMap() { // for debugging
-        return new TreeMap();
-    }
-
-    /**
-     * Internal class
-     */
-    @Deprecated
-    public static class StringDistanceTable extends DistanceTable {
-        final Map<String, Map<String, DistanceNode>> subtables;
-
-        StringDistanceTable(Map<String, Map<String, DistanceNode>> tables) {
-            subtables = tables;
-        }
-        @SuppressWarnings("unchecked")
-        StringDistanceTable() {
-            this(newMap());
-        }
-
-        @Override
-        public boolean isEmpty() {
-            return subtables.isEmpty();
-        }
-
-        @Override
-        public boolean equals(Object obj) {
-            return this == obj ||
-                    (obj != null
-                    && obj.getClass() == this.getClass()
-                    && subtables.equals(((StringDistanceTable) obj).subtables));
-        }
-        @Override
-        public int hashCode() {
-            return subtables.hashCode();
-        }
-
-        @Override
-        public int getDistance(String desired, String supported, Output<DistanceTable> distanceTable, boolean starEquals) {
-            if (TRACE_DISTANCE) {
-                System.err.printf("    Entering       getDistance: desired=%s supported=%s starEquals=%s\n",
-                    desired, supported, Boolean.toString(starEquals));
-            }
-            boolean star = false;
-            Map<String, DistanceNode> sub2 = subtables.get(desired);
-            if (sub2 == null) {
-                sub2 = subtables.get(ANY); // <*, supported>
-                star = true;
-            }
-            DistanceNode value = sub2.get(supported);   // <*/desired, supported>
-            if (value == null) {
-                value = sub2.get(ANY);  // <*/desired, *>
-                if (value == null && !star) {
-                    sub2 = subtables.get(ANY);   // <*, supported>
-                    value = sub2.get(supported);
-                    if (value == null) {
-                        value = sub2.get(ANY);   // <*, *>
-                    }
-                }
-                star = true;
-            }
-            if (distanceTable != null) {
-                distanceTable.value = ((StringDistanceNode) value).distanceTable;
-            }
-            int result = starEquals && star && desired.equals(supported) ? 0 : value.distance;
-            if (TRACE_DISTANCE) {
-                System.err.printf("    Returning from getDistance: %d\n", result);
-            }
-            return result;
-        }
-
-        public void copy(StringDistanceTable other) {
-            for (Entry<String, Map<String, DistanceNode>> e1 : other.subtables.entrySet()) {
-                for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
-                    DistanceNode value = e2.getValue();
-                    @SuppressWarnings("unused")
-                    DistanceNode subNode = addSubtable(e1.getKey(), e2.getKey(), value.distance);
-                }
-            }
-        }
-
-        @SuppressWarnings("unchecked")
-        DistanceNode addSubtable(String desired, String supported, int distance) {
-            Map<String, DistanceNode> sub2 = subtables.get(desired);
-            if (sub2 == null) {
-                subtables.put(desired, sub2 = newMap());
-            }
-            DistanceNode oldNode = sub2.get(supported);
-            if (oldNode != null) {
-                return oldNode;
-            }
-
-            final StringDistanceNode newNode = new StringDistanceNode(distance);
-            sub2.put(supported, newNode);
-            return newNode;
-        }
-
-        /**
-         * Return null if value doesn't exist
-         */
-        private DistanceNode getNode(String desired, String supported) {
-            Map<String, DistanceNode> sub2 = subtables.get(desired);
-            if (sub2 == null) {
-                return null;
-            }
-            return sub2.get(supported);
-        }
-
-
-        /** add table for each subitem that matches and doesn't have a table already
-         */
-        public void addSubtables(
-                String desired, String supported,
-                Predicate<DistanceNode> action) {
-            DistanceNode node = getNode(desired, supported);
-            if (node == null) {
-                // get the distance it would have
-                Output<DistanceTable> node2 = new Output<>();
-                int distance = getDistance(desired, supported, node2, true);
-                // now add it
-                node = addSubtable(desired, supported, distance);
-                if (node2.value != null) {
-                    ((StringDistanceNode)node).copyTables((StringDistanceTable)(node2.value));
-                }
-            }
-            action.test(node);
-        }
-
-        public void addSubtables(String desiredLang, String supportedLang,
-                String desiredScript, String supportedScript,
-                int percentage) {
-
-            // add to all the values that have the matching desiredLang and supportedLang
-            @SuppressWarnings("unused")
-            boolean haveKeys = false;
-            for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
-                String key1 = e1.getKey();
-                final boolean desiredIsKey = desiredLang.equals(key1);
-                if (desiredIsKey || desiredLang.equals(ANY)) {
-                    for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
-                        String key2 = e2.getKey();
-                        final boolean supportedIsKey = supportedLang.equals(key2);
-                        haveKeys |= (desiredIsKey && supportedIsKey);
-                        if (supportedIsKey || supportedLang.equals(ANY)) {
-                            DistanceNode value = e2.getValue();
-                            ((StringDistanceTable)value.getDistanceTable()).addSubtable(desiredScript, supportedScript, percentage);
-                        }
-                    }
-                }
-            }
-            // now add the sequence explicitly
-            StringDistanceTable dt = new StringDistanceTable();
-            dt.addSubtable(desiredScript, supportedScript, percentage);
-            CopyIfEmpty r = new CopyIfEmpty(dt);
-            addSubtables(desiredLang, supportedLang, r);
-        }
-
-        public void addSubtables(String desiredLang, String supportedLang,
-                String desiredScript, String supportedScript,
-                String desiredRegion, String supportedRegion,
-                int percentage) {
-
-            // add to all the values that have the matching desiredLang and supportedLang
-            @SuppressWarnings("unused")
-            boolean haveKeys = false;
-            for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
-                String key1 = e1.getKey();
-                final boolean desiredIsKey = desiredLang.equals(key1);
-                if (desiredIsKey || desiredLang.equals(ANY)) {
-                    for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
-                        String key2 = e2.getKey();
-                        final boolean supportedIsKey = supportedLang.equals(key2);
-                        haveKeys |= (desiredIsKey && supportedIsKey);
-                        if (supportedIsKey || supportedLang.equals(ANY)) {
-                            StringDistanceNode value = (StringDistanceNode) e2.getValue();
-                            ((StringDistanceTable)value.distanceTable).addSubtables(desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
-                        }
-                    }
-                }
-            }
-            // now add the sequence explicitly
-
-            StringDistanceTable dt = new StringDistanceTable();
-            dt.addSubtable(desiredRegion, supportedRegion, percentage);
-            AddSub r = new AddSub(desiredScript, supportedScript, dt);
-            addSubtables(desiredLang,  supportedLang,  r);
-        }
-
-        @Override
-        public String toString() {
-            return toString(false);
-        }
-
-        @Override
-        public String toString(boolean abbreviate) {
-            return toString(abbreviate, "", new IdMakerFull<>("interner"), new StringBuilder()).toString();
-        }
-
-        public StringBuilder toString(boolean abbreviate, String indent, IdMakerFull<Object> intern, StringBuilder buffer) {
-            String indent2 = indent.isEmpty() ? "" : "\t";
-            Integer id = abbreviate ? intern.getOldAndAdd(subtables) : null;
-            if (id != null) {
-                buffer.append(indent2).append('#').append(id).append('\n');
-            } else for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
-                final Map<String, DistanceNode> subsubtable = e1.getValue();
-                buffer.append(indent2).append(e1.getKey());
-                String indent3 = "\t";
-                id = abbreviate ? intern.getOldAndAdd(subsubtable) : null;
-                if (id != null) {
-                    buffer.append(indent3).append('#').append(id).append('\n');
-                } else for (Entry<String, DistanceNode> e2 : subsubtable.entrySet()) {
-                    DistanceNode value = e2.getValue();
-                    buffer.append(indent3).append(e2.getKey());
-                    id = abbreviate ? intern.getOldAndAdd(value) : null;
-                    if (id != null) {
-                        buffer.append('\t').append('#').append(id).append('\n');
-                    } else {
-                        buffer.append('\t').append(value.distance);
-                        final DistanceTable distanceTable = value.getDistanceTable();
-                        if (distanceTable != null) {
-                            id = abbreviate ? intern.getOldAndAdd(distanceTable) : null;
-                            if (id != null) {
-                                buffer.append('\t').append('#').append(id).append('\n');
-                            } else {
-                                ((StringDistanceTable)distanceTable).toString(abbreviate, indent+"\t\t\t", intern, buffer);
-                                buffer.append('\n');
-                            }
-                        } else {
-                            buffer.append('\n');
-                        }
-                    }
-                    indent3 = indent+'\t';
-                }
-                indent2 = indent;
-            }
-            return buffer;
-        }
-
-        @Override
-        public StringDistanceTable compact() {
-            return new CompactAndImmutablizer().compact(this);
-        }
-
-        @Override
-        public Set<String> getCloser(int threshold) {
-            Set<String> result = new HashSet<>();
-            for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
-                String desired = e1.getKey();
-                for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
-                    if (e2.getValue().distance < threshold) {
-                        result.add(desired);
-                        break;
-                    }
-                }
-            }
-            return result;
-        }
-
-        public Integer getInternalDistance(String a, String b) {
-            Map<String, DistanceNode> subsub = subtables.get(a);
-            if (subsub == null) {
-                return null;
-            }
-            DistanceNode dnode = subsub.get(b);
-            return dnode == null ? null : dnode.distance;
-        }
-
-        @Override
-        public DistanceNode getInternalNode(String a, String b) {
-            Map<String, DistanceNode> subsub = subtables.get(a);
-            if (subsub == null) {
-                return null;
-            }
-            return subsub.get(b);
-        }
-
-        @Override
-        public Map<String, Set<String>> getInternalMatches() {
-            Map<String, Set<String>> result = new LinkedHashMap<>();
-            for (Entry<String, Map<String, DistanceNode>> entry : subtables.entrySet()) {
-                result.put(entry.getKey(), new LinkedHashSet<>(entry.getValue().keySet()));
-            }
-            return result;
-        }
-    }
-
-    static class CopyIfEmpty implements Predicate<DistanceNode> {
-        private final StringDistanceTable toCopy;
-        CopyIfEmpty(StringDistanceTable resetIfNotNull) {
-            this.toCopy = resetIfNotNull;
-        }
-        @Override
-        public boolean test(DistanceNode node) {
-            final StringDistanceTable subtables = (StringDistanceTable) node.getDistanceTable();
-            if (subtables.subtables.isEmpty()) {
-                subtables.copy(toCopy);
-            }
-            return true;
-        }
-    }
-
-    static class AddSub implements Predicate<DistanceNode> {
-        private final String desiredSub;
-        private final String supportedSub;
-        private final CopyIfEmpty r;
-
-        AddSub(String desiredSub, String supportedSub, StringDistanceTable distanceTableToCopy) {
-            this.r = new CopyIfEmpty(distanceTableToCopy);
-            this.desiredSub = desiredSub;
-            this.supportedSub = supportedSub;
-        }
-        @Override
-        public boolean test(DistanceNode node) {
-            if (node == null) {
-                throw new IllegalArgumentException("bad structure");
-            } else {
-                ((StringDistanceNode)node).addSubtables(desiredSub, supportedSub, r);
-            }
-            return true;
-        }
-    }
-
-    public int distance(ULocale desired, ULocale supported, int threshold, DistanceOption distanceOption) {
-        LSR supportedLSR = LSR.fromMaximalized(supported);
-        LSR desiredLSR = LSR.fromMaximalized(desired);
-        return distanceRaw(desiredLSR, supportedLSR, threshold, distanceOption);
-    }
-
-    /**
-     * Returns distance, from 0 to ABOVE_THRESHOLD.
-     * ULocales must be in canonical, addLikelySubtags format. Returns distance
-     */
-    public int distanceRaw(LSR desired, LSR supported, int threshold, DistanceOption distanceOption) {
-        if (TRACE_DISTANCE) {
-            System.err.printf("  Entering       distanceRaw: desired=%s supported=%s "
-            + "threshold=%d preferred=%s\n",
-            desired, supported, threshold,
-            distanceOption.name());
-        }
-        int result = distanceRaw(desired.language, supported.language,
-                desired.script, supported.script,
-                desired.region, supported.region,
-                threshold, distanceOption);
-        if (TRACE_DISTANCE) {
-            System.err.printf("  Returning from distanceRaw: %d\n", result);
-        }
-        return result;
-    }
-
-    public enum DistanceOption {REGION_FIRST, SCRIPT_FIRST}
-    // NOTE: Replaced "NORMAL" with "REGION_FIRST". By default, scripts have greater weight
-    // than regions, so they might be considered the "normal" case.
-
-    /**
-     * Returns distance, from 0 to ABOVE_THRESHOLD.
-     * ULocales must be in canonical, addLikelySubtags format.
-     * (Exception: internal calls may pass any strings. They do this for pseudo-locales.)
-     * Returns distance.
-     */
-    public int distanceRaw(
-            String desiredLang, String supportedLang,
-            String desiredScript, String supportedScript,
-            String desiredRegion, String supportedRegion,
-            int threshold,
-            DistanceOption distanceOption) {
-
-        Output<DistanceTable> subtable = new Output<>();
-
-        int distance = languageDesired2Supported.getDistance(desiredLang, supportedLang, subtable, true);
-        boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST;
-        if (scriptFirst) {
-            distance >>= 2;
-        }
-        if (distance < 0) {
-            distance = 0;
-        } else if (distance >= threshold) {
-            return ABOVE_THRESHOLD;
-        }
-
-        int scriptDistance = subtable.value.getDistance(desiredScript, supportedScript, subtable, true);
-        if (scriptFirst) {
-            scriptDistance >>= 1;
-        }
-        distance += scriptDistance;
-        if (distance >= threshold) {
-            return ABOVE_THRESHOLD;
-        }
-
-        if (desiredRegion.equals(supportedRegion)) {
-            return distance;
-        }
-
-        // From here on we know the regions are not equal
-
-        final String desiredPartition = regionMapper.toId(desiredRegion);
-        final String supportedPartition = regionMapper.toId(supportedRegion);
-        int subdistance;
-
-        // check for macros. If one is found, we take the maximum distance
-        // this could be optimized by adding some more structure, but probably not worth it.
-
-        Collection<String> desiredPartitions = desiredPartition.isEmpty() ? regionMapper.macroToPartitions.get(desiredRegion) : null;
-        Collection<String> supportedPartitions = supportedPartition.isEmpty() ? regionMapper.macroToPartitions.get(supportedRegion) : null;
-        if (desiredPartitions != null || supportedPartitions != null) {
-            subdistance = 0;
-            // make the code simple for now
-            if (desiredPartitions == null) {
-                desiredPartitions = Collections.singleton(desiredPartition);
-            }
-            if (supportedPartitions == null) {
-                supportedPartitions = Collections.singleton(supportedPartition);
-            }
-
-            for (String desiredPartition2 : desiredPartitions) {
-                for (String supportedPartition2 : supportedPartitions) {
-                    int tempSubdistance = subtable.value.getDistance(desiredPartition2, supportedPartition2, null, false);
-                    if (subdistance < tempSubdistance) {
-                        subdistance = tempSubdistance;
-                    }
-                }
-            }
-        } else {
-            subdistance = subtable.value.getDistance(desiredPartition, supportedPartition, null, false);
-        }
-        distance += subdistance;
-        return distance >= threshold ? ABOVE_THRESHOLD : distance;
-    }
-
-
-    private static final XLocaleDistance DEFAULT;
-
-    public static XLocaleDistance getDefault() {
-        return DEFAULT;
-    }
-
-    static {
-        String[][] variableOverrides = {
-                {"$enUS", "AS+GU+MH+MP+PR+UM+US+VI"},
-
-                {"$cnsar", "HK+MO"},
-
-                {"$americas", "019"},
-
-                {"$maghreb", "MA+DZ+TN+LY+MR+EH"},
-        };
-        String[] paradigmRegions = {
-                "en", "en-GB", "es", "es-419", "pt-BR", "pt-PT"
-        };
-        String[][] regionRuleOverrides = {
-                {"ar_*_$maghreb", "ar_*_$maghreb", "96"},
-                {"ar_*_$!maghreb", "ar_*_$!maghreb", "96"},
-                {"ar_*_*", "ar_*_*", "95"},
-
-                {"en_*_$enUS", "en_*_$enUS", "96"},
-                {"en_*_$!enUS", "en_*_$!enUS", "96"},
-                {"en_*_*", "en_*_*", "95"},
-
-                {"es_*_$americas", "es_*_$americas", "96"},
-                {"es_*_$!americas", "es_*_$!americas", "96"},
-                {"es_*_*", "es_*_*", "95"},
-
-                {"pt_*_$americas", "pt_*_$americas", "96"},
-                {"pt_*_$!americas", "pt_*_$!americas", "96"},
-                {"pt_*_*", "pt_*_*", "95"},
-
-                {"zh_Hant_$cnsar", "zh_Hant_$cnsar", "96"},
-                {"zh_Hant_$!cnsar", "zh_Hant_$!cnsar", "96"},
-                {"zh_Hant_*", "zh_Hant_*", "95"},
-
-                {"*_*_*", "*_*_*", "96"},
-        };
-
-        Builder rmb = new RegionMapper.Builder().addParadigms(paradigmRegions);
-        for (String[] variableRule : variableOverrides) {
-            rmb.add(variableRule[0], variableRule[1]);
-        }
-        if (PRINT_OVERRIDES) {
-            System.out.println("\t\t<languageMatches type=\"written\" alt=\"enhanced\">");
-            System.out.println("\t\t\t<paradigmLocales locales=\"" + XCldrStub.join(paradigmRegions, " ")
-            + "\"/>");
-            for (String[] variableRule : variableOverrides) {
-                System.out.println("\t\t\t<matchVariable id=\"" + variableRule[0]
-                        + "\" value=\""
-                        + variableRule[1]
-                                + "\"/>");
-            }
-        }
-
-        final StringDistanceTable defaultDistanceTable = new StringDistanceTable();
-        final RegionMapper defaultRegionMapper = rmb.build();
-
-        Splitter bar = Splitter.on('_');
-
-        @SuppressWarnings({"unchecked", "rawtypes"})
-        List<Row.R4<List<String>, List<String>, Integer, Boolean>>[] sorted = new ArrayList[3];
-        sorted[0] = new ArrayList<>();
-        sorted[1] = new ArrayList<>();
-        sorted[2] = new ArrayList<>();
-
-        // sort the rules so that the language-only are first, then the language-script, and finally the language-script-region.
-        for (R4<String, String, Integer, Boolean> info : xGetLanguageMatcherData()) {
-            String desiredRaw = info.get0();
-            String supportedRaw = info.get1();
-            List<String> desired = bar.splitToList(desiredRaw);
-            List<String> supported = bar.splitToList(supportedRaw);
-            Boolean oneway = info.get3();
-            int distance = desiredRaw.equals("*_*") ? 50 : info.get2();
-            int size = desired.size();
-
-            // for now, skip size == 3
-            if (size == 3) continue;
-
-            sorted[size-1].add(Row.of(desired, supported, distance, oneway));
-        }
-
-        for (List<Row.R4<List<String>, List<String>, Integer, Boolean>> item1 : sorted) {
-            for (Row.R4<List<String>, List<String>, Integer, Boolean> item2 : item1) {
-                List<String> desired = item2.get0();
-                List<String> supported = item2.get1();
-                Integer distance = item2.get2();
-                Boolean oneway = item2.get3();
-                add(defaultDistanceTable, desired, supported, distance);
-                if (oneway != Boolean.TRUE && !desired.equals(supported)) {
-                    add(defaultDistanceTable, supported, desired, distance);
-                }
-                printMatchXml(desired, supported, distance, oneway);
-            }
-        }
-
-        // add new size=3
-        for (String[] rule : regionRuleOverrides) {
-            //            if (PRINT_OVERRIDES) System.out.println("\t\t\t<languageMatch desired=\""
-            //                + rule[0]
-            //                    + "\" supported=\""
-            //                    + rule[1]
-            //                        + "\" distance=\""
-            //                        + rule[2]
-            //                            + "\"/>");
-            //            if (rule[0].equals("en_*_*") || rule[1].equals("*_*_*")) {
-            //                int debug = 0;
-            //            }
-            List<String> desiredBase = new ArrayList<>(bar.splitToList(rule[0]));
-            List<String> supportedBase = new ArrayList<>(bar.splitToList(rule[1]));
-            Integer distance = 100-Integer.parseInt(rule[2]);
-            printMatchXml(desiredBase, supportedBase, distance, false);
-
-            Collection<String> desiredRegions = defaultRegionMapper.getIdsFromVariable(desiredBase.get(2));
-            if (desiredRegions.isEmpty()) {
-                throw new IllegalArgumentException("Bad region variable: " + desiredBase.get(2));
-            }
-            Collection<String> supportedRegions = defaultRegionMapper.getIdsFromVariable(supportedBase.get(2));
-            if (supportedRegions.isEmpty()) {
-                throw new IllegalArgumentException("Bad region variable: " + supportedBase.get(2));
-            }
-            for (String desiredRegion2 : desiredRegions) {
-                desiredBase.set(2, desiredRegion2.toString()); // fix later
-                for (String supportedRegion2 : supportedRegions) {
-                    supportedBase.set(2, supportedRegion2.toString()); // fix later
-                    add(defaultDistanceTable, desiredBase, supportedBase, distance);
-                    add(defaultDistanceTable, supportedBase, desiredBase, distance);
-                }
-            }
-        }
-
-        // Pseudo regions should match no other regions.
-        // {"*-*-XA", "*-*-*", "0"},
-        // {"*-*-XB", "*-*-*", "0"},
-        // {"*-*-XC", "*-*-*", "0"},
-        // {"x1-*-*", "*-*-*", "0"},
-        // {"x2-*-*", "*-*-*", "0"},
-        // ...
-        // {"x8-*-*", "*-*-*", "0"},
-        List<String> supported = Arrays.asList("*", "*", "*");
-        for (String x : Arrays.asList("XA", "XB", "XC")) {
-            List<String> desired = Arrays.asList("*", "*", x);
-            add(defaultDistanceTable, desired, supported, 100);
-            add(defaultDistanceTable, supported, desired, 100);
-        }
-        // See XLikelySubtags.java for the mapping of pseudo-locales to x1 ... x8.
-        for (int i = 1; i <= 8; ++i) {
-            List<String> desired = Arrays.asList("x" + String.valueOf(i), "*", "*");
-            add(defaultDistanceTable, desired, supported, 100);
-            add(defaultDistanceTable, supported, desired, 100);
-        }
-
-        if (PRINT_OVERRIDES) {
-            System.out.println("\t\t</languageMatches>");
-        }
-
-        DEFAULT = new XLocaleDistance(defaultDistanceTable.compact(), defaultRegionMapper);
-
-        if (PRINT_OVERRIDES) {
-            System.out.println(defaultRegionMapper);
-            System.out.println(defaultDistanceTable);
-            throw new IllegalArgumentException();
-        }
-    }
-
-    private static void printMatchXml(List<String> desired, List<String> supported, Integer distance, Boolean oneway) {
-        if (PRINT_OVERRIDES) {
-            String desiredStr = CollectionUtilities.join(desired, "_");
-            String supportedStr = CollectionUtilities.join(supported, "_");
-            String desiredName = fixedName(desired);
-            String supportedName = fixedName(supported);
-            System.out.println("\t\t\t<languageMatch"
-                    + " desired=\"" + desiredStr
-                    + "\"\tsupported=\"" + supportedStr
-                    + "\"\tdistance=\"" + distance
-                    + (!oneway ? "" : "\"\toneway=\"true")
-                    + "\"/>\t<!-- " + desiredName + " ⇒ " + supportedName + " -->");
-        }
-    }
-
-    private static String fixedName(List<String> match) {
-        List<String> alt = new ArrayList<>(match);
-        int size = alt.size();
-        assert size >= 1 && size <= 3;
-
-        StringBuilder result = new StringBuilder();
-
-        if (size >= 3) {
-            String region = alt.get(2);
-            if (region.equals("*") || region.startsWith("$")) {
-                result.append(region);
-            } else {
-                result.append(english.regionDisplayName(region));
-            }
-        }
-        if (size >= 2) {
-            String script = alt.get(1);
-            if (script.equals("*")) {
-                result.insert(0, script);
-            } else {
-                result.insert(0, english.scriptDisplayName(script));
-            }
-        }
-        if (size >= 1) {
-            String language = alt.get(0);
-            if (language.equals("*")) {
-                result.insert(0, language);
-            } else {
-                result.insert(0, english.languageDisplayName(language));
-            }
-        }
-        return CollectionUtilities.join(alt, "; ");
-    }
-
-    static public void add(StringDistanceTable languageDesired2Supported, List<String> desired, List<String> supported, int percentage) {
-        int size = desired.size();
-        if (size != supported.size() || size < 1 || size > 3) {
-            throw new IllegalArgumentException();
-        }
-        final String desiredLang = fixAny(desired.get(0));
-        final String supportedLang = fixAny(supported.get(0));
-        if (size == 1) {
-            languageDesired2Supported.addSubtable(desiredLang, supportedLang, percentage);
-        } else {
-            final String desiredScript = fixAny(desired.get(1));
-            final String supportedScript = fixAny(supported.get(1));
-            if (size == 2) {
-                languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, percentage);
-            } else {
-                final String desiredRegion = fixAny(desired.get(2));
-                final String supportedRegion = fixAny(supported.get(2));
-                languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
-            }
-        }
-    }
-
-    @Override
-    public String toString() {
-        return toString(false);
-    }
-
-    public String toString(boolean abbreviate) {
-        return regionMapper + "\n" + languageDesired2Supported.toString(abbreviate);
-    }
-
-
-    //    public static XLocaleDistance createDefaultInt() {
-    //        IntDistanceTable d = new IntDistanceTable(DEFAULT_DISTANCE_TABLE);
-    //        return new XLocaleDistance(d, DEFAULT_REGION_MAPPER);
-    //    }
-
-    static Set<String> getContainingMacrosFor(Collection<String> input, Set<String> output) {
-        output.clear();
-        for (Entry<String, Set<String>> entry : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
-            if (input.containsAll(entry.getValue())) { // example; if all southern Europe are contained, then add S. Europe
-                output.add(entry.getKey());
-            }
-        }
-        return output;
-    }
-
-    static class RegionMapper implements IdMapper<String,String> {
-        /**
-         * Used for processing rules. At the start we have a variable setting like $A1=US+CA+MX. We generate a mapping from $A1 to a set of partitions {P1, P2}
-         * When we hit a rule that contains a variable, we replace that rule by multiple rules for the partitions.
-         */
-        final Multimap<String,String> variableToPartition;
-        /**
-         * Used for executing the rules. We map a region to a partition before processing.
-         */
-        final Map<String,String> regionToPartition;
-        /**
-         * Used to support es_419 compared to es_AR, etc.
-         */
-        final Multimap<String,String> macroToPartitions;
-        /**
-         * Used to get the paradigm region for a cluster, if there is one
-         */
-        final Set<ULocale> paradigms;
-
-        private RegionMapper(
-                Multimap<String, String> variableToPartitionIn,
-                Map<String, String> regionToPartitionIn,
-                Multimap<String,String> macroToPartitionsIn,
-                Set<ULocale> paradigmsIn) {
-            variableToPartition = ImmutableMultimap.copyOf(variableToPartitionIn);
-            regionToPartition = ImmutableMap.copyOf(regionToPartitionIn);
-            macroToPartitions = ImmutableMultimap.copyOf(macroToPartitionsIn);
-            paradigms = ImmutableSet.copyOf(paradigmsIn);
-        }
-
-        @Override
-        public String toId(String region) {
-            String result = regionToPartition.get(region);
-            return result == null ? "" : result;
-        }
-
-        public Collection<String> getIdsFromVariable(String variable) {
-            if (variable.equals("*")) {
-                return Collections.singleton("*");
-            }
-            Collection<String> result = variableToPartition.get(variable);
-            if (result == null || result.isEmpty()) {
-                throw new IllegalArgumentException("Variable not defined: " + variable);
-            }
-            return result;
-        }
-
-        public Set<String> regions() {
-            return regionToPartition.keySet();
-        }
-
-        public Set<String> variables() {
-            return variableToPartition.keySet();
-        }
-
-        @Override
-        public String toString() {
-            TreeMultimap<String, String> partitionToVariables = Multimaps.invertFrom(variableToPartition,
-                    TreeMultimap.<String, String>create());
-            TreeMultimap<String, String> partitionToRegions = TreeMultimap.create();
-            for (Entry<String, String> e : regionToPartition.entrySet()) {
-                partitionToRegions.put(e.getValue(), e.getKey());
-            }
-            StringBuilder buffer = new StringBuilder();
-            buffer.append("Partition ➠ Variables ➠ Regions (final)");
-            for (Entry<String, Set<String>> e : partitionToVariables.asMap().entrySet()) {
-                buffer.append('\n');
-                buffer.append(e.getKey() + "\t" + e.getValue() + "\t" + partitionToRegions.get(e.getKey()));
-            }
-            buffer.append("\nMacro ➠ Partitions");
-            for (Entry<String, Set<String>> e : macroToPartitions.asMap().entrySet()) {
-                buffer.append('\n');
-                buffer.append(e.getKey() + "\t" + e.getValue());
-            }
-
-            return buffer.toString();
-        }
-
-        static class Builder {
-            final private Multimap<String, String> regionToRawPartition = TreeMultimap.create();
-            final private RegionSet regionSet = new RegionSet();
-            final private Set<ULocale> paradigms = new LinkedHashSet<>();
-
-            void add(String variable, String barString) {
-                Set<String> tempRegions = regionSet.parseSet(barString);
-
-                for (String region : tempRegions) {
-                    regionToRawPartition.put(region, variable);
-                }
-
-                // now add the inverse variable
-
-                Set<String> inverse = regionSet.inverse();
-                String inverseVariable = "$!" + variable.substring(1);
-                for (String region : inverse) {
-                    regionToRawPartition.put(region, inverseVariable);
-                }
-            }
-
-            public Builder addParadigms(String... paradigmRegions) {
-                for (String paradigm : paradigmRegions) {
-                    paradigms.add(new ULocale(paradigm));
-                }
-                return this;
-            }
-
-            RegionMapper build() {
-                final IdMakerFull<Collection<String>> id = new IdMakerFull<>("partition");
-                Multimap<String,String> variableToPartitions = TreeMultimap.create();
-                Map<String,String> regionToPartition = new TreeMap<>();
-                Multimap<String,String> partitionToRegions = TreeMultimap.create();
-
-                for (Entry<String, Set<String>> e : regionToRawPartition.asMap().entrySet()) {
-                    final String region = e.getKey();
-                    final Collection<String> rawPartition = e.getValue();
-                    String partition = String.valueOf((char)('α' + id.add(rawPartition)));
-
-                    regionToPartition.put(region, partition);
-                    partitionToRegions.put(partition, region);
-
-                    for (String variable : rawPartition) {
-                        variableToPartitions.put(variable, partition);
-                    }
-                }
-
-                // we get a mapping of each macro to the partitions it intersects with
-                Multimap<String,String> macroToPartitions = TreeMultimap.create();
-                for (Entry<String, Set<String>> e : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
-                    String macro = e.getKey();
-                    for (Entry<String, Set<String>> e2 : partitionToRegions.asMap().entrySet()) {
-                        String partition = e2.getKey();
-                        if (!Collections.disjoint(e.getValue(), e2.getValue())) {
-                            macroToPartitions.put(macro, partition);
-                        }
-                    }
-                }
-
-                return new RegionMapper(
-                        variableToPartitions,
-                        regionToPartition,
-                        macroToPartitions,
-                        paradigms);
-            }
-        }
-    }
-
-    /**
-     * Parses a string of regions like "US+005-BR" and produces a set of resolved regions.
-     * All macroregions are fully resolved to sets of non-macro regions.
-     * <br>Syntax is simple for now:
-     * <pre>regionSet := region ([-+] region)*</pre>
-     * No precedence, so "x+y-y+z" is (((x+y)-y)+z) NOT (x+y)-(y+z)
-     */
-    private static class RegionSet {
-        private enum Operation {add, remove}
-        // temporaries used in processing
-        final private Set<String> tempRegions = new TreeSet<>();
-        private Operation operation = null;
-
-        private Set<String> parseSet(String barString) {
-            operation = Operation.add;
-            int last = 0;
-            tempRegions.clear();
-            int i = 0;
-            for (; i < barString.length(); ++i) {
-                char c = barString.charAt(i); // UTF16 is ok, since syntax is only ascii
-                switch(c) {
-                case '+':
-                    add(barString, last, i);
-                    last = i+1;
-                    operation = Operation.add;
-                    break;
-                case '-':
-                    add(barString, last, i);
-                    last = i+1;
-                    operation = Operation.remove;
-                    break;
-                }
-            }
-            add(barString, last, i);
-            return tempRegions;
-        }
-
-        private Set<String> inverse() {
-            TreeSet<String> result = new TreeSet<>(ALL_FINAL_REGIONS);
-            result.removeAll(tempRegions);
-            return result;
-        }
-
-        private void add(String barString, int last, int i) {
-            if (i > last) {
-                String region = barString.substring(last,i);
-                changeSet(operation, region);
-            }
-        }
-
-        private void changeSet(Operation operation, String region) {
-            Collection<String> contained = CONTAINER_TO_CONTAINED_FINAL.get(region);
-            if (contained != null && !contained.isEmpty()) {
-                if (Operation.add == operation) {
-                    tempRegions.addAll(contained);
-                } else {
-                    tempRegions.removeAll(contained);
-                }
-            } else if (Operation.add == operation) {
-                tempRegions.add(region);
-            } else {
-                tempRegions.remove(region);
-            }
-        }
-    }
-
-    public static <K,V> Multimap<K,V> invertMap(Map<V,K> map) {
-        return Multimaps.invertFrom(Multimaps.forMap(map), LinkedHashMultimap.<K,V>create());
-    }
-
-    public Set<ULocale> getParadigms() {
-        return regionMapper.paradigms;
-    }
-
-    public int getDefaultLanguageDistance() {
-        return defaultLanguageDistance;
-    }
-
-    public int getDefaultScriptDistance() {
-        return defaultScriptDistance;
-    }
-
-    public int getDefaultRegionDistance() {
-        return defaultRegionDistance;
-    }
-
-    static class CompactAndImmutablizer extends IdMakerFull<Object> {
-        StringDistanceTable compact(StringDistanceTable item) {
-            if (toId(item) != null) {
-                return (StringDistanceTable) intern(item);
-            }
-            return new StringDistanceTable(compact(item.subtables, 0));
-        }
-        @SuppressWarnings({ "unchecked", "rawtypes" })
-        <K,T> Map<K,T> compact(Map<K,T> item, int level) {
-            if (toId(item) != null) {
-                return (Map<K, T>) intern(item);
-            }
-            Map<K,T> copy = new LinkedHashMap<>();
-            for (Entry<K,T> entry : item.entrySet()) {
-                T value = entry.getValue();
-                if (value instanceof Map) {
-                    copy.put(entry.getKey(), (T)compact((Map)value, level+1));
-                } else {
-                    copy.put(entry.getKey(), (T)compact((DistanceNode)value));
-                }
-            }
-            return ImmutableMap.copyOf(copy);
-        }
-        DistanceNode compact(DistanceNode item) {
-            if (toId(item) != null) {
-                return (DistanceNode) intern(item);
-            }
-            final DistanceTable distanceTable = item.getDistanceTable();
-            if (distanceTable == null || distanceTable.isEmpty()) {
-                return new DistanceNode(item.distance);
-            } else {
-                return new StringDistanceNode(item.distance, compact((StringDistanceTable)((StringDistanceNode)item).distanceTable));
-            }
-        }
-    }
-
-    @Deprecated
-    public StringDistanceTable internalGetDistanceTable() {
-        return (StringDistanceTable) languageDesired2Supported;
-    }
-
-    public static void main(String[] args) {
-        //      for (Entry<String, Collection<String>> entry : containerToContained.asMap().entrySet()) {
-        //          System.out.println(entry.getKey() + "\t⥢" + entry.getValue() + "; " + containerToFinalContained.get(entry.getKey()));
-        //      }
-        //      final Multimap<String,String> regionToMacros = ImmutableMultimap.copyOf(Multimaps.invertFrom(containerToContained, TreeMultimap.create()));
-        //      for (Entry<String, Collection<String>> entry : regionToMacros.asMap().entrySet()) {
-        //          System.out.println(entry.getKey() + "\t⥤ " + entry.getValue());
-        //      }
-        if (PRINT_OVERRIDES) {
-            System.out.println(getDefault().toString(true));
-        }
-        DistanceTable table = getDefault().languageDesired2Supported;
-        DistanceTable compactedTable = table.compact();
-        if (!table.equals(compactedTable)) {
-            throw new IllegalArgumentException("Compaction isn't equal");
-        }
-    }
-}
index 8782aab0416d4b57648f2b87ee50ee619cba5576..a527c6192f0bb68806367cbe27ac13c6857159ff 100644 (file)
@@ -2,19 +2,20 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.locale;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Locale;
 import java.util.Map;
-import java.util.Map.Entry;
+import java.util.Objects;
 import java.util.Set;
 
-import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
-import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
-import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
-import com.ibm.icu.impl.locale.XCldrStub.Multimap;
-import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
-import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
+import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
 import com.ibm.icu.util.LocalePriorityList;
 import com.ibm.icu.util.Output;
 import com.ibm.icu.util.ULocale;
@@ -23,57 +24,144 @@ import com.ibm.icu.util.ULocale;
  * Immutable class that picks best match between user's desired locales and application's supported locales.
  * @author markdavis
  */
-public class XLocaleMatcher {
-    private static final LSR UND = new LSR("und","","");
+public final class XLocaleMatcher {
+    private static final LSR UND_LSR = new LSR("und","","");
     private static final ULocale UND_LOCALE = new ULocale("und");
+    private static final Iterator<ULocale> NULL_ITERATOR = null;
 
     // Activates debugging output to stderr with details of GetBestMatch.
     private static final boolean TRACE_MATCHER = false;
 
+    // List of indexes, optimized for one or two.
+    private static final class Indexes {
+        // Some indexes without further object creation and auto-boxing.
+        int first, second = -1;
+        // We could turn the List into an int array + length and manage its growth.
+        List<Integer> remaining;
+
+        Indexes(int firstIndex) {
+            first = firstIndex;
+        }
+        void add(int i) {
+            if (second < 0) {
+                second = i;
+            } else {
+                if (remaining == null) {
+                    remaining = new ArrayList<>();
+                }
+                remaining.add(i);
+            }
+        }
+        int getFirst() { return first; }
+        int get(int i) {  // returns -1 when i >= length
+            if (i == 0) {
+                return first;
+            } else if (i == 1) {
+                return second;
+            } else if (remaining != null && (i -= 2) < remaining.size()) {
+                return remaining.get(i);
+            } else {
+                return -1;
+            }
+        }
+    }
+
+    // TODO: Make public, and add public methods that return it.
+    private static final class Result {
+        private Result(ULocale desired, ULocale supported,
+                /* Locale jdesired, */ Locale jsupported,
+                int desIndex, int suppIndex) {
+            desiredLocale = desired;
+            supportedLocale = supported;
+            // desiredJavaLocale = jdesired;
+            supportedJavaLocale = jsupported;
+            desiredIndex = desIndex;
+            supportedIndex = suppIndex;
+        }
+
+        ULocale desiredLocale;
+        ULocale supportedLocale;
+        // Locale desiredJavaLocale;
+        Locale supportedJavaLocale;
+        int desiredIndex;
+        @SuppressWarnings("unused")  // unused until public, for other wrappers
+        int supportedIndex;
+    }
+
     // normally the default values, but can be set via constructor
 
-    private final XLocaleDistance localeDistance;
     private final int thresholdDistance;
     private final int demotionPerAdditionalDesiredLocale;
     private final DistanceOption distanceOption;
 
     // built based on application's supported languages in constructor
 
-    private final Map<LSR, Set<ULocale>> supportedLanguages; // the locales in the collection are ordered!
-    private final Set<ULocale> exactSupportedLocales; // the locales in the collection are ordered!
-    private final ULocale defaultLanguage;
+    private final ULocale[] supportedLocales;
+    private final Locale[] supportedJavaLocales;
+    private final Map<ULocale, Integer> supportedToIndex;
+    private final Map<LSR, Indexes> supportedLsrToIndexes;
+    // Array versions of the supportedLsrToIndexes keys and values.
+    // The distance lookup loops over the supportedLsrs and returns the index of the best match.
+    private final LSR[] supportedLsrs;
+    private final Indexes[] supportedIndexes;
+    private final ULocale defaultLocale;
+    private final Locale defaultJavaLocale;
+    private final int defaultLocaleIndex;
 
     public static class Builder {
-        private Set<ULocale> supportedLanguagesList;
+        /**
+         * Supported locales. A Set, to avoid duplicates.
+         * Maintains iteration order for consistent matching behavior (first best match wins).
+         */
+        private Set<ULocale> supportedLocales;
         private int thresholdDistance = -1;
         private int demotionPerAdditionalDesiredLocale = -1;;
-        private ULocale defaultLanguage;
-        private XLocaleDistance localeDistance;
+        private ULocale defaultLocale;
         private DistanceOption distanceOption;
         /**
-         * @param languagePriorityList the languagePriorityList to set
+         * @param locales the languagePriorityList to set
          * @return this Builder object
          */
-        public Builder setSupportedLocales(String languagePriorityList) {
-            this.supportedLanguagesList = asSet(LocalePriorityList.add(languagePriorityList).build());
+        public Builder setSupportedLocales(String locales) {
+            return setSupportedLocales(LocalePriorityList.add(locales).build());
+        }
+        public Builder setSupportedLocales(Iterable<ULocale> locales) {
+            supportedLocales = new LinkedHashSet<>(); // maintain order
+            for (ULocale locale : locales) {
+                supportedLocales.add(locale);
+            }
             return this;
         }
-        public Builder setSupportedLocales(LocalePriorityList languagePriorityList) {
-            this.supportedLanguagesList = asSet(languagePriorityList);
+        public Builder setSupportedLocales(Collection<ULocale> locales) {
+            supportedLocales = new LinkedHashSet<>(locales); // maintain order
             return this;
         }
-        public Builder setSupportedLocales(Set<ULocale> languagePriorityList) {
-            Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order
-            temp.addAll(languagePriorityList);
-            this.supportedLanguagesList = temp;
+        public Builder setSupportedJavaLocales(Collection<Locale> locales) {
+            supportedLocales = new LinkedHashSet<>(locales.size()); // maintain order
+            for (Locale locale : locales) {
+                supportedLocales.add(ULocale.forLocale(locale));
+            }
+            return this;
+        }
+        public Builder addSupportedLocale(ULocale locale) {
+            if (supportedLocales == null) {
+                supportedLocales = new LinkedHashSet<>();
+            }
+            supportedLocales.add(locale);
             return this;
         }
+        public Builder addSupportedLocale(Locale locale) {
+            return addSupportedLocale(ULocale.forLocale(locale));
+        }
 
         /**
          * @param thresholdDistance the thresholdDistance to set, with -1 = default
          * @return this Builder object
          */
         public Builder setThresholdDistance(int thresholdDistance) {
+            if (thresholdDistance > 100) {
+                thresholdDistance = 100;
+            }
             this.thresholdDistance = thresholdDistance;
             return this;
         }
@@ -86,22 +174,13 @@ public class XLocaleMatcher {
             return this;
         }
 
-        /**
-         * @param localeDistance the localeDistance to set, with default = XLocaleDistance.getDefault().
-         * @return this Builder object
-         */
-        public Builder setLocaleDistance(XLocaleDistance localeDistance) {
-            this.localeDistance = localeDistance;
-            return this;
-        }
-
         /**
          * Set the default language, with null = default = first supported language
-         * @param defaultLanguage the default language
+         * @param defaultLocale the default language
          * @return this Builder object
          */
-        public Builder setDefaultLanguage(ULocale defaultLanguage) {
-            this.defaultLanguage = defaultLanguage;
+        public Builder setDefaultLanguage(ULocale defaultLocale) {
+            this.defaultLocale = defaultLocale;
             return this;
         }
 
@@ -122,18 +201,23 @@ public class XLocaleMatcher {
 
         @Override
         public String toString() {
-          StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
-          if (!supportedLanguagesList.isEmpty()) {
-            s.append(" supported={").append(supportedLanguagesList.toString()).append("}");
-          }
-          if (defaultLanguage != null) {
-            s.append(" default=").append(defaultLanguage.toString());
-          }
-          if (thresholdDistance >= 0) {
-            s.append(String.format(" thresholdDistance=%d", thresholdDistance));
-          }
-          s.append(" preference=").append(distanceOption.name());
-          return s.append("}").toString();
+            StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
+            if (!supportedLocales.isEmpty()) {
+                s.append(" supported={").append(supportedLocales.toString()).append('}');
+            }
+            if (defaultLocale != null) {
+                s.append(" default=").append(defaultLocale.toString());
+            }
+            if (distanceOption != null) {
+                s.append(" distance=").append(distanceOption.toString());
+            }
+            if (thresholdDistance >= 0) {
+                s.append(String.format(" threshold=%d", thresholdDistance));
+            }
+            if (demotionPerAdditionalDesiredLocale >= 0) {
+                s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
+            }
+            return s.append('}').toString();
         }
     }
 
@@ -159,75 +243,101 @@ public class XLocaleMatcher {
     }
 
     /**
-     * Create a locale matcher with the given parameters.
-     * @param supportedLocales
-     * @param thresholdDistance
-     * @param demotionPerAdditionalDesiredLocale
-     * @param localeDistance
-     * @param likelySubtags
+     * Creates a locale matcher with the given Builder parameters.
      */
     private XLocaleMatcher(Builder builder) {
-        localeDistance = builder.localeDistance == null ? XLocaleDistance.getDefault()
-            : builder.localeDistance;
-        thresholdDistance = builder.thresholdDistance < 0 ? localeDistance.getDefaultScriptDistance()
-            : builder.thresholdDistance;
-        // only do AFTER above are set
-        Set<LSR> paradigms = extractLsrSet(localeDistance.getParadigms());
-        final Multimap<LSR, ULocale> temp2 = extractLsrMap(builder.supportedLanguagesList, paradigms);
-        supportedLanguages = temp2.asMap();
-        exactSupportedLocales = ImmutableSet.copyOf(temp2.values());
-        defaultLanguage = builder.defaultLanguage != null ? builder.defaultLanguage
-            : supportedLanguages.isEmpty() ? null
-                : supportedLanguages.entrySet().iterator().next().getValue().iterator().next(); // first language
-        demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ? localeDistance.getDefaultRegionDistance()+1
-            : builder.demotionPerAdditionalDesiredLocale;
+        thresholdDistance = builder.thresholdDistance < 0 ?
+                LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
+        // Store the supported locales in input order,
+        // so that when different types are used (e.g., java.util.Locale)
+        // we can return those by parallel index.
+        int supportedLocalesLength = builder.supportedLocales.size();
+        supportedLocales = new ULocale[supportedLocalesLength];
+        supportedJavaLocales = new Locale[supportedLocalesLength];
+        supportedToIndex = new HashMap<>(supportedLocalesLength);
+        // We need an unordered map from LSR to first supported locale with that LSR,
+        // and an ordered list of (LSR, Indexes).
+        // We use a LinkedHashMap for both,
+        // and insert the supported locales in the following order:
+        // 1. First supported locale.
+        // 2. Priority locales in builder order.
+        // 3. Remaining locales in builder order.
+        supportedLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
+        Map<LSR, Indexes> otherLsrToIndexes = null;
+        LSR firstLSR = null;
+        int i = 0;
+        for (ULocale locale : builder.supportedLocales) {
+            supportedLocales[i] = locale;
+            supportedJavaLocales[i] = locale.toLocale();
+            // supportedToIndex.putIfAbsent(locale, i)
+            Integer oldIndex = supportedToIndex.get(locale);
+            if (oldIndex == null) {
+                supportedToIndex.put(locale, i);
+            }
+            LSR lsr = getMaximalLsrOrUnd(locale);
+            if (i == 0) {
+                firstLSR = lsr;
+                supportedLsrToIndexes.put(lsr, new Indexes(0));
+            } else if (lsr.equals(firstLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
+                addIndex(supportedLsrToIndexes, lsr, i);
+            } else {
+                if (otherLsrToIndexes == null) {
+                    otherLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
+                }
+                addIndex(otherLsrToIndexes, lsr, i);
+            }
+            ++i;
+        }
+        if (otherLsrToIndexes != null) {
+            supportedLsrToIndexes.putAll(otherLsrToIndexes);
+        }
+        int numSuppLsrs = supportedLsrToIndexes.size();
+        supportedLsrs = supportedLsrToIndexes.keySet().toArray(new LSR[numSuppLsrs]);
+        supportedIndexes = supportedLsrToIndexes.values().toArray(new Indexes[numSuppLsrs]);
+        ULocale def;
+        Locale jdef = null;
+        int idef = -1;
+        if (builder.defaultLocale != null) {
+            def = builder.defaultLocale;
+        } else if (supportedLocalesLength > 0) {
+            def = supportedLocales[0]; // first language
+            jdef = supportedJavaLocales[0];
+            idef = 0;
+        } else {
+            def = null;
+        }
+        if (jdef == null && def != null) {
+            jdef = def.toLocale();
+        }
+        defaultLocale = def;
+        defaultJavaLocale = jdef;
+        defaultLocaleIndex = idef;
+        demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ?
+                LocaleDistance.INSTANCE.getDefaultRegionDistance() + 1 :
+                    builder.demotionPerAdditionalDesiredLocale;
         distanceOption = builder.distanceOption;
     }
 
-    // Result is not immutable!
-    private Set<LSR> extractLsrSet(Set<ULocale> languagePriorityList) {
-        Set<LSR> result = new LinkedHashSet<LSR>();
-        for (ULocale item : languagePriorityList) {
-            final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
-            result.add(max);
+    private static final void addIndex(Map<LSR, Indexes> lsrToIndexes, LSR lsr, int i) {
+        Indexes indexes = lsrToIndexes.get(lsr);
+        if (indexes == null) {
+            lsrToIndexes.put(lsr, new Indexes(i));
+        } else {
+            indexes.add(i);
         }
-        return result;
     }
 
-    private Multimap<LSR,ULocale> extractLsrMap(Set<ULocale> languagePriorityList, Set<LSR> priorities) {
-        Multimap<LSR, ULocale> builder = LinkedHashMultimap.create();
-        for (ULocale item : languagePriorityList) {
-            final LSR max = item.equals(UND_LOCALE) ? UND :
-            LSR.fromMaximalized(item);
-            builder.put(max, item);
-        }
-        if (builder.size() > 1 && priorities != null) {
-            // for the supported list, we put any priorities before all others, except for the first.
-            Multimap<LSR, ULocale> builder2 = LinkedHashMultimap.create();
-
-            // copy the long way so the priorities are in the same order as in the original
-            boolean first = true;
-            for (Entry<LSR, Set<ULocale>> entry : builder.asMap().entrySet()) {
-                final LSR key = entry.getKey();
-                if (first || priorities.contains(key)) {
-                    builder2.putAll(key, entry.getValue());
-                    first = false;
-                }
-            }
-            // now copy the rest
-            builder2.putAll(builder);
-            if (!builder2.equals(builder)) {
-                throw new IllegalArgumentException();
-            }
-            builder = builder2;
+    private static final LSR getMaximalLsrOrUnd(ULocale locale) {
+        if (locale.equals(UND_LOCALE)) {
+            return UND_LSR;
+        } else {
+            return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
         }
-        return ImmutableMultimap.copyOf(builder);
     }
 
-
     /** Convenience method */
     public ULocale getBestMatch(ULocale ulocale) {
-        return getBestMatch(ulocale, null);
+        return getBestMatch(ulocale, NULL_ITERATOR).supportedLocale;
     }
     /** Convenience method */
     public ULocale getBestMatch(String languageList) {
@@ -235,126 +345,128 @@ public class XLocaleMatcher {
     }
     /** Convenience method */
     public ULocale getBestMatch(ULocale... locales) {
-        return getBestMatch(new LinkedHashSet<ULocale>(Arrays.asList(locales)), null);
-    }
-    /** Convenience method */
-    public ULocale getBestMatch(Set<ULocale> desiredLanguages) {
-        return getBestMatch(desiredLanguages, null);
-    }
-    /** Convenience method */
-    public ULocale getBestMatch(LocalePriorityList desiredLanguages) {
-        return getBestMatch(desiredLanguages, null);
+        return getBestMatch(Arrays.asList(locales), null);
     }
     /** Convenience method */
-    public ULocale getBestMatch(LocalePriorityList desiredLanguages, Output<ULocale> outputBestDesired) {
-        return getBestMatch(asSet(desiredLanguages), outputBestDesired);
-    }
-
-    // TODO add LocalePriorityList method asSet() for ordered Set view backed by LocalePriorityList
-    private static Set<ULocale> asSet(LocalePriorityList languageList) {
-        Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order
-        for (ULocale locale : languageList) {
-            temp.add(locale);
-        };
-        return temp;
+    public ULocale getBestMatch(Iterable<ULocale> desiredLocales) {
+        return getBestMatch(desiredLocales, null);
     }
 
     /**
      * Get the best match between the desired languages and supported languages
-     * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
-     * @param outputBestDesired The one of the desired languages that matched best.
+     * @param desiredLocales Typically the supplied user's languages, in order of preference, with best first.
+     * @param outputBestDesired The one of the desired languages that matched best (can be null).
      * Set to null if the best match was not below the threshold distance.
      * @return the best match.
      */
-    public ULocale getBestMatch(Set<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
-        // fast path for singleton
-        if (desiredLanguages.size() == 1) {
-            return getBestMatch(desiredLanguages.iterator().next(), outputBestDesired);
-        }
-        // TODO produce optimized version for single desired ULocale
-        Multimap<LSR, ULocale> desiredLSRs = extractLsrMap(desiredLanguages,null);
-        int bestDistance = Integer.MAX_VALUE;
+    public ULocale getBestMatch(Iterable<ULocale> desiredLocales, Output<ULocale> outputBestDesired) {
+        Iterator<ULocale> desiredIter = desiredLocales.iterator();
+        if (!desiredIter.hasNext()) {
+            if (outputBestDesired != null) {
+                outputBestDesired.value = null;
+            }
+            if (TRACE_MATCHER) {
+                System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
+            }
+            return defaultLocale;
+        }
+        ULocale desiredLocale = desiredIter.next();
+        return getBestMatch(desiredLocale, desiredIter, outputBestDesired);
+    }
+
+    /**
+     * @param desiredLocale First desired locale.
+     * @param remainingIter Remaining desired locales, null or empty if none.
+     * @param outputBestDesired If not null,
+     *     will be set to the desired locale that matches the best supported one.
+     * @return the best supported locale.
+     */
+    private ULocale getBestMatch(ULocale desiredLocale, Iterator<ULocale> remainingIter,
+            Output<ULocale> outputBestDesired) {
+        Result result = getBestMatch(desiredLocale, remainingIter);
+        if (outputBestDesired != null) {
+            outputBestDesired.value = result.desiredLocale;
+        }
+        return result.supportedLocale;
+    }
+
+    private Result getBestMatch(ULocale desiredLocale, Iterator<ULocale> remainingIter) {
+        int desiredIndex = 0;
+        int bestDesiredIndex = -1;
         ULocale bestDesiredLocale = null;
-        Collection<ULocale> bestSupportedLocales = null;
-        int delta = 0;
-    mainLoop:
-        for (final Entry<LSR, Set<ULocale>> desiredLsrAndLocales : desiredLSRs.asMap().entrySet()) {
-          LSR desiredLSR = desiredLsrAndLocales.getKey();
-          for (ULocale desiredLocale : desiredLsrAndLocales.getValue()) {
-            // quick check for exact match
-            if (delta < bestDistance) {
-              if (exactSupportedLocales.contains(desiredLocale)) {
-                if (outputBestDesired != null) {
-                  outputBestDesired.value = desiredLocale;
-                }
+        int bestSupportedLsrIndex = 0;
+        for (int bestDistance = thresholdDistance; bestDistance > 0;
+                bestDistance -= demotionPerAdditionalDesiredLocale) {
+            // Quick check for exact locale match.
+            Integer supportedIndex = supportedToIndex.get(desiredLocale);
+            if (supportedIndex != null) {
                 if (TRACE_MATCHER) {
-                    System.err.printf(
-                              "Returning %s, which is an exact match for a supported language\n",
-                              desiredLocale);
-                 }
-                return desiredLocale;
-              }
-              // quick check for maximized locale
-              Collection<ULocale> found = supportedLanguages.get(desiredLSR);
-              if (found != null) {
-                // if we find one in the set, return first (lowest). We already know the exact one isn't
-                // there.
-                if (outputBestDesired != null) {
-                  outputBestDesired.value = desiredLocale;
+                    System.err.printf("Returning %s: desired=supported\n", desiredLocale);
                 }
-                ULocale result = found.iterator().next();
+                int suppIndex = supportedIndex;
+                return new Result(desiredLocale, supportedLocales[suppIndex],
+                        supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
+            }
+            // Quick check for exact maximized LSR.
+            LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+            Indexes indexes = supportedLsrToIndexes.get(desiredLSR);
+            if (indexes != null) {
+                // If this is a supported LSR, return the first locale.
+                // We already know the exact locale isn't there.
+                int suppIndex = indexes.getFirst();
+                ULocale result = supportedLocales[suppIndex];
                 if (TRACE_MATCHER) {
-                  System.err.printf("Returning %s\n", result.toString());
+                    System.err.printf("Returning %s: desiredLSR=supportedLSR\n", result);
                 }
-                return result;
-              }
+                return new Result(desiredLocale, result,
+                        supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
             }
-            for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
-              int distance =
-                  delta
-                      + localeDistance.distanceRaw(
-                          desiredLSR,
-                          supportedLsrAndLocale.getKey(),
-                          thresholdDistance,
-                          distanceOption);
-              if (distance < bestDistance) {
-                bestDistance = distance;
+            int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
+                    desiredLSR, supportedLsrs, bestDistance, distanceOption);
+            if (bestIndexAndDistance >= 0) {
+                bestDistance = bestIndexAndDistance & 0xff;
+                bestDesiredIndex = desiredIndex;
                 bestDesiredLocale = desiredLocale;
-                bestSupportedLocales = supportedLsrAndLocale.getValue();
-                if (distance == 0) {
-                  break mainLoop;
+                bestSupportedLsrIndex = bestIndexAndDistance >> 8;
+                if (bestDistance == 0) {
+                    break;
                 }
-              }
             }
-            delta += demotionPerAdditionalDesiredLocale;
-          }
-        }
-        if (bestDistance >= thresholdDistance) {
-            if (outputBestDesired != null) {
-                outputBestDesired.value = null;
+            if (remainingIter == null || !remainingIter.hasNext()) {
+                break;
             }
+            desiredLocale = remainingIter.next();
+            ++desiredIndex;
+        }
+        if (bestDesiredIndex < 0) {
             if (TRACE_MATCHER) {
-              System.err.printf("Returning default %s\n", defaultLanguage.toString());
+                System.err.printf("Returning default %s: no good match\n", defaultLocale);
             }
-            return defaultLanguage;
+            return new Result(null, defaultLocale, defaultJavaLocale, -1, defaultLocaleIndex);
         }
-        if (outputBestDesired != null) {
-            outputBestDesired.value = bestDesiredLocale;
-        }
-        // pick exact match if there is one
-        if (bestSupportedLocales.contains(bestDesiredLocale)) {
-            if (TRACE_MATCHER) {
-              System.err.printf(
-                  "Returning %s which matches a supported language\n", bestDesiredLocale.toString());
+        // Pick exact match if there is one.
+        // The length of the list is normally 1.
+        Indexes bestSupportedIndexes = supportedIndexes[bestSupportedLsrIndex];
+        int suppIndex;
+        for (int i = 0; (suppIndex = bestSupportedIndexes.get(i)) >= 0; ++i) {
+            ULocale locale = supportedLocales[suppIndex];
+            if (bestDesiredLocale.equals(locale)) {
+                if (TRACE_MATCHER) {
+                    System.err.printf("Returning %s: desired=best matching supported language\n",
+                            bestDesiredLocale);
+                }
+                return new Result(bestDesiredLocale, locale,
+                        supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
             }
-            return bestDesiredLocale;
         }
-        // otherwise return first supported, combining variants and extensions from bestDesired
-        ULocale result = bestSupportedLocales.iterator().next();
+        // Otherwise return the first of the supported languages that share the best-matching LSR.
+        suppIndex = bestSupportedIndexes.getFirst();
+        ULocale result = supportedLocales[suppIndex];
         if (TRACE_MATCHER) {
-          System.err.printf("Returning first supported language %s\n", result.toString());
+            System.err.printf("Returning %s: first best matching supported language\n", result);
         }
-        return result;
+        return new Result(bestDesiredLocale, result,
+                supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
     }
 
     /**
@@ -365,73 +477,88 @@ public class XLocaleMatcher {
      * @return the best match.
      */
     public ULocale getBestMatch(ULocale desiredLocale, Output<ULocale> outputBestDesired) {
-        int bestDistance = Integer.MAX_VALUE;
-        ULocale bestDesiredLocale = null;
-        Collection<ULocale> bestSupportedLocales = null;
+        return getBestMatch(desiredLocale, null, outputBestDesired);
+    }
 
-        // quick check for exact match, with hack for und
-        final LSR desiredLSR = desiredLocale.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(desiredLocale);
+    /**
+     * Converts Locales to ULocales on the fly.
+     */
+    private static final class LocalesWrapper implements Iterator<ULocale> {
+        private Iterator<Locale> locales;
+        // Cache locales to avoid conversion of the result.
+        private Locale first, second;
+        private List<Locale> remaining;
+
+        LocalesWrapper(Iterator<Locale> locales) {
+            this.locales = locales;
+        }
 
-        if (exactSupportedLocales.contains(desiredLocale)) {
-            if (outputBestDesired != null) {
-                outputBestDesired.value = desiredLocale;
-            }
-            if (TRACE_MATCHER) {
-              System.err.printf("Exact match with a supported locale.\n");
-            }
-            return desiredLocale;
-        }
-        // quick check for maximized locale
-        if (distanceOption == DistanceOption.REGION_FIRST) {
-            Collection<ULocale> found = supportedLanguages.get(desiredLSR);
-            if (found != null) {
-                // if we find one in the set, return first (lowest). We already know the exact one isn't there.
-                if (outputBestDesired != null) {
-                    outputBestDesired.value = desiredLocale;
-                }
-                ULocale result = found.iterator().next();
-                if (TRACE_MATCHER) {
-                  System.err.printf("Matches a maximized supported locale: %s\n", result);
+        @Override
+        public boolean hasNext() {
+            return locales.hasNext();
+        }
+
+        @Override
+        public ULocale next() {
+            Locale locale = locales.next();
+            if (first == null) {
+                first = locale;
+            } else if (second == null) {
+                second = locale;
+            } else {
+                if (remaining == null) {
+                    remaining = new ArrayList<>();
                 }
-                return result;
+                remaining.add(locale);
             }
+            return ULocale.forLocale(locale);
         }
-        for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
-            int distance = localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
-                thresholdDistance, distanceOption);
-            if (distance < bestDistance) {
-                bestDistance = distance;
-                bestDesiredLocale = desiredLocale;
-                bestSupportedLocales = supportedLsrAndLocale.getValue();
-                if (distance == 0) {
-                    break;
-                }
+
+        Locale getJavaLocale(int i) {
+            if (i == 0) {
+                return first;
+            } else if (i == 1) {
+                return second;
+            } else {
+                // TODO: test code coverage
+                return remaining.get(i - 2);
             }
         }
-        if (bestDistance >= thresholdDistance) {
+
+        @Override
+        public void remove() {
+            throw new UnsupportedOperationException();
+        }
+    }
+
+    public Locale getBestJavaMatch(Iterable<Locale> desiredLocales, Output<Locale> outputBestDesired) {
+        Iterator<Locale> desiredIter = desiredLocales.iterator();
+        if (!desiredIter.hasNext()) {
             if (outputBestDesired != null) {
                 outputBestDesired.value = null;
             }
             if (TRACE_MATCHER) {
-              System.err.printf(
-                  "Returning default %s because everything exceeded the threshold of %d.\n",
-                  defaultLanguage, thresholdDistance);
+                System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
             }
-            return defaultLanguage;
+            return defaultJavaLocale;
         }
+        LocalesWrapper wrapper = new LocalesWrapper(desiredIter);
+        ULocale desiredLocale = wrapper.next();
+        Result result = getBestMatch(desiredLocale, NULL_ITERATOR);
         if (outputBestDesired != null) {
-            outputBestDesired.value = bestDesiredLocale;
+            outputBestDesired.value = result.desiredIndex >= 0 ?
+                    wrapper.getJavaLocale(result.desiredIndex) : null;
         }
-        // pick exact match if there is one
-        if (bestSupportedLocales.contains(bestDesiredLocale)) {
-            return bestDesiredLocale;
-        }
-        // otherwise return first supported, combining variants and extensions from bestDesired
-        ULocale result = bestSupportedLocales.iterator().next();
-        if (TRACE_MATCHER) {
-          System.err.printf("First in the list of supported locales: %s\n", result);
+        return result.supportedJavaLocale;
+    }
+
+    public Locale getBestJavaMatch(Locale desiredLocale, Output<Locale> outputBestDesired) {
+        ULocale desiredULocale = ULocale.forLocale(desiredLocale);
+        Result result = getBestMatch(desiredULocale, NULL_ITERATOR);
+        if (outputBestDesired != null) {
+            outputBestDesired.value = result.desiredIndex >= 0 ? desiredLocale : null;
         }
-        return result;
+        return result.supportedJavaLocale;
     }
 
     /** Combine features of the desired locale into those of the supported, and return result. */
@@ -474,22 +601,39 @@ public class XLocaleMatcher {
      * A language is first maximized with add likely subtags, then compared.
      */
     public int distance(ULocale desired, ULocale supported) {
-        return localeDistance.distanceRaw(
-            LSR.fromMaximalized(desired),
-            LSR.fromMaximalized(supported), thresholdDistance, distanceOption);
+        return LocaleDistance.INSTANCE.getBestIndexAndDistance(
+            XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
+            new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
+            thresholdDistance, distanceOption) & 0xff;
     }
 
     /** Convenience method */
     public int distance(String desiredLanguage, String supportedLanguage) {
-        return localeDistance.distanceRaw(
-            LSR.fromMaximalized(new ULocale(desiredLanguage)),
-            LSR.fromMaximalized(new ULocale(supportedLanguage)),
-            thresholdDistance, distanceOption);
+        return LocaleDistance.INSTANCE.getBestIndexAndDistance(
+            XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(desiredLanguage)),
+            new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(supportedLanguage)) },
+            thresholdDistance, distanceOption) & 0xff;
     }
 
     @Override
     public String toString() {
-        return exactSupportedLocales.toString();
+        StringBuilder s = new StringBuilder().append("{XLocaleMatcher");
+        if (supportedLocales.length > 0) {
+            s.append(" supported={").append(supportedLocales[0].toString());
+            for (int i = 1; i < supportedLocales.length; ++i) {
+                s.append(", ").append(supportedLocales[1].toString());
+            }
+            s.append('}');
+        }
+        s.append(" default=").append(Objects.toString(defaultLocale));
+        if (distanceOption != null) {
+            s.append(" distance=").append(distanceOption.toString());
+        }
+        if (thresholdDistance >= 0) {
+            s.append(String.format(" threshold=%d", thresholdDistance));
+        }
+        s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
+        return s.append('}').toString();
     }
 
     /** Return the inverse of the distance: that is, 1-distance(desired, supported) */
index 49c67aa5bf86b7ea30060d0753ea9ee7109d6af4..7df1a857fdbf1222bc49db82fffbee73ce05c8f4 100644 (file)
@@ -25,7 +25,7 @@ import com.ibm.icu.impl.ICUResourceBundle;
 import com.ibm.icu.impl.Relation;
 import com.ibm.icu.impl.Row;
 import com.ibm.icu.impl.Row.R3;
-import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
+import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
 import com.ibm.icu.impl.locale.XLocaleMatcher;
 import com.ibm.icu.impl.locale.XLocaleMatcher.Builder;
 
index 2ea96a7fb1bc1af7b729bb3b5eea049c0347b8a4..c5d57ca05345d0f9f554916a8dc62303fd8a9f76 100644 (file)
@@ -4,9 +4,8 @@ package com.ibm.icu.dev.test.util;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
 import java.util.Set;
 
 import org.junit.Ignore;
@@ -15,17 +14,15 @@ import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 import com.ibm.icu.dev.test.TestFmwk;
-import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
-import com.ibm.icu.impl.locale.XLocaleDistance;
-import com.ibm.icu.impl.locale.XLocaleDistance.DistanceNode;
-import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
-import com.ibm.icu.impl.locale.XLocaleDistance.DistanceTable;
+import com.ibm.icu.impl.locale.LocaleDistance;
+import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
 import com.ibm.icu.util.LocaleMatcher;
 import com.ibm.icu.util.Output;
 import com.ibm.icu.util.ULocale;
 
 /**
- * Test the XLocaleDistance.
+ * Test the LocaleDistance.
+ * TODO: Rename to LocaleDistanceTest.
  *
  * @author markdavis
  */
@@ -33,9 +30,7 @@ import com.ibm.icu.util.ULocale;
 public class XLocaleDistanceTest extends TestFmwk {
     private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
 
-    public static final int FAIL = XLocaleDistance.ABOVE_THRESHOLD;
-
-    private XLocaleDistance localeMatcher = XLocaleDistance.getDefault();
+    private LocaleDistance localeDistance = LocaleDistance.INSTANCE;
     DataDrivenTestHelper tfh = new MyTestFileHandler()
             .setFramework(this)
             .load(XLocaleDistanceTest.class, "data/localeDistanceTest.txt");
@@ -58,7 +53,7 @@ public class XLocaleDistanceTest extends TestFmwk {
     @Ignore("Disabled because of Linux; need to investigate.")
     @Test
     public void testTiming() {
-        List<Arguments> testArgs = new ArrayList<Arguments>();
+        List<Arguments> testArgs = new ArrayList<>();
         for (List<String> line : tfh.getLines()) {
             if (tfh.isTestLine(line)) {
                 testArgs.add(new Arguments(line));
@@ -94,13 +89,13 @@ public class XLocaleDistanceTest extends TestFmwk {
                 oldTimeMinusLikely += System.nanoTime()-temp;
 
                 temp = System.nanoTime();
-                final LSR desiredLSR = LSR.fromMaximalized(desired);
-                final LSR supportedLSR = LSR.fromMaximalized(supported);
+//                final LSR desiredLSR = LSR.maximizedFrom(desired);
+//                final LSR supportedLSR = LSR.maximizedFrom(supported);
                 newLikelyTime += System.nanoTime()-temp;
 
                 temp = System.nanoTime();
-                int dist1 = localeMatcher.distanceRaw(desiredLSR, supportedLSR, 1000, DistanceOption.REGION_FIRST);
-                int dist2 = localeMatcher.distanceRaw(supportedLSR, desiredLSR, 1000, DistanceOption.REGION_FIRST);
+                int dist1 = localeDistance.testOnlyDistance(desired, supported, 1000, DistanceOption.REGION_FIRST);
+                int dist2 = localeDistance.testOnlyDistance(supported, desired, 1000, DistanceOption.REGION_FIRST);
                 newTimeMinusLikely += System.nanoTime()-temp;
             }
         }
@@ -118,52 +113,53 @@ public class XLocaleDistanceTest extends TestFmwk {
     }
 
     @Test
-    @SuppressWarnings("deprecation")
     public void testInternalTable() {
-        checkTables(localeMatcher.internalGetDistanceTable(), "", 1);
-    }
-
-    @SuppressWarnings("deprecation")
-    private void checkTables(DistanceTable internalGetDistanceTable, String title, int depth) {
-        // Check that ANY, ANY is always present, and that the table has a depth of exactly 3 everyplace.
-        Map<String, Set<String>> matches = internalGetDistanceTable.getInternalMatches();
-
-        // must have ANY,ANY
-        boolean haveANYANY = false;
-        for (Entry<String, Set<String>> entry : matches.entrySet()) {
-            String first = entry.getKey();
-            boolean haveANYfirst = first.equals(XLocaleDistance.ANY);
-            for (String second : entry.getValue()) {
-                haveANYANY |= haveANYfirst && second.equals(XLocaleDistance.ANY);
-                DistanceNode distanceNode = internalGetDistanceTable.getInternalNode(first, second);
-                DistanceTable subDistanceTable = distanceNode.getDistanceTable();
-                if (subDistanceTable == null || subDistanceTable.isEmpty()) {
-                    if (depth != 3) {
-                        logln("depth should be 3");
-                    }
-                    if (distanceNode.getClass() != DistanceNode.class) {
-                        logln("should be plain DistanceNode");
-                    }
-                } else {
-                    if (depth >= 3) {
-                        logln("depth should be ≤ 3");
-                    }
-                    if (distanceNode.getClass() == DistanceNode.class) {
-                        logln("should NOT be plain DistanceNode");
-                    }
-                    checkTables(subDistanceTable, first + "," + second + ",", depth+1);
+        Set<String> strings = localeDistance.testOnlyGetDistanceTable(false).keySet();
+        // Check that the table has a depth of exactly 3 (desired, supported) pairs everyplace
+        // by removing every prefix of a 6-subtag string from a copy of the set of strings.
+        // Any remaining string is not a prefix of a full-depth string.
+        Set<String> remaining = new HashSet<>(strings);
+        // Check that ANY, ANY is always present.
+        assertTrue("*-*", strings.contains("*-*"));
+        for (String s : strings) {
+            int num = countSubtags(s);
+            assertTrue(s, 1 <= num && num <= 6);
+            if (num > 1) {
+                String oneShorter = removeLastSubtag(s);
+                assertTrue(oneShorter, strings.contains(oneShorter));
+            }
+            if (num == 2 || num == 4) {
+                String sPlusAnyAny = s + "-*-*";
+                assertTrue(sPlusAnyAny, strings.contains(sPlusAnyAny));
+            } else if (num == 6) {
+                for (;; --num) {
+                    remaining.remove(s);
+                    if (num == 1) { break; }
+                    s = removeLastSubtag(s);
                 }
             }
         }
-        if (!haveANYANY) {
-            logln("ANY-ANY not in" + matches);
+        assertTrue("strings that do not lead to 6-subtag matches", remaining.isEmpty());
+    }
+
+    private static final int countSubtags(String s) {
+        if (s.isEmpty()) { return 0; }
+        int num = 1;
+        for (int pos = 0; (pos = s.indexOf('-', pos)) >= 0; ++pos) {
+            ++num;
         }
+        return num;
+    }
+
+    private static final String removeLastSubtag(String s) {
+        int last = s.lastIndexOf('-');
+        return s.substring(0, last);
     }
 
     @Test
     public void testShowDistanceTable() {
         if (isVerbose()) {
-            System.out.println(XLocaleDistance.getDefault().toString(false));
+            localeDistance.testOnlyPrintDistanceTable();
         }
     }
 
@@ -176,10 +172,9 @@ public class XLocaleDistanceTest extends TestFmwk {
     }
 
     class MyTestFileHandler extends DataDrivenTestHelper {
-        final XLocaleDistance distance = XLocaleDistance.getDefault();
-        Output<ULocale> bestDesired = new Output<ULocale>();
+        Output<ULocale> bestDesired = new Output<>();
         private DistanceOption distanceOption = DistanceOption.REGION_FIRST;
-        private Integer threshold = distance.getDefaultScriptDistance();
+        private Integer threshold = localeDistance.getDefaultScriptDistance();
 
         @Override
         public void handle(int lineNumber, boolean breakpoint, String commentBase, List<String> arguments) {
@@ -187,8 +182,8 @@ public class XLocaleDistanceTest extends TestFmwk {
                 breakpoint = false; // put debugger breakpoint here to break at @debug in test file
             }
             Arguments args = new Arguments(arguments);
-            int supportedToDesiredActual = distance.distance(args.supported, args.desired, threshold, distanceOption);
-            int desiredToSupportedActual = distance.distance(args.desired, args.supported, threshold, distanceOption);
+            int supportedToDesiredActual = localeDistance.testOnlyDistance(args.supported, args.desired, threshold, distanceOption);
+            int desiredToSupportedActual = localeDistance.testOnlyDistance(args.desired, args.supported, threshold, distanceOption);
             String desiredTag = args.desired.toLanguageTag();
             String supportedTag = args.supported.toLanguageTag();
             final String comment = commentBase.isEmpty() ? "" : "\t# " + commentBase;
index 07c87b9968dff10e7ac3fb9801089ac9c3a5edc4..7a4df3b30619cfe01f7a75f65f9746294bff95b8 100644 (file)
@@ -2,7 +2,6 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.dev.test.util;
 
-
 import java.io.BufferedReader;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -16,9 +15,9 @@ import org.junit.Test;
 import org.junit.runner.RunWith;
 
 import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.locale.LocaleDistance;
+import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
 import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
-import com.ibm.icu.impl.locale.XLocaleDistance;
-import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
 import com.ibm.icu.impl.locale.XLocaleMatcher;
 import com.ibm.icu.util.LocaleMatcher;
 import com.ibm.icu.util.LocalePriorityList;
@@ -37,7 +36,7 @@ import junitparams.Parameters;
 public class XLocaleMatcherTest extends TestFmwk {
     private static final int REGION_DISTANCE = 4;
 
-    private static final XLocaleDistance LANGUAGE_MATCHER_DATA = XLocaleDistance.getDefault();
+    private static final LocaleDistance LANGUAGE_MATCHER_DATA = LocaleDistance.INSTANCE;
 
     private XLocaleMatcher newXLocaleMatcher() {
         return new XLocaleMatcher("");
@@ -176,17 +175,102 @@ public class XLocaleMatcherTest extends TestFmwk {
         }
     }
 
+    private static final class PerfCase {
+        ULocale desired;
+        ULocale expectedShort;
+        ULocale expectedLong;
+        ULocale expectedVeryLong;
+
+        PerfCase(String des, String expShort, String expLong, String expVeryLong) {
+            desired = new ULocale(des);
+            expectedShort = new ULocale(expShort);
+            expectedLong = new ULocale(expLong);
+            expectedVeryLong = new ULocale(expVeryLong);
+        }
+    }
+
+    private static final int WARM_UP_ITERATIONS = 1000;
+    private static final int BENCHMARK_ITERATIONS = 20000;
+    private static final int AVG_PCT_MEDIUM_NEW_OLD = 33;
+    private static final int AVG_PCT_LONG_NEW_OLD = 80;
 
     @Test
     public void testPerf() {
         if (LANGUAGE_MATCHER_DATA == null) {
             return; // skip except when testing data
         }
-        final ULocale desired = new ULocale("sv");
 
         final String shortList = "en, sv";
-        final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu";
-        final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
+        final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
+                "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
+                "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
+                "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
+                "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
+                "zh-CN, zh-TW, zu";
+        final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " +
+                "ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " +
+                "ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " +
+                "ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " +
+                "ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " +
+                "bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " +
+                "bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " +
+                "bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " +
+                "ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " +
+                "cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " +
+                "de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " +
+                // removed en_001 to avoid exact match
+                "ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " +
+                "en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " +
+                "en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " +
+                "en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " +
+                "en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " +
+                "en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " +
+                "en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " +
+                "en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " +
+                "en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " +
+                "en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " +
+                "en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " +
+                "es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " +
+                "es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " +
+                "et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " +
+                "ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " +
+                "fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " +
+                "fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " +
+                "fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " +
+                "fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " +
+                "fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " +
+                "gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " +
+                "he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " +
+                "id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " +
+                "jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " +
+                "kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " +
+                "kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " +
+                "ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " +
+                "lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " +
+                "lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " +
+                "lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " +
+                "mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " +
+                "ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " +
+                "nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " +
+                "nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " +
+                "nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " +
+                "pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " +
+                "pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " +
+                "rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " +
+                "ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " +
+                "sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " +
+                "shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " +
+                "smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " +
+                "sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " +
+                "sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " +
+                "sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " +
+                "teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " +
+                "tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " +
+                "uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " +
+                "vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " +
+                "wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " +
+                "zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " +
+                "zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
 
         final XLocaleMatcher matcherShort = newXLocaleMatcher(shortList);
         final XLocaleMatcher matcherLong = newXLocaleMatcher(longList);
@@ -196,62 +280,93 @@ public class XLocaleMatcherTest extends TestFmwk {
         final LocaleMatcher matcherLongOld = new LocaleMatcher(longList);
         final LocaleMatcher matcherVeryLongOld = new LocaleMatcher(veryLongList);
 
-        //XLocaleMatcher.DEBUG = true;
-        ULocale expected = new ULocale("sv");
-        assertEquals(expected, matcherShort.getBestMatch(desired));
-        assertEquals(expected, matcherLong.getBestMatch(desired));
-        assertEquals(expected, matcherVeryLong.getBestMatch(desired));
-        //XLocaleMatcher.DEBUG = false;
-
         long timeShortNew=0;
         long timeMediumNew=0;
         long timeLongNew=0;
 
-        for (int i = 0; i < 2; ++i) {
-            int iterations = i == 0 ? 1000 : 1000000;
-            boolean showMessage = i != 0;
-            timeShortNew = timeXLocaleMatcher("Duration (few  supported):\t", desired, matcherShort, showMessage, iterations);
-            timeMediumNew = timeXLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations);
-            timeLongNew = timeXLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations);
-        }
-
         long timeShortOld=0;
         long timeMediumOld=0;
         long timeLongOld=0;
 
-        for (int i = 0; i < 2; ++i) {
-            int iterations = i == 0 ? 1000 : 100000;
-            boolean showMessage = i != 0;
-            timeShortOld = timeLocaleMatcher("Old Duration (few  supported):\t", desired, matcherShortOld, showMessage, iterations);
-            timeMediumOld = timeLocaleMatcher("Old Duration (med. supported):\t", desired, matcherLongOld, showMessage, iterations);
-            timeLongOld = timeLocaleMatcher("Old Duration (many supported):\t", desired, matcherVeryLongOld, showMessage, iterations);
+        PerfCase[] pcs = new PerfCase[] {
+                // Exact match in all matchers.
+                new PerfCase("sv", "sv", "sv", "sv"),
+                // Common locale, exact match only in very long list.
+                new PerfCase("fr_CA", "en", "fr", "fr_CA"),
+                // Unusual locale, no exact match.
+                new PerfCase("de_CA", "en", "de", "de"),
+                // World English maps to several region partitions.
+                new PerfCase("en_001", "en", "en", "en"),
+                // Ancient language with interesting subtags.
+                new PerfCase("egy_Copt_CY", "en", "af", "af")
+        };
+
+        for (PerfCase pc : pcs) {
+            final ULocale desired = pc.desired;
+
+            assertEquals(pc.expectedShort, matcherShort.getBestMatch(desired));
+            assertEquals(pc.expectedLong, matcherLong.getBestMatch(desired));
+            assertEquals(pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
+
+            timeXLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
+            timeXLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
+            timeXLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS);
+            long tns = timeXLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS);
+            System.out.format("New Duration (few  supported):\t%s\t%d\tnanos\n", desired, tns);
+            timeShortNew += tns;
+            long tnl = timeXLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS);
+            System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl);
+            timeMediumNew += tnl;
+            long tnv = timeXLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS);
+            System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv);
+            timeLongNew += tnv;
+
+            timeLocaleMatcher(desired, matcherShortOld, WARM_UP_ITERATIONS);
+            timeLocaleMatcher(desired, matcherLongOld, WARM_UP_ITERATIONS);
+            timeLocaleMatcher(desired, matcherVeryLongOld, WARM_UP_ITERATIONS);
+            long tos = timeLocaleMatcher(desired, matcherShortOld, BENCHMARK_ITERATIONS);
+            System.out.format("Old Duration (few  supported):\t%s\t%d\tnanos  new/old=%d%%\n",
+                    desired, tos, (100 * tns) / tos);
+            timeShortOld += tos;
+            long tol = timeLocaleMatcher(desired, matcherLongOld, BENCHMARK_ITERATIONS);
+            System.out.format("Old Duration (med. supported):\t%s\t%d\tnanos  new/old=%d%%\n",
+                    desired, tol, (100 * tnl) / tol);
+            timeMediumOld += tol;
+            long tov = timeLocaleMatcher(desired, matcherVeryLongOld, BENCHMARK_ITERATIONS);
+            System.out.format("Old Duration (many supported):\t%s\t%d\tnanos  new/old=%d%%\n",
+                    desired, tov, (100 * tnv) / tov);
+            timeLongOld += tov;
         }
 
-        assertTrue("timeShortNew (=" + timeShortNew + ") < 25% of timeShortOld (=" + timeShortOld + ")", timeShortNew * 4 < timeShortOld);
-        assertTrue("timeMediumNew (=" + timeMediumNew + ") < 25% of timeMediumOld (=" + timeMediumOld + ")", timeMediumNew * 4 < timeMediumOld);
-        assertTrue("timeLongNew (=" + timeLongNew + ") < 25% of timeLongOld (=" + timeLongOld + ")", timeLongNew * 4 < timeLongOld);
-
+        assertTrue(
+                String.format("timeShortNew=%d < %d%% of timeShortOld=%d",
+                        timeShortNew, AVG_PCT_MEDIUM_NEW_OLD, timeShortOld),
+                timeShortNew * 100 < timeShortOld * AVG_PCT_MEDIUM_NEW_OLD);
+        assertTrue(
+                String.format("timeMediumNew=%d < %d%% of timeMediumOld=%d",
+                        timeMediumNew, AVG_PCT_MEDIUM_NEW_OLD, timeMediumOld),
+                timeMediumNew * 100 < timeMediumOld * AVG_PCT_MEDIUM_NEW_OLD);
+        assertTrue(
+                String.format("timeLongNew=%d < %d%% of timeLongOld=%d",
+                        timeLongNew, AVG_PCT_LONG_NEW_OLD, timeLongOld),
+                timeLongNew * 100 < timeLongOld * AVG_PCT_LONG_NEW_OLD);
     }
 
-    private long timeXLocaleMatcher(String title, ULocale desired, XLocaleMatcher matcher,
-        boolean showmessage, int iterations) {
+    private long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) {
         long start = System.nanoTime();
         for (int i = iterations; i > 0; --i) {
             matcher.getBestMatch(desired);
         }
         long delta = System.nanoTime() - start;
-        if (showmessage) logln(title + (delta / iterations) + " nanos");
         return (delta / iterations);
     }
 
-    private long timeLocaleMatcher(String title, ULocale desired, LocaleMatcher matcher,
-        boolean showmessage, int iterations) {
+    private long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
         long start = System.nanoTime();
         for (int i = iterations; i > 0; --i) {
             matcher.getBestMatch(desired);
         }
         long delta = System.nanoTime() - start;
-        if (showmessage) logln(title + (delta / iterations) + " nanos");
         return (delta / iterations);
     }