--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.ibm.icu.util.ICUException;
+import com.ibm.icu.util.ICUUncheckedIOException;
+
+/**
+ * Stub class to make migration easier until we get either Guava or a higher level of Java.
+ */
+public class XCldrStub {
+
+ public static class Multimap<K, V> {
+ private final Map<K,Set<V>> map;
+ private final Class<Set<V>> setClass;
+
+ @SuppressWarnings("unchecked")
+ private Multimap(Map<K,Set<V>> map, Class<?> setClass) {
+ this.map = map;
+ this.setClass = (Class<Set<V>>) (setClass != null
+ ? setClass
+ : HashSet.class);
+ }
+ public Multimap<K, V> putAll(K key, V... values) {
+ if (values.length != 0) {
+ createSetIfMissing(key).addAll(Arrays.asList(values));
+ }
+ return this;
+ }
+ public void putAll(K key, Collection<V> values) {
+ if (!values.isEmpty()) {
+ createSetIfMissing(key).addAll(values);
+ }
+ }
+ public void putAll(Collection<K> keys, V value) {
+ for (K key : keys) {
+ put(key, value);
+ }
+ }
+ public void putAll(Multimap<K, V> source) {
+ for (Entry<K, Set<V>> entry : source.map.entrySet()) {
+ putAll(entry.getKey(), entry.getValue());
+ }
+ }
+ public void put(K key, V value) {
+ createSetIfMissing(key).add(value);
+ }
+ private Set<V> createSetIfMissing(K key) {
+ Set<V> old = map.get(key);
+ if (old == null) {
+ map.put(key, old = getInstance());
+ }
+ return old;
+ }
+ private Set<V> getInstance() {
+ try {
+ return setClass.newInstance();
+ } catch (Exception e) {
+ throw new ICUException(e);
+ }
+ }
+ public Set<V> get(K key) {
+ Set<V> result = map.get(key);
+ return result; // == null ? Collections.<V>emptySet() : result;
+ }
+ public Set<K> keySet() {
+ return map.keySet();
+ }
+ public Map<K, Set<V>> asMap() {
+ return map;
+ }
+ public Set<V> values() {
+ Collection<Set<V>> values = map.values();
+ if (values.size() == 0) {
+ return Collections.<V>emptySet();
+ }
+ Set<V> result = getInstance();
+ for ( Set<V> valueSet : values) {
+ result.addAll(valueSet);
+ }
+ return result;
+ }
+ public int size() {
+ return map.size();
+ }
+ public Iterable<Entry<K, V>> entries() {
+ return new MultimapIterator<K, V>(map);
+ }
+ @Override
+ public boolean equals(Object obj) {
+ Multimap<K,V> other = (Multimap) obj;
+ return map.equals(other.map);
+ }
+ }
+
+ public static class Multimaps {
+ public static <K, V, R extends Multimap<K, V>> R invertFrom(Multimap<V, K> source, R target) {
+ for (Entry<V, Set<K>> entry : source.asMap().entrySet()) {
+ target.putAll(entry.getValue(), entry.getKey());
+ }
+ return target;
+ }
+ public static <K, V, R extends Multimap<K, V>> R invertFrom(Map<V, K> source, R target) {
+ for (Entry<V, K> entry : source.entrySet()) {
+ target.put(entry.getValue(), entry.getKey());
+ }
+ return target;
+ }
+ /**
+ * Warning, not functionally the same as Guava; only for use in invertFrom.
+ */
+ public static <K, V> Map<K,V> forMap(Map<K,V> map) {
+ return map;
+ }
+ }
+
+ private static class MultimapIterator<K,V> implements Iterator<Entry<K,V>>, Iterable<Entry<K,V>> {
+ private final Iterator<Entry<K, Set<V>>> it1;
+ private Iterator<V> it2 = null;
+ private final ReusableEntry<K,V> entry = new ReusableEntry<K,V>();
+
+ private MultimapIterator(Map<K,Set<V>> map) {
+ it1 = map.entrySet().iterator();
+ }
+ @Override
+ public boolean hasNext() {
+ return it1.hasNext() || it2 != null && it2.hasNext();
+ }
+ @Override
+ public Entry<K, V> next() {
+ if (it2 != null && it2.hasNext()) {
+ entry.value = it2.next();
+ } else {
+ Entry<K, Set<V>> e = it1.next();
+ entry.key = e.getKey();
+ it2 = e.getValue().iterator();
+ }
+ return entry;
+ }
+ @Override
+ public Iterator<Entry<K, V>> iterator() {
+ return this;
+ }
+ }
+
+ private static class ReusableEntry<K,V> implements Entry<K,V> {
+ K key;
+ V value;
+ @Override
+ public K getKey() {
+ return key;
+ }
+ @Override
+ public V getValue() {
+ return value;
+ }
+ @Override
+ public V setValue(V value) {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ public static class HashMultimap<K, V> extends Multimap<K, V> {
+ private HashMultimap() {
+ super(new HashMap<K, Set<V>>(), HashSet.class);
+ }
+ public static <K, V> HashMultimap<K, V> create() {
+ return new HashMultimap<K, V>();
+ }
+ }
+
+ public static class TreeMultimap<K, V> extends Multimap<K, V> {
+ private TreeMultimap() {
+ super(new TreeMap<K, Set<V>>(), TreeSet.class);
+ }
+ public static <K, V> TreeMultimap<K, V> create() {
+ return new TreeMultimap<K, V>();
+ }
+ }
+
+ public static class LinkedHashMultimap<K, V> extends Multimap<K, V> {
+ private LinkedHashMultimap() {
+ super(new LinkedHashMap<K, Set<V>>(), LinkedHashSet.class);
+ }
+ public static <K, V> LinkedHashMultimap<K, V> create() {
+ return new LinkedHashMultimap<K, V>();
+ }
+ }
+
+
+ public static class Counter<T> implements Iterable<T>{
+ private Map<T,Long> data;
+ @Override
+ public Iterator<T> iterator() {
+ return data.keySet().iterator();
+ }
+ public long get(T s) {
+ Long result = data.get(s);
+ return result != null ? result : 0L;
+ }
+ public void add(T item, int count) {
+ Long result = data.get(item);
+ data.put(item, result == null ? count : result + count);
+ }
+ }
+
+ public static <T> String join(T[] source, String separator) {
+ StringBuilder result = new StringBuilder();
+ for (int i = 0; i < source.length; ++i) {
+ if (i != 0) result.append(separator);
+ result.append(source[i]);
+ }
+ return result.toString();
+ }
+
+ public static <T> String join(Iterable<T> source, String separator) {
+ StringBuilder result = new StringBuilder();
+ boolean first = true;
+ for (T item : source) {
+ if (!first) result.append(separator);
+ else first = false;
+ result.append(item.toString());
+ }
+ return result.toString();
+ }
+
+ public static class CollectionUtilities {
+ public static <T, U extends Iterable<T>> String join(U source, String separator) {
+ return XCldrStub.join(source, separator);
+ }
+ }
+
+ public static class Joiner {
+ private final String separator;
+ private Joiner(String separator) {
+ this.separator = separator;
+ }
+ public static final Joiner on(String separator) {
+ return new Joiner(separator);
+ }
+ public <T> String join(T[] source) {
+ return XCldrStub.join(source, separator);
+ }
+ public <T> String join(Iterable<T> source) {
+ return XCldrStub.join(source, separator);
+ }
+ }
+
+ public static class Splitter {
+ Pattern pattern;
+ boolean trimResults = false;
+ public Splitter(char c) {
+ this(Pattern.compile("\\Q" + c + "\\E"));
+ }
+ public Splitter(Pattern p) {
+ pattern = p;
+ }
+ public static Splitter on(char c) {
+ return new Splitter(c);
+ }
+ public static Splitter on(Pattern p) {
+ return new Splitter(p);
+ }
+ public List<String> splitToList(String input) {
+ String[] items = pattern.split(input);
+ if (trimResults) {
+ for (int i = 0; i < items.length; ++i) {
+ items[i] = items[i].trim();
+ }
+ }
+ return Arrays.asList(items);
+ }
+ public Splitter trimResults() {
+ trimResults = true;
+ return this;
+ }
+ public Iterable<String> split(String input) {
+ return splitToList(input);
+ }
+ }
+
+ public static class ImmutableSet {
+ public static <T> Set<T> copyOf(Set<T> values) {
+ return Collections.unmodifiableSet(new LinkedHashSet<T>(values)); // copy set for safety, preserve order
+ }
+ }
+ public static class ImmutableMap {
+ public static <K,V> Map<K,V> copyOf(Map<K,V> values) {
+ return Collections.unmodifiableMap(new LinkedHashMap<K,V>(values)); // copy set for safety, preserve order
+ }
+ }
+ public static class ImmutableMultimap {
+ public static <K,V> Multimap<K,V> copyOf(Multimap<K,V> values) {
+ LinkedHashMap<K, Set<V>> temp = new LinkedHashMap<K,Set<V>>(); // semi-deep copy, preserve order
+ for (Entry<K, Set<V>> entry : values.asMap().entrySet()) {
+ Set<V> value = entry.getValue();
+ temp.put(entry.getKey(), value.size() == 1
+ ? Collections.singleton(value.iterator().next())
+ : Collections.unmodifiableSet(new LinkedHashSet<V>(value)));
+ }
+ return new Multimap<K,V>(Collections.unmodifiableMap(temp), null);
+ }
+ }
+
+ public static class FileUtilities {
+ public static final Charset UTF8 = Charset.forName("utf-8");
+
+ public static BufferedReader openFile(Class<?> class1, String file) {
+ return openFile(class1, file, UTF8);
+ }
+
+ public static BufferedReader openFile(Class<?> class1, String file, Charset charset) {
+ // URL path = null;
+ // String externalForm = null;
+ try {
+ final InputStream resourceAsStream = class1.getResourceAsStream(file);
+ if (charset == null) {
+ charset = UTF8;
+ }
+ InputStreamReader reader = new InputStreamReader(resourceAsStream, charset);
+ BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64);
+ return bufferedReader;
+ } catch (Exception e) {
+ String className = class1 == null ? null : class1.getCanonicalName();
+ String canonicalName = null;
+ try {
+ String relativeFileName = getRelativeFileName(class1, "../util/");
+ canonicalName = new File(relativeFileName).getCanonicalPath();
+ } catch (Exception e1) {
+ throw new ICUUncheckedIOException("Couldn't open file: " + file + "; relative to class: "
+ + className, e);
+ }
+ throw new ICUUncheckedIOException("Couldn't open file " + file + "; in path " + canonicalName + "; relative to class: "
+ + className, e);
+ }
+ }
+ public static String getRelativeFileName(Class<?> class1, String filename) {
+ URL resource = class1.getResource(filename);
+ String resourceString = resource.toString();
+ if (resourceString.startsWith("file:")) {
+ return resourceString.substring(5);
+ } else if (resourceString.startsWith("jar:file:")) {
+ return resourceString.substring(9);
+ } else {
+ throw new ICUUncheckedIOException("File not found: " + resourceString);
+ }
+ }
+ }
+
+ static public class RegexUtilities {
+ public static int findMismatch(Matcher m, CharSequence s) {
+ int i;
+ for (i = 1; i < s.length(); ++i) {
+ boolean matches = m.reset(s.subSequence(0, i)).matches();
+ if (!matches && !m.hitEnd()) {
+ break;
+ }
+ }
+ return i - 1;
+ }
+ public static String showMismatch(Matcher m, CharSequence s) {
+ int failPoint = findMismatch(m, s);
+ String show = s.subSequence(0, failPoint) + "☹" + s.subSequence(failPoint, s.length());
+ return show;
+ }
+ }
+}
\ No newline at end of file
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.Set;
+import java.util.TreeMap;
+
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
+import com.ibm.icu.util.ICUException;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.ULocale.Minimize;
+import com.ibm.icu.util.UResourceBundle;
+
+public class XLikelySubtags {
+
+ private static final XLikelySubtags DEFAULT = new XLikelySubtags();
+
+ public static final XLikelySubtags getDefault() {
+ return DEFAULT;
+ }
+
+ static abstract class Maker {
+ abstract <V> V make();
+
+ @SuppressWarnings("unchecked")
+ public <K,V> V getSubtable(Map<K, V> langTable, final K language) {
+ V scriptTable = langTable.get(language);
+ if (scriptTable == null) {
+ langTable.put(language, scriptTable = (V) make());
+ }
+ return scriptTable;
+ }
+
+ static final Maker HASHMAP = new Maker() {
+ @Override
+ @SuppressWarnings("unchecked")
+ public Map<Object,Object> make() {
+ return new HashMap<Object,Object>();
+ }
+ };
+
+ static final Maker TREEMAP = new Maker() {
+ @Override
+ @SuppressWarnings("unchecked")
+ public Map<Object,Object> make() {
+ return new TreeMap<Object,Object>();
+ }
+ };
+ }
+
+ public static class Aliases {
+ final Map<String, String> toCanonical;
+ final Multimap<String, String> toAliases;
+ public String getCanonical(String alias) {
+ String canonical = toCanonical.get(alias);
+ return canonical == null ? alias : canonical;
+ }
+ public Set<String> getAliases(String canonical) {
+ Set<String> aliases = toAliases.get(canonical);
+ return aliases == null ? Collections.singleton(canonical) : aliases;
+ }
+ public Aliases(String key) {
+ UResourceBundle metadata = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"metadata",ICUResourceBundle.ICU_DATA_CLASS_LOADER);
+ UResourceBundle metadataAlias = metadata.get("alias");
+ UResourceBundle territoryAlias = metadataAlias.get(key);
+ Map<String, String> toCanonical1 = new HashMap<String, String>();
+ for ( int i = 0 ; i < territoryAlias.getSize(); i++ ) {
+ UResourceBundle res = territoryAlias.get(i);
+ String aliasFrom = res.getKey();
+ if (aliasFrom.contains("_")) {
+ continue; // only simple aliasing
+ }
+ String aliasReason = res.get("reason").getString();
+ if (aliasReason.equals("overlong")) {
+ continue;
+ }
+ String aliasTo = res.get("replacement").getString();
+ int spacePos = aliasTo.indexOf(' ');
+ String aliasFirst = spacePos < 0 ? aliasTo : aliasTo.substring(0, spacePos);
+ if (aliasFirst.contains("_")) {
+ continue; // only simple aliasing
+ }
+ toCanonical1.put(aliasFrom, aliasFirst);
+ }
+ if (key.equals("language")) {
+ toCanonical1.put("mo", "ro"); // special case
+ }
+ toCanonical = Collections.unmodifiableMap(toCanonical1);
+ toAliases = Multimaps.invertFrom(toCanonical1, HashMultimap.<String,String>create());
+ }
+ }
+
+ public static class LSR {
+ public final String language;
+ public final String script;
+ public final String region;
+
+ public static Aliases LANGUAGE_ALIASES = new Aliases("language");
+ public static Aliases REGION_ALIASES = new Aliases("territory");
+
+ public static LSR from(String language, String script, String region) {
+ return new LSR(language, script, region);
+ }
+
+ // from http://unicode.org/reports/tr35/#Unicode_language_identifier
+ // but simplified to requiring language subtag, and nothing beyond region
+ // #1 is language
+ // #2 is script
+ // #3 is region
+// static final String pat =
+// "language_id = (unicode_language_subtag)"
+// + "(?:sep(unicode_script_subtag))?"
+// + "(?:sep(unicode_region_subtag))?;\n"
+// + "unicode_language_subtag = alpha{2,3}|alpha{5,8};\n"
+// + "unicode_script_subtag = alpha{4};\n"
+// + "unicode_region_subtag = alpha{2}|digit{3};\n"
+// + "sep = [-_];\n"
+// + "digit = [0-9];\n"
+// + "alpha = [A-Za-z];\n"
+// ;
+// static {
+// System.out.println(pat);
+// System.out.println(new UnicodeRegex().compileBnf(pat));
+// }
+// static final Pattern LANGUAGE_PATTERN = Pattern.compile(
+// "([a-zA-Z0-9]+)" // (?:[-_]([a-zA-Z0-9]+))?(?:[-_]([a-zA-Z0-9]+))?"
+// //new UnicodeRegex().compileBnf(pat)
+// );
+//
+ // TODO: fix this to check for format. Not required, since this is only called internally, but safer for the future.
+ static LSR from(String languageIdentifier) {
+ String[] parts = languageIdentifier.split("[-_]");
+ if (parts.length < 1 || parts.length > 3) {
+ throw new ICUException("too many subtags");
+ }
+ String lang = parts[0].toLowerCase();
+ String p2 = parts.length < 2 ? "": parts[1];
+ String p3 = parts.length < 3 ? "": parts[2];
+ return p2.length() < 4 ? new LSR(lang, "", p2) : new LSR(lang, p2, p3);
+
+ // Matcher matcher = LANGUAGE_PATTERN.matcher(languageIdentifier);
+ // if (!matcher.matches()) {
+ // return new LSR(matcher.group(1), matcher.group(2), matcher.group(3));
+ // }
+ // System.out.println(RegexUtilities.showMismatch(matcher, languageIdentifier));
+ // throw new ICUException("invalid language id");
+ }
+
+ public static LSR from(ULocale locale) {
+ return new LSR(locale.getLanguage(), locale.getScript(), locale.getCountry());
+ }
+
+ public static LSR fromMaximalized(ULocale locale) {
+ return fromMaximalized(locale.getLanguage(), locale.getScript(), locale.getCountry());
+ }
+
+ public static LSR fromMaximalized(String language, String script, String region) {
+ String canonicalLanguage = LANGUAGE_ALIASES.getCanonical(language);
+ // script is ok
+ String canonicalRegion = REGION_ALIASES.getCanonical(region); // getCanonical(REGION_ALIASES.get(region));
+
+ return DEFAULT.maximize(canonicalLanguage, script, canonicalRegion);
+ }
+
+ public LSR(String language, String script, String region) {
+ this.language = language;
+ this.script = script;
+ this.region = region;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder result = new StringBuilder(language);
+ if (!script.isEmpty()) {
+ result.append('-').append(script);
+ }
+ if (!region.isEmpty()) {
+ result.append('-').append(region);
+ }
+ return result.toString();
+ }
+ public LSR replace(String language2, String script2, String region2) {
+ if (language2 == null && script2 == null && region2 == null) return this;
+ return new LSR(
+ language2 == null ? language: language2,
+ script2 == null ? script : script2,
+ region2 == null ? region : region2);
+ }
+ @Override
+ public boolean equals(Object obj) {
+ LSR other = (LSR) obj;
+ return language.equals(other.language)
+ && script.equals(other.script)
+ && region.equals(other.region);
+ }
+ @Override
+ public int hashCode() {
+ return Objects.hash(language, script, region);
+ }
+ }
+
+ final Map<String, Map<String, Map<String, LSR>>> langTable;
+
+ public XLikelySubtags() {
+ this(getDefaultRawData(), true);
+ }
+
+ private static Map<String, String> getDefaultRawData() {
+ Map<String, String> rawData = new TreeMap<String, String>();
+ UResourceBundle bundle = UResourceBundle.getBundleInstance( ICUData.ICU_BASE_NAME, "likelySubtags");
+ for (Enumeration<String> enumer = bundle.getKeys(); enumer.hasMoreElements();) {
+ String key = enumer.nextElement();
+ rawData.put(key, bundle.getString(key));
+ }
+ return rawData;
+ }
+
+ public XLikelySubtags(Map<String, String> rawData, boolean skipNoncanonical) {
+ this.langTable = init(rawData, skipNoncanonical);
+ }
+
+ private Map<String, Map<String, Map<String, LSR>>> init(final Map<String, String> rawData, boolean skipNoncanonical) {
+ // prepare alias info. We want a mapping from the canonical form to all aliases
+
+ //Multimap<String,String> canonicalToAliasLanguage = HashMultimap.create();
+ // getAliasInfo(LANGUAGE_ALIASES, canonicalToAliasLanguage);
+
+ // Don't bother with script; there are none
+
+ //Multimap<String,String> canonicalToAliasRegion = HashMultimap.create();
+ // getAliasInfo(REGION_ALIASES, canonicalToAliasRegion);
+
+ Maker maker = Maker.TREEMAP;
+ Map<String, Map<String, Map<String, LSR>>> result = maker.make();
+// Splitter bar = Splitter.on('_');
+// int last = -1;
+ // set the base data
+ Map<LSR,LSR> internCache = new HashMap<LSR,LSR>();
+ for (Entry<String, String> sourceTarget : rawData.entrySet()) {
+ LSR ltp = LSR.from(sourceTarget.getKey());
+ final String language = ltp.language;
+ final String script = ltp.script;
+ final String region = ltp.region;
+
+ ltp = LSR.from(sourceTarget.getValue());
+ String languageTarget = ltp.language;
+ final String scriptTarget = ltp.script;
+ final String regionTarget = ltp.region;
+
+ set(result, language, script, region, languageTarget, scriptTarget, regionTarget, internCache);
+ // now add aliases
+ Collection<String> languageAliases = LSR.LANGUAGE_ALIASES.getAliases(language);
+// if (languageAliases.isEmpty()) {
+// languageAliases = Collections.singleton(language);
+// }
+ Collection<String> regionAliases = LSR.REGION_ALIASES.getAliases(region);
+// if (regionAliases.isEmpty()) {
+// regionAliases = Collections.singleton(region);
+// }
+ for (String languageAlias : languageAliases) {
+ for (String regionAlias : regionAliases) {
+ if (languageAlias.equals(language) && regionAlias.equals(region)) {
+ continue;
+ }
+ set(result, languageAlias, script, regionAlias, languageTarget, scriptTarget, regionTarget, internCache);
+ }
+ }
+ }
+ // hack
+ set(result, "und", "Latn", "", "en", "Latn", "US", internCache);
+
+ // hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
+ // <likelySubtag from="und_GH" to="ak_Latn_GH"/>
+
+ // so und-Latn-GH => ak-Latn-GH
+ Map<String, Map<String, LSR>> undScriptMap = result.get("und");
+ Map<String, LSR> undEmptyRegionMap = undScriptMap.get("");
+ for (Entry<String, LSR> regionEntry : undEmptyRegionMap.entrySet()) {
+ final LSR value = regionEntry.getValue();
+ set(result, "und", value.script, value.region, value);
+ }
+ //
+ // check that every level has "" (or "und")
+ if (!result.containsKey("und")) {
+ throw new IllegalArgumentException("failure: base");
+ }
+ for (Entry<String, Map<String, Map<String, LSR>>> langEntry : result.entrySet()) {
+ String lang = langEntry.getKey();
+ final Map<String, Map<String, LSR>> scriptMap = langEntry.getValue();
+ if (!scriptMap.containsKey("")) {
+ throw new IllegalArgumentException("failure: " + lang);
+ }
+ for (Entry<String, Map<String, LSR>> scriptEntry : scriptMap.entrySet()) {
+ String script = scriptEntry.getKey();
+ final Map<String, LSR> regionMap = scriptEntry.getValue();
+ if (!regionMap.containsKey("")) {
+ throw new IllegalArgumentException("failure: " + lang + "-" + script);
+ }
+ // for (Entry<String, LSR> regionEntry : regionMap.entrySet()) {
+ // String region = regionEntry.getKey();
+ // LSR value = regionEntry.getValue();
+ // }
+ }
+ }
+ return result;
+ }
+
+// private void getAliasInfo(Map<String, R2<List<String>, String>> aliasInfo, Multimap<String, String> canonicalToAlias) {
+// for (Entry<String, R2<List<String>, String>> e : aliasInfo.entrySet()) {
+// final String alias = e.getKey();
+// if (alias.contains("_")) {
+// continue; // only do simple aliasing
+// }
+// String canonical = getCanonical(e.getValue());
+// canonicalToAlias.put(canonical, alias);
+// }
+// }
+
+// private static String getCanonical(R2<List<String>, String> aliasAndReason) {
+// if (aliasAndReason == null) {
+// return null;
+// }
+// if (aliasAndReason.get1().equals("overlong")) {
+// return null;
+// }
+// List<String> value = aliasAndReason.get0();
+// if (value.size() != 1) {
+// return null;
+// }
+// final String canonical = value.iterator().next();
+// if (canonical.contains("_")) {
+// return null; // only do simple aliasing
+// }
+// return canonical;
+// }
+
+ private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region,
+ final String languageTarget, final String scriptTarget, final String regionTarget, Map<LSR, LSR> internCache) {
+ LSR newValue = new LSR(languageTarget, scriptTarget, regionTarget);
+ LSR oldValue = internCache.get(newValue);
+ if (oldValue == null) {
+ internCache.put(newValue, newValue);
+ oldValue = newValue;
+ }
+ set(langTable, language, script, region, oldValue);
+ }
+
+ private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region, LSR newValue) {
+ Map<String, Map<String, LSR>> scriptTable = Maker.TREEMAP.getSubtable(langTable, language);
+ Map<String, LSR> regionTable = Maker.TREEMAP.getSubtable(scriptTable, script);
+ LSR oldValue = regionTable.get(region);
+ if (oldValue != null) {
+ int debug = 0;
+ }
+ regionTable.put(region, newValue);
+ }
+
+ /**
+ * Convenience methods
+ * @param source
+ * @return
+ */
+ public LSR maximize(String source) {
+ return maximize(ULocale.forLanguageTag(source));
+ }
+
+ public LSR maximize(ULocale source) {
+ return maximize(source.getLanguage(), source.getScript(), source.getCountry());
+ }
+
+ public LSR maximize(LSR source) {
+ return maximize(source.language, source.script, source.region);
+ }
+
+ // public static ULocale addLikelySubtags(ULocale loc) {
+ //
+ // }
+
+ /**
+ * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
+ */
+ public LSR maximize(String language, String script, String region) {
+ int retainOldMask = 0;
+ Map<String, Map<String, LSR>> scriptTable = langTable.get(language);
+ if (scriptTable == null) { // cannot happen if language == "und"
+ retainOldMask |= 4;
+ scriptTable = langTable.get("und");
+ } else if (!language.equals("und")) {
+ retainOldMask |= 4;
+ }
+
+ if (script.equals("Zzzz")) {
+ script = "";
+ }
+ Map<String, LSR> regionTable = scriptTable.get(script);
+ if (regionTable == null) { // cannot happen if script == ""
+ retainOldMask |= 2;
+ regionTable = scriptTable.get("");
+ } else if (!script.isEmpty()) {
+ retainOldMask |= 2;
+ }
+
+ if (region.equals("ZZ")) {
+ region = "";
+ }
+ LSR result = regionTable.get(region);
+ if (result == null) { // cannot happen if region == ""
+ retainOldMask |= 1;
+ result = regionTable.get("");
+ if (result == null) {
+ return null;
+ }
+ } else if (!region.isEmpty()) {
+ retainOldMask |= 1;
+ }
+
+ switch (retainOldMask) {
+ default:
+ case 0: return result;
+ case 1: return result.replace(null, null, region);
+ case 2: return result.replace(null, script, null);
+ case 3: return result.replace(null, script, region);
+ case 4: return result.replace(language, null, null);
+ case 5: return result.replace(language, null, region);
+ case 6: return result.replace(language, script, null);
+ case 7: return result.replace(language, script, region);
+ }
+ }
+
+ private LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn, Minimize fieldToFavor) {
+ LSR result = maximize(languageIn, scriptIn, regionIn);
+
+ // We could try just a series of checks, like:
+ // LSR result2 = addLikelySubtags(languageIn, "", "");
+ // if result.equals(result2) return result2;
+ // However, we can optimize 2 of the cases:
+ // (languageIn, "", "")
+ // (languageIn, "", regionIn)
+
+ Map<String, Map<String, LSR>> scriptTable = langTable.get(result.language);
+
+ Map<String, LSR> regionTable0 = scriptTable.get("");
+ LSR value00 = regionTable0.get("");
+ boolean favorRegionOk = false;
+ if (result.script.equals(value00.script)) { //script is default
+ if (result.region.equals(value00.region)) {
+ return result.replace(null, "", "");
+ } else if (fieldToFavor == fieldToFavor.FAVOR_REGION) {
+ return result.replace(null, "", null);
+ } else {
+ favorRegionOk = true;
+ }
+ }
+
+ // The last case is not as easy to optimize.
+ // Maybe do later, but for now use the straightforward code.
+ LSR result2 = maximize(languageIn, scriptIn, "");
+ if (result2.equals(result)) {
+ return result.replace(null, null, "");
+ } else if (favorRegionOk) {
+ return result.replace(null, "", null);
+ }
+ return result;
+ }
+
+ private static <V> StringBuilder show(Map<String,V> map, String indent, StringBuilder output) {
+ String first = indent.isEmpty() ? "" : "\t";
+ for (Entry<String,V> e : map.entrySet()) {
+ String key = e.getKey();
+ V value = e.getValue();
+ output.append(first + (key.isEmpty() ? "∅" : key));
+ if (value instanceof Map) {
+ show((Map)value, indent+"\t", output);
+ } else {
+ output.append("\t" + Objects.toString(value)).append("\n");
+ }
+ first = indent;
+ }
+ return output;
+ }
+
+ @Override
+ public String toString() {
+ return show(langTable, "", new StringBuilder()).toString();
+ }
+
+ // public static void main(String[] args) {
+ // System.out.println(LSR.fromMaximalized(ULocale.ENGLISH));
+ //
+ // final Map<String, String> rawData = sdi.getLikelySubtags();
+ // XLikelySubtags ls = XLikelySubtags.getDefault();
+ // System.out.println(ls);
+ // ls.maximize(new ULocale("iw"));
+ // if (true) return;
+ //
+ // LanguageTagParser ltp = new LanguageTagParser();
+ //
+ // // get all the languages, scripts, and regions
+ // Set<String> languages = new TreeSet<String>();
+ // Set<String> scripts = new TreeSet<String>();
+ // Set<String> regions = new TreeSet<String>();
+ // Counter<String> languageCounter = new Counter<String>();
+ // Counter<String> scriptCounter = new Counter<String>();
+ // Counter<String> regionCounter = new Counter<String>();
+ //
+ // for (Entry<String, String> sourceTarget : rawData.entrySet()) {
+ // final String source = sourceTarget.getKey();
+ // ltp.set(source);
+ // languages.add(ltp.getLanguage());
+ // scripts.add(ltp.getScript());
+ // regions.add(ltp.getRegion());
+ // final String target = sourceTarget.getValue();
+ // ltp.set(target);
+ // add(target, languageCounter, ltp.getLanguage(), 1);
+ // add(target, scriptCounter, ltp.getScript(), 1);
+ // add(target, regionCounter, ltp.getRegion(), 1);
+ // }
+ // ltp.set("und-Zzzz-ZZ");
+ // languageCounter.add(ltp.getLanguage(), 1);
+ // scriptCounter.add(ltp.getScript(), 1);
+ // regionCounter.add(ltp.getRegion(), 1);
+ //
+ // if (SHORT) {
+ // removeSingletons(languages, languageCounter);
+ // removeSingletons(scripts, scriptCounter);
+ // removeSingletons(regions, regionCounter);
+ // }
+ //
+ // System.out.println("languages: " + languages.size() + "\n\t" + languages + "\n\t" + languageCounter);
+ // System.out.println("scripts: " + scripts.size() + "\n\t" + scripts + "\n\t" + scriptCounter);
+ // System.out.println("regions: " + regions.size() + "\n\t" + regions + "\n\t" + regionCounter);
+ //
+ // int maxCount = Integer.MAX_VALUE;
+ //
+ // int counter = maxCount;
+ // long tempTime = System.nanoTime();
+ // newMax:
+ // for (String language : languages) {
+ // for (String script : scripts) {
+ // for (String region : regions) {
+ // if (--counter < 0) break newMax;
+ // LSR result = ls.maximize(language, script, region);
+ // }
+ // }
+ // }
+ // long newMaxTime = System.nanoTime() - tempTime;
+ // System.out.println("newMaxTime: " + newMaxTime);
+ //
+ // counter = maxCount;
+ // tempTime = System.nanoTime();
+ // newMin:
+ // for (String language : languages) {
+ // for (String script : scripts) {
+ // for (String region : regions) {
+ // if (--counter < 0) break newMin;
+ // LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
+ // }
+ // }
+ // }
+ // long newMinTime = System.nanoTime() - tempTime;
+ // System.out.println("newMinTime: " + newMinTime);
+ //
+ // // *****
+ //
+ // tempTime = System.nanoTime();
+ // counter = maxCount;
+ // oldMax:
+ // for (String language : languages) {
+ // for (String script : scripts) {
+ // for (String region : regions) {
+ // if (--counter < 0) break oldMax;
+ // ULocale tempLocale = new ULocale(language, script, region);
+ // ULocale max = ULocale.addLikelySubtags(tempLocale);
+ // }
+ // }
+ // }
+ // long oldMaxTime = System.nanoTime() - tempTime;
+ // System.out.println("oldMaxTime: " + oldMaxTime + "\t" + oldMaxTime/newMaxTime + "x");
+ //
+ // counter = maxCount;
+ // tempTime = System.nanoTime();
+ // oldMin:
+ // for (String language : languages) {
+ // for (String script : scripts) {
+ // for (String region : regions) {
+ // if (--counter < 0) break oldMin;
+ // ULocale tempLocale = new ULocale(language, script, region);
+ // ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
+ // }
+ // }
+ // }
+ // long oldMinTime = System.nanoTime() - tempTime;
+ // System.out.println("oldMinTime: " + oldMinTime + "\t" + oldMinTime/newMinTime + "x");
+ //
+ // counter = maxCount;
+ // testMain:
+ // for (String language : languages) {
+ // System.out.println(language);
+ // int tests = 0;
+ // for (String script : scripts) {
+ // for (String region : regions) {
+ // ++tests;
+ // if (--counter < 0) break testMain;
+ // LSR maxNew = ls.maximize(language, script, region);
+ // LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
+ // LSR minNewR = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_REGION);
+ //
+ // ULocale tempLocale = new ULocale(language, script, region);
+ // ULocale maxOld = ULocale.addLikelySubtags(tempLocale);
+ // ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
+ // ULocale minOldR = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_REGION);
+ //
+ // // check values
+ // final String maxNewS = String.valueOf(maxNew);
+ // final String maxOldS = maxOld.toLanguageTag();
+ // boolean sameMax = maxOldS.equals(maxNewS);
+ //
+ // final String minNewSS = String.valueOf(minNewS);
+ // final String minOldSS = minOldS.toLanguageTag();
+ // boolean sameMinS = minNewSS.equals(minOldSS);
+ //
+ // final String minNewRS = String.valueOf(minNewR);
+ // final String minOldRS = minOldS.toLanguageTag();
+ // boolean sameMinR = minNewRS.equals(minOldRS);
+ //
+ // if (sameMax && sameMinS && sameMinR) continue;
+ // System.out.println(new LSR(language, script, region)
+ // + "\tmax: " + maxNew
+ // + (sameMax ? "" : "≠" + maxOldS)
+ // + "\tminS: " + minNewS
+ // + (sameMinS ? "" : "≠" + minOldS)
+ // + "\tminR: " + minNewR
+ // + (sameMinR ? "" : "≠" + minOldR)
+ // );
+ // }
+ // }
+ // System.out.println(language + ": " + tests);
+ // }
+ // }
+ //
+ // private static void add(String target, Counter<String> languageCounter, String language, int count) {
+ // if (language.equals("aa")) {
+ // int debug = 0;
+ // }
+ // languageCounter.add(language, count);
+ // }
+ //
+ // private static void removeSingletons(Set<String> languages, Counter<String> languageCounter) {
+ // for (String s : languageCounter) {
+ // final long count = languageCounter.get(s);
+ // if (count <= 1) {
+ // languages.remove(s);
+ // }
+ // }
+ // }
+}
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.function.Predicate;
+
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.Row;
+import com.ibm.icu.impl.Row.R4;
+import com.ibm.icu.impl.locale.XCldrStub.CollectionUtilities;
+import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap;
+import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
+import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
+import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
+import com.ibm.icu.impl.locale.XCldrStub.Splitter;
+import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap;
+import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
+import com.ibm.icu.impl.locale.XLocaleDistance.RegionMapper.Builder;
+import com.ibm.icu.text.LocaleDisplayNames;
+import com.ibm.icu.util.LocaleMatcher;
+import com.ibm.icu.util.Output;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundleIterator;
+
+public class XLocaleDistance {
+
+ static final boolean PRINT_OVERRIDES = false;
+
+ public static final int ABOVE_THRESHOLD = 100;
+
+ @Deprecated
+ public static final String ANY = "�"; // matches any character. Uses value above any subtag.
+
+ private static String fixAny(String string) {
+ return "*".equals(string) ? ANY : string;
+ }
+
+ static final LocaleDisplayNames english = LocaleDisplayNames.getInstance(ULocale.ENGLISH);
+
+ private static List<R4<String, String, Integer, Boolean>> xGetLanguageMatcherData() {
+ List<R4<String, String, Integer, Boolean>> distanceList = new ArrayList<R4<String, String, Integer, Boolean>>();
+
+ ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
+ ICUResourceBundle languageMatchingNew = suppData.findTopLevel("languageMatchingNew");
+ ICUResourceBundle written = (ICUResourceBundle) languageMatchingNew.get("written");
+
+ for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
+ ICUResourceBundle item = (ICUResourceBundle) iter.next();
+ boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
+ distanceList.add(
+ (R4<String, String, Integer, Boolean>) // note: .freeze returning wrong type, so casting.
+ Row.of(
+ item.getString(0),
+ item.getString(1),
+ Integer.parseInt(item.getString(2)),
+ oneway)
+ .freeze());
+ }
+ return Collections.unmodifiableList(distanceList);
+ }
+
+ private static Set<String> xGetParadigmLocales() {
+ ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
+ ICUResourceBundle languageMatchingInfo = suppData.findTopLevel("languageMatchingInfo");
+ ICUResourceBundle writtenParadigmLocales = (ICUResourceBundle) languageMatchingInfo.get("written")
+ .get("paradigmLocales");
+// paradigmLocales{ "en", "en-GB",... }
+ HashSet<String> paradigmLocales = new HashSet<String>(Arrays.asList(writtenParadigmLocales.getStringArray()));
+ return Collections.unmodifiableSet(paradigmLocales);
+ }
+
+ private static Map<String, String> xGetMatchVariables() {
+ ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
+ ICUResourceBundle languageMatchingInfo = suppData.findTopLevel("languageMatchingInfo");
+ ICUResourceBundle writtenMatchVariables = (ICUResourceBundle) languageMatchingInfo.get("written")
+ .get("matchVariable");
+// matchVariable{ americas{"019"} cnsar{"HK+MO"} ...}
+
+ HashMap<String,String> matchVariables = new HashMap<String,String>();
+ for (Enumeration<String> enumer = writtenMatchVariables.getKeys(); enumer.hasMoreElements(); ) {
+ String key = enumer.nextElement();
+ matchVariables.put(key, writtenMatchVariables.getString(key));
+ }
+ return Collections.unmodifiableMap(matchVariables);
+ }
+
+ private static Multimap<String, String> xGetContainment() {
+ TreeMultimap<String,String> containment = TreeMultimap.create();
+ containment
+ .putAll("001", "019", "002", "150", "142", "009")
+ .putAll("011", "BF", "BJ", "CI", "CV", "GH", "GM", "GN", "GW", "LR", "ML", "MR", "NE", "NG", "SH", "SL", "SN", "TG")
+ .putAll("013", "BZ", "CR", "GT", "HN", "MX", "NI", "PA", "SV")
+ .putAll("014", "BI", "DJ", "ER", "ET", "KE", "KM", "MG", "MU", "MW", "MZ", "RE", "RW", "SC", "SO", "SS", "TZ", "UG", "YT", "ZM", "ZW")
+ .putAll("142", "145", "143", "030", "034", "035")
+ .putAll("143", "TM", "TJ", "KG", "KZ", "UZ")
+ .putAll("145", "AE", "AM", "AZ", "BH", "CY", "GE", "IL", "IQ", "JO", "KW", "LB", "OM", "PS", "QA", "SA", "SY", "TR", "YE", "NT", "YD")
+ .putAll("015", "DZ", "EG", "EH", "LY", "MA", "SD", "TN", "EA", "IC")
+ .putAll("150", "154", "155", "151", "039")
+ .putAll("151", "BG", "BY", "CZ", "HU", "MD", "PL", "RO", "RU", "SK", "UA", "SU")
+ .putAll("154", "GG", "IM", "JE", "AX", "DK", "EE", "FI", "FO", "GB", "IE", "IS", "LT", "LV", "NO", "SE", "SJ")
+ .putAll("155", "AT", "BE", "CH", "DE", "FR", "LI", "LU", "MC", "NL", "DD", "FX")
+ .putAll("017", "AO", "CD", "CF", "CG", "CM", "GA", "GQ", "ST", "TD", "ZR")
+ .putAll("018", "BW", "LS", "NA", "SZ", "ZA")
+ .putAll("019", "021", "013", "029", "005", "003", "419")
+ .putAll("002", "015", "011", "017", "014", "018")
+ .putAll("021", "BM", "CA", "GL", "PM", "US")
+ .putAll("029", "AG", "AI", "AW", "BB", "BL", "BQ", "BS", "CU", "CW", "DM", "DO", "GD", "GP", "HT", "JM", "KN", "KY", "LC", "MF", "MQ", "MS", "PR", "SX", "TC", "TT", "VC", "VG", "VI", "AN")
+ .putAll("003", "021", "013", "029")
+ .putAll("030", "CN", "HK", "JP", "KP", "KR", "MN", "MO", "TW")
+ .putAll("035", "BN", "ID", "KH", "LA", "MM", "MY", "PH", "SG", "TH", "TL", "VN", "BU", "TP")
+ .putAll("039", "AD", "AL", "BA", "ES", "GI", "GR", "HR", "IT", "ME", "MK", "MT", "RS", "PT", "SI", "SM", "VA", "XK", "CS", "YU")
+ .putAll("419", "013", "029", "005")
+ .putAll("005", "AR", "BO", "BR", "CL", "CO", "EC", "FK", "GF", "GY", "PE", "PY", "SR", "UY", "VE")
+ .putAll("053", "AU", "NF", "NZ")
+ .putAll("054", "FJ", "NC", "PG", "SB", "VU")
+ .putAll("057", "FM", "GU", "KI", "MH", "MP", "NR", "PW")
+ .putAll("061", "AS", "CK", "NU", "PF", "PN", "TK", "TO", "TV", "WF", "WS")
+ .putAll("034", "AF", "BD", "BT", "IN", "IR", "LK", "MV", "NP", "PK")
+ .putAll("009", "053", "054", "057", "061", "QO")
+ .putAll("QO", "AQ", "BV", "CC", "CX", "GS", "HM", "IO", "TF", "UM", "AC", "CP", "DG", "TA")
+ ;
+ //Can't use following, because data from CLDR is discarded
+// ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
+// UResourceBundle territoryContainment = suppData.get("territoryContainment");
+// for (int i = 0 ; i < territoryContainment.getSize(); i++) {
+// UResourceBundle mapping = territoryContainment.get(i);
+// String parent = mapping.getKey();
+// for (int j = 0 ; j < mapping.getSize(); j++) {
+// String child = mapping.getString(j);
+// containment.put(parent,child);
+// System.out.println(parent + " => " + child);
+// }
+// }
+ TreeMultimap<String,String> containmentResolved = TreeMultimap.create();
+ fill("001", containment, containmentResolved);
+ return ImmutableMultimap.copyOf(containmentResolved);
+ }
+
+ private static Set<String> fill(String region, TreeMultimap<String, String> containment, Multimap<String, String> toAddTo) {
+ Set<String> contained = containment.get(region);
+ if (contained == null) {
+ return Collections.emptySet();
+ }
+ toAddTo.putAll(region, contained); // do top level
+ // then recursively
+ for (String subregion : contained) {
+ toAddTo.putAll(region, fill(subregion, containment, toAddTo));
+ }
+ return toAddTo.get(region);
+ }
+
+
+ static final Multimap<String,String> CONTAINER_TO_CONTAINED;
+ static final Multimap<String,String> CONTAINER_TO_CONTAINED_FINAL;
+ static {
+// Multimap<String, String> containerToContainedTemp = xGetContainment();
+// fill(Region.getInstance("001"), containerToContainedTemp);
+
+ CONTAINER_TO_CONTAINED = xGetContainment();
+ Multimap<String, String> containerToFinalContainedBuilder = TreeMultimap.create();
+ for (Entry<String, Set<String>> entry : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
+ String container = entry.getKey();
+ for (String contained : entry.getValue()) {
+ if (CONTAINER_TO_CONTAINED.get(contained) == null) {
+ containerToFinalContainedBuilder.put(container, contained);
+ }
+ }
+ }
+ CONTAINER_TO_CONTAINED_FINAL = ImmutableMultimap.copyOf(containerToFinalContainedBuilder);
+ }
+
+ final static private Set<String> ALL_FINAL_REGIONS = ImmutableSet.copyOf(CONTAINER_TO_CONTAINED_FINAL.get("001"));
+
+ // end of data from CLDR
+
+ private final DistanceTable languageDesired2Supported;
+ private final RegionMapper regionMapper;
+ private final int defaultLanguageDistance;
+ private final int defaultScriptDistance;
+ private final int defaultRegionDistance;
+
+ @Deprecated
+ public static abstract class DistanceTable {
+ abstract int getDistance(String desiredLang, String supportedlang, Output<DistanceTable> table, boolean starEquals);
+ abstract Set<String> getCloser(int threshold);
+ abstract String toString(boolean abbreviate);
+ public DistanceTable compact() {
+ return this;
+ }
+ // public Integer getInternalDistance(String a, String b) {
+ // return null;
+ // }
+ public DistanceNode getInternalNode(String any, String any2) {
+ return null;
+ }
+ public Map<String, Set<String>> getInternalMatches() {
+ return null;
+ }
+ public boolean isEmpty() {
+ return true;
+ }
+ }
+
+ @Deprecated
+ public static class DistanceNode {
+ final int distance;
+
+ public DistanceNode(int distance) {
+ this.distance = distance;
+ }
+
+ public DistanceTable getDistanceTable() {
+ return null;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof DistanceNode)) {
+ return false;
+ }
+ DistanceNode other = (DistanceNode) obj;
+ return distance == other.distance;
+ }
+ @Override
+ public int hashCode() {
+ return distance;
+ }
+ @Override
+ public String toString() {
+ return "\ndistance: " + distance;
+ }
+ }
+
+ private interface IdMapper<K,V> {
+ public V toId(K source);
+ }
+
+ static class IdMakerFull<T> implements IdMapper<T,Integer> {
+ private final Map<T, Integer> objectToInt = new HashMap<T, Integer>();
+ private final List<T> intToObject = new ArrayList<T>();
+ final String name; // for debugging
+
+ IdMakerFull(String name) {
+ this.name = name;
+ }
+
+ IdMakerFull() {
+ this("unnamed");
+ }
+
+ IdMakerFull(String name, T zeroValue) {
+ this(name);
+ add(zeroValue);
+ }
+
+ /**
+ * Return an id, making one if there wasn't one already.
+ */
+ public Integer add(T source) {
+ Integer result = objectToInt.get(source);
+ if (result == null) {
+ Integer newResult = intToObject.size();
+ objectToInt.put(source, newResult);
+ intToObject.add(source);
+ return newResult;
+ } else {
+ return result;
+ }
+ }
+
+ /**
+ * Return an id, or null if there is none.
+ */
+ @Override
+ public Integer toId(T source) {
+ return objectToInt.get(source);
+ // return value == null ? 0 : value;
+ }
+
+ /**
+ * Return the object for the id, or null if there is none.
+ */
+ public T fromId(int id) {
+ return intToObject.get(id);
+ }
+
+ /**
+ * Return interned object
+ */
+ public T intern(T source) {
+ return fromId(add(source));
+ }
+
+ public int size() {
+ return intToObject.size();
+ }
+ /**
+ * Same as add, except if the object didn't have an id, return null;
+ */
+ public Integer getOldAndAdd(T source) {
+ Integer result = objectToInt.get(source);
+ if (result == null) {
+ Integer newResult = intToObject.size();
+ objectToInt.put(source, newResult);
+ intToObject.add(source);
+ }
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return size() + ": " + intToObject;
+ }
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof IdMakerFull)) {
+ return false;
+ }
+ IdMakerFull<T> other = (IdMakerFull) obj;
+ return intToObject.equals(other.intToObject);
+ }
+ @Override
+ public int hashCode() {
+ return intToObject.hashCode();
+ }
+ }
+
+ static class StringDistanceNode extends DistanceNode {
+ final DistanceTable distanceTable;
+
+ public StringDistanceNode(int distance, DistanceTable distanceTable) {
+ super(distance);
+ this.distanceTable = distanceTable;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof StringDistanceNode)) {
+ return false;
+ }
+ StringDistanceNode other = (StringDistanceNode) obj;
+ return distance == other.distance && Objects.equals(distanceTable, other.distanceTable);
+ }
+ @Override
+ public int hashCode() {
+ return distance ^ Objects.hashCode(distanceTable);
+ }
+
+ StringDistanceNode(int distance) {
+ this(distance, new StringDistanceTable());
+ }
+
+ public void addSubtables(String desiredSub, String supportedSub, CopyIfEmpty r) {
+ ((StringDistanceTable) distanceTable).addSubtables(desiredSub, supportedSub, r);
+ }
+ @Override
+ public String toString() {
+ return "distance: " + distance + "\n" + distanceTable;
+ }
+
+ public void copyTables(StringDistanceTable value) {
+ if (value != null) {
+ ((StringDistanceTable)distanceTable).copy(value);
+ }
+ }
+
+ @Override
+ public DistanceTable getDistanceTable() {
+ return distanceTable;
+ }
+ }
+
+ public XLocaleDistance(DistanceTable datadistancetable2, RegionMapper regionMapper) {
+ languageDesired2Supported = datadistancetable2;
+ this.regionMapper = regionMapper;
+
+ StringDistanceNode languageNode = (StringDistanceNode) ((StringDistanceTable) languageDesired2Supported).subtables.get(ANY).get(ANY);
+ defaultLanguageDistance = languageNode.distance;
+ StringDistanceNode scriptNode = (StringDistanceNode) ((StringDistanceTable)languageNode.distanceTable).subtables.get(ANY).get(ANY);
+ defaultScriptDistance = scriptNode.distance;
+ DistanceNode regionNode = ((StringDistanceTable)scriptNode.distanceTable).subtables.get(ANY).get(ANY);
+ defaultRegionDistance = regionNode.distance;
+ }
+
+ private static Map newMap() { // for debugging
+ return new TreeMap();
+ }
+
+ /**
+ * Internal class
+ */
+ @Deprecated
+ public static class StringDistanceTable extends DistanceTable {
+ final Map<String, Map<String, DistanceNode>> subtables;
+
+ StringDistanceTable(Map<String, Map<String, DistanceNode>> tables) {
+ subtables = tables;
+ }
+ StringDistanceTable() {
+ this(newMap());
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return subtables.isEmpty();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof StringDistanceTable)) {
+ return false;
+ }
+ StringDistanceTable other = (StringDistanceTable) obj;
+ return subtables.equals(other.subtables);
+ }
+ @Override
+ public int hashCode() {
+ return subtables.hashCode();
+ }
+
+ @Override
+ public int getDistance(String desired, String supported, Output<DistanceTable> distanceTable, boolean starEquals) {
+ boolean star = false;
+ Map<String, DistanceNode> sub2 = subtables.get(desired);
+ if (sub2 == null) {
+ sub2 = subtables.get(ANY); // <*, supported>
+ star = true;
+ }
+ DistanceNode value = sub2.get(supported); // <*/desired, supported>
+ if (value == null) {
+ value = sub2.get(ANY); // <*/desired, *>
+ if (value == null && !star) {
+ sub2 = subtables.get(ANY); // <*, supported>
+ value = sub2.get(supported);
+ if (value == null) {
+ value = sub2.get(ANY); // <*, *>
+ }
+ }
+ star = true;
+ }
+ if (distanceTable != null) {
+ distanceTable.value = ((StringDistanceNode) value).distanceTable;
+ }
+ return starEquals && star && desired.equals(supported) ? 0 : value.distance;
+ }
+
+ public void copy(StringDistanceTable other) {
+ for (Entry<String, Map<String, DistanceNode>> e1 : other.subtables.entrySet()) {
+ for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
+ DistanceNode value = e2.getValue();
+ DistanceNode subNode = addSubtable(e1.getKey(), e2.getKey(), value.distance);
+ }
+ }
+ }
+
+ DistanceNode addSubtable(String desired, String supported, int distance) {
+ Map<String, DistanceNode> sub2 = subtables.get(desired);
+ if (sub2 == null) {
+ subtables.put(desired, sub2 = newMap());
+ }
+ DistanceNode oldNode = sub2.get(supported);
+ if (oldNode != null) {
+ return oldNode;
+ }
+
+ final StringDistanceNode newNode = new StringDistanceNode(distance);
+ sub2.put(supported, newNode);
+ return newNode;
+ }
+
+ /**
+ * Return null if value doesn't exist
+ */
+ private DistanceNode getNode(String desired, String supported) {
+ Map<String, DistanceNode> sub2 = subtables.get(desired);
+ if (sub2 == null) {
+ return null;
+ }
+ return sub2.get(supported);
+ }
+
+
+ /** add table for each subitem that matches and doesn't have a table already
+ */
+ public void addSubtables(
+ String desired, String supported,
+ Predicate<DistanceNode> action) {
+ int count = 0;
+ DistanceNode node = getNode(desired, supported);
+ if (node == null) {
+ // get the distance it would have
+ Output<DistanceTable> node2 = new Output<DistanceTable>();
+ int distance = getDistance(desired, supported, node2, true);
+ // now add it
+ node = addSubtable(desired, supported, distance);
+ if (node2.value != null) {
+ ((StringDistanceNode)node).copyTables((StringDistanceTable)(node2.value));
+ }
+ }
+ action.test(node);
+ }
+
+ public void addSubtables(String desiredLang, String supportedLang,
+ String desiredScript, String supportedScript,
+ int percentage) {
+
+ // add to all the values that have the matching desiredLang and supportedLang
+ boolean haveKeys = false;
+ for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
+ String key1 = e1.getKey();
+ final boolean desiredIsKey = desiredLang.equals(key1);
+ if (desiredIsKey || desiredLang.equals(ANY)) {
+ for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
+ String key2 = e2.getKey();
+ final boolean supportedIsKey = supportedLang.equals(key2);
+ haveKeys |= (desiredIsKey && supportedIsKey);
+ if (supportedIsKey || supportedLang.equals(ANY)) {
+ DistanceNode value = e2.getValue();
+ ((StringDistanceTable)value.getDistanceTable()).addSubtable(desiredScript, supportedScript, percentage);
+ }
+ }
+ }
+ }
+ // now add the sequence explicitly
+ StringDistanceTable dt = new StringDistanceTable();
+ dt.addSubtable(desiredScript, supportedScript, percentage);
+ CopyIfEmpty r = new CopyIfEmpty(dt);
+ addSubtables(desiredLang, supportedLang, r);
+ }
+
+ public void addSubtables(String desiredLang, String supportedLang,
+ String desiredScript, String supportedScript,
+ String desiredRegion, String supportedRegion,
+ int percentage) {
+
+ // add to all the values that have the matching desiredLang and supportedLang
+ boolean haveKeys = false;
+ for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
+ String key1 = e1.getKey();
+ final boolean desiredIsKey = desiredLang.equals(key1);
+ if (desiredIsKey || desiredLang.equals(ANY)) {
+ for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
+ String key2 = e2.getKey();
+ final boolean supportedIsKey = supportedLang.equals(key2);
+ haveKeys |= (desiredIsKey && supportedIsKey);
+ if (supportedIsKey || supportedLang.equals(ANY)) {
+ StringDistanceNode value = (StringDistanceNode) e2.getValue();
+ ((StringDistanceTable)value.distanceTable).addSubtables(desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
+ }
+ }
+ }
+ }
+ // now add the sequence explicitly
+
+ StringDistanceTable dt = new StringDistanceTable();
+ dt.addSubtable(desiredRegion, supportedRegion, percentage);
+ AddSub r = new AddSub(desiredScript, supportedScript, dt);
+ addSubtables(desiredLang, supportedLang, r);
+ }
+
+ @Override
+ public String toString() {
+ return toString(false);
+ }
+
+ @Override
+ public String toString(boolean abbreviate) {
+ return toString(abbreviate, "", new IdMakerFull<Object>("interner"), new StringBuilder()).toString();
+ }
+
+ public StringBuilder toString(boolean abbreviate, String indent, IdMakerFull<Object> intern, StringBuilder buffer) {
+ String indent2 = indent.isEmpty() ? "" : "\t";
+ Integer id = abbreviate ? intern.getOldAndAdd(subtables) : null;
+ if (id != null) {
+ buffer.append(indent2).append('#').append(id).append('\n');
+ } else for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
+ final Map<String, DistanceNode> subsubtable = e1.getValue();
+ buffer.append(indent2).append(e1.getKey());
+ String indent3 = "\t";
+ id = abbreviate ? intern.getOldAndAdd(subsubtable) : null;
+ if (id != null) {
+ buffer.append(indent3).append('#').append(id).append('\n');
+ } else for (Entry<String, DistanceNode> e2 : subsubtable.entrySet()) {
+ DistanceNode value = e2.getValue();
+ buffer.append(indent3).append(e2.getKey());
+ id = abbreviate ? intern.getOldAndAdd(value) : null;
+ if (id != null) {
+ buffer.append('\t').append('#').append(id).append('\n');
+ } else {
+ buffer.append('\t').append(value.distance);
+ final DistanceTable distanceTable = value.getDistanceTable();
+ if (distanceTable != null) {
+ id = abbreviate ? intern.getOldAndAdd(distanceTable) : null;
+ if (id != null) {
+ buffer.append('\t').append('#').append(id).append('\n');
+ } else {
+ ((StringDistanceTable)distanceTable).toString(abbreviate, indent+"\t\t\t", intern, buffer);
+ }
+ } else {
+ buffer.append('\n');
+ }
+ }
+ indent3 = indent+'\t';
+ }
+ indent2 = indent;
+ }
+ return buffer;
+ }
+
+ @Override
+ public StringDistanceTable compact() {
+ return new CompactAndImmutablizer().compact(this);
+ }
+
+ @Override
+ public Set<String> getCloser(int threshold) {
+ Set<String> result = new HashSet<String>();
+ for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
+ String desired = e1.getKey();
+ for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
+ if (e2.getValue().distance < threshold) {
+ result.add(desired);
+ break;
+ }
+ }
+ }
+ return result;
+ }
+
+ public Integer getInternalDistance(String a, String b) {
+ Map<String, DistanceNode> subsub = subtables.get(a);
+ if (subsub == null) {
+ return null;
+ }
+ DistanceNode dnode = subsub.get(b);
+ return dnode == null ? null : dnode.distance;
+ }
+
+ @Override
+ public DistanceNode getInternalNode(String a, String b) {
+ Map<String, DistanceNode> subsub = subtables.get(a);
+ if (subsub == null) {
+ return null;
+ }
+ return subsub.get(b);
+ }
+
+ @Override
+ public Map<String, Set<String>> getInternalMatches() {
+ Map<String, Set<String>> result = new LinkedHashMap<String, Set<String>>();
+ for (Entry<String, Map<String, DistanceNode>> entry : subtables.entrySet()) {
+ result.put(entry.getKey(), new LinkedHashSet<String>(entry.getValue().keySet()));
+ }
+ return result;
+ }
+ }
+
+ static class CopyIfEmpty implements Predicate<DistanceNode> {
+ private final StringDistanceTable toCopy;
+ CopyIfEmpty(StringDistanceTable resetIfNotNull) {
+ this.toCopy = resetIfNotNull;
+ }
+ @Override
+ public boolean test(DistanceNode node) {
+ final StringDistanceTable subtables = (StringDistanceTable) node.getDistanceTable();
+ if (subtables.subtables.isEmpty()) {
+ subtables.copy(toCopy);
+ }
+ return true;
+ }
+ }
+
+ static class AddSub implements Predicate<DistanceNode> {
+ private final String desiredSub;
+ private final String supportedSub;
+ private final CopyIfEmpty r;
+
+ AddSub(String desiredSub, String supportedSub, StringDistanceTable distanceTableToCopy) {
+ this.r = new CopyIfEmpty(distanceTableToCopy);
+ this.desiredSub = desiredSub;
+ this.supportedSub = supportedSub;
+ }
+ @Override
+ public boolean test(DistanceNode node) {
+ if (node == null) {
+ throw new IllegalArgumentException("bad structure");
+ } else {
+ ((StringDistanceNode)node).addSubtables(desiredSub, supportedSub, r);
+ }
+ return true;
+ }
+ }
+
+ public int distance(ULocale desired, ULocale supported, int threshold, DistanceOption distanceOption) {
+ LSR supportedLSR = LSR.fromMaximalized(supported);
+ LSR desiredLSR = LSR.fromMaximalized(desired);
+ return distanceRaw(desiredLSR, supportedLSR, threshold, distanceOption);
+ }
+
+ /**
+ * Returns distance, from 0 to ABOVE_THRESHOLD.
+ * ULocales must be in canonical, addLikelySubtags format. Returns distance
+ * @param desired
+ * @param supported
+ * @param distanceOption
+ * @return
+ */
+ public int distanceRaw(LSR desired, LSR supported, int threshold, DistanceOption distanceOption) {
+ return distanceRaw(desired.language, supported.language,
+ desired.script, supported.script,
+ desired.region, supported.region,
+ threshold, distanceOption);
+ }
+
+ public enum DistanceOption {NORMAL, SCRIPT_FIRST}
+
+ /**
+ * Returns distance, from 0 to ABOVE_THRESHOLD.
+ * ULocales must be in canonical, addLikelySubtags format. Returns distance
+ */
+ public int distanceRaw(
+ String desiredLang, String supportedlang,
+ String desiredScript, String supportedScript,
+ String desiredRegion, String supportedRegion,
+ int threshold,
+ DistanceOption distanceOption) {
+
+ Output<DistanceTable> subtable = new Output<DistanceTable>();
+
+ int distance = languageDesired2Supported.getDistance(desiredLang, supportedlang, subtable, true);
+ boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST;
+ if (scriptFirst) {
+ distance >>= 2;
+ }
+ if (distance < 0) {
+ distance = 0;
+ } else if (distance >= threshold) {
+ return ABOVE_THRESHOLD;
+ }
+
+ int scriptDistance = subtable.value.getDistance(desiredScript, supportedScript, subtable, true);
+ if (scriptFirst) {
+ scriptDistance >>= 1;
+ }
+ distance += scriptDistance;
+ if (distance >= threshold) {
+ return ABOVE_THRESHOLD;
+ }
+
+ if (desiredRegion.equals(supportedRegion)) {
+ return distance;
+ }
+
+ // From here on we know the regions are not equal
+
+ final String desiredPartition = regionMapper.toId(desiredRegion);
+ final String supportedPartition = regionMapper.toId(supportedRegion);
+ int subdistance;
+
+ // check for macros. If one is found, we take the maximum distance
+ // this could be optimized by adding some more structure, but probably not worth it.
+
+ Collection<String> desiredPartitions = desiredPartition.isEmpty() ? regionMapper.macroToPartitions.get(desiredRegion) : null;
+ Collection<String> supportedPartitions = supportedPartition.isEmpty() ? regionMapper.macroToPartitions.get(supportedRegion) : null;
+ if (desiredPartitions != null || supportedPartitions != null) {
+ subdistance = 0;
+ // make the code simple for now
+ if (desiredPartitions == null) {
+ desiredPartitions = Collections.singleton(desiredPartition);
+ }
+ if (supportedPartitions == null) {
+ supportedPartitions = Collections.singleton(supportedPartition);
+ }
+
+ for (String desiredPartition2 : desiredPartitions) {
+ for (String supportedPartition2 : supportedPartitions) {
+ int tempSubdistance = subtable.value.getDistance(desiredPartition2, supportedPartition2, null, false);
+ if (subdistance < tempSubdistance) {
+ subdistance = tempSubdistance;
+ }
+ }
+ }
+ } else {
+ subdistance = subtable.value.getDistance(desiredPartition, supportedPartition, null, false);
+ }
+ distance += subdistance;
+ return distance >= threshold ? ABOVE_THRESHOLD : distance;
+ }
+
+
+ private static final XLocaleDistance DEFAULT;
+
+ public static XLocaleDistance getDefault() {
+ return DEFAULT;
+ }
+
+ static {
+ String[][] variableOverrides = {
+ {"$enUS", "AS+GU+MH+MP+PR+UM+US+VI"},
+
+ {"$cnsar", "HK+MO"},
+
+ {"$americas", "019"},
+
+ {"$maghreb", "MA+DZ+TN+LY+MR+EH"},
+ };
+ String[] paradigmRegions = {
+ "en", "en-GB", "es", "es-419", "pt-BR", "pt-PT"
+ };
+ String[][] regionRuleOverrides = {
+ {"ar_*_$maghreb", "ar_*_$maghreb", "96"},
+ {"ar_*_$!maghreb", "ar_*_$!maghreb", "96"},
+ {"ar_*_*", "ar_*_*", "95"},
+
+ {"en_*_$enUS", "en_*_$enUS", "96"},
+ {"en_*_$!enUS", "en_*_$!enUS", "96"},
+ {"en_*_*", "en_*_*", "95"},
+
+ {"es_*_$americas", "es_*_$americas", "96"},
+ {"es_*_$!americas", "es_*_$!americas", "96"},
+ {"es_*_*", "es_*_*", "95"},
+
+ {"pt_*_$americas", "pt_*_$americas", "96"},
+ {"pt_*_$!americas", "pt_*_$!americas", "96"},
+ {"pt_*_*", "pt_*_*", "95"},
+
+ {"zh_Hant_$cnsar", "zh_Hant_$cnsar", "96"},
+ {"zh_Hant_$!cnsar", "zh_Hant_$!cnsar", "96"},
+ {"zh_Hant_*", "zh_Hant_*", "95"},
+
+ {"*_*_*", "*_*_*", "96"},
+ };
+
+ Builder rmb = new RegionMapper.Builder().addParadigms(paradigmRegions);
+ for (String[] variableRule : variableOverrides) {
+ rmb.add(variableRule[0], variableRule[1]);
+ }
+ if (PRINT_OVERRIDES) {
+ System.out.println("\t\t<languageMatches type=\"written\" alt=\"enhanced\">");
+ System.out.println("\t\t\t<paradigmLocales locales=\"" + XCldrStub.join(paradigmRegions, " ")
+ + "\"/>");
+ for (String[] variableRule : variableOverrides) {
+ System.out.println("\t\t\t<matchVariable id=\"" + variableRule[0]
+ + "\" value=\""
+ + variableRule[1]
+ + "\"/>");
+ }
+ }
+
+ final StringDistanceTable defaultDistanceTable = new StringDistanceTable();
+ final RegionMapper defaultRegionMapper = rmb.build();
+
+ Splitter bar = Splitter.on('_');
+
+ List<Row.R4<List<String>, List<String>, Integer, Boolean>>[] sorted = new ArrayList[3];
+ sorted[0] = new ArrayList<Row.R4<List<String>, List<String>, Integer, Boolean>>();
+ sorted[1] = new ArrayList<Row.R4<List<String>, List<String>, Integer, Boolean>>();
+ sorted[2] = new ArrayList<Row.R4<List<String>, List<String>, Integer, Boolean>>();
+
+ // sort the rules so that the language-only are first, then the language-script, and finally the language-script-region.
+ for (R4<String, String, Integer, Boolean> info : xGetLanguageMatcherData()) {
+ String desiredRaw = info.get0();
+ String supportedRaw = info.get1();
+ List<String> desired = bar.splitToList(desiredRaw);
+ List<String> supported = bar.splitToList(supportedRaw);
+ Boolean oneway = info.get3();
+ int distance = desiredRaw.equals("*_*") ? 50 : info.get2();
+ int size = desired.size();
+
+ // for now, skip size == 3
+ if (size == 3) continue;
+
+ sorted[size-1].add(Row.of(desired, supported, distance, oneway));
+ }
+
+ for (List<Row.R4<List<String>, List<String>, Integer, Boolean>> item1 : sorted) {
+ int debug = 0;
+ for (Row.R4<List<String>, List<String>, Integer, Boolean> item2 : item1) {
+ List<String> desired = item2.get0();
+ List<String> supported = item2.get1();
+ Integer distance = item2.get2();
+ Boolean oneway = item2.get3();
+ add(defaultDistanceTable, desired, supported, distance);
+ if (oneway != Boolean.TRUE && !desired.equals(supported)) {
+ add(defaultDistanceTable, supported, desired, distance);
+ }
+ printMatchXml(desired, supported, distance, oneway);
+ }
+ }
+
+ // add new size=3
+ for (String[] rule : regionRuleOverrides) {
+ // if (PRINT_OVERRIDES) System.out.println("\t\t\t<languageMatch desired=\""
+ // + rule[0]
+ // + "\" supported=\""
+ // + rule[1]
+ // + "\" distance=\""
+ // + rule[2]
+ // + "\"/>");
+ if (rule[0].equals("en_*_*") || rule[1].equals("*_*_*")) {
+ int debug = 0;
+ }
+ List<String> desiredBase = new ArrayList<String>(bar.splitToList(rule[0]));
+ List<String> supportedBase = new ArrayList<String>(bar.splitToList(rule[1]));
+ Integer distance = 100-Integer.parseInt(rule[2]);
+ printMatchXml(desiredBase, supportedBase, distance, false);
+
+ Collection<String> desiredRegions = defaultRegionMapper.getIdsFromVariable(desiredBase.get(2));
+ if (desiredRegions.isEmpty()) {
+ throw new IllegalArgumentException("Bad region variable: " + desiredBase.get(2));
+ }
+ Collection<String> supportedRegions = defaultRegionMapper.getIdsFromVariable(supportedBase.get(2));
+ if (supportedRegions.isEmpty()) {
+ throw new IllegalArgumentException("Bad region variable: " + supportedBase.get(2));
+ }
+ for (String desiredRegion2 : desiredRegions) {
+ desiredBase.set(2, desiredRegion2.toString()); // fix later
+ for (String supportedRegion2 : supportedRegions) {
+ supportedBase.set(2, supportedRegion2.toString()); // fix later
+ add(defaultDistanceTable, desiredBase, supportedBase, distance);
+ add(defaultDistanceTable, supportedBase, desiredBase, distance);
+ }
+ }
+ }
+ if (PRINT_OVERRIDES) {
+ System.out.println("\t\t</languageMatches>");
+ }
+
+ DEFAULT = new XLocaleDistance(defaultDistanceTable.compact(), defaultRegionMapper);
+
+ if (false && PRINT_OVERRIDES) {
+ System.out.println(defaultRegionMapper);
+ System.out.println(defaultDistanceTable);
+ throw new IllegalArgumentException();
+ }
+ }
+
+ private static void printMatchXml(List<String> desired, List<String> supported, Integer distance, Boolean oneway) {
+ if (PRINT_OVERRIDES) {
+ String desiredStr = CollectionUtilities.join(desired, "_");
+ String supportedStr = CollectionUtilities.join(supported, "_");
+ String desiredName = fixedName(desired);
+ String supportedName = fixedName(supported);
+ System.out.println("\t\t\t<languageMatch"
+ + " desired=\"" + desiredStr
+ + "\"\tsupported=\"" + supportedStr
+ + "\"\tdistance=\"" + distance
+ + (!oneway ? "" : "\"\toneway=\"true")
+ + "\"/>\t<!-- " + desiredName + " ⇒ " + supportedName + " -->");
+ }
+ }
+
+ private static String fixedName(List<String> match) {
+ List<String> alt = new ArrayList<String>(match);
+ StringBuilder result = new StringBuilder();
+ switch(alt.size()) {
+ case 3:
+ String region = alt.get(2);
+ if (region.equals("*") || region.startsWith("$")) {
+ result.append(region);
+ } else {
+ result.append(english.regionDisplayName(region));
+ }
+ case 2:
+ String script = alt.get(1);
+ if (script.equals("*")) {
+ result.insert(0, script);
+ } else {
+ result.insert(0, english.scriptDisplayName(script));
+ }
+ case 1:
+ String language = alt.get(0);
+ if (language.equals("*")) {
+ result.insert(0, language);
+ } else {
+ result.insert(0, english.languageDisplayName(language));
+ }
+ }
+ return CollectionUtilities.join(alt, "; ");
+ }
+
+ static public void add(StringDistanceTable languageDesired2Supported, List<String> desired, List<String> supported, int percentage) {
+ int size = desired.size();
+ if (size != supported.size() || size < 1 || size > 3) {
+ throw new IllegalArgumentException();
+ }
+ final String desiredLang = fixAny(desired.get(0));
+ final String supportedLang = fixAny(supported.get(0));
+ if (size == 1) {
+ languageDesired2Supported.addSubtable(desiredLang, supportedLang, percentage);
+ } else {
+ final String desiredScript = fixAny(desired.get(1));
+ final String supportedScript = fixAny(supported.get(1));
+ if (size == 2) {
+ languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, percentage);
+ } else {
+ final String desiredRegion = fixAny(desired.get(2));
+ final String supportedRegion = fixAny(supported.get(2));
+ languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ return toString(false);
+ }
+
+ public String toString(boolean abbreviate) {
+ return regionMapper + "\n" + languageDesired2Supported.toString(abbreviate);
+ }
+
+
+ // public static XLocaleDistance createDefaultInt() {
+ // IntDistanceTable d = new IntDistanceTable(DEFAULT_DISTANCE_TABLE);
+ // return new XLocaleDistance(d, DEFAULT_REGION_MAPPER);
+ // }
+
+ static Set<String> getContainingMacrosFor(Collection<String> input, Set<String> output) {
+ output.clear();
+ for (Entry<String, Set<String>> entry : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
+ if (input.containsAll(entry.getValue())) { // example; if all southern Europe are contained, then add S. Europe
+ output.add(entry.getKey());
+ }
+ }
+ return output;
+ }
+
+ static class RegionMapper implements IdMapper<String,String> {
+ /**
+ * Used for processing rules. At the start we have a variable setting like $A1=US+CA+MX. We generate a mapping from $A1 to a set of partitions {P1, P2}
+ * When we hit a rule that contains a variable, we replace that rule by multiple rules for the partitions.
+ */
+ final Multimap<String,String> variableToPartition;
+ /**
+ * Used for executing the rules. We map a region to a partition before processing.
+ */
+ final Map<String,String> regionToPartition;
+ /**
+ * Used to support es_419 compared to es_AR, etc.
+ * @param variableToPartitionIn
+ * @param regionToPartitionIn
+ */
+ final Multimap<String,String> macroToPartitions;
+ /**
+ * Used to get the paradigm region for a cluster, if there is one
+ */
+ final Set<ULocale> paradigms;
+
+ private RegionMapper(
+ Multimap<String, String> variableToPartitionIn,
+ Map<String, String> regionToPartitionIn,
+ Multimap<String,String> macroToPartitionsIn,
+ Set<ULocale> paradigmsIn) {
+ variableToPartition = ImmutableMultimap.copyOf(variableToPartitionIn);
+ regionToPartition = ImmutableMap.copyOf(regionToPartitionIn);
+ macroToPartitions = ImmutableMultimap.copyOf(macroToPartitionsIn);
+ paradigms = ImmutableSet.copyOf(paradigmsIn);
+ }
+
+ @Override
+ public String toId(String region) {
+ String result = regionToPartition.get(region);
+ return result == null ? "" : result;
+ }
+
+ public Collection<String> getIdsFromVariable(String variable) {
+ if (variable.equals("*")) {
+ return Collections.singleton("*");
+ }
+ Collection<String> result = variableToPartition.get(variable);
+ if (result == null || result.isEmpty()) {
+ throw new IllegalArgumentException("Variable not defined: " + variable);
+ }
+ return result;
+ }
+
+ public Set<String> regions() {
+ return regionToPartition.keySet();
+ }
+
+ public Set<String> variables() {
+ return variableToPartition.keySet();
+ }
+
+ @Override
+ public String toString() {
+ TreeMultimap<String, String> partitionToVariables = Multimaps.invertFrom(variableToPartition,
+ TreeMultimap.<String, String>create());
+ TreeMultimap<String, String> partitionToRegions = TreeMultimap.create();
+ for (Entry<String, String> e : regionToPartition.entrySet()) {
+ partitionToRegions.put(e.getValue(), e.getKey());
+ }
+ StringBuilder buffer = new StringBuilder();
+ buffer.append("Partition ➠ Variables ➠ Regions (final)");
+ for (Entry<String, Set<String>> e : partitionToVariables.asMap().entrySet()) {
+ buffer.append('\n');
+ buffer.append(e.getKey() + "\t" + e.getValue() + "\t" + partitionToRegions.get(e.getKey()));
+ }
+ buffer.append("\nMacro ➠ Partitions");
+ for (Entry<String, Set<String>> e : macroToPartitions.asMap().entrySet()) {
+ buffer.append('\n');
+ buffer.append(e.getKey() + "\t" + e.getValue());
+ }
+
+ return buffer.toString();
+ }
+
+ static class Builder {
+ final private Multimap<String, String> regionToRawPartition = TreeMultimap.create();
+ final private RegionSet regionSet = new RegionSet();
+ final private Set<ULocale> paradigms = new LinkedHashSet<ULocale>();
+
+ void add(String variable, String barString) {
+ Set<String> tempRegions = regionSet.parseSet(barString);
+
+ for (String region : tempRegions) {
+ regionToRawPartition.put(region, variable);
+ }
+
+ // now add the inverse variable
+
+ Set<String> inverse = regionSet.inverse();
+ String inverseVariable = "$!" + variable.substring(1);
+ for (String region : inverse) {
+ regionToRawPartition.put(region, inverseVariable);
+ }
+ }
+
+ public Builder addParadigms(String... paradigmRegions) {
+ for (String paradigm : paradigmRegions) {
+ paradigms.add(new ULocale(paradigm));
+ }
+ return this;
+ }
+
+ RegionMapper build() {
+ final IdMakerFull<Collection<String>> id = new IdMakerFull<Collection<String>>("partition");
+ Multimap<String,String> variableToPartitions = TreeMultimap.create();
+ Map<String,String> regionToPartition = new TreeMap<String,String>();
+ Multimap<String,String> partitionToRegions = TreeMultimap.create();
+
+ for (Entry<String, Set<String>> e : regionToRawPartition.asMap().entrySet()) {
+ final String region = e.getKey();
+ final Collection<String> rawPartition = e.getValue();
+ String partition = String.valueOf((char)('α' + id.add(rawPartition)));
+
+ regionToPartition.put(region, partition);
+ partitionToRegions.put(partition, region);
+
+ for (String variable : rawPartition) {
+ variableToPartitions.put(variable, partition);
+ }
+ }
+
+ // we get a mapping of each macro to the partitions it intersects with
+ Multimap<String,String> macroToPartitions = TreeMultimap.create();
+ for (Entry<String, Set<String>> e : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
+ String macro = e.getKey();
+ for (Entry<String, Set<String>> e2 : partitionToRegions.asMap().entrySet()) {
+ String partition = e2.getKey();
+ if (!Collections.disjoint(e.getValue(), e2.getValue())) {
+ macroToPartitions.put(macro, partition);
+ }
+ }
+ }
+
+ return new RegionMapper(
+ variableToPartitions,
+ regionToPartition,
+ macroToPartitions,
+ paradigms);
+ }
+ }
+ }
+
+ /**
+ * Parses a string of regions like "US+005-BR" and produces a set of resolved regions.
+ * All macroregions are fully resolved to sets of non-macro regions.
+ * <br>Syntax is simple for now:
+ * <pre>regionSet := region ([-+] region)*</pre>
+ * No precedence, so "x+y-y+z" is (((x+y)-y)+z) NOT (x+y)-(y+z)
+ */
+ private static class RegionSet {
+ private enum Operation {add, remove}
+ // temporaries used in processing
+ final private Set<String> tempRegions = new TreeSet<String>();
+ private Operation operation = null;
+
+ private Set<String> parseSet(String barString) {
+ operation = Operation.add;
+ int last = 0;
+ tempRegions.clear();
+ int i = 0;
+ for (; i < barString.length(); ++i) {
+ char c = barString.charAt(i); // UTF16 is ok, since syntax is only ascii
+ switch(c) {
+ case '+':
+ add(barString, last, i);
+ last = i+1;
+ operation = Operation.add;
+ break;
+ case '-':
+ add(barString, last, i);
+ last = i+1;
+ operation = Operation.remove;
+ break;
+ }
+ }
+ add(barString, last, i);
+ return tempRegions;
+ }
+
+ private Set<String> inverse() {
+ TreeSet<String> result = new TreeSet<String>(ALL_FINAL_REGIONS);
+ result.removeAll(tempRegions);
+ return result;
+ }
+
+ private void add(String barString, int last, int i) {
+ if (i > last) {
+ String region = barString.substring(last,i);
+ changeSet(operation, region);
+ }
+ }
+
+ private void changeSet(Operation operation, String region) {
+ Collection<String> contained = CONTAINER_TO_CONTAINED_FINAL.get(region);
+ if (contained != null && !contained.isEmpty()) {
+ if (Operation.add == operation) {
+ tempRegions.addAll(contained);
+ } else {
+ tempRegions.removeAll(contained);
+ }
+ } else if (Operation.add == operation) {
+ tempRegions.add(region);
+ } else {
+ tempRegions.remove(region);
+ }
+ }
+ }
+
+ public static <K,V> Multimap<K,V> invertMap(Map<V,K> map) {
+ return Multimaps.invertFrom(Multimaps.forMap(map), LinkedHashMultimap.<K,V>create());
+ }
+
+ public Set<ULocale> getParadigms() {
+ return regionMapper.paradigms;
+ }
+
+ public int getDefaultLanguageDistance() {
+ return defaultLanguageDistance;
+ }
+
+ public int getDefaultScriptDistance() {
+ return defaultScriptDistance;
+ }
+
+ public int getDefaultRegionDistance() {
+ return defaultRegionDistance;
+ }
+
+ static class CompactAndImmutablizer extends IdMakerFull<Object> {
+ StringDistanceTable compact(StringDistanceTable item) {
+ if (toId(item) != null) {
+ return (StringDistanceTable) intern(item);
+ }
+ return new StringDistanceTable(compact(item.subtables, 0));
+ }
+ <K,T> Map<K,T> compact(Map<K,T> item, int level) {
+ if (toId(item) != null) {
+ return (Map<K,T>)intern(item);
+ }
+ Map<K,T> copy = new LinkedHashMap<K,T>();
+ for (Entry<K,T> entry : item.entrySet()) {
+ T value = entry.getValue();
+ if (value instanceof Map) {
+ copy.put(entry.getKey(), (T)compact((Map)value, level+1));
+ } else {
+ copy.put(entry.getKey(), (T)compact((DistanceNode)value));
+ }
+ }
+ return ImmutableMap.copyOf(copy);
+ }
+ DistanceNode compact(DistanceNode item) {
+ if (toId(item) != null) {
+ return (DistanceNode) intern(item);
+ }
+ final DistanceTable distanceTable = item.getDistanceTable();
+ if (distanceTable == null || distanceTable.isEmpty()) {
+ return new DistanceNode(item.distance);
+ } else {
+ return new StringDistanceNode(item.distance, compact((StringDistanceTable)((StringDistanceNode)item).distanceTable));
+ }
+ }
+ }
+
+ @Deprecated
+ public StringDistanceTable internalGetDistanceTable() {
+ return (StringDistanceTable) languageDesired2Supported;
+ }
+
+ public static void main(String[] args) {
+ // for (Entry<String, Collection<String>> entry : containerToContained.asMap().entrySet()) {
+ // System.out.println(entry.getKey() + "\t⥢" + entry.getValue() + "; " + containerToFinalContained.get(entry.getKey()));
+ // }
+ // final Multimap<String,String> regionToMacros = ImmutableMultimap.copyOf(Multimaps.invertFrom(containerToContained, TreeMultimap.create()));
+ // for (Entry<String, Collection<String>> entry : regionToMacros.asMap().entrySet()) {
+ // System.out.println(entry.getKey() + "\t⥤ " + entry.getValue());
+ // }
+ if (PRINT_OVERRIDES) {
+ System.out.println(getDefault().toString(true));
+ }
+ DistanceTable table = getDefault().languageDesired2Supported;
+ DistanceTable compactedTable = table.compact();
+ if (!table.equals(compactedTable)) {
+ throw new IllegalArgumentException("Compaction isn't equal");
+ }
+ }
+}
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
+import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
+import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimap;
+import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
+import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
+import com.ibm.icu.util.LocalePriorityList;
+import com.ibm.icu.util.Output;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * Immutable class that picks best match between user's desired locales and application's supported locales.
+ * @author markdavis
+ */
+public class XLocaleMatcher {
+ private static final LSR UND = new LSR("und","","");
+ private static final ULocale UND_LOCALE = new ULocale("und");
+
+ // normally the default values, but can be set via constructor
+
+ private final XLocaleDistance localeDistance;
+ private final int thresholdDistance;
+ private final int demotionPerAdditionalDesiredLocale;
+ private final DistanceOption distanceOption;
+
+ // built based on application's supported languages in constructor
+
+ private final Map<LSR, Set<ULocale>> supportedLanguages; // the locales in the collection are ordered!
+ private final Set<ULocale> exactSupportedLocales; // the locales in the collection are ordered!
+ private final ULocale defaultLanguage;
+
+
+ public static class Builder {
+ private Set<ULocale> supportedLanguagesList;
+ private int thresholdDistance = -1;
+ private int demotionPerAdditionalDesiredLocale = -1;;
+ private ULocale defaultLanguage;
+ private XLocaleDistance localeDistance;
+ private DistanceOption distanceOption;
+ /**
+ * @param languagePriorityList the languagePriorityList to set
+ * @return
+ */
+ public Builder setSupportedLocales(String languagePriorityList) {
+ this.supportedLanguagesList = asSet(LocalePriorityList.add(languagePriorityList).build());
+ return this;
+ }
+ public Builder setSupportedLocales(LocalePriorityList languagePriorityList) {
+ this.supportedLanguagesList = asSet(languagePriorityList);
+ return this;
+ }
+ public Builder setSupportedLocales(Set<ULocale> languagePriorityList) {
+ this.supportedLanguagesList = languagePriorityList;
+ return this;
+ }
+
+ /**
+ * @param thresholdDistance the thresholdDistance to set, with -1 = default
+ * @return
+ */
+ public Builder setThresholdDistance(int thresholdDistance) {
+ this.thresholdDistance = thresholdDistance;
+ return this;
+ }
+ /**
+ * @param demotionPerAdditionalDesiredLocale the demotionPerAdditionalDesiredLocale to set, with -1 = default
+ * @return
+ */
+ public Builder setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale) {
+ this.demotionPerAdditionalDesiredLocale = demotionPerAdditionalDesiredLocale;
+ return this;
+ }
+
+ /**
+ * @param localeDistance the localeDistance to set, with default = XLocaleDistance.getDefault().
+ * @return
+ */
+ public Builder setLocaleDistance(XLocaleDistance localeDistance) {
+ this.localeDistance = localeDistance;
+ return this;
+ }
+
+ /**
+ * Set the default language, with null = default = first supported language
+ * @param defaultLanguage
+ * @return
+ */
+ public Builder setDefaultLanguage(ULocale defaultLanguage) {
+ this.defaultLanguage = defaultLanguage;
+ return this;
+ }
+
+ /**
+ * If true, then the language differences are smaller than than script differences.
+ * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
+ * @param distanceOption
+ * @return
+ */
+ public Builder setDistanceOption(DistanceOption distanceOption) {
+ this.distanceOption = distanceOption;
+ return this;
+ }
+
+ public XLocaleMatcher build() {
+ return new XLocaleMatcher(this);
+ }
+ }
+
+ /**
+ * Returns a builder used in chaining parameters for building a Locale Matcher.
+ * @return
+ */
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ /** Convenience method */
+ public XLocaleMatcher(String supportedLocales) {
+ this(builder().setSupportedLocales(supportedLocales));
+ }
+ /** Convenience method */
+ public XLocaleMatcher(LocalePriorityList supportedLocales) {
+ this(builder().setSupportedLocales(supportedLocales));
+ }
+ /** Convenience method */
+ public XLocaleMatcher(Set<ULocale> supportedLocales) {
+ this(builder().setSupportedLocales(supportedLocales));
+ }
+
+ /**
+ * Create a locale matcher with the given parameters.
+ * @param supportedLocales
+ * @param thresholdDistance
+ * @param demotionPerAdditionalDesiredLocale
+ * @param localeDistance
+ * @param likelySubtags
+ */
+ private XLocaleMatcher(Builder builder) {
+ localeDistance = builder.localeDistance == null ? XLocaleDistance.getDefault()
+ : builder.localeDistance;
+ thresholdDistance = builder.thresholdDistance < 0 ? localeDistance.getDefaultScriptDistance()
+ : builder.thresholdDistance;
+ // only do AFTER above are set
+ Set<LSR> paradigms = extractLsrSet(localeDistance.getParadigms());
+ final Multimap<LSR, ULocale> temp2 = extractLsrMap(builder.supportedLanguagesList, paradigms);
+ supportedLanguages = temp2.asMap();
+ exactSupportedLocales = ImmutableSet.copyOf(temp2.values());
+ defaultLanguage = builder.defaultLanguage != null ? builder.defaultLanguage
+ : supportedLanguages.isEmpty() ? null
+ : supportedLanguages.entrySet().iterator().next().getValue().iterator().next(); // first language
+ demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ? localeDistance.getDefaultRegionDistance()+1
+ : builder.demotionPerAdditionalDesiredLocale;
+ distanceOption = builder.distanceOption;
+ }
+
+ // Result is not immutable!
+ private Set<LSR> extractLsrSet(Set<ULocale> languagePriorityList) {
+ Set<LSR> result = new LinkedHashSet<LSR>();
+ for (ULocale item : languagePriorityList) {
+ final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
+ result.add(max);
+ }
+ return result;
+ }
+
+ private Multimap<LSR,ULocale> extractLsrMap(Set<ULocale> languagePriorityList, Set<LSR> priorities) {
+ Multimap<LSR, ULocale> builder = LinkedHashMultimap.create();
+ for (ULocale item : languagePriorityList) {
+ final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
+ builder.put(max, item);
+ }
+ if (builder.size() > 1 && priorities != null) {
+ // for the supported list, we put any priorities before all others, except for the first.
+ Multimap<LSR, ULocale> builder2 = LinkedHashMultimap.create();
+
+ // copy the long way so the priorities are in the same order as in the original
+ boolean first = true;
+ for (Entry<LSR, Set<ULocale>> entry : builder.asMap().entrySet()) {
+ final LSR key = entry.getKey();
+ if (first || priorities.contains(key)) {
+ builder2.putAll(key, entry.getValue());
+ first = false;
+ }
+ }
+ // now copy the rest
+ builder2.putAll(builder);
+ if (!builder2.equals(builder)) {
+ throw new IllegalArgumentException();
+ }
+ builder = builder2;
+ }
+ return ImmutableMultimap.copyOf(builder);
+ }
+
+
+ /** Convenience method */
+ public ULocale getBestMatch(ULocale ulocale) {
+ return getBestMatch(ulocale, null);
+ }
+ /** Convenience method */
+ public ULocale getBestMatch(String languageList) {
+ return getBestMatch(LocalePriorityList.add(languageList).build(), null);
+ }
+ /** Convenience method */
+ public ULocale getBestMatch(ULocale... locales) {
+ return getBestMatch(new LinkedHashSet<ULocale>(Arrays.asList(locales)), null);
+ }
+ /** Convenience method */
+ public ULocale getBestMatch(Set<ULocale> desiredLanguages) {
+ return getBestMatch(desiredLanguages, null);
+ }
+ /** Convenience method */
+ public ULocale getBestMatch(LocalePriorityList desiredLanguages) {
+ return getBestMatch(desiredLanguages, null);
+ }
+ /** Convenience method */
+ public ULocale getBestMatch(LocalePriorityList desiredLanguages, Output<ULocale> outputBestDesired) {
+ return getBestMatch(asSet(desiredLanguages), outputBestDesired);
+ }
+
+ // TODO add LocalePriorityList method asSet() for ordered Set view backed by LocalePriorityList
+ private static Set<ULocale> asSet(LocalePriorityList languageList) {
+ Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order
+ for (ULocale locale : languageList) {
+ temp.add(locale);
+ };
+ return temp;
+ }
+
+ /**
+ * Get the best match between the desired languages and supported languages
+ * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
+ * @param outputBestDesired The one of the desired languages that matched best.
+ * Set to null if the best match was not below the threshold distance.
+ * @return
+ */
+ public ULocale getBestMatch(Set<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
+ // fast path for singleton
+ if (desiredLanguages.size() == 1) {
+ return getBestMatch(desiredLanguages.iterator().next(), outputBestDesired);
+ }
+ // TODO produce optimized version for single desired ULocale
+ Multimap<LSR, ULocale> desiredLSRs = extractLsrMap(desiredLanguages,null);
+ int bestDistance = Integer.MAX_VALUE;
+ ULocale bestDesiredLocale = null;
+ Collection<ULocale> bestSupportedLocales = null;
+ int delta = 0;
+ mainLoop:
+ for (final Entry<LSR, ULocale> desiredLsrAndLocale : desiredLSRs.entries()) {
+ // quick check for exact match
+ ULocale desiredLocale = desiredLsrAndLocale.getValue();
+ LSR desiredLSR = desiredLsrAndLocale.getKey();
+ if (delta < bestDistance) {
+ if (exactSupportedLocales.contains(desiredLocale)) {
+ if (outputBestDesired != null) {
+ outputBestDesired.value = desiredLocale;
+ }
+ return desiredLocale;
+ }
+ // quick check for maximized locale
+ Collection<ULocale> found = supportedLanguages.get(desiredLSR);
+ if (found != null) {
+ // if we find one in the set, return first (lowest). We already know the exact one isn't there.
+ if (outputBestDesired != null) {
+ outputBestDesired.value = desiredLocale;
+ }
+ return found.iterator().next();
+ }
+ }
+ for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
+ int distance = delta + localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
+ thresholdDistance, distanceOption);
+ if (distance < bestDistance) {
+ bestDistance = distance;
+ bestDesiredLocale = desiredLocale;
+ bestSupportedLocales = supportedLsrAndLocale.getValue();
+ if (distance == 0) {
+ break mainLoop;
+ }
+ }
+ }
+ delta += demotionPerAdditionalDesiredLocale;
+ }
+ if (bestDistance >= thresholdDistance) {
+ if (outputBestDesired != null) {
+ outputBestDesired.value = null;
+ }
+ return defaultLanguage;
+ }
+ if (outputBestDesired != null) {
+ outputBestDesired.value = bestDesiredLocale;
+ }
+ // pick exact match if there is one
+ if (bestSupportedLocales.contains(bestDesiredLocale)) {
+ return bestDesiredLocale;
+ }
+ // otherwise return first supported, combining variants and extensions from bestDesired
+ return bestSupportedLocales.iterator().next();
+ }
+
+ /**
+ * Get the best match between the desired languages and supported languages
+ * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
+ * @param outputBestDesired The one of the desired languages that matched best.
+ * Set to null if the best match was not below the threshold distance.
+ * @return
+ */
+ public ULocale getBestMatch(ULocale desiredLocale, Output<ULocale> outputBestDesired) {
+ int bestDistance = Integer.MAX_VALUE;
+ ULocale bestDesiredLocale = null;
+ Collection<ULocale> bestSupportedLocales = null;
+
+ // quick check for exact match, with hack for und
+ final LSR desiredLSR = desiredLocale.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(desiredLocale);
+
+ if (exactSupportedLocales.contains(desiredLocale)) {
+ if (outputBestDesired != null) {
+ outputBestDesired.value = desiredLocale;
+ }
+ return desiredLocale;
+ }
+ // quick check for maximized locale
+ if (distanceOption == DistanceOption.NORMAL) {
+ Collection<ULocale> found = supportedLanguages.get(desiredLSR);
+ if (found != null) {
+ // if we find one in the set, return first (lowest). We already know the exact one isn't there.
+ if (outputBestDesired != null) {
+ outputBestDesired.value = desiredLocale;
+ }
+ return found.iterator().next();
+ }
+ }
+ for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
+ int distance = localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
+ thresholdDistance, distanceOption);
+ if (distance < bestDistance) {
+ bestDistance = distance;
+ bestDesiredLocale = desiredLocale;
+ bestSupportedLocales = supportedLsrAndLocale.getValue();
+ if (distance == 0) {
+ break;
+ }
+ }
+ }
+ if (bestDistance >= thresholdDistance) {
+ if (outputBestDesired != null) {
+ outputBestDesired.value = null;
+ }
+ return defaultLanguage;
+ }
+ if (outputBestDesired != null) {
+ outputBestDesired.value = bestDesiredLocale;
+ }
+ // pick exact match if there is one
+ if (bestSupportedLocales.contains(bestDesiredLocale)) {
+ return bestDesiredLocale;
+ }
+ // otherwise return first supported, combining variants and extensions from bestDesired
+ return bestSupportedLocales.iterator().next();
+ }
+
+ /** Combine features of the desired locale into those of the supported, and return result. */
+ public static ULocale combine(ULocale bestSupported, ULocale bestDesired) {
+ // for examples of extensions, variants, see
+ // http://unicode.org/repos/cldr/tags/latest/common/bcp47/
+ // http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
+
+ if (!bestSupported.equals(bestDesired) && bestDesired != null) {
+ // add region, variants, extensions
+ ULocale.Builder b = new ULocale.Builder().setLocale(bestSupported);
+
+ // copy the region from the desired, if there is one
+ String region = bestDesired.getCountry();
+ if (!region.isEmpty()) {
+ b.setRegion(region);
+ }
+
+ // copy the variants from desired, if there is one
+ // note that this will override any subvariants. Eg "sco-ulster-fonipa" + "…-fonupa" => "sco-fonupa" (nuking ulster)
+ String variants = bestDesired.getVariant();
+ if (!variants.isEmpty()) {
+ b.setVariant(variants);
+ }
+
+ // copy the extensions from desired, if there are any
+ // note that this will override any subkeys. Eg "th-u-nu-latn-ca-buddhist" + "…-u-nu-native" => "th-u-nu-native" (nuking calendar)
+ for (char extensionKey : bestDesired.getExtensionKeys()) {
+ b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
+ }
+ bestSupported = b.build();
+ }
+ return bestSupported;
+ }
+
+ /** Returns the distance between the two languages. The values are not necessarily symmetric.
+ * @param desired A locale desired by the user
+ * @param supported A locale supported by a program.
+ * @return A return of 0 is a complete match, and 100 is a failure case (above the thresholdDistance).
+ * A language is first maximized with add likely subtags, then compared.
+ */
+ public int distance(ULocale desired, ULocale supported) {
+ return localeDistance.distanceRaw(
+ LSR.fromMaximalized(desired),
+ LSR.fromMaximalized(supported), thresholdDistance, distanceOption);
+ }
+
+ /** Convenience method */
+ public int distance(String desiredLanguage, String supportedLanguage) {
+ return localeDistance.distanceRaw(
+ LSR.fromMaximalized(new ULocale(desiredLanguage)),
+ LSR.fromMaximalized(new ULocale(supportedLanguage)),
+ thresholdDistance, distanceOption);
+ }
+
+ @Override
+ public String toString() {
+ return exactSupportedLocales.toString();
+ }
+
+ /** Return the inverse of the distance: that is, 1-distance(desired, supported) */
+ public double match(ULocale desired, ULocale supported) {
+ return (100-distance(desired, supported))/100.0;
+ }
+
+ /**
+ * Returns a fraction between 0 and 1, where 1 means that the languages are a
+ * perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
+ * <br>Note that
+ * the precise values may change over time; no code should be made dependent
+ * on the values remaining constant.
+ * @param desired Desired locale
+ * @param desiredMax Maximized locale (using likely subtags)
+ * @param supported Supported locale
+ * @param supportedMax Maximized locale (using likely subtags)
+ * @return value between 0 and 1, inclusive.
+ * @deprecated Use the form with 2 parameters instead.
+ */
+ @Deprecated
+ public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
+ return match(desired, supported);
+ }
+
+ /**
+ * Canonicalize a locale (language). Note that for now, it is canonicalizing
+ * according to CLDR conventions (he vs iw, etc), since that is what is needed
+ * for likelySubtags.
+ * @param ulocale language/locale code
+ * @return ULocale with remapped subtags.
+ * @stable ICU 4.4
+ */
+ public ULocale canonicalize(ULocale ulocale) {
+ // TODO
+ return null;
+ }
+
+ /**
+ * @return the thresholdDistance. Any distance above this value is treated as a match failure.
+ */
+ public int getThresholdDistance() {
+ return thresholdDistance;
+ }
+}
import com.ibm.icu.impl.Row;
import com.ibm.icu.impl.Row.R3;
import com.ibm.icu.impl.Utility;
+import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
+import com.ibm.icu.impl.locale.XLocaleMatcher;
+import com.ibm.icu.impl.locale.XLocaleMatcher.Builder;
/**
* Provides a way to match the languages (locales) supported by a product to the
* languages (locales) acceptable to a user, and get the best match. For
* example:
- *
+ *
* <pre>
* LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en");
- *
+ *
* // afterwards:
* matcher.getBestMatch("en-US").toLanguageTag() => "en"
* </pre>
- *
+ *
* It takes into account when languages are close to one another, such as fil
* and tl, and when language regional variants are close, like en-GB and en-AU.
* It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test
* product will just need one static instance, built with the languages
* that it supports. However, it may want multiple instances with different
* default languages based on additional information, such as the domain.
- *
+ *
* @author markdavis@google.com
* @stable ICU 4.4
*/
* threshold, that default language is chosen. Typically the default is English,
* but it could be different based on additional information, such as the domain
* of the page.
- *
+ *
* @param languagePriorityList weighted list
* @stable ICU 4.4
*/
/**
* Create a new language matcher from a String form. The highest-weighted
* language is the default.
- *
+ *
* @param languagePriorityListString String form of LanguagePriorityList
* @stable ICU 4.4
*/
@Deprecated
public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) {
this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze();
+ this.languagePriorityList = languagePriorityList;
for (final ULocale language : languagePriorityList) {
add(language, languagePriorityList.getWeight(language));
}
/**
* Get the best match for a LanguagePriorityList
- *
+ *
* @param languageList list to match
* @return best matching language code
* @stable ICU 4.4
/**
* Convenience method: Get the best match for a LanguagePriorityList
- *
+ *
* @param languageList String form of language priority list
* @return best matching language code
* @stable ICU 4.4
/**
* Get the best match for an individual language code.
- *
+ *
* @param ulocale locale/language code to match
* @return best matching language code
* @stable ICU 4.4
*/
@Override
public String toString() {
- return "{" + defaultLanguage + ", "
+ return "{" + defaultLanguage + ", "
+ localeToMaxLocaleAndWeight + "}";
}
// ================= Privates =====================
/**
* Get the best match for an individual language code.
- *
+ *
* @param languageCode
* @return best matching language code and weight (as per
* {@link #match(ULocale, ULocale)})
}
return bestTableMatch;
}
-
+
/**
- * @internal
+ * @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
}
/**
- * We preprocess the data to get just the possible matches for each desired base language.
+ * We preprocess the data to get just the possible matches for each desired base language.
*/
private void processMapping() {
for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) {
}
Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<Row.R3<ULocale, ULocale, Double>>();
- Map<String,Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData
+ Map<String,Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData
= new LinkedHashMap<String,Set<Row.R3<ULocale, ULocale, Double>>>();
// =============== Special Mapping Information ==============
return (region == null ? "*" : region);
}
+ @Override
public String toString() {
String result = getLanguage();
if (level != Level.language) {
enum Level {
language(0.99),
- script(0.2),
+ script(0.2),
region(0.04);
final double worst;
}
}
- double getScore(ULocale dMax, String desiredRaw, String desiredMax,
+ double getScore(ULocale dMax, String desiredRaw, String desiredMax,
ULocale sMax, String supportedRaw, String supportedMax) {
double distance = 0;
if (!desiredMax.equals(supportedMax)) {
System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale);
}
for (R3<LocalePatternMatcher,LocalePatternMatcher,Double> datum : scores) { // : result
- if (datum.get0().matches(desiredLocale)
+ if (datum.get0().matches(desiredLocale)
&& datum.get1().matches(supportedLocale)) {
if (DEBUG) {
System.out.println("\t\t\t\tFOUND\t" + datum);
return level.worst;
}
+ @Override
public String toString() {
StringBuilder result = new StringBuilder().append(level);
for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) {
}
+ @Override
@SuppressWarnings("unchecked")
public ScoreData cloneAsThawed() {
try {
private volatile boolean frozen = false;
+ @Override
public ScoreData freeze() {
return this;
}
+ @Override
public boolean isFrozen() {
return frozen;
}
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
public String toString() {
return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores;
return this;
}
- /**
+ /**
* {@inheritDoc}
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
public LanguageMatcherData cloneAsThawed() {
LanguageMatcherData result;
}
}
- /**
+ /**
* {@inheritDoc}
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
public LanguageMatcherData freeze() {
languageScores.freeze();
return this;
}
- /**
+ /**
* {@inheritDoc}
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
public boolean isFrozen() {
return frozen;
}
LanguageMatcherData matcherData;
+ LocalePriorityList languagePriorityList;
private static final LanguageMatcherData defaultWritten;
final LocaleMatcher matcher = new LocaleMatcher("");
return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b));
}
+
+ transient XLocaleMatcher xLocaleMatcher = null;
+ transient ULocale xDefaultLanguage = null;
+ transient boolean xFavorScript = false;
+
+ /*
+ * Returns the distance between the two languages, using the new CLDR syntax (see getBestMatch).
+ * The values are not necessarily symmetric.
+ * @param desired A locale desired by the user
+ * @param supported A locale supported by a program.
+ * @return A return of 0 is a complete match, and 100 is a complete mismatch (above the thresholdDistance).
+ * A language is first maximized with add likely subtags, then compared.
+ * @internal
+ * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+ */
+ @Deprecated
+ public int distance(ULocale desired, ULocale supported) {
+ return getLocaleMatcher().distance(desired, supported);
+ }
+
+ private synchronized XLocaleMatcher getLocaleMatcher() {
+ if (xLocaleMatcher == null) {
+ Builder builder = XLocaleMatcher.builder();
+ builder.setSupportedLocales(languagePriorityList);
+ if (xDefaultLanguage != null) {
+ builder.setDefaultLanguage(xDefaultLanguage);
+ }
+ if (xFavorScript) {
+ builder.setDistanceOption(DistanceOption.SCRIPT_FIRST);
+ }
+ xLocaleMatcher = builder.build();
+ }
+ return xLocaleMatcher;
+ }
+
+ /**
+ * Get the best match between the desired languages and supported languages
+ * This supports the new CLDR syntax to provide for better matches within
+ * regional clusters (such as maghreb Arabic vs non-maghreb Arabic, or regions that use en-GB vs en-US)
+ * and also matching between regions and macroregions, such as comparing es-419 to es-AR).
+ * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
+ * @param outputBestDesired The one of the desired languages that matched best.
+ * Set to null if the best match was not below the threshold distance.
+ * @return best-match supported language
+ * @internal
+ * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+ */
+ @Deprecated
+ public ULocale getBestMatch(LinkedHashSet<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
+ return getLocaleMatcher().getBestMatch(desiredLanguages, outputBestDesired);
+ }
+
+ /**
+ * Set the default language, with null = default = first supported language
+ * @param defaultLanguage Language to use in case the threshold for distance is exceeded.
+ * @return this, for chaining
+ * @internal
+ * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+ */
+ @Deprecated
+ public synchronized LocaleMatcher setDefaultLanguage(ULocale defaultLanguage) {
+ this.xDefaultLanguage = defaultLanguage;
+ xLocaleMatcher = null;
+ return this;
+ }
+
+ /**
+ * If true, then the language differences are smaller than than script differences.
+ * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
+ * @param favorScript Set to true to treat script as most important.
+ * @return this, for chaining.
+ * @internal
+ * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+ */
+ @Deprecated
+ public synchronized LocaleMatcher setFavorScript(boolean favorScript) {
+ this.xFavorScript = favorScript;
+ xLocaleMatcher = null;
+ return this;
+ }
}
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.dev.test.util;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+import com.ibm.icu.dev.test.AbstractTestLog;
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.dev.util.CollectionUtilities;
+import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
+import com.ibm.icu.impl.locale.XCldrStub.Splitter;
+import com.ibm.icu.util.ICUUncheckedIOException;
+
+abstract public class DataDrivenTestHelper {
+
+ public static final List<String> DEBUG_LINE = Collections.singletonList("@debug");
+ public static final Splitter SEMICOLON = Splitter.on(';').trimResults();
+ public static final Splitter EQUAL_SPLIT = Splitter.on('=').trimResults();
+ public static final String SEPARATOR = " ; \t";
+
+ protected TestFmwk framework = null;
+ protected int minArgumentCount = 3;
+ protected int maxArgumentCount = 4;
+ private List<List<String>> lines = new ArrayList<List<String>>();
+ private List<String> comments = new ArrayList<String>();
+
+ public DataDrivenTestHelper setFramework(TestFmwk testFramework) {
+ this.framework = testFramework;
+ return this;
+ }
+
+ public <T extends Appendable> T appendLines(T out) {
+ try {
+ for (int i = 0; i < lines.size(); ++i) {
+ List<String> components = lines.get(i);
+ String comment = comments.get(i);
+ if (components.isEmpty()) {
+ if(!comment.isEmpty()) {
+ out.append("# ").append(comment);
+ }
+ } else {
+ String first = components.iterator().next();
+ String sep = first.startsWith("@") ? "=" : SEPARATOR;
+ out.append(CollectionUtilities.join(components, sep));
+ if (!comment.isEmpty()) {
+ out.append("\t# ").append(comment);
+ }
+ }
+ out.append('\n');
+ }
+ return out;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
+ }
+
+ protected DataDrivenTestHelper addLine(List<String> arguments, String commentBase) {
+ lines.add(Collections.unmodifiableList(arguments));
+ comments.add(commentBase);
+ return this;
+ }
+
+ public DataDrivenTestHelper run(Class<?> classFileIsRelativeTo, String file) {
+ return load(classFileIsRelativeTo, file)
+ .test();
+ }
+
+ public boolean isTestLine(List<String> arguments) {
+ return !arguments.isEmpty() && !arguments.equals(DEBUG_LINE);
+ }
+
+ public DataDrivenTestHelper test() {
+ boolean breakpoint = false;
+ for (int i = 0; i < lines.size(); ++i) {
+ List<String> arguments = lines.get(i);
+ String comment = comments.get(i);
+ if (arguments.isEmpty()) {
+ if (!comment.isEmpty()) {
+ AbstractTestLog.logln(comment);
+ }
+ continue;
+ } else if (arguments.equals(DEBUG_LINE)) {
+ breakpoint = true;
+ continue;
+ } else {
+ String first = arguments.get(0);
+ if (first.startsWith("@")) {
+ handleParams(comment, arguments);
+ continue;
+ }
+ }
+ try {
+ handle(i, breakpoint, comment, arguments);
+ } catch (Exception e) {
+ e.printStackTrace();
+ AbstractTestLog.errln("Illegal data test file entry (" + i + "): " + arguments + " # " + comment);
+ }
+ breakpoint = false;
+ }
+ return this;
+ }
+
+ public DataDrivenTestHelper load(Class<?> classFileIsRelativeTo, String file) {
+ BufferedReader in = null;
+ try {
+ in = FileUtilities.openFile(classFileIsRelativeTo, file);
+ //boolean breakpoint = false;
+
+ while (true) {
+ String line = in.readLine();
+ if (line == null) {
+ break;
+ }
+ line = line.trim();
+ if (line.isEmpty()) {
+ addLine(Collections.<String>emptyList(), "");
+ continue;
+ }
+ int hash = line.indexOf('#');
+ String comment = "";
+ String commentBase = "";
+ if (hash >= 0) {
+ commentBase = line.substring(hash+1).trim();
+ line = line.substring(0,hash).trim();
+ comment = "# " + commentBase;
+ if (!line.isEmpty()) {
+ comment = "\t" + comment;
+ }
+ }
+ if (line.isEmpty()) {
+ addLine(Collections.<String>emptyList(), commentBase);
+ continue;
+ }
+ if (line.startsWith("@")) {
+ List<String> keyValue = EQUAL_SPLIT.splitToList(line);
+ addLine(keyValue, comment);
+ continue;
+ }
+ List<String> arguments = SEMICOLON.splitToList(line);
+ if (arguments.size() < minArgumentCount || arguments.size() > maxArgumentCount) {
+ AbstractTestLog.errln("Malformed data line:" + line + comment);
+ continue;
+ }
+ addLine(arguments, commentBase);
+ }
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ } finally {
+ if (in != null) {
+ try {
+ in.close();
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
+ }
+ }
+ lines = Collections.unmodifiableList(lines); // should do deep unmodifiable...
+ comments = Collections.unmodifiableList(comments);
+ return this;
+ }
+
+ protected boolean assertEquals(String message, Object expected, Object actual) {
+ return TestFmwk.handleAssert(Objects.equals(expected, actual), message, stringFor(expected), stringFor(actual), null, false);
+ }
+
+ private final String stringFor(Object obj) {
+ return obj == null ? "null"
+ : obj instanceof String ? "\"" + obj + '"'
+ : obj instanceof Number ? String.valueOf(obj)
+ : obj.getClass().getName() + "<" + obj + ">";
+ }
+
+ abstract public void handle(int lineNumber, boolean breakpoint, String commentBase, List<String> arguments);
+
+ public void handleParams(String comment, List<String> arguments) {
+ throw new IllegalArgumentException("Unrecognized parameter: " + arguments);
+ }
+
+ public List<List<String>> getLines() {
+ return lines;
+ }
+}
\ No newline at end of file
package com.ibm.icu.dev.test.util;
+import java.util.Arrays;
+import java.util.LinkedHashSet;
import java.util.Set;
import java.util.TreeSet;
import com.ibm.icu.util.LocaleMatcher;
import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
import com.ibm.icu.util.LocalePriorityList;
+import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
/**
* Test the LocaleMatcher.
- *
+ *
* @author markdavis
*/
@SuppressWarnings("deprecation")
LocaleMatcher matcher;
matcher = new LocaleMatcher("mul, nl");
assertEquals("nl", matcher.getBestMatch("af").toString()); // af => nl
-
+
matcher = new LocaleMatcher("mul, af");
assertEquals("mul", matcher.getBestMatch("nl").toString()); // but nl !=> af
}
}
}
- private long timeLocaleMatcher(String title, String desired, LocaleMatcher matcher,
+ private long timeLocaleMatcher(String title, String desired, LocaleMatcher matcher,
boolean showmessage, int iterations, long comparisonTime) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
+ (comparisonTime > 0 ? (delta * 100 / comparisonTime - 100) + "% longer" : ""));
return delta;
}
-
+
@Test
public void Test8288() {
final LocaleMatcher matcher = newLocaleMatcher("it, en");
assertEquals("it", matcher.getBestMatch("und").toString());
assertEquals("en", matcher.getBestMatch("und, en").toString());
}
+
+ @Test
+ public void TestTechPreview() {
+ final LocaleMatcher matcher = newLocaleMatcher("it, en, ru");
+ ULocale und = new ULocale("und");
+ ULocale bulgarian = new ULocale("bg");
+ ULocale russian = new ULocale("ru");
+
+ assertEquals("es-419/MX", 4, matcher.distance(new ULocale("es","419"), new ULocale("es","MX")));
+ assertEquals("es-ES/DE", 4, matcher.distance(new ULocale("es","DE"), new ULocale("es","ES")));
+
+ Output<ULocale> outputBestDesired = new Output<ULocale>();
+
+ ULocale best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
+ assertEquals(ULocale.ITALIAN, best);
+ assertEquals(null, outputBestDesired.value);
+
+ matcher.setDefaultLanguage(ULocale.JAPANESE);
+ best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
+ assertEquals(ULocale.JAPANESE, best);
+
+ matcher.setFavorScript(true);
+ best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, bulgarian)), outputBestDesired);
+ assertEquals(russian, best);
+ }
}
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.dev.test.util;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.junit.Test;
+
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
+import com.ibm.icu.impl.locale.XLocaleDistance;
+import com.ibm.icu.impl.locale.XLocaleDistance.DistanceNode;
+import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
+import com.ibm.icu.impl.locale.XLocaleDistance.DistanceTable;
+import com.ibm.icu.util.LocaleMatcher;
+import com.ibm.icu.util.Output;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * Test the XLocaleDistance.
+ *
+ * @author markdavis
+ */
+public class XLocaleDistanceTest extends TestFmwk {
+ private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
+
+ public static final int FAIL = XLocaleDistance.ABOVE_THRESHOLD;
+
+ private XLocaleDistance localeMatcher = XLocaleDistance.getDefault();
+ DataDrivenTestHelper tfh = new MyTestFileHandler()
+ .setFramework(this)
+ .load(XLocaleDistanceTest.class, "data/localeDistanceTest.txt");
+
+ static class Arguments {
+ final ULocale desired;
+ final ULocale supported;
+ final int desiredToSupported;
+ final int supportedToDesired;
+
+ public Arguments(List<String> args) {
+ this.desired = new ULocale.Builder().setLanguageTag(args.get(0)).build(); // use more complicated expression to check syntax
+ this.supported = new ULocale.Builder().setLanguageTag(args.get(1)).build();
+ this.desiredToSupported = Integer.parseInt(args.get(2));
+ this.supportedToDesired = args.size() > 3 ? Integer.parseInt(args.get(3)) : this.desiredToSupported;
+ }
+ }
+
+ @Test
+ public void testTiming() {
+ List<Arguments> testArgs = new ArrayList<Arguments>();
+ for (List<String> line : tfh.getLines()) {
+ if (tfh.isTestLine(line)) {
+ testArgs.add(new Arguments(line));
+ }
+ }
+ Arguments[] tests = testArgs.toArray(new Arguments[testArgs.size()]);
+
+ final LocaleMatcher oldLocaleMatcher = new LocaleMatcher("");
+
+ long likelyTime = 0;
+ long newLikelyTime = 0;
+ long newTimeMinusLikely = 0;
+ //long intTime = 0;
+ long oldTimeMinusLikely = 0;
+ final int maxIterations = 1000;
+
+ for (int iterations = maxIterations; iterations > 0; --iterations) {
+ // int count=0;
+ for (Arguments test : tests) {
+ final ULocale desired = test.desired;
+ final ULocale supported = test.supported;
+ //final int desiredToSupported = test.desiredToSupported;
+ //final int supportedToDesired = test.supportedToDesired;
+
+ long temp = System.nanoTime();
+ final ULocale desiredMax = ULocale.addLikelySubtags(desired);
+ final ULocale supportedMax = ULocale.addLikelySubtags(supported);
+ likelyTime += System.nanoTime()-temp;
+
+ temp = System.nanoTime();
+ //double distOld1 = oldLocaleMatcher.match(desired, desiredMax, supported, supportedMax);
+ //double distOld2 = oldLocaleMatcher.match(supported, supportedMax, desired, desiredMax);
+ oldTimeMinusLikely += System.nanoTime()-temp;
+
+ temp = System.nanoTime();
+ final LSR desiredLSR = LSR.fromMaximalized(desired);
+ final LSR supportedLSR = LSR.fromMaximalized(supported);
+ newLikelyTime += System.nanoTime()-temp;
+
+ temp = System.nanoTime();
+ int dist1 = localeMatcher.distanceRaw(desiredLSR, supportedLSR, 1000, DistanceOption.NORMAL);
+ int dist2 = localeMatcher.distanceRaw(supportedLSR, desiredLSR, 1000, DistanceOption.NORMAL);
+ newTimeMinusLikely += System.nanoTime()-temp;
+ }
+ }
+ final long oldTime = oldTimeMinusLikely+likelyTime;
+ final long newTime = newLikelyTime+newTimeMinusLikely;
+ logln("\n");
+ logln("\tlikelyTime:\t" + likelyTime/maxIterations);
+ logln("\toldTime-likelyTime:\t" + oldTimeMinusLikely/maxIterations);
+ logln("totalOld:\t" + oldTime/maxIterations);
+ logln("\tnewLikelyTime:\t" + newLikelyTime/maxIterations);
+ logln("totalNew:\t" + newTime/maxIterations);
+ assertTrue("newTime < 20% of oldTime", newTime * 5 < oldTime);
+ //logln("\tnewIntTime-newLikelyTime-extractTime:\t" + intTime/maxIterations);
+ //logln("totalInt:\t" + (intTime)/maxIterations);
+ }
+
+ @Test
+ @SuppressWarnings("deprecation")
+ public void testInternalTable() {
+ checkTables(localeMatcher.internalGetDistanceTable(), "", 1);
+ }
+
+ @SuppressWarnings("deprecation")
+ private void checkTables(DistanceTable internalGetDistanceTable, String title, int depth) {
+ // Check that ANY, ANY is always present, and that the table has a depth of exactly 3 everyplace.
+ Map<String, Set<String>> matches = internalGetDistanceTable.getInternalMatches();
+
+ // must have ANY,ANY
+ boolean haveANYANY = false;
+ for (Entry<String, Set<String>> entry : matches.entrySet()) {
+ String first = entry.getKey();
+ boolean haveANYfirst = first.equals(XLocaleDistance.ANY);
+ for (String second : entry.getValue()) {
+ haveANYANY |= haveANYfirst && second.equals(XLocaleDistance.ANY);
+ DistanceNode distanceNode = internalGetDistanceTable.getInternalNode(first, second);
+ DistanceTable subDistanceTable = distanceNode.getDistanceTable();
+ if (subDistanceTable == null || subDistanceTable.isEmpty()) {
+ if (depth != 3) {
+ logln("depth should be 3");
+ }
+ if (distanceNode.getClass() != DistanceNode.class) {
+ logln("should be plain DistanceNode");
+ }
+ } else {
+ if (depth >= 3) {
+ logln("depth should be ≤ 3");
+ }
+ if (distanceNode.getClass() == DistanceNode.class) {
+ logln("should NOT be plain DistanceNode");
+ }
+ checkTables(subDistanceTable, first + "," + second + ",", depth+1);
+ }
+ }
+ }
+ if (!haveANYANY) {
+ logln("ANY-ANY not in" + matches);
+ }
+ }
+
+ @Test
+ public void testShowDistanceTable() {
+ if (isVerbose()) {
+ System.out.println(XLocaleDistance.getDefault().toString(false));
+ }
+ }
+
+ @Test
+ public void testDataDriven() throws IOException {
+ tfh.test();
+ if (REFORMAT) {
+ System.out.println(tfh.appendLines(new StringBuffer()));
+ }
+ }
+
+ class MyTestFileHandler extends DataDrivenTestHelper {
+ final XLocaleDistance distance = XLocaleDistance.getDefault();
+ Output<ULocale> bestDesired = new Output<ULocale>();
+ private DistanceOption distanceOption = DistanceOption.NORMAL;
+ private Integer threshold = distance.getDefaultScriptDistance();
+
+ @Override
+ public void handle(int lineNumber, boolean breakpoint, String commentBase, List<String> arguments) {
+ if (breakpoint) {
+ breakpoint = false; // put debugger breakpoint here to break at @debug in test file
+ }
+ Arguments args = new Arguments(arguments);
+ int supportedToDesiredActual = distance.distance(args.supported, args.desired, threshold, distanceOption);
+ int desiredToSupportedActual = distance.distance(args.desired, args.supported, threshold, distanceOption);
+ String desiredTag = args.desired.toLanguageTag();
+ String supportedTag = args.supported.toLanguageTag();
+ final String comment = commentBase.isEmpty() ? "" : "\t# " + commentBase;
+ if (assertEquals("(" + lineNumber + ") " + desiredTag + " to " + supportedTag + comment, args.desiredToSupported, desiredToSupportedActual)) {
+ assertEquals("(" + lineNumber + ") " + supportedTag + " to " + desiredTag + comment, args.supportedToDesired, supportedToDesiredActual);
+ }
+ }
+ @Override
+ public void handleParams(String comment, List<String> arguments) {
+ String switchArg = arguments.get(0);
+ if (switchArg.equals("@DistanceOption")) {
+ distanceOption = DistanceOption.valueOf(arguments.get(1));
+ } else if (switchArg.equals("@Threshold")) {
+ threshold = Integer.valueOf(arguments.get(1));
+ } else {
+ super.handleParams(comment, arguments);
+ }
+ return;
+ }
+ }
+}
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.dev.test.util;
+
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.regex.Pattern;
+
+import org.junit.Test;
+
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.locale.XCldrStub.Joiner;
+import com.ibm.icu.impl.locale.XCldrStub.Splitter;
+import com.ibm.icu.impl.locale.XLocaleDistance;
+import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
+import com.ibm.icu.impl.locale.XLocaleMatcher;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.LocaleMatcher;
+import com.ibm.icu.util.LocalePriorityList;
+import com.ibm.icu.util.Output;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * Test the XLocaleMatcher.
+ *
+ * @author markdavis
+ */
+public class XLocaleMatcherTest extends TestFmwk {
+ private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
+
+ private static final int REGION_DISTANCE = 4;
+
+ private static final XLocaleDistance LANGUAGE_MATCHER_DATA = XLocaleDistance.getDefault();
+
+ private XLocaleMatcher newXLocaleMatcher() {
+ return new XLocaleMatcher("");
+ }
+
+ private XLocaleMatcher newXLocaleMatcher(LocalePriorityList build) {
+ return new XLocaleMatcher(build);
+ }
+
+ private XLocaleMatcher newXLocaleMatcher(String string) {
+ return new XLocaleMatcher(LocalePriorityList.add(string).build());
+ }
+
+ private XLocaleMatcher newXLocaleMatcher(LocalePriorityList string, int d) {
+ return XLocaleMatcher.builder().setSupportedLocales(string).setThresholdDistance(d).build();
+ }
+
+ private XLocaleMatcher newXLocaleMatcher(LocalePriorityList string, int d, DistanceOption distanceOption) {
+ return XLocaleMatcher
+ .builder()
+ .setSupportedLocales(string)
+ .setThresholdDistance(d)
+ .setDistanceOption(distanceOption)
+ .build();
+ }
+
+ // public void testParentLocales() {
+ // // find all the regions that have a closer relation because of an explicit parent
+ // Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
+ // explicitParents.remove("root");
+ // Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
+ // for (String locale : explicitParents) {
+ // while (true) {
+ // locale = LocaleIDParser.getParent(locale);
+ // if (locale == null || locale.equals("root")) {
+ // break;
+ // }
+ // otherParents.add(locale);
+ // }
+ // }
+ // otherParents.remove("root");
+ //
+ // for (String locale : CONFIG.getCldrFactory().getAvailable()) {
+ // String parentId = LocaleIDParser.getParent(locale);
+ // String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
+ // if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
+ // continue;
+ // }
+ // System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
+ // }
+ // }
+
+
+// TBD reenable with override data
+// public void testOverrideData() {
+// double threshold = 0.05;
+// XLocaleDistance XLocaleMatcherData = new XLocaleDistance()
+// .addDistance("br", "fr", 10, true)
+// .addDistance("es", "cy", 10, true);
+// logln(XLocaleMatcherData.toString());
+//
+// final XLocaleMatcher matcher = newXLocaleMatcher(
+// LocalePriorityList
+// .add(ULocale.ENGLISH)
+// .add(ULocale.FRENCH)
+// .add(ULocale.UK)
+// .build(), XLocaleMatcherData, threshold);
+// logln(matcher.toString());
+//
+// assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
+// assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
+// // way
+// }
+
+
+ private void assertEquals(Object expected, Object string) {
+ assertEquals("", expected, string);
+ }
+
+ /**
+ * If all the base languages are the same, then each sublocale matches
+ * itself most closely
+ */
+ @Test
+ public void testExactMatches() {
+ String lastBase = "";
+ TreeSet<ULocale> sorted = new TreeSet<ULocale>();
+ for (ULocale loc : ULocale.getAvailableLocales()) {
+ String language = loc.getLanguage();
+ if (!lastBase.equals(language)) {
+ check(sorted);
+ sorted.clear();
+ lastBase = language;
+ }
+ sorted.add(loc);
+ }
+ check(sorted);
+ }
+
+ private void check(Set<ULocale> sorted) {
+ if (sorted.isEmpty()) {
+ return;
+ }
+ check2(sorted);
+ ULocale first = sorted.iterator().next();
+ ULocale max = ULocale.addLikelySubtags(first);
+ sorted.add(max);
+ check2(sorted);
+ }
+
+ /**
+ * @param sorted
+ */
+ private void check2(Set<ULocale> sorted) {
+ // TODO Auto-generated method stub
+ logln("Checking: " + sorted);
+ XLocaleMatcher matcher = newXLocaleMatcher(
+ LocalePriorityList.add(
+ sorted.toArray(new ULocale[sorted.size()]))
+ .build());
+ for (ULocale loc : sorted) {
+ String stringLoc = loc.toString();
+ assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
+ }
+ }
+
+ @Test
+ public void testComputeDistance_monkeyTest() {
+ String[] codes = ULocale.getISOCountries();
+ Random random = new Random();
+ XLocaleMatcher lm = newXLocaleMatcher();
+ for (int i = 0; i < 1000; ++i) {
+ String x = codes[random.nextInt(codes.length)];
+ String y = codes[random.nextInt(codes.length)];
+ double d = lm.distance(ULocale.forLanguageTag("xx-Xxxx-"+x), ULocale.forLanguageTag("xx-Xxxx-"+y));
+ if (x.equals("ZZ") || y.equals("ZZ")) {
+ assertEquals("dist(regionDistance," + x + ") = 0", REGION_DISTANCE, d);
+ } else if (x.equals(y)) {
+ assertEquals("dist(x,x) = 0", 0.0, d);
+ } else {
+ assertTrue("dist(" + x + "," + y + ") > 0", d > 0);
+ assertTrue("dist(" + x + "," + y + ") ≤ " + REGION_DISTANCE, d <= REGION_DISTANCE);
+ }
+ }
+ }
+
+
+ @Test
+ public void testPerf() {
+ if (LANGUAGE_MATCHER_DATA == null) {
+ return; // skip except when testing data
+ }
+ final ULocale desired = new ULocale("sv");
+
+ final String shortList = "en, sv";
+ final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu";
+ final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
+
+ final XLocaleMatcher matcherShort = newXLocaleMatcher(shortList);
+ final XLocaleMatcher matcherLong = newXLocaleMatcher(longList);
+ final XLocaleMatcher matcherVeryLong = newXLocaleMatcher(veryLongList);
+
+ final LocaleMatcher matcherShortOld = new LocaleMatcher(shortList);
+ final LocaleMatcher matcherLongOld = new LocaleMatcher(longList);
+ final LocaleMatcher matcherVeryLongOld = new LocaleMatcher(veryLongList);
+
+ //XLocaleMatcher.DEBUG = true;
+ ULocale expected = new ULocale("sv");
+ assertEquals(expected, matcherShort.getBestMatch(desired));
+ assertEquals(expected, matcherLong.getBestMatch(desired));
+ assertEquals(expected, matcherVeryLong.getBestMatch(desired));
+ //XLocaleMatcher.DEBUG = false;
+
+ long timeShortNew=0;
+ long timeMediumNew=0;
+ long timeLongNew=0;
+
+ for (int i = 0; i < 2; ++i) {
+ int iterations = i == 0 ? 1000 : 1000000;
+ boolean showMessage = i != 0;
+ timeShortNew = timeXLocaleMatcher("Duration (few supported):\t", desired, matcherShort, showMessage, iterations);
+ timeMediumNew = timeXLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations);
+ timeLongNew = timeXLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations);
+ }
+
+ long timeShortOld=0;
+ long timeMediumOld=0;
+ long timeLongOld=0;
+
+ for (int i = 0; i < 2; ++i) {
+ int iterations = i == 0 ? 1000 : 100000;
+ boolean showMessage = i != 0;
+ timeShortOld = timeLocaleMatcher("Old Duration (few supported):\t", desired, matcherShortOld, showMessage, iterations);
+ timeMediumOld = timeLocaleMatcher("Old Duration (med. supported):\t", desired, matcherLongOld, showMessage, iterations);
+ timeLongOld = timeLocaleMatcher("Old Duration (many supported):\t", desired, matcherVeryLongOld, showMessage, iterations);
+ }
+
+ assertTrue("timeShortNew (=" + timeShortNew + ") < 25% of timeShortOld (=" + timeShortOld + ")", timeShortNew * 4 < timeShortOld);
+ assertTrue("timeMediumNew (=" + timeMediumNew + ") < 25% of timeMediumOld (=" + timeMediumOld + ")", timeMediumNew * 4 < timeMediumOld);
+ assertTrue("timeLongNew (=" + timeLongNew + ") < 25% of timeLongOld (=" + timeLongOld + ")", timeLongNew * 4 < timeLongOld);
+
+ }
+
+ private long timeXLocaleMatcher(String title, ULocale desired, XLocaleMatcher matcher,
+ boolean showmessage, int iterations) {
+ long start = System.nanoTime();
+ for (int i = iterations; i > 0; --i) {
+ matcher.getBestMatch(desired);
+ }
+ long delta = System.nanoTime() - start;
+ if (showmessage) logln(title + (delta / iterations) + " nanos");
+ return (delta / iterations);
+ }
+
+ private long timeLocaleMatcher(String title, ULocale desired, LocaleMatcher matcher,
+ boolean showmessage, int iterations) {
+ long start = System.nanoTime();
+ for (int i = iterations; i > 0; --i) {
+ matcher.getBestMatch(desired);
+ }
+ long delta = System.nanoTime() - start;
+ if (showmessage) logln(title + (delta / iterations) + " nanos");
+ return (delta / iterations);
+ }
+
+ @Test
+ public void testDataDriven() throws IOException {
+ DataDrivenTestHelper tfh = new MyTestFileHandler()
+ .setFramework(this)
+ .run(XLocaleMatcherTest.class, "data/localeMatcherTest.txt");
+ if (REFORMAT) {
+ System.out.println(tfh.appendLines(new StringBuilder()));
+ }
+ }
+
+ private static final Splitter COMMA_SPACE = Splitter.on(Pattern.compile(",\\s*|\\s+")).trimResults();
+ private static final Joiner JOIN_COMMA_SPACE = Joiner.on(", ");
+ private static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
+
+ class MyTestFileHandler extends DataDrivenTestHelper {
+
+ Output<ULocale> bestDesired = new Output<ULocale>();
+ DistanceOption distanceOption = DistanceOption.NORMAL;
+ int threshold = -1;
+
+ @Override
+ public void handle(int lineNumber, boolean breakpoint, String commentBase, List<String> arguments) {
+ List<String> supported = COMMA_SPACE.splitToList(arguments.get(0));
+ final String supportedReformatted = JOIN_COMMA_SPACE.join(supported);
+ LocalePriorityList supportedList = LocalePriorityList.add(supportedReformatted).build();
+
+ Iterable<String> desired = COMMA_SPACE.split(arguments.get(1));
+ final String desiredReformatted = JOIN_COMMA_SPACE.join(desired);
+ LocalePriorityList desiredList = LocalePriorityList.add(desiredReformatted).build();
+
+ String expected = arguments.get(2);
+ String expectedLanguageTag = expected.equals("null") ? null : new ULocale(expected).toLanguageTag();
+
+ String expectedUi = arguments.size() < 4 ? null : arguments.get(3);
+ String expectedUiLanguageTag = expectedUi == null || expectedUi.equals("null") ? null
+ : new ULocale(expectedUi).toLanguageTag();
+
+ if (breakpoint) {
+ breakpoint = false; // put debugger breakpoint here to break at @debug in test file
+ }
+
+ XLocaleMatcher matcher = threshold < 0 && distanceOption == DistanceOption.NORMAL
+ ? newXLocaleMatcher(supportedList)
+ : newXLocaleMatcher(supportedList, threshold, distanceOption);
+ commentBase = "(" + lineNumber + ") " + commentBase;
+
+ ULocale bestSupported;
+ if (expectedUi != null) {
+ bestSupported = matcher.getBestMatch(desiredList, bestDesired);
+ ULocale bestUI = XLocaleMatcher.combine(bestSupported, bestDesired.value);
+ assertEquals(commentBase + " (UI)", expectedUiLanguageTag, bestUI == null ? null : bestUI.toLanguageTag());
+ } else {
+ bestSupported = matcher.getBestMatch(desiredList);
+ }
+ String bestMatchLanguageTag = bestSupported == null ? null : bestSupported.toLanguageTag();
+ assertEquals(commentBase, expectedLanguageTag, bestMatchLanguageTag);
+ }
+
+ @Override
+ public void handleParams(String comment, List<String> arguments) {
+ String switchItem = arguments.get(0);
+ if (switchItem.equals("@DistanceOption")) {
+ distanceOption = DistanceOption.valueOf(arguments.get(1));
+ } else if (switchItem.equals("@Threshold")) {
+ threshold = Integer.valueOf(arguments.get(1));
+ } else {
+ super.handleParams(comment, arguments);
+ }
+ return;
+ }
+ }
+}
--- /dev/null
+# © 2017 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
+# Data-driven test for XLocaleDistance.
+# Format
+# • supported ; desired ; dist(s,d) ; dist(d,x)
+# • argument 4 only used when different
+# • 100 = fail
+# A line starting with @debug will reach a statement in the test code where you can put a breakpoint for debugging
+# The test code also supports reformatting this file, by setting the REFORMAT flag.
+
+en-CA ; en-CA ; 0
+ar-MK ; en-CA ; 100
+
+iw ; he ; 0
+zh ; cmn ; 0
+
+# fallback languages get closer distances, between script (40) and region (4)
+
+@debug
+to ; en ; 14 ; 100
+no ; no-DE ; 4
+nn ; no ; 10
+no-DE ; nn ; 14
+no ; no ; 0
+no ; da ; 12
+da ; zh-Hant ; 100
+zh-Hant ; zh-Hans ; 23 ; 19
+zh-Hans ; en ; 100
+
+en-US ; en-AU ; 5 # across clusters
+en-VI ; en-GU ; 4 # within cluster
+en-AU ; en-CA ; 4 # within cluster
+
+# testScript
+en-CA ; en-Cyrl ; 100
+en-Cyrl ; es-MX ; 100
+
+hr ; sr ; 100
+#hr ; sr-Latn ; 8
+sr ; sr-Latn ; 5
+
+# test419
+# Should be as good as any in cluster
+es-MX ; es-AR ; 4
+@debug
+es-MX ; es-419 ; 4
+es-MX ; es-MX ; 0
+es-MX ; es-ES ; 5
+es-MX ; es-PT ; 5
+es-MX ; es-150 ; 5
+es-419 ; es-AR ; 4
+es-419 ; es-419 ; 0
+es-419 ; es-MX ; 4
+es-419 ; es-ES ; 5
+es-419 ; es-PT ; 5
+es-419 ; es-150 ; 5
+es-ES ; es-AR ; 5
+es-ES ; es-419 ; 5
+es-ES ; es-MX ; 5
+es-ES ; es-ES ; 0
+es-ES ; es-PT ; 4
+es-419 ; es-150 ; 5
+
+# testEuEc
+xx-Xxxx-EC; xx-Xxxx-EU; 4
--- /dev/null
+# © 2017 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
+# Data-driven test for the XLocaleMatcher.
+# Format
+# • Everything after "#" is a comment
+# • Arguments are separated by ";". They are:
+
+# supported ; desired ; expected
+
+# • The supported may have the threshold distance reset as a first item, eg 50, en, fr
+# A line starting with @debug will reach a statement in the test code where you can put a breakpoint for debugging
+# The test code also supports reformatting this file, by setting the REFORMAT flag.
+
+##################################################
+# testParentLocales
+
+# es-419, es-AR, and es-MX are in a cluster; es is in a different one
+
+@debug
+es-419, es-ES ; es-AR ; es-419
+es-ES, es-419 ; es-AR ; es-419
+
+es-419, es ; es-AR ; es-419
+es, es-419 ; es-AR ; es-419
+
+es-MX, es ; es-AR ; es-MX
+es, es-MX ; es-AR ; es-MX
+
+# en-GB, en-AU, and en-NZ are in a cluster; en in a different one
+
+en-GB, en-US ; en-AU ; en-GB
+en-US, en-GB ; en-AU ; en-GB
+
+en-GB, en ; en-AU ; en-GB
+en, en-GB ; en-AU ; en-GB
+
+en-NZ, en-US ; en-AU ; en-NZ
+en-US, en-NZ ; en-AU ; en-NZ
+
+en-NZ, en ; en-AU ; en-NZ
+en, en-NZ ; en-AU ; en-NZ
+
+# pt-AU and pt-PT in one cluster; pt-BR in another
+
+pt-PT, pt-BR ; pt-AO ; pt-PT
+pt-BR, pt-PT ; pt-AO ; pt-PT
+
+pt-PT, pt ; pt-AO ; pt-PT
+pt, pt-PT ; pt-AO ; pt-PT
+
+zh-MO, zh-TW ; zh-HK ; zh-MO
+zh-TW, zh-MO ; zh-HK ; zh-MO
+
+zh-MO, zh-TW ; zh-HK ; zh-MO
+zh-TW, zh-MO ; zh-HK ; zh-MO
+
+zh-MO, zh-CN ; zh-HK ; zh-MO
+zh-CN, zh-MO ; zh-HK ; zh-MO
+
+zh-MO, zh ; zh-HK ; zh-MO
+zh, zh-MO ; zh-HK ; zh-MO
+
+##################################################
+# testChinese
+
+zh-CN, zh-TW, iw ; zh-Hant-TW ; zh-TW
+zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
+zh-CN, zh-TW, iw ; zh-TW ; zh-TW
+zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
+zh-CN, zh-TW, iw ; zh-CN ; zh-CN
+zh-CN, zh-TW, iw ; zh ; zh-CN
+
+##################################################
+# testenGB
+
+fr, en, en-GB, es-419, es-MX, es ; en-NZ ; en-GB
+fr, en, en-GB, es-419, es-MX, es ; es-ES ; es
+fr, en, en-GB, es-419, es-MX, es ; es-AR ; es-419
+fr, en, en-GB, es-419, es-MX, es ; es-MX ; es-MX
+
+##################################################
+# testFallbacks
+
+91, en, hi ; sa ; hi
+
+##################################################
+# testBasics
+
+fr, en-GB, en ; en-GB ; en-GB
+fr, en-GB, en ; en ; en
+fr, en-GB, en ; fr ; fr
+fr, en-GB, en ; ja ; fr # return first if no match
+
+##################################################
+# testFallback
+
+# check that script fallbacks are handled right
+
+zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
+zh-CN, zh-TW, iw ; zh ; zh-CN
+zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
+zh-CN, zh-TW, iw ; zh-Hant-HK ; zh-TW
+zh-CN, zh-TW, iw ; he-IT ; iw
+
+##################################################
+# testSpecials
+
+# check that nearby languages are handled
+
+en, fil, ro, nn ; tl ; fil
+en, fil, ro, nn ; mo ; ro
+en, fil, ro, nn ; nb ; nn
+
+# make sure default works
+
+en, fil, ro, nn ; ja ; en
+
+##################################################
+# testRegionalSpecials
+
+# verify that en-AU is closer to en-GB than to en (which is en-US)
+
+en, en-GB, es, es-419 ; es-MX ; es-419
+en, en-GB, es, es-419 ; en-AU ; en-GB
+en, en-GB, es, es-419 ; es-ES ; es
+
+##################################################
+# testHK
+
+# HK and MO are closer to each other for Hant than to TW
+
+zh, zh-TW, zh-MO ; zh-HK ; zh-MO
+zh, zh-TW, zh-HK ; zh-MO ; zh-HK
+
+##################################################
+# testMatch-exact
+
+# see localeDistance.txt
+
+##################################################
+# testMatch-none
+
+# see localeDistance.txt
+
+##################################################
+# testMatch-matchOnMazimized
+
+zh, zh-Hant ; und-TW ; zh-Hant # und-TW should be closer to zh-Hant than to zh
+en-Hant-TW, und-TW ; zh-Hant ; und-TW # zh-Hant should be closer to und-TW than to en-Hant-TW
+en-Hant-TW, und-TW ; zh ; und-TW # zh should be closer to und-TW than to en-Hant-TW
+
+##################################################
+# testMatchGrandfatheredCode
+
+fr, i-klingon, en-Latn-US ; en-GB-oed ; en-Latn-US
+
+##################################################
+# testGetBestMatchForList-exactMatch
+fr, en-GB, ja, es-ES, es-MX ; ja, de ; ja
+
+##################################################
+# testGetBestMatchForList-simpleVariantMatch
+fr, en-GB, ja, es-ES, es-MX ; de, en-US ; en-GB # Intentionally avoiding a perfect-match or two candidates for variant matches.
+
+# Fallback.
+
+fr, en-GB, ja, es-ES, es-MX ; de, zh ; fr
+
+##################################################
+# testGetBestMatchForList-matchOnMaximized
+# Check that if the preference is maximized already, it works as well.
+
+en, ja ; ja-Jpan-JP, en-AU ; ja # Match for ja-Jpan-JP (maximized already)
+
+# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
+
+en, ja ; ja-JP, en-US ; ja # Match for ja-Jpan-JP (maximized already)
+
+# Check that if the preference is maximized already, it works as well.
+
+en, ja ; ja-Jpan-JP, en-US ; ja # Match for ja-Jpan-JP (maximized already)
+
+##################################################
+# testGetBestMatchForList-noMatchOnMaximized
+# Regression test for http://b/5714572 .
+# de maximizes to de-DE. Pick the exact match for the secondary language instead.
+en, de, fr, ja ; de-CH, fr ; de
+
+##################################################
+# testBestMatchForTraditionalChinese
+
+# Scenario: An application that only supports Simplified Chinese (and some other languages),
+# but does not support Traditional Chinese. zh-Hans-CN could be replaced with zh-CN, zh, or
+# zh-Hans, it wouldn't make much of a difference.
+
+# The script distance (simplified vs. traditional Han) is considered small enough
+# to be an acceptable match. The regional difference is considered almost insignificant.
+
+fr, zh-Hans-CN, en-US ; zh-TW ; zh-Hans-CN
+fr, zh-Hans-CN, en-US ; zh-Hant ; zh-Hans-CN
+
+# For geo-political reasons, you might want to avoid a zh-Hant -> zh-Hans match.
+# In this case, if zh-TW, zh-HK or a tag starting with zh-Hant is requested, you can
+# change your call to getBestMatch to include a 2nd language preference.
+# "en" is a better match since its distance to "en-US" is closer than the distance
+# from "zh-TW" to "zh-CN" (script distance).
+
+fr, zh-Hans-CN, en-US ; zh-TW, en ; en-US
+fr, zh-Hans-CN, en-US ; zh-Hant-CN, en, en ; en-US
+fr, zh-Hans-CN, en-US ; zh-Hans, en ; zh-Hans-CN
+
+##################################################
+# testUndefined
+# When the undefined language doesn't match anything in the list,
+# getBestMatch returns the default, as usual.
+
+it, fr ; und ; it
+
+# When it *does* occur in the list, bestMatch returns it, as expected.
+it, und ; und ; und
+
+# The unusual part: max("und") = "en-Latn-US", and since matching is based on maximized
+# tags, the undefined language would normally match English. But that would produce the
+# counterintuitive results that getBestMatch("und", XLocaleMatcher("it,en")) would be "en", and
+# getBestMatch("en", XLocaleMatcher("it,und")) would be "und".
+
+# To avoid that, we change the matcher's definitions of max
+# so that max("und")="und". That produces the following, more desirable
+# results:
+
+it, en ; und ; it
+it, und ; en ; it
+
+##################################################
+# testGetBestMatch-regionDistance
+
+es-AR, es ; es-MX ; es-AR
+fr, en, en-GB ; en-CA ; en-GB
+de-AT, de-DE, de-CH ; de ; de-DE
+
+##################################################
+# testAsymmetry
+
+mul, nl ; af ; nl # af => nl
+mul, af ; nl ; mul # but nl !=> af
+
+##################################################
+# testGetBestMatchForList-matchOnMaximized2
+
+# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
+
+fr, en-GB, ja, es-ES, es-MX ; ja-JP, en-GB ; ja # Match for ja-JP, with likely region subtag
+
+# Check that if the preference is maximized already, it works as well.
+
+fr, en-GB, ja, es-ES, es-MX ; ja-Jpan-JP, en-GB ; ja # Match for ja-Jpan-JP (maximized already)
+
+##################################################
+# testGetBestMatchForList-closeEnoughMatchOnMaximized
+
+en-GB, en, de, fr, ja ; de-CH, fr ; de
+en-GB, en, de, fr, ja ; en-US, ar, nl, de, ja ; en
+
+##################################################
+# testGetBestMatchForPortuguese
+
+# pt might be supported and not pt-PT
+
+# European user who prefers Spanish over Brazillian Portuguese as a fallback.
+
+pt-PT, pt-BR, es, es-419 ; pt-PT, es, pt ; pt-PT
+pt-PT, pt, es, es-419 ; pt-PT, es, pt ; pt-PT # pt implicit
+
+# Brazillian user who prefers South American Spanish over European Portuguese as a fallback.
+# The asymmetry between this case and above is because it's "pt-PT" that's missing between the
+# matchers as "pt-BR" is a much more common language.
+
+pt-PT, pt-BR, es, es-419 ; pt, es-419, pt-PT ; pt-BR
+pt-PT, pt-BR, es, es-419 ; pt-PT, es, pt ; pt-PT
+pt-PT, pt, es, es-419 ; pt-PT, es, pt ; pt-PT
+pt-PT, pt, es, es-419 ; pt, es-419, pt-PT ; pt
+
+pt-BR, es, es-419 ; pt, es-419, pt-PT ; pt-BR
+
+# Code that adds the user's country can get "pt-US" for a user's language.
+# That should fall back to "pt-BR".
+
+pt-PT, pt-BR, es, es-419 ; pt-US, pt-PT ; pt-BR
+pt-PT, pt, es, es-419 ; pt-US, pt-PT, pt ; pt # pt-BR implicit
+
+##################################################
+# testVariantWithScriptMatch 1 and 2
+
+fr, en, sv ; en-GB ; en
+fr, en, sv ; en-GB ; en
+en, sv ; en-GB, sv ; en
+
+##################################################
+# testLongLists
+
+en, sv ; sv ; sv
+af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu ; sv ; sv
+af, af-NA, af-ZA, agq, agq-CM, ak, ak-GH, am, am-ET, ar, ar-001, ar-AE, ar-BH, ar-DJ, ar-DZ, ar-EG, ar-EH, ar-ER, ar-IL, ar-IQ, ar-JO, ar-KM, ar-KW, ar-LB, ar-LY, ar-MA, ar-MR, ar-OM, ar-PS, ar-QA, ar-SA, ar-SD, ar-SO, ar-SS, ar-SY, ar-TD, ar-TN, ar-YE, as, as-IN, asa, asa-TZ, ast, ast-ES, az, az-Cyrl, az-Cyrl-AZ, az-Latn, az-Latn-AZ, bas, bas-CM, be, be-BY, bem, bem-ZM, bez, bez-TZ, bg, bg-BG, bm, bm-ML, bn, bn-BD, bn-IN, bo, bo-CN, bo-IN, br, br-FR, brx, brx-IN, bs, bs-Cyrl, bs-Cyrl-BA, bs-Latn, bs-Latn-BA, ca, ca-AD, ca-ES, ca-ES-VALENCIA, ca-FR, ca-IT, ce, ce-RU, cgg, cgg-UG, chr, chr-US, ckb, ckb-IQ, ckb-IR, cs, cs-CZ, cu, cu-RU, cy, cy-GB, da, da-DK, da-GL, dav, dav-KE, de, de-AT, de-BE, de-CH, de-DE, de-LI, de-LU, dje, dje-NE, dsb, dsb-DE, dua, dua-CM, dyo, dyo-SN, dz, dz-BT, ebu, ebu-KE, ee, ee-GH, ee-TG, el, el-CY, el-GR, en, en-001, en-150, en-AG, en-AI, en-AS, en-AT, en-AU, en-BB, en-BE, en-BI, en-BM, en-BS, en-BW, en-BZ, en-CA, en-CC, en-CH, en-CK, en-CM, en-CX, en-CY, en-DE, en-DG, en-DK, en-DM, en-ER, en-FI, en-FJ, en-FK, en-FM, en-GB, en-GD, en-GG, en-GH, en-GI, en-GM, en-GU, en-GY, en-HK, en-IE, en-IL, en-IM, en-IN, en-IO, en-JE, en-JM, en-KE, en-KI, en-KN, en-KY, en-LC, en-LR, en-LS, en-MG, en-MH, en-MO, en-MP, en-MS, en-MT, en-MU, en-MW, en-MY, en-NA, en-NF, en-NG, en-NL, en-NR, en-NU, en-NZ, en-PG, en-PH, en-PK, en-PN, en-PR, en-PW, en-RW, en-SB, en-SC, en-SD, en-SE, en-SG, en-SH, en-SI, en-SL, en-SS, en-SX, en-SZ, en-TC, en-TK, en-TO, en-TT, en-TV, en-TZ, en-UG, en-UM, en-US, en-US-POSIX, en-VC, en-VG, en-VI, en-VU, en-WS, en-ZA, en-ZM, en-ZW, eo, eo-001, es, es-419, es-AR, es-BO, es-CL, es-CO, es-CR, es-CU, es-DO, es-EA, es-EC, es-ES, es-GQ, es-GT, es-HN, es-IC, es-MX, es-NI, es-PA, es-PE, es-PH, es-PR, es-PY, es-SV, es-US, es-UY, es-VE, et, et-EE, eu, eu-ES, ewo, ewo-CM, fa, fa-AF, fa-IR, ff, ff-CM, ff-GN, ff-MR, ff-SN, fi, fi-FI, fil, fil-PH, fo, fo-DK, fo-FO, fr, fr-BE, fr-BF, fr-BI, fr-BJ, fr-BL, fr-CA, fr-CD, fr-CF, fr-CG, fr-CH, fr-CI, fr-CM, fr-DJ, fr-DZ, fr-FR, fr-GA, fr-GF, fr-GN, fr-GP, fr-GQ, fr-HT, fr-KM, fr-LU, fr-MA, fr-MC, fr-MF, fr-MG, fr-ML, fr-MQ, fr-MR, fr-MU, fr-NC, fr-NE, fr-PF, fr-PM, fr-RE, fr-RW, fr-SC, fr-SN, fr-SY, fr-TD, fr-TG, fr-TN, fr-VU, fr-WF, fr-YT, fur, fur-IT, fy, fy-NL, ga, ga-IE, gd, gd-GB, gl, gl-ES, gsw, gsw-CH, gsw-FR, gsw-LI, gu, gu-IN, guz, guz-KE, gv, gv-IM, ha, ha-GH, ha-NE, ha-NG, haw, haw-US, he, he-IL, hi, hi-IN, hr, hr-BA, hr-HR, hsb, hsb-DE, hu, hu-HU, hy, hy-AM, id, id-ID, ig, ig-NG, ii, ii-CN, is, is-IS, it, it-CH, it-IT, it-SM, ja, ja-JP, jgo, jgo-CM, jmc, jmc-TZ, ka, ka-GE, kab, kab-DZ, kam, kam-KE, kde, kde-TZ, kea, kea-CV, khq, khq-ML, ki, ki-KE, kk, kk-KZ, kkj, kkj-CM, kl, kl-GL, kln, kln-KE, km, km-KH, kn, kn-IN, ko, ko-KP, ko-KR, kok, kok-IN, ks, ks-IN, ksb, ksb-TZ, ksf, ksf-CM, ksh, ksh-DE, kw, kw-GB, ky, ky-KG, lag, lag-TZ, lb, lb-LU, lg, lg-UG, lkt, lkt-US, ln, ln-AO, ln-CD, ln-CF, ln-CG, lo, lo-LA, lrc, lrc-IQ, lrc-IR, lt, lt-LT, lu, lu-CD, luo, luo-KE, luy, luy-KE, lv, lv-LV, mas, mas-KE, mas-TZ, mer, mer-KE, mfe, mfe-MU, mg, mg-MG, mgh, mgh-MZ, mgo, mgo-CM, mk, mk-MK, ml, ml-IN, mn, mn-MN, mr, mr-IN, ms, ms-BN, ms-MY, ms-SG, mt, mt-MT, mua, mua-CM, my, my-MM, mzn, mzn-IR, naq, naq-NA, nb, nb-NO, nb-SJ, nd, nd-ZW, ne, ne-IN, ne-NP, nl, nl-AW, nl-BE, nl-BQ, nl-CW, nl-NL, nl-SR, nl-SX, nmg, nmg-CM, nn, nn-NO, nnh, nnh-CM, nus, nus-SS, nyn, nyn-UG, om, om-ET, om-KE, or, or-IN, os, os-GE, os-RU, pa, pa-Arab, pa-Arab-PK, pa-Guru, pa-Guru-IN, pl, pl-PL, prg, prg-001, ps, ps-AF, pt, pt-AO, pt-BR, pt-CV, pt-GW, pt-MO, pt-MZ, pt-PT, pt-ST, pt-TL, qu, qu-BO, qu-EC, qu-PE, rm, rm-CH, rn, rn-BI, ro, ro-MD, ro-RO, rof, rof-TZ, root, ru, ru-BY, ru-KG, ru-KZ, ru-MD, ru-RU, ru-UA, rw, rw-RW, rwk, rwk-TZ, sah, sah-RU, saq, saq-KE, sbp, sbp-TZ, se, se-FI, se-NO, se-SE, seh, seh-MZ, ses, ses-ML, sg, sg-CF, shi, shi-Latn, shi-Latn-MA, shi-Tfng, shi-Tfng-MA, si, si-LK, sk, sk-SK, sl, sl-SI, smn, smn-FI, sn, sn-ZW, so, so-DJ, so-ET, so-KE, so-SO, sq, sq-AL, sq-MK, sq-XK, sr, sr-Cyrl, sr-Cyrl-BA, sr-Cyrl-ME, sr-Cyrl-RS, sr-Cyrl-XK, sr-Latn, sr-Latn-BA, sr-Latn-ME, sr-Latn-RS, sr-Latn-XK, sv, sv-AX, sv-FI, sv-SE, sw, sw-CD, sw-KE, sw-TZ, sw-UG, ta, ta-IN, ta-LK, ta-MY, ta-SG, te, te-IN, teo, teo-KE, teo-UG, th, th-TH, ti, ti-ER, ti-ET, tk, tk-TM, to, to-TO, tr, tr-CY, tr-TR, twq, twq-NE, tzm, tzm-MA, ug, ug-CN, uk, uk-UA, ur, ur-IN, ur-PK, uz, uz-Arab, uz-Arab-AF, uz-Cyrl, uz-Cyrl-UZ, uz-Latn, uz-Latn-UZ, vai, vai-Latn, vai-Latn-LR, vai-Vaii, vai-Vaii-LR, vi, vi-VN, vo, vo-001, vun, vun-TZ, wae, wae-CH, xog, xog-UG, yav, yav-CM, yi, yi-001, yo, yo-BJ, yo-NG, zgh, zgh-MA, zh, zh-Hans, zh-Hans-CN, zh-Hans-HK, zh-Hans-MO, zh-Hans-SG, zh-Hant, zh-Hant-HK, zh-Hant-MO, zh-Hant-TW, zu, zu-ZA ; sv ; sv
+
+##################################################
+# test8288
+
+it, en ; und ; it
+it, en ; und, en ; en
+
+# examples from
+# http://unicode.org/repos/cldr/tags/latest/common/bcp47/
+# http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
+
+##################################################
+# testUnHack
+
+en-NZ, en-IT ; en-US ; en-NZ
+
+##################################################
+# testEmptySupported => null
+ ; en ; null
+
+##################################################
+# testVariantsAndExtensions
+##################################################
+# tests the .combine() method
+
+und, fr ; fr-BE-fonipa ; fr ; fr-BE-fonipa
+und, fr-CA ; fr-BE-fonipa ; fr-CA ; fr-BE-fonipa
+und, fr-fonupa ; fr-BE-fonipa ; fr-fonupa ; fr-BE-fonipa
+und, no ; nn-BE-fonipa ; no ; no-BE-fonipa
+und, en-GB-u-sd-gbsct ; en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin ; en-GB-u-sd-gbsct ; en-GB-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin
+
+en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; fr-PSCRACK ; fr-PSCRACK
+en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; fr ; fr-PSCRACK
+en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; de-CH ; de-PSCRACK
+
+##################################################
+# testClusters
+# we favor es-419 over others in cluster. Clusters: es- {ES, MA, EA} {419, AR, MX}
+
+und, es, es-MA, es-MX, es-419 ; es-AR ; es-419
+und, es-MA, es, es-419, es-MX ; es-AR ; es-419
+und, es, es-MA, es-MX, es-419 ; es-EA ; es
+und, es-MA, es, es-419, es-MX ; es-EA ; es
+
+# of course, fall back to within cluster
+
+und, es, es-MA, es-MX ; es-AR ; es-MX
+und, es-MA, es, es-MX ; es-AR ; es-MX
+und, es-MA, es-MX, es-419 ; es-EA ; es-MA
+und, es-MA, es-419, es-MX ; es-EA ; es-MA
+
+# we favor es-GB over others in cluster. Clusters: en- {US, GU, VI} {GB, IN, ZA}
+
+und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB
+und, en-GU, en, en-GB, en-IN ; en-ZA ; en-GB
+und, en, en-GU, en-IN, en-GB ; en-VI ; en
+und, en-GU, en, en-GB, en-IN ; en-VI ; en
+
+# of course, fall back to within cluster
+
+und, en, en-GU, en-IN ; en-ZA ; en-IN
+und, en-GU, en, en-IN ; en-ZA ; en-IN
+und, en-GU, en-IN, en-GB ; en-VI ; en-GU
+und, en-GU, en-GB, en-IN ; en-VI ; en-GU
+
+##################################################
+# testThreshold
+@Threshold=60
+
+50, und, fr-CA-fonupa ; fr-BE-fonipa ; fr-CA-fonupa ; fr-BE-fonipa
+50, und, fr-Cyrl-CA-fonupa ; fr-BE-fonipa ; fr-Cyrl-CA-fonupa ; fr-Cyrl-BE-fonipa
+
+@Threshold=-1 # restore
+
+##################################################
+# testScriptFirst
+@DistanceOption=SCRIPT_FIRST
+@debug
+
+ru, fr ; zh, pl ; fr
+ru, fr ; zh-Cyrl, pl ; ru
+#hr, en-Cyrl; sr ; en-Cyrl
+da, ru, hr; sr ; ru
\ No newline at end of file