* @internal ICU 3.0
*/
public static final String[] getKeywordValues(String baseName, String keyword) {
- Set<String> keywords = new HashSet<String>();
+ Set<String> keywords = new HashSet<>();
ULocale locales[] = getAvailEntry(baseName, ICU_DATA_CLASS_LOADER).getULocaleList();
int i;
return result;
}
+ public UResource.Value getValueWithFallback(String path) throws MissingResourceException {
+ ICUResourceBundle rb;
+ if (path.isEmpty()) {
+ rb = this;
+ } else {
+ rb = findResourceWithFallback(path, this, null);
+ if (rb == null) {
+ throw new MissingResourceException(
+ "Can't find resource for bundle "
+ + this.getClass().getName() + ", key " + getType(),
+ path, getKey());
+ }
+ }
+ ReaderValue readerValue = new ReaderValue();
+ ICUResourceBundleImpl impl = (ICUResourceBundleImpl)rb;
+ readerValue.reader = impl.wholeBundle.reader;
+ readerValue.res = impl.getResource();
+ return readerValue;
+ }
+
public void getAllItemsWithFallbackNoFail(String path, UResource.Sink sink) {
try {
getAllItemsWithFallback(path, sink);
* @return the list of converted ULocales
*/
public static final Locale[] getLocaleList(ULocale[] ulocales) {
- ArrayList<Locale> list = new ArrayList<Locale>(ulocales.length);
- HashSet<Locale> uniqueSet = new HashSet<Locale>();
+ ArrayList<Locale> list = new ArrayList<>(ulocales.length);
+ HashSet<Locale> uniqueSet = new HashSet<>();
for (int i = 0; i < ulocales.length; i++) {
Locale loc = ulocales[i].toLocale();
if (!uniqueSet.contains(loc)) {
private static Set<String> createFullLocaleNameSet(String baseName, ClassLoader loader) {
String bn = baseName.endsWith("/") ? baseName : baseName + "/";
- Set<String> set = new HashSet<String>();
+ Set<String> set = new HashSet<>();
String skipScan = ICUConfig.get("com.ibm.icu.impl.ICUResourceBundle.skipRuntimeLocaleResourceScan", "false");
if (!skipScan.equalsIgnoreCase("true")) {
// scan available locale resources under the base url first
}
private static Set<String> createLocaleNameSet(String baseName, ClassLoader loader) {
- HashSet<String> set = new HashSet<String>();
+ HashSet<String> set = new HashSet<>();
addLocaleIDsFromIndexBundle(baseName, loader, set);
return Collections.unmodifiableSet(set);
}
String bundleName;
String rpath = wholeBundle.reader.getAlias(_resource);
if (aliasesVisited == null) {
- aliasesVisited = new HashMap<String, String>();
+ aliasesVisited = new HashMap<>();
}
if (aliasesVisited.get(rpath) != null) {
throw new IllegalArgumentException(
}
return false;
}
+ @Override
+ public boolean findValue(CharSequence key, UResource.Value value) {
+ ReaderValue readerValue = (ReaderValue)value;
+ int i = findTableItem(readerValue.reader, key);
+ if (i >= 0) {
+ readerValue.res = getContainerResource(readerValue.reader, i);
+ return true;
+ } else {
+ return false;
+ }
+ }
}
private static final class Table1632 extends Table {
@Override
*/
public interface Table {
/**
- * @return The number of items in the array resource.
+ * @return The number of items in the table resource.
*/
public int getSize();
/**
- * @param i Array item index.
+ * @param i Table item index.
* @param key Output-only, receives the key of the i'th item.
* @param value Output-only, receives the value of the i'th item.
* @return true if i is non-negative and less than getSize().
*/
public boolean getKeyAndValue(int i, Key key, Value value);
+ /**
+ * @param key Key string to find in the table.
+ * @param value Output-only, receives the value of the item with that key.
+ * @return true if the table contains the key.
+ */
+ public boolean findValue(CharSequence key, Value value);
}
/**
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.util.Objects;
+
+final class LSR {
+ static final int REGION_INDEX_LIMIT = 1000 + 26 * 26;
+
+ final String language;
+ final String script;
+ final String region;
+ /** Index for region, negative if ill-formed. @see indexForRegion */
+ final int regionIndex;
+
+ LSR(String language, String script, String region) {
+ this.language = language;
+ this.script = script;
+ this.region = region;
+ regionIndex = indexForRegion(region);
+ }
+
+ /**
+ * Returns a non-negative index for a well-formed region code.
+ * Do not rely on a particular region->index mapping; it may change.
+ * Returns -1 for ill-formed strings.
+ */
+ static final int indexForRegion(String region) {
+ if (region.length() == 2) {
+ int a = region.charAt(0) - 'A';
+ if (a < 0 || 25 < a) { return -1; }
+ int b = region.charAt(1) - 'A';
+ if (b < 0 || 25 < b) { return -1; }
+ return 26 * a + b + 1000;
+ } else if (region.length() == 3) {
+ int a = region.charAt(0) - '0';
+ if (a < 0 || 9 < a) { return -1; }
+ int b = region.charAt(1) - '0';
+ if (b < 0 || 9 < b) { return -1; }
+ int c = region.charAt(2) - '0';
+ if (c < 0 || 9 < c) { return -1; }
+ return (10 * a + b) * 10 + c;
+ }
+ return -1;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder result = new StringBuilder(language);
+ if (!script.isEmpty()) {
+ result.append('-').append(script);
+ }
+ if (!region.isEmpty()) {
+ result.append('-').append(region);
+ }
+ return result.toString();
+ }
+ @Override
+ public boolean equals(Object obj) {
+ LSR other;
+ return this == obj ||
+ (obj != null
+ && obj.getClass() == this.getClass()
+ && language.equals((other = (LSR) obj).language)
+ && script.equals(other.script)
+ && region.equals(other.region));
+ }
+ @Override
+ public int hashCode() {
+ return Objects.hash(language, script, region);
+ }
+}
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.UResource;
+import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimap;
+import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
+import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.util.BytesTrieBuilder;
+import com.ibm.icu.util.ICUException;
+
+/**
+ * Builds data for XLikelySubtags.
+ * Reads source data from ICU resource bundles.
+ */
+class LikelySubtagsBuilder {
+ private static final boolean DEBUG_OUTPUT = false;
+
+ private static ICUResourceBundle getSupplementalDataBundle(String name) {
+ return ICUResourceBundle.getBundleInstance(
+ ICUData.ICU_BASE_NAME, name,
+ ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
+ }
+
+ private static final class AliasesBuilder {
+ final Map<String, String> toCanonical = new HashMap<>();
+ final Multimap<String, String> toAliases;
+
+ public Set<String> getAliases(String canonical) {
+ Set<String> aliases = toAliases.get(canonical);
+ return aliases == null ? Collections.singleton(canonical) : aliases;
+ }
+
+ public AliasesBuilder(String type) {
+ ICUResourceBundle metadata = getSupplementalDataBundle("metadata");
+ UResource.Value value = metadata.getValueWithFallback("alias/" + type);
+ UResource.Table aliases = value.getTable();
+ UResource.Key key = new UResource.Key();
+ for (int i = 0; aliases.getKeyAndValue(i, key, value); ++i) {
+ String aliasFrom = key.toString();
+ if (aliasFrom.contains("_")) {
+ continue; // only simple aliasing
+ }
+ UResource.Table table = value.getTable();
+ if (table.findValue("reason", value) && value.getString().equals("overlong")) {
+ continue;
+ }
+ if (!table.findValue("replacement", value)) {
+ continue;
+ }
+ String aliasTo = value.getString();
+ int spacePos = aliasTo.indexOf(' ');
+ String aliasFirst = spacePos < 0 ? aliasTo : aliasTo.substring(0, spacePos);
+ if (aliasFirst.contains("_")) {
+ continue; // only simple aliasing
+ }
+ toCanonical.put(aliasFrom, aliasFirst);
+ }
+ if (type.equals("language")) {
+ toCanonical.put("mo", "ro"); // special case
+ }
+ toAliases = Multimaps.invertFrom(toCanonical, HashMultimap.<String, String>create());
+
+ if (DEBUG_OUTPUT) {
+ System.out.println("*** " + type + " aliases");
+ for (Map.Entry<String, String> mapping : new TreeMap<>(toCanonical).entrySet()) {
+ System.out.println(mapping);
+ }
+ }
+ }
+ }
+
+ private static final class TrieBuilder {
+ byte[] bytes = new byte[24];
+ BytesTrieBuilder tb = new BytesTrieBuilder();
+
+ void addMapping(String s, int value) {
+ // s contains only ASCII characters.
+ s.getBytes(0, s.length(), bytes, 0);
+ tb.add(bytes, s.length(), value);
+ }
+
+ BytesTrie build() {
+ ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
+ // Allocate an array with just the necessary capacity,
+ // so that we do not hold on to a larger array for a long time.
+ byte[] bytes = new byte[buffer.remaining()];
+ buffer.get(bytes);
+ if (DEBUG_OUTPUT) {
+ System.out.println("likely subtags trie size: " + bytes.length + " bytes");
+ }
+ return new BytesTrie(bytes, 0);
+ }
+ }
+
+ static XLikelySubtags.Data build() {
+ AliasesBuilder languageAliasesBuilder = new AliasesBuilder("language");
+ AliasesBuilder regionAliasesBuilder = new AliasesBuilder("territory");
+
+ Map<String, Map<String, Map<String, LSR>>> langTable =
+ makeTable(languageAliasesBuilder, regionAliasesBuilder);
+
+ TrieBuilder trieBuilder = new TrieBuilder();
+ Map<LSR, Integer> lsrIndexes = new LinkedHashMap<>();
+ // Bogus LSR at index 0 for some code to easily distinguish between
+ // intermediate match points and real result values.
+ LSR bogus = new LSR("", "", "");
+ lsrIndexes.put(bogus, 0);
+ // We could prefill the lsrList with common locales to give them small indexes,
+ // and see if that improves performance a little.
+ for (Map.Entry<String, Map<String, Map<String, LSR>>> ls : langTable.entrySet()) {
+ String lang = ls.getKey();
+ if (lang.equals("und")) {
+ lang = "*";
+ }
+ // Create a match point for the language.
+ trieBuilder.addMapping(lang, 0);
+ Map<String, Map<String, LSR>> scriptTable = ls.getValue();
+ for (Map.Entry<String, Map<String, LSR>> sr : scriptTable.entrySet()) {
+ String script = sr.getKey();
+ if (script.isEmpty()) {
+ script = "*";
+ }
+ // Match point for lang+script.
+ trieBuilder.addMapping(lang + script, 0);
+ Map<String, LSR> regionTable = sr.getValue();
+ for (Map.Entry<String, LSR> r2lsr : regionTable.entrySet()) {
+ String region = r2lsr.getKey();
+ if (region.isEmpty()) {
+ region = "*";
+ }
+ // Map the whole lang+script+region to a unique, dense index of the LSR.
+ LSR lsr = r2lsr.getValue();
+ Integer index = lsrIndexes.get(lsr);
+ int i;
+ if (index != null) {
+ i = index.intValue();
+ } else {
+ i = lsrIndexes.size();
+ lsrIndexes.put(lsr, i);
+ }
+ trieBuilder.addMapping(lang + script + region, i);
+ }
+ }
+ }
+ BytesTrie trie = trieBuilder.build();
+ LSR[] lsrs = lsrIndexes.keySet().toArray(new LSR[lsrIndexes.size()]);
+ return new XLikelySubtags.Data(
+ languageAliasesBuilder.toCanonical, regionAliasesBuilder.toCanonical, trie, lsrs);
+ }
+
+ private static Map<String, Map<String, Map<String, LSR>>> makeTable(
+ AliasesBuilder languageAliasesBuilder, AliasesBuilder regionAliasesBuilder) {
+ Map<String, Map<String, Map<String, LSR>>> result = new TreeMap<>();
+ // set the base data
+ ICUResourceBundle likelySubtags = getSupplementalDataBundle("likelySubtags");
+ UResource.Value value = likelySubtags.getValueWithFallback("");
+ UResource.Table table = value.getTable();
+ UResource.Key key = new UResource.Key();
+ for (int i = 0; table.getKeyAndValue(i, key, value); ++i) {
+ LSR ltp = lsrFromLocaleID(key.toString()); // source
+ final String language = ltp.language;
+ final String script = ltp.script;
+ final String region = ltp.region;
+
+ ltp = lsrFromLocaleID(value.getString()); // target
+ String languageTarget = ltp.language;
+ final String scriptTarget = ltp.script;
+ final String regionTarget = ltp.region;
+
+ set(result, language, script, region, languageTarget, scriptTarget, regionTarget);
+ // now add aliases
+ Collection<String> languageAliases = languageAliasesBuilder.getAliases(language);
+ Collection<String> regionAliases = regionAliasesBuilder.getAliases(region);
+ for (String languageAlias : languageAliases) {
+ for (String regionAlias : regionAliases) {
+ if (languageAlias.equals(language) && regionAlias.equals(region)) {
+ continue;
+ }
+ set(result, languageAlias, script, regionAlias,
+ languageTarget, scriptTarget, regionTarget);
+ }
+ }
+ }
+ // hack
+ set(result, "und", "Latn", "", "en", "Latn", "US");
+
+ // hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
+ // <likelySubtag from="und_GH" to="ak_Latn_GH"/>
+
+ // so und-Latn-GH => ak-Latn-GH
+ Map<String, Map<String, LSR>> undScriptMap = result.get("und");
+ Map<String, LSR> undEmptyRegionMap = undScriptMap.get("");
+ for (Map.Entry<String, LSR> regionEntry : undEmptyRegionMap.entrySet()) {
+ final LSR lsr = regionEntry.getValue();
+ set(result, "und", lsr.script, lsr.region, lsr);
+ }
+ //
+ // check that every level has "" (or "und")
+ if (!result.containsKey("und")) {
+ throw new IllegalArgumentException("failure: base");
+ }
+ for (Map.Entry<String, Map<String, Map<String, LSR>>> langEntry : result.entrySet()) {
+ String lang = langEntry.getKey();
+ final Map<String, Map<String, LSR>> scriptMap = langEntry.getValue();
+ if (!scriptMap.containsKey("")) {
+ throw new IllegalArgumentException("failure: " + lang);
+ }
+ for (Map.Entry<String, Map<String, LSR>> scriptEntry : scriptMap.entrySet()) {
+ String script = scriptEntry.getKey();
+ final Map<String, LSR> regionMap = scriptEntry.getValue();
+ if (!regionMap.containsKey("")) {
+ throw new IllegalArgumentException("failure: " + lang + "-" + script);
+ }
+ }
+ }
+ return result;
+ }
+
+ // Parses locale IDs in the likelySubtags data, not arbitrary language tags.
+ private static LSR lsrFromLocaleID(String languageIdentifier) {
+ String[] parts = languageIdentifier.split("[-_]");
+ if (parts.length < 1 || parts.length > 3) {
+ throw new ICUException("too many subtags");
+ }
+ String lang = parts[0];
+ String p2 = parts.length < 2 ? "" : parts[1];
+ String p3 = parts.length < 3 ? "" : parts[2];
+ return p2.length() < 4 ? new LSR(lang, "", p2) : new LSR(lang, p2, p3);
+ }
+
+ private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
+ final String language, final String script, final String region,
+ final String languageTarget, final String scriptTarget, final String regionTarget) {
+ LSR target = new LSR(languageTarget, scriptTarget, regionTarget);
+ set(langTable, language, script, region, target);
+ }
+
+ private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
+ final String language, final String script, final String region, LSR newValue) {
+ Map<String, Map<String, LSR>> scriptTable = getSubtable(langTable, language);
+ Map<String, LSR> regionTable = getSubtable(scriptTable, script);
+ regionTable.put(region, newValue);
+ }
+
+ private static <K, V, T> Map<V, T> getSubtable(Map<K, Map<V, T>> table, final K language) {
+ Map<V, T> subTable = table.get(language);
+ if (subTable == null) {
+ table.put(language, subTable = new TreeMap<>());
+ }
+ return subTable;
+ }
+}
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+
+import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * Off-line-built data for LocaleMatcher.
+ * Mostly but not only the data for mapping locales to their maximized forms.
+ */
+public class LocaleDistance {
+ private static final int ABOVE_THRESHOLD = 100;
+
+ private static final boolean DEBUG_OUTPUT = false;
+
+ // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
+ // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
+ // There is also a trie value for each subsequence of whole subtags.
+ // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
+ private final BytesTrie trie;
+
+ /**
+ * Maps each region to zero or more single-character partitions.
+ */
+ private final byte[] regionToPartitionsIndex;
+ private final String[][] partitionArrays;
+
+ /**
+ * Used to get the paradigm region for a cluster, if there is one.
+ */
+ private final Set<LSR> paradigmLSRs;
+
+ private final int defaultLanguageDistance;
+ private final int defaultScriptDistance;
+ private final int defaultRegionDistance;
+
+ // TODO: Load prebuilt data from a resource bundle
+ // to avoid the dependency on the builder code.
+ // VisibleForTesting
+ public static final LocaleDistance INSTANCE = LocaleDistanceBuilder.build();
+
+ LocaleDistance(BytesTrie trie,
+ byte[] regionToPartitionsIndex, String[][] partitionArrays,
+ Set<LSR> paradigmLSRs) {
+ this.trie = trie;
+ if (DEBUG_OUTPUT) {
+ System.out.println("*** locale distance");
+ testOnlyPrintDistanceTable();
+ }
+ this.regionToPartitionsIndex = regionToPartitionsIndex;
+ this.partitionArrays = partitionArrays;
+ this.paradigmLSRs = paradigmLSRs;
+
+ BytesTrie iter = new BytesTrie(trie);
+ BytesTrie.Result result = iter.next('*');
+ assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
+ defaultLanguageDistance = iter.getValue();
+ result = iter.next('*');
+ assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
+ defaultScriptDistance = iter.getValue();
+ result = iter.next('*');
+ assert result.hasValue();
+ defaultRegionDistance = iter.getValue();
+ }
+
+ // VisibleForTesting
+ public int testOnlyDistance(ULocale desired, ULocale supported,
+ int threshold, DistanceOption distanceOption) {
+ LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported);
+ LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired);
+ return getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR },
+ threshold, distanceOption) & 0xff;
+ }
+
+ public enum DistanceOption {REGION_FIRST, SCRIPT_FIRST}
+ // NOTE: Replaced "NORMAL" with "REGION_FIRST". By default, scripts have greater weight
+ // than regions, so they might be considered the "normal" case.
+
+ /**
+ * Finds the supported LSR with the smallest distance from the desired one.
+ * Equivalent LSR subtags must be normalized into a canonical form.
+ *
+ * <p>Returns the index of the lowest-distance supported LSR in bits 31..8
+ * (negative if none has a distance below the threshold),
+ * and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
+ */
+ int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
+ int threshold, DistanceOption distanceOption) {
+ BytesTrie iter = new BytesTrie(trie);
+ // Look up the desired language only once for all supported LSRs.
+ // Its "distance" is either a match point value of 0, or a non-match negative value.
+ // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+ // Set wantValue=true so that iter reads & skips the match point value.
+ int desLangDistance = trieNext(iter, desired.language, true, true);
+ long desLangState = desLangDistance >= 0 && supportedLsrs.length > 1 ? iter.getState64() : 0;
+ // Index of the supported LSR with the lowest distance.
+ int bestIndex = -1;
+ for (int slIndex = 0; slIndex < supportedLsrs.length; ++slIndex) {
+ LSR supported = supportedLsrs[slIndex];
+ boolean star = false;
+ int distance = desLangDistance;
+ if (distance >= 0) {
+ if (slIndex != 0) {
+ iter.resetToState64(desLangState);
+ }
+ distance = trieNext(iter, supported.language, true, true);
+ }
+ // Note: The data builder verifies that there are no rules with "any" (*) language and
+ // real (non *) script or region subtags.
+ // This means that if the lookup for either language fails we can use
+ // the default distances without further lookups.
+ if (distance < 0) { // <*, *>
+ if (desired.language.equals(supported.language)) {
+ distance = 0;
+ } else {
+ distance = defaultLanguageDistance;
+ }
+ star = true;
+ }
+ assert 0 <= distance && distance <= 100;
+ boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST;
+ if (scriptFirst) {
+ distance >>= 2;
+ }
+ if (distance >= threshold) {
+ continue;
+ }
+
+ int scriptDistance;
+ if (star) {
+ if (desired.script.equals(supported.script)) {
+ scriptDistance = 0;
+ } else {
+ scriptDistance = defaultScriptDistance;
+ }
+ } else {
+ scriptDistance = getDesSuppDistance(iter, iter.getState64(),
+ desired.script, supported.script, false);
+ }
+ if (scriptFirst) {
+ scriptDistance >>= 1;
+ }
+ distance += scriptDistance;
+ if (distance >= threshold) {
+ continue;
+ }
+
+ if (desired.region.equals(supported.region)) {
+ // regionDistance = 0
+ } else if (star) {
+ distance += defaultRegionDistance;
+ } else {
+ long startState = iter.getState64();
+
+ // From here on we know the regions are not equal.
+ // Map each region to zero or more partitions. (zero = one empty string)
+ // If either side has more than one, then we find the maximum distance.
+ // This could be optimized by adding some more structure, but probably not worth it.
+ final String[] desiredPartitions = partitionsForRegion(desired);
+ final String[] supportedPartitions = partitionsForRegion(supported);
+ int regionDistance;
+
+ if (desiredPartitions.length > 1 || supportedPartitions.length > 1) {
+ regionDistance = getRegionPartitionsDistance(iter, startState,
+ desiredPartitions, supportedPartitions, threshold - distance);
+ } else {
+ regionDistance = getDesSuppDistance(iter, startState,
+ desiredPartitions[0], supportedPartitions[0], true);
+ }
+ distance += regionDistance;
+ }
+ if (distance < threshold) {
+ if (distance == 0) {
+ return slIndex << 8;
+ }
+ bestIndex = slIndex;
+ threshold = distance;
+ }
+ }
+ return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
+ }
+
+ private int getRegionPartitionsDistance(BytesTrie iter, long startState,
+ String[] desiredPartitions, String[] supportedPartitions, int threshold) {
+ int regionDistance = -1;
+ for (String dp : desiredPartitions) {
+ for (String sp : supportedPartitions) {
+ if (regionDistance >= 0) { // no need to reset in first iteration
+ iter.resetToState64(startState);
+ }
+ int d = getDesSuppDistance(iter, startState, dp, sp, true);
+ if (regionDistance < d) {
+ if (d >= threshold) {
+ return d;
+ }
+ regionDistance = d;
+ }
+ }
+ }
+ assert regionDistance >= 0;
+ return regionDistance;
+ }
+
+ // Modified from
+ // DistanceTable#getDistance(desired, supported, Output distanceTable, starEquals).
+ private static final int getDesSuppDistance(BytesTrie iter, long startState,
+ String desired, String supported, boolean finalSubtag) {
+ // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+ int distance = trieNext(iter, desired, false, true);
+ if (distance >= 0) {
+ distance = trieNext(iter, supported, true, !finalSubtag);
+ }
+ if (distance < 0) {
+ BytesTrie.Result result = iter.resetToState64(startState).next('*'); // <*, *>
+ assert finalSubtag ? result.hasValue() : result == BytesTrie.Result.INTERMEDIATE_VALUE;
+ if (!finalSubtag && desired.equals(supported)) {
+ distance = 0; // same language or script
+ } else {
+ distance = iter.getValue();
+ assert distance >= 0;
+ }
+ }
+ return distance;
+ }
+
+ private static final int trieNext(BytesTrie iter, String s, boolean wantValue, boolean wantNext) {
+ if (s.isEmpty()) {
+ return -1; // no empty subtags in the distance data
+ }
+ BytesTrie.Result result;
+ int end = s.length() - 1;
+ for (int i = 0;; ++i) {
+ int c = s.charAt(i);
+ assert c <= 0x7f;
+ if (i < end) {
+ result = iter.next(c);
+ if (!result.hasNext()) {
+ return -1;
+ }
+ } else {
+ // last character of this subtag
+ result = iter.next(c | 0x80);
+ break;
+ }
+ }
+ if (wantValue) {
+ if (wantNext) {
+ if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
+ return iter.getValue();
+ }
+ } else {
+ if (result.hasValue()) {
+ return iter.getValue();
+ }
+ }
+ } else {
+ if (wantNext) {
+ if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
+ return 0;
+ }
+ } else {
+ if (result.hasValue()) {
+ return 0;
+ }
+ }
+ }
+ return -1;
+ }
+
+ @Override
+ public String toString() {
+ return testOnlyGetDistanceTable(true).toString();
+ }
+
+ private String[] partitionsForRegion(LSR lsr) {
+ // ill-formed region -> one empty string
+ int pIndex = lsr.regionIndex >= 0 ? regionToPartitionsIndex[lsr.regionIndex] : 0;
+ return partitionArrays[pIndex];
+ }
+
+ boolean isParadigmLSR(LSR lsr) {
+ return paradigmLSRs.contains(lsr);
+ }
+
+ // VisibleForTesting
+ public int getDefaultScriptDistance() {
+ return defaultScriptDistance;
+ }
+
+ int getDefaultRegionDistance() {
+ return defaultRegionDistance;
+ }
+
+ // VisibleForTesting
+ public Map<String, Integer> testOnlyGetDistanceTable(boolean skipIntermediateMatchPoints) {
+ Map<String, Integer> map = new LinkedHashMap<>();
+ StringBuilder sb = new StringBuilder();
+ for (BytesTrie.Entry entry : trie) {
+ sb.setLength(0);
+ int numSubtags = 0;
+ int length = entry.bytesLength();
+ for (int i = 0; i < length; ++i) {
+ byte b = entry.byteAt(i);
+ if (b == '*') {
+ // One * represents a (desired, supported) = (ANY, ANY) pair.
+ sb.append("*-*-");
+ numSubtags += 2;
+ } else {
+ if (b >= 0) {
+ sb.append((char) b);
+ } else { // end of subtag
+ sb.append((char) (b & 0x7f)).append('-');
+ ++numSubtags;
+ }
+ }
+ }
+ assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-';
+ if (!skipIntermediateMatchPoints || (numSubtags & 1) == 0) {
+ sb.setLength(sb.length() - 1);
+ String s = sb.toString();
+ if (!skipIntermediateMatchPoints && s.endsWith("*-*")) {
+ // Re-insert single-ANY match points to show consistent structure
+ // for the test code.
+ map.put(s.substring(0, s.length() - 2), 0);
+ }
+ map.put(s, entry.value);
+ }
+ }
+ return map;
+ }
+
+ // VisibleForTesting
+ public void testOnlyPrintDistanceTable() {
+ for (Map.Entry<String, Integer> mapping : testOnlyGetDistanceTable(true).entrySet()) {
+ System.out.println(mapping);
+ }
+ }
+}
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.locale;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.UResource;
+import com.ibm.icu.impl.locale.XCldrStub.Multimap;
+import com.ibm.icu.impl.locale.XCldrStub.Predicate;
+import com.ibm.icu.impl.locale.XCldrStub.Splitter;
+import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap;
+import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.util.BytesTrieBuilder;
+import com.ibm.icu.util.Output;
+import com.ibm.icu.util.ULocale;
+
+public final class LocaleDistanceBuilder {
+ private static final String ANY = "�"; // matches any character. Uses value above any subtag.
+
+ private static final boolean DEBUG_OUTPUT = false;
+
+ private static String fixAny(String string) {
+ return "*".equals(string) ? ANY : string;
+ }
+
+ private static ICUResourceBundle getSupplementalDataBundle(String name) {
+ return ICUResourceBundle.getBundleInstance(
+ ICUData.ICU_BASE_NAME, name,
+ ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
+ }
+
+ private static final class TerritoryContainment {
+ /** Directed, acyclic containment graph. Maps each container to its direct contents. */
+ final Multimap<String, String> graph = TreeMultimap.create();
+ /** Maps each container to all of its contents, direct and indirect. */
+ final Multimap<String, String> resolved = TreeMultimap.create();
+ /** Maps each container only to its leaf contents. */
+ final Multimap<String, String> toLeavesOnly = TreeMultimap.create();
+ /** The leaves of the graph. */
+ final Set<String> leaves;
+
+ TerritoryContainment(ICUResourceBundle supplementalData) {
+ UResource.Value value = supplementalData.getValueWithFallback("territoryContainment");
+ UResource.Key key = new UResource.Key();
+ addContainments(key, value);
+ resolve("001");
+
+ for (Map.Entry<String, Set<String>> entry : resolved.asMap().entrySet()) {
+ String container = entry.getKey();
+ for (String contained : entry.getValue()) {
+ if (resolved.get(contained) == null) { // a leaf node (usually a country)
+ toLeavesOnly.put(container, contained);
+ }
+ }
+ }
+ leaves = toLeavesOnly.get("001");
+ }
+
+ private void addContainments(UResource.Key key, UResource.Value value) {
+ UResource.Table containers = value.getTable();
+ for (int i = 0; containers.getKeyAndValue(i, key, value); ++i) {
+ if (key.length() <= 3) {
+ String container = key.toString();
+ String[] contents = value.getStringArrayOrStringAsArray();
+ for (String s : contents) {
+ graph.put(container, s);
+ }
+ } else {
+ addContainments(key, value); // containedGroupings etc.
+ }
+ }
+ }
+
+ private Set<String> resolve(String region) {
+ Set<String> contained = graph.get(region);
+ if (contained == null) {
+ return Collections.emptySet();
+ }
+ resolved.putAll(region, contained); // do top level
+ // then recursively
+ for (String subregion : contained) {
+ resolved.putAll(region, resolve(subregion));
+ }
+ return resolved.get(region);
+ }
+ }
+
+ private static final class Rule {
+ final List<String> desired;
+ final List<String> supported;
+ final int distance;
+ final boolean oneway;
+
+ Rule(List<String> desired, List<String> supported, int distance, boolean oneway) {
+ this.desired = desired;
+ this.supported = supported;
+ this.distance = distance;
+ this.oneway = oneway;
+ }
+ }
+
+ private static final <T> int makeUniqueIndex(Map<T, Integer> objectToInt, T source) {
+ Integer result = objectToInt.get(source);
+ if (result == null) {
+ int newResult = objectToInt.size();
+ objectToInt.put(source, newResult);
+ return newResult;
+ } else {
+ return result;
+ }
+ }
+
+ private static final class TrieBuilder {
+ byte[] bytes = new byte[24];
+ int length = 0;
+ BytesTrieBuilder tb = new BytesTrieBuilder();
+
+ void addStar(int value) {
+ assert value >= 0;
+ bytes[length++] = '*';
+ tb.add(bytes, length, value);
+ }
+
+ void addSubtag(String s, int value) {
+ assert !s.isEmpty();
+ assert value >= 0;
+ assert !s.equals(ANY);
+ int end = s.length() - 1;
+ for (int i = 0;; ++i) {
+ char c = s.charAt(i);
+ assert c <= 0x7f;
+ if (i < end) {
+ bytes[length++] = (byte) c;
+ } else {
+ // Mark the last character as a terminator to avoid overlap matches.
+ bytes[length++] = (byte) (c | 0x80);
+ break;
+ }
+ }
+ tb.add(bytes, length, value);
+ }
+
+ BytesTrie build() {
+ ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
+ // Allocate an array with just the necessary capacity,
+ // so that we do not hold on to a larger array for a long time.
+ byte[] bytes = new byte[buffer.remaining()];
+ buffer.get(bytes);
+ if (DEBUG_OUTPUT) {
+ System.out.println("distance trie size: " + bytes.length + " bytes");
+ }
+ return new BytesTrie(bytes, 0);
+ }
+ }
+
+ private static final class DistanceTable {
+ final int nodeDistance; // distance for the lookup so far
+ final Map<String, Map<String, DistanceTable>> subtables;
+
+ DistanceTable(int distance) {
+ nodeDistance = distance;
+ subtables = new TreeMap<>();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ DistanceTable other;
+ return this == obj ||
+ (obj != null
+ && obj.getClass() == this.getClass()
+ && nodeDistance == (other = (DistanceTable) obj).nodeDistance
+ && subtables.equals(other.subtables));
+ }
+ @Override
+ public int hashCode() {
+ return nodeDistance ^ subtables.hashCode();
+ }
+
+ public int getDistance(String desired, String supported, Output<DistanceTable> distanceTable, boolean starEquals) {
+ boolean star = false;
+ Map<String, DistanceTable> sub2 = subtables.get(desired);
+ if (sub2 == null) {
+ sub2 = subtables.get(ANY); // <*, supported>
+ star = true;
+ }
+ DistanceTable value = sub2.get(supported); // <*/desired, supported>
+ if (value == null) {
+ value = sub2.get(ANY); // <*/desired, *>
+ if (value == null && !star) {
+ sub2 = subtables.get(ANY); // <*, supported>
+ value = sub2.get(supported);
+ if (value == null) {
+ value = sub2.get(ANY); // <*, *>
+ }
+ }
+ star = true;
+ }
+ if (distanceTable != null) {
+ distanceTable.value = value;
+ }
+ int result = starEquals && star && desired.equals(supported) ? 0 : value.nodeDistance;
+ return result;
+ }
+
+ void copy(DistanceTable other) {
+ for (Map.Entry<String, Map<String, DistanceTable>> e1 : other.subtables.entrySet()) {
+ for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
+ DistanceTable value = e2.getValue();
+ addSubtable(e1.getKey(), e2.getKey(), value.nodeDistance);
+ }
+ }
+ }
+
+ DistanceTable addSubtable(String desired, String supported, int distance) {
+ Map<String, DistanceTable> sub2 = subtables.get(desired);
+ if (sub2 == null) {
+ subtables.put(desired, sub2 = new TreeMap<>());
+ }
+ DistanceTable oldNode = sub2.get(supported);
+ if (oldNode != null) {
+ return oldNode;
+ }
+
+ final DistanceTable newNode = new DistanceTable(distance);
+ sub2.put(supported, newNode);
+ return newNode;
+ }
+
+ /**
+ * Return null if value doesn't exist
+ */
+ private DistanceTable getNode(String desired, String supported) {
+ Map<String, DistanceTable> sub2 = subtables.get(desired);
+ if (sub2 == null) {
+ return null;
+ }
+ return sub2.get(supported);
+ }
+
+
+ /** add table for each subitem that matches and doesn't have a table already
+ */
+ void addSubtables(
+ String desired, String supported,
+ Predicate<DistanceTable> action) {
+ DistanceTable node = getNode(desired, supported);
+ if (node == null) {
+ // get the distance it would have
+ Output<DistanceTable> node2 = new Output<>();
+ int distance = getDistance(desired, supported, node2, true);
+ // now add it
+ node = addSubtable(desired, supported, distance);
+ if (node2.value != null) {
+ DistanceTable nextTable = node2.value;
+ node.copy(nextTable);
+ }
+ }
+ action.test(node);
+ }
+
+ void addSubtables(String desiredLang, String supportedLang,
+ String desiredScript, String supportedScript,
+ int percentage) {
+
+ // add to all the values that have the matching desiredLang and supportedLang
+ @SuppressWarnings("unused")
+ boolean haveKeys = false;
+ for (Map.Entry<String, Map<String, DistanceTable>> e1 : subtables.entrySet()) {
+ String key1 = e1.getKey();
+ final boolean desiredIsKey = desiredLang.equals(key1);
+ if (desiredIsKey || desiredLang.equals(ANY)) {
+ for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
+ String key2 = e2.getKey();
+ final boolean supportedIsKey = supportedLang.equals(key2);
+ haveKeys |= (desiredIsKey && supportedIsKey);
+ if (supportedIsKey || supportedLang.equals(ANY)) {
+ DistanceTable value = e2.getValue();
+ value.addSubtable(desiredScript, supportedScript, percentage);
+ }
+ }
+ }
+ }
+ // now add the sequence explicitly
+ DistanceTable dt = new DistanceTable(-1);
+ dt.addSubtable(desiredScript, supportedScript, percentage);
+ CopyIfEmpty r = new CopyIfEmpty(dt);
+ addSubtables(desiredLang, supportedLang, r);
+ }
+
+ void addSubtables(String desiredLang, String supportedLang,
+ String desiredScript, String supportedScript,
+ String desiredRegion, String supportedRegion,
+ int percentage) {
+
+ // add to all the values that have the matching desiredLang and supportedLang
+ @SuppressWarnings("unused")
+ boolean haveKeys = false;
+ for (Map.Entry<String, Map<String, DistanceTable>> e1 : subtables.entrySet()) {
+ String key1 = e1.getKey();
+ final boolean desiredIsKey = desiredLang.equals(key1);
+ if (desiredIsKey || desiredLang.equals(ANY)) {
+ for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
+ String key2 = e2.getKey();
+ final boolean supportedIsKey = supportedLang.equals(key2);
+ haveKeys |= (desiredIsKey && supportedIsKey);
+ if (supportedIsKey || supportedLang.equals(ANY)) {
+ DistanceTable value = e2.getValue();
+ value.addSubtables(desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
+ }
+ }
+ }
+ }
+ // now add the sequence explicitly
+
+ DistanceTable dt = new DistanceTable(-1);
+ dt.addSubtable(desiredRegion, supportedRegion, percentage);
+ AddSub r = new AddSub(desiredScript, supportedScript, dt);
+ addSubtables(desiredLang, supportedLang, r);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder("distance: ").append(nodeDistance).append('\n');
+ return toString("", sb).toString();
+ }
+
+ private StringBuilder toString(String indent, StringBuilder buffer) {
+ String indent2 = indent.isEmpty() ? "" : "\t";
+ for (Map.Entry<String, Map<String, DistanceTable>> e1 : subtables.entrySet()) {
+ final Map<String, DistanceTable> subsubtable = e1.getValue();
+ buffer.append(indent2).append(e1.getKey());
+ String indent3 = "\t";
+ for (Map.Entry<String, DistanceTable> e2 : subsubtable.entrySet()) {
+ DistanceTable value = e2.getValue();
+ buffer.append(indent3).append(e2.getKey());
+ buffer.append('\t').append(value.nodeDistance);
+ value.toString(indent+"\t\t\t", buffer);
+ buffer.append('\n');
+ indent3 = indent+'\t';
+ }
+ indent2 = indent;
+ }
+ return buffer;
+ }
+
+ void toTrie(TrieBuilder builder) {
+ int startLength = builder.length;
+ for (Map.Entry<String, Map<String, DistanceTable>> desSuppNode : subtables.entrySet()) {
+ String desired = desSuppNode.getKey();
+ Map<String, DistanceTable> suppNodeMap = desSuppNode.getValue();
+ // Collapse ANY-ANY into one single *.
+ if (desired.equals(ANY)) {
+ assert suppNodeMap.size() == 1;
+ DistanceTable node = suppNodeMap.get(ANY);
+ builder.addStar(node.nodeDistance);
+ node.toTrie(builder);
+ } else {
+ builder.addSubtag(desired, 0);
+ int desiredLength = builder.length;
+ for (Map.Entry<String, DistanceTable> suppNode : suppNodeMap.entrySet()) {
+ String supported = suppNode.getKey();
+ assert !supported.equals(ANY);
+ DistanceTable node = suppNode.getValue();
+ builder.addSubtag(supported, node.nodeDistance);
+ node.toTrie(builder);
+ builder.length = desiredLength;
+ }
+ }
+ builder.length = startLength;
+ }
+ }
+ }
+
+ private static final class CopyIfEmpty implements Predicate<DistanceTable> {
+ private final DistanceTable toCopy;
+ CopyIfEmpty(DistanceTable resetIfNotNull) {
+ this.toCopy = resetIfNotNull;
+ }
+ @Override
+ public boolean test(DistanceTable node) {
+ if (node.subtables.isEmpty()) {
+ node.copy(toCopy);
+ }
+ return true;
+ }
+ }
+
+ private static final class AddSub implements Predicate<DistanceTable> {
+ private final String desiredSub;
+ private final String supportedSub;
+ private final CopyIfEmpty r;
+
+ AddSub(String desiredSub, String supportedSub, DistanceTable distanceTableToCopy) {
+ this.r = new CopyIfEmpty(distanceTableToCopy);
+ this.desiredSub = desiredSub;
+ this.supportedSub = supportedSub;
+ }
+ @Override
+ public boolean test(DistanceTable node) {
+ if (node == null) {
+ throw new IllegalArgumentException("bad structure");
+ } else {
+ node.addSubtables(desiredSub, supportedSub, r);
+ }
+ return true;
+ }
+ }
+
+ private static Collection<String> getIdsFromVariable(
+ Multimap<String, String> variableToPartition, String variable) {
+ if (variable.equals("*")) {
+ return Collections.singleton("*");
+ }
+ Collection<String> result = variableToPartition.get(variable);
+ if (result == null || result.isEmpty()) {
+ throw new IllegalArgumentException("Variable not defined: " + variable);
+ }
+ return result;
+ }
+
+ static LocaleDistance build() {
+ // From CLDR supplementalData/languageMatching/languageMatches type="written_new"/
+ // and then paradigmLocales, matchVariable, and the last languageMatch items.
+ ICUResourceBundle supplementalData = getSupplementalDataBundle("supplementalData");
+ String[] paradigms = supplementalData.getValueWithFallback(
+ "languageMatchingInfo/written/paradigmLocales").getStringArray();
+ Set<LSR> paradigmLSRs = new HashSet<>(); // could be TreeSet if LSR were Comparable
+ for (String paradigm : paradigms) {
+ ULocale pl = new ULocale(paradigm);
+ paradigmLSRs.add(XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl));
+ }
+
+ TerritoryContainment tc = new TerritoryContainment(supplementalData);
+
+ RegionMapperBuilder rmb = new RegionMapperBuilder(tc);
+ UResource.Value value = supplementalData.getValueWithFallback(
+ "languageMatchingInfo/written/matchVariable");
+ UResource.Table variables = value.getTable();
+ UResource.Key key = new UResource.Key();
+ for (int i = 0; variables.getKeyAndValue(i, key, value); ++i) {
+ String variable = "$" + key.toString();
+ String regions = value.getString();
+ rmb.add(variable, regions);
+ }
+
+ // Parse the rules.
+ // We could almost process them while reading them from the source data,
+ // but a rule may contain a region code rather than a variable.
+ // We need to create a variable for each such region code
+ // before rmb.build() and before processing the rules.
+ Splitter bar = Splitter.on('_');
+
+ int prevSize = 0;
+ value = supplementalData.getValueWithFallback("languageMatchingNew/written");
+ UResource.Array matches = value.getArray();
+ List<Rule> rules = new ArrayList<>(matches.getSize());
+ for (int i = 0; matches.getValue(i, value); ++i) {
+ String[] tuple = value.getStringArray();
+ int distance = Integer.parseInt(tuple[2]);
+ boolean oneway = tuple.length >= 4 && tuple[3].equals("1");
+ List<String> desired = new ArrayList<>(bar.splitToList(tuple[0]));
+ List<String> supported = new ArrayList<>(bar.splitToList(tuple[1]));
+ int size = desired.size();
+ if (size != supported.size()) {
+ throw new IllegalArgumentException("uneven languageMatches pair");
+ }
+ if (size < prevSize) {
+ throw new IllegalArgumentException("languageMatches out of order");
+ }
+ prevSize = size;
+ // Implementation shortcuts assume:
+ // - At any level, either both or neither rule subtags are *.
+ // - If the rule language subtags are *, the other-level subtags must also be *.
+ // If there are rules that do not fit these constraints,
+ // then we need to revise the implementation.
+ int langStars = checkStars(desired.get(0), supported.get(0), false);
+ if (size >= 2) {
+ checkStars(desired.get(1), supported.get(1), langStars == 2);
+ }
+ if (size == 3) {
+ checkStars(desired.get(2), supported.get(2), langStars == 2);
+ rmb.ensureRegionIsVariable(desired);
+ rmb.ensureRegionIsVariable(supported);
+ }
+ rules.add(new Rule(desired, supported, distance, oneway));
+ }
+
+ rmb.build();
+
+ /**
+ * Used for processing rules. At the start we have a variable setting like $A1=US+CA+MX.
+ * We generate a mapping from $A1 to a set of partitions {P1, P2}
+ * When we hit a rule that contains a variable,
+ * we replace that rule by multiple rules for the partitions.
+ */
+ final Multimap<String, String> variableToPartition = rmb.variableToPartitions;
+
+ final DistanceTable defaultDistanceTable = new DistanceTable(-1);
+ for (Rule rule : rules) {
+ List<String> desired = rule.desired;
+ List<String> supported = rule.supported;
+ if (rule.desired.size() <= 2) {
+ // language-only or language-script
+ add(defaultDistanceTable, desired, supported, rule.distance);
+ if (!rule.oneway && !desired.equals(supported)) {
+ add(defaultDistanceTable, supported, desired, rule.distance);
+ }
+ } else {
+ // language-script-region
+ Collection<String> desiredRegions = getIdsFromVariable(variableToPartition, desired.get(2));
+ Collection<String> supportedRegions = getIdsFromVariable(variableToPartition, supported.get(2));
+ for (String desiredRegion2 : desiredRegions) {
+ desired.set(2, desiredRegion2.toString()); // fix later
+ for (String supportedRegion2 : supportedRegions) {
+ supported.set(2, supportedRegion2.toString()); // fix later
+ add(defaultDistanceTable, desired, supported, rule.distance);
+ if (!rule.oneway) {
+ add(defaultDistanceTable, supported, desired, rule.distance);
+ }
+ }
+ }
+ }
+ }
+
+ TrieBuilder trieBuilder = new TrieBuilder();
+ defaultDistanceTable.toTrie(trieBuilder);
+ BytesTrie trie = trieBuilder.build();
+ return new LocaleDistance(
+ trie, rmb.regionToPartitionsIndex, rmb.partitionArrays, paradigmLSRs);
+ }
+
+ private static int checkStars(String desired, String supported, boolean allStars) {
+ int stars = (desired.equals("*") ? 1 : 0) + (supported.equals("*") ? 1 : 0);
+ if (stars == 1) {
+ throw new IllegalArgumentException("either both or neither rule subtags must be *: " +
+ desired + ", " + supported);
+ }
+ if (allStars && stars != 2) {
+ throw new IllegalArgumentException("both language subtags are * --> " +
+ "both rule subtags on all levels must be *: " +
+ desired + ", " + supported);
+ }
+ return stars;
+ }
+
+ private static void add(DistanceTable languageDesired2Supported,
+ List<String> desired, List<String> supported, int percentage) {
+ int size = desired.size();
+ if (size != supported.size() || size < 1 || size > 3) {
+ throw new IllegalArgumentException();
+ }
+ final String desiredLang = fixAny(desired.get(0));
+ final String supportedLang = fixAny(supported.get(0));
+ if (size == 1) {
+ languageDesired2Supported.addSubtable(desiredLang, supportedLang, percentage);
+ } else {
+ final String desiredScript = fixAny(desired.get(1));
+ final String supportedScript = fixAny(supported.get(1));
+ if (size == 2) {
+ languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, percentage);
+ } else {
+ final String desiredRegion = fixAny(desired.get(2));
+ final String supportedRegion = fixAny(supported.get(2));
+ languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
+ }
+ }
+ }
+
+ private static final class RegionMapperBuilder {
+ private final Set<String> variables = new HashSet<>();
+ final private Multimap<String, String> regionToRawPartition = TreeMultimap.create();
+ final private RegionSet regionSet;
+ private final TerritoryContainment tc;
+
+ // build() output
+ Multimap<String, String> variableToPartitions;
+ private byte[] regionToPartitionsIndex;
+ private String[][] partitionArrays;
+
+ RegionMapperBuilder(TerritoryContainment tc) {
+ regionSet = new RegionSet(tc);
+ this.tc = tc;
+ }
+
+ private boolean isKnownVariable(String variable) {
+ return variables.contains(variable) || variable.equals("*");
+ }
+
+ void add(String variable, String barString) {
+ assert !isKnownVariable(variable);
+ assert variable.startsWith("$");
+ assert !variable.startsWith("$!");
+ variables.add(variable);
+ Set<String> tempRegions = regionSet.parseSet(barString);
+
+ for (String region : tempRegions) {
+ regionToRawPartition.put(region, variable);
+ }
+
+ // now add the inverse variable
+
+ Set<String> inverse = regionSet.inverse();
+ String inverseVariable = "$!" + variable.substring(1);
+ assert !isKnownVariable(inverseVariable);
+ variables.add(inverseVariable);
+ for (String region : inverse) {
+ regionToRawPartition.put(region, inverseVariable);
+ }
+ }
+
+ void ensureRegionIsVariable(List<String> lsrList) {
+ String region = lsrList.get(2);
+ if (!isKnownVariable(region)) {
+ assert LSR.indexForRegion(region) >= 0; // well-formed region subtag
+ String variable = "$" + region;
+ add(variable, region);
+ lsrList.set(2, variable);
+ }
+ }
+
+ void build() {
+ // Partitions as sets of variables.
+ // LinkedHashMap to store & number unique sets.
+ // Example: {"$!cnsar", "$!enUS", "$!maghreb", "$americas"}
+ Map<Collection<String>, Integer> partitionVariables = new LinkedHashMap<>();
+ // Partitions as sets of lookup ID strings.
+ // Example: {"1", "5"}
+ Map<Collection<String>, Integer> partitionStrings = new LinkedHashMap<>();
+ // pIndex 0: default value in regionToPartitionsIndex
+ Collection<String> noPartitions = Collections.singleton("");
+ makeUniqueIndex(partitionStrings, noPartitions);
+
+ // Example: "$americas" -> {"1", "5"}
+ variableToPartitions = TreeMultimap.create();
+ // Maps the index of each region code to a pIndex into partitionStrings.
+ regionToPartitionsIndex = new byte[LSR.REGION_INDEX_LIMIT];
+ // Maps a partition string to the set of region codes in that partition.
+ // Example: "5" -> {"PR", "US", "VI"}
+ Multimap<String, String> partitionToRegions = TreeMultimap.create();
+
+ for (Map.Entry<String, Set<String>> e : regionToRawPartition.asMap().entrySet()) {
+ final String region = e.getKey();
+ final Collection<String> rawPartition = e.getValue();
+ // Single-character string.
+ // Must be an ASCII character and must not be '*'.
+ // Used to start with α.
+ char partitionChar = (char) ('0' + makeUniqueIndex(partitionVariables, rawPartition));
+ assert partitionChar <= 0x7f;
+ String partition = String.valueOf(partitionChar);
+ int pIndex = makeUniqueIndex(partitionStrings, Collections.singleton(partition));
+ // The pIndex must fit into a byte.
+ // For Java code simplicity, we want it to also be non-negative.
+ assert pIndex <= 0x7f;
+
+ regionToPartitionsIndex[LSR.indexForRegion(region)] = (byte) pIndex;
+ partitionToRegions.put(partition, region);
+
+ for (String variable : rawPartition) {
+ variableToPartitions.put(variable, partition);
+ }
+ }
+
+ // We get a mapping of each macro to the partitions it intersects with.
+ // Example: "419" -> {"1", "5"}
+ Multimap<String,String> macroToPartitions = TreeMultimap.create();
+ for (Map.Entry<String, Set<String>> e : tc.resolved.asMap().entrySet()) {
+ String macro = e.getKey();
+ for (Map.Entry<String, Set<String>> e2 : partitionToRegions.asMap().entrySet()) {
+ String partition = e2.getKey();
+ if (!Collections.disjoint(e.getValue(), e2.getValue())) {
+ macroToPartitions.put(macro, partition);
+ }
+ }
+ }
+
+ // Create a combined mapping from a region code, which can be a macro region,
+ // via the getRegionIndex() of that region code,
+ // to a set of single-character partition strings.
+ for (Map.Entry<String, Set<String>> m2p : macroToPartitions.asMap().entrySet()) {
+ String macro = m2p.getKey();
+ int regionIndex = LSR.indexForRegion(macro);
+ if (regionToPartitionsIndex[regionIndex] == 0) {
+ Set<String> partitions = m2p.getValue();
+ int pIndex = makeUniqueIndex(partitionStrings, partitions);
+ regionToPartitionsIndex[regionIndex] = (byte) pIndex;
+ }
+ }
+
+ // Turn the Collection of Collections into an array of arrays.
+ Collection<Collection<String>> list = partitionStrings.keySet();
+ partitionArrays = new String[list.size()][];
+ int i = 0;
+ for (Collection<String> partitions : list) {
+ partitionArrays[i++] = partitions.toArray(new String[partitions.size()]);
+ }
+ }
+ }
+
+ /**
+ * Parses a string of regions like "US+005-BR" and produces a set of resolved regions.
+ * All macroregions are fully resolved to sets of non-macro regions.
+ * <br>Syntax is simple for now:
+ * <pre>regionSet := region ([-+] region)*</pre>
+ * No precedence, so "x+y-y+z" is (((x+y)-y)+z) NOT (x+y)-(y+z)
+ */
+ private static final class RegionSet {
+ private enum Operation {add, remove}
+ private final TerritoryContainment tc;
+ // temporaries used in processing
+ final private Set<String> tempRegions = new TreeSet<>();
+ private Operation operation = null;
+
+ RegionSet(TerritoryContainment tc) {
+ this.tc = tc;
+ }
+
+ private Set<String> parseSet(String barString) {
+ operation = Operation.add;
+ int last = 0;
+ tempRegions.clear();
+ int i = 0;
+ for (; i < barString.length(); ++i) {
+ char c = barString.charAt(i); // UTF16 is ok, since syntax is only ascii
+ switch(c) {
+ case '+':
+ add(barString, last, i);
+ last = i+1;
+ operation = Operation.add;
+ break;
+ case '-':
+ add(barString, last, i);
+ last = i+1;
+ operation = Operation.remove;
+ break;
+ }
+ }
+ add(barString, last, i);
+ return tempRegions;
+ }
+
+ private Set<String> inverse() {
+ TreeSet<String> result = new TreeSet<>(tc.leaves);
+ result.removeAll(tempRegions);
+ return result;
+ }
+
+ private void add(String barString, int last, int i) {
+ if (i > last) {
+ String region = barString.substring(last,i);
+ changeSet(operation, region);
+ }
+ }
+
+ private void changeSet(Operation operation, String region) {
+ Collection<String> contained = tc.toLeavesOnly.get(region);
+ if (contained != null && !contained.isEmpty()) {
+ if (Operation.add == operation) {
+ tempRegions.addAll(contained);
+ } else {
+ tempRegions.removeAll(contained);
+ }
+ } else if (Operation.add == operation) {
+ tempRegions.add(region);
+ } else {
+ tempRegions.remove(region);
+ }
+ }
+ }
+}
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Enumeration;
-import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Objects;
import java.util.Set;
-import java.util.TreeMap;
-import com.ibm.icu.impl.ICUData;
-import com.ibm.icu.impl.ICUResourceBundle;
-import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
-import com.ibm.icu.impl.locale.XCldrStub.Multimap;
-import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
-import com.ibm.icu.util.ICUException;
+import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.ULocale;
-import com.ibm.icu.util.ULocale.Minimize;
-import com.ibm.icu.util.UResourceBundle;
-public class XLikelySubtags {
-
- private static final XLikelySubtags DEFAULT = new XLikelySubtags();
-
- public static final XLikelySubtags getDefault() {
- return DEFAULT;
- }
-
- private static <K, V, T> Map<V, T> getSubtable(Map<K, Map<V, T>> table, final K language) {
- Map<V, T> subTable = table.get(language);
- if (subTable == null) {
- table.put(language, subTable = new TreeMap<>());
+public final class XLikelySubtags {
+ private static final String PSEUDO_ACCENTS_PREFIX = "'"; // -XA, -PSACCENT
+ private static final String PSEUDO_BIDI_PREFIX = "+"; // -XB, -PSBIDI
+ private static final String PSEUDO_CRACKED_PREFIX = ","; // -XC, -PSCRACK
+
+ private static final boolean DEBUG_OUTPUT = false;
+
+ // TODO: Load prebuilt data from a resource bundle
+ // to avoid the dependency on the builder code.
+ static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build());
+
+ static final class Data {
+ private final Map<String, String> languageAliases;
+ private final Map<String, String> regionAliases;
+ private final BytesTrie trie;
+ private final LSR[] lsrs;
+
+ Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
+ BytesTrie trie, LSR[] lsrs) {
+ this.languageAliases = languageAliases;
+ this.regionAliases = regionAliases;
+ this.trie = trie;
+ this.lsrs = lsrs;
}
- return subTable;
}
- public static class Aliases {
- final Map<String, String> toCanonical;
- final Multimap<String, String> toAliases;
- public String getCanonical(String alias) {
- String canonical = toCanonical.get(alias);
- return canonical == null ? alias : canonical;
- }
- public Set<String> getAliases(String canonical) {
- Set<String> aliases = toAliases.get(canonical);
- return aliases == null ? Collections.singleton(canonical) : aliases;
- }
- public Aliases(String key) {
- UResourceBundle metadata = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"metadata",ICUResourceBundle.ICU_DATA_CLASS_LOADER);
- UResourceBundle metadataAlias = metadata.get("alias");
- UResourceBundle territoryAlias = metadataAlias.get(key);
- Map<String, String> toCanonical1 = new HashMap<>();
- for ( int i = 0 ; i < territoryAlias.getSize(); i++ ) {
- UResourceBundle res = territoryAlias.get(i);
- String aliasFrom = res.getKey();
- if (aliasFrom.contains("_")) {
- continue; // only simple aliasing
- }
- String aliasReason = res.get("reason").getString();
- if (aliasReason.equals("overlong")) {
- continue;
- }
- String aliasTo = res.get("replacement").getString();
- int spacePos = aliasTo.indexOf(' ');
- String aliasFirst = spacePos < 0 ? aliasTo : aliasTo.substring(0, spacePos);
- if (aliasFirst.contains("_")) {
- continue; // only simple aliasing
- }
- toCanonical1.put(aliasFrom, aliasFirst);
+ private final Map<String, String> languageAliases;
+ private final Map<String, String> regionAliases;
+
+ // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
+ // There is also a trie value for each intermediate lang and lang+script.
+ // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
+ private final BytesTrie trie;
+ private final long trieUndState;
+ private final long trieUndZzzzState;
+ private final int defaultLsrIndex;
+ private final LSR[] lsrs;
+
+ private XLikelySubtags(XLikelySubtags.Data data) {
+ languageAliases = data.languageAliases;
+ regionAliases = data.regionAliases;
+ trie = data.trie;
+ lsrs = data.lsrs;
+
+ // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
+ BytesTrie.Result result = trie.next('*');
+ assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
+ int value = trie.getValue();
+ assert value == 0;
+ trieUndState = trie.getState64();
+ result = trie.next('*');
+ assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
+ value = trie.getValue();
+ assert value == 0;
+ trieUndZzzzState = trie.getState64();
+ result = trie.next('*');
+ assert result.hasValue();
+ defaultLsrIndex = trie.getValue();
+ trie.reset();
+
+ if (DEBUG_OUTPUT) {
+ System.out.println("*** likely subtags");
+ for (Map.Entry<String, LSR> mapping : getTable().entrySet()) {
+ System.out.println(mapping);
}
- if (key.equals("language")) {
- toCanonical1.put("mo", "ro"); // special case
- }
- toCanonical = Collections.unmodifiableMap(toCanonical1);
- toAliases = Multimaps.invertFrom(toCanonical1, HashMultimap.<String,String>create());
}
}
- public static class LSR {
- public final String language;
- public final String script;
- public final String region;
-
- public static Aliases LANGUAGE_ALIASES = new Aliases("language");
- public static Aliases REGION_ALIASES = new Aliases("territory");
+ private static String getCanonical(Map<String, String> aliases, String alias) {
+ String canonical = aliases.get(alias);
+ return canonical == null ? alias : canonical;
+ }
- public static LSR from(String language, String script, String region) {
- return new LSR(language, script, region);
+ LSR makeMaximizedLsrFrom(ULocale locale) {
+ String name = locale.getName();
+ if (name.startsWith("@x=")) {
+ // Private use language tag x-subtag-subtag...
+ return new LSR(name, "", "");
}
- // from http://unicode.org/reports/tr35/#Unicode_language_identifier
- // but simplified to requiring language subtag, and nothing beyond region
- // #1 is language
- // #2 is script
- // #3 is region
- // static final String pat =
- // "language_id = (unicode_language_subtag)"
- // + "(?:sep(unicode_script_subtag))?"
- // + "(?:sep(unicode_region_subtag))?;\n"
- // + "unicode_language_subtag = alpha{2,3}|alpha{5,8};\n"
- // + "unicode_script_subtag = alpha{4};\n"
- // + "unicode_region_subtag = alpha{2}|digit{3};\n"
- // + "sep = [-_];\n"
- // + "digit = [0-9];\n"
- // + "alpha = [A-Za-z];\n"
- // ;
- // static {
- // System.out.println(pat);
- // System.out.println(new UnicodeRegex().compileBnf(pat));
- // }
- // static final Pattern LANGUAGE_PATTERN = Pattern.compile(
- // "([a-zA-Z0-9]+)" // (?:[-_]([a-zA-Z0-9]+))?(?:[-_]([a-zA-Z0-9]+))?"
- // //new UnicodeRegex().compileBnf(pat)
- // );
- //
- // NOTE: Should we fix this to check for format?
- // ANSWER: Not required, since this is only called internally. Moreover, we deliberately
- // use invalid language tags ("x1", "x2", etc.) to represent pseudo-locales. See below.
- static LSR from(String languageIdentifier) {
- String[] parts = languageIdentifier.split("[-_]");
- if (parts.length < 1 || parts.length > 3) {
- throw new ICUException("too many subtags");
+ // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
+ // They should match only themselves,
+ // not other locales with what looks like the same language and script subtags.
+ String language = locale.getLanguage();
+ String script = locale.getScript();
+ String region = locale.getCountry();
+ if (region.length() == 2 && region.charAt(0) == 'X') {
+ switch (region.charAt(1)) {
+ case 'A':
+ return new LSR(PSEUDO_ACCENTS_PREFIX + language,
+ PSEUDO_ACCENTS_PREFIX + script, region);
+ case 'B':
+ return new LSR(PSEUDO_BIDI_PREFIX + language,
+ PSEUDO_BIDI_PREFIX + script, region);
+ case 'C':
+ return new LSR(PSEUDO_CRACKED_PREFIX + language,
+ PSEUDO_CRACKED_PREFIX + script, region);
+ default: // normal locale
+ break;
}
- String lang = parts[0].toLowerCase();
- String p2 = parts.length < 2 ? "" : parts[1];
- String p3 = parts.length < 3 ? "" : parts[2];
- return p2.length() < 4 ? new LSR(lang, "", p2) : new LSR(lang, p2, p3);
-
- // Matcher matcher = LANGUAGE_PATTERN.matcher(languageIdentifier);
- // if (!matcher.matches()) {
- // return new LSR(matcher.group(1), matcher.group(2), matcher.group(3));
- // }
- // System.out.println(RegexUtilities.showMismatch(matcher, languageIdentifier));
- // throw new ICUException("invalid language id");
}
- private static final HashMap<ULocale, LSR> pseudoReplacements = new HashMap<>(11);
-
- // Note code in XLocaledistance.java handle pseudo-regions XA, XB, and XC, making them
- // very distant from any other locale. Similarly, it establishes that any of the
- // invalid locales below ("x1", "x2", ..., "x7", and "x8-en") are very distant
- // from any other locale.
- static {
- String[][] source = {
- {"x-bork", "x1", "", ""},
- {"x-elmer", "x2", "", ""},
- {"x-hacker", "x3", "", ""},
- {"x-piglatin", "x4", "", ""},
- {"x-pirate", "x5", "", ""},
- {"en-XA", "x6", "", ""},
- {"en-PSACCENT", "x6", "", ""}, // Note: same as for ex-XA
- {"ar-XB", "x7", "", ""},
- {"ar-PSBIDI", "x7", "", ""}, // Note: same as for ar-XB
- {"en-XC", "x8", "en", ""}, // Note: language is stored in LSR.script field
- {"en-PSCRACK", "x8", "en", ""}, // Note: same as for en-XC
- };
- for (int i = 0; i < source.length; ++i) {
- pseudoReplacements.put(new ULocale(source[i][0]),
- new LSR(source[i][1], source[i][2], source[i][3]));
+ String variant = locale.getVariant();
+ if (variant.startsWith("PS")) {
+ switch (variant) {
+ case "PSACCENT":
+ return new LSR(PSEUDO_ACCENTS_PREFIX + language,
+ PSEUDO_ACCENTS_PREFIX + script, region.isEmpty() ? "XA" : region);
+ case "PSBIDI":
+ return new LSR(PSEUDO_BIDI_PREFIX + language,
+ PSEUDO_BIDI_PREFIX + script, region.isEmpty() ? "XB" : region);
+ case "PSCRACK":
+ return new LSR(PSEUDO_CRACKED_PREFIX + language,
+ PSEUDO_CRACKED_PREFIX + script, region.isEmpty() ? "XC" : region);
+ default: // normal locale
+ break;
}
-
}
- public static LSR from(ULocale locale) {
- LSR replacement = pseudoReplacements.get(locale);
- if (replacement != null) {
- return replacement;
- }
- // Map *-*-*-PSCRACK to x8-***, same as for en-PSCRACK.
- if ("PSCRACK".equals(locale.getVariant())) {
- return new LSR(
- "x8", locale.getLanguage() + locale.getScript() + locale.getCountry(), "");
- }
- return new LSR(locale.getLanguage(), locale.getScript(), locale.getCountry());
- }
+ language = getCanonical(languageAliases, language);
+ // script is ok
+ region = getCanonical(regionAliases, region);
+ return INSTANCE.maximize(language, script, region);
+ }
- public static LSR fromMaximalized(ULocale locale) {
- LSR replacement = pseudoReplacements.get(locale);
- if (replacement != null) {
- return replacement;
- }
- // Map *-*-*-PSCRACK to x8-***, same as for en-PSCRACK.
- if ("PSCRACK".equals(locale.getVariant())) {
- return new LSR(
- "x8", locale.getLanguage() + locale.getScript() + locale.getCountry(), "");
- }
- return fromMaximalized(locale.getLanguage(), locale.getScript(), locale.getCountry());
+ /**
+ * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
+ */
+ private LSR maximize(String language, String script, String region) {
+ int retainOldMask = 0;
+ BytesTrie iter = new BytesTrie(trie);
+ // language lookup
+ if (language.equals("und")) {
+ language = "";
}
-
- public static LSR fromMaximalized(String language, String script, String region) {
- String canonicalLanguage = LANGUAGE_ALIASES.getCanonical(language);
- // script is ok
- String canonicalRegion = REGION_ALIASES.getCanonical(region);
-
- return DEFAULT.maximize(canonicalLanguage, script, canonicalRegion);
+ long state;
+ int value = trieNext(iter, language, false);
+ if (value >= 0) {
+ if (!language.isEmpty()) {
+ retainOldMask |= 4;
+ }
+ state = iter.getState64();
+ } else {
+ retainOldMask |= 4;
+ iter.resetToState64(trieUndState); // "und" ("*")
+ state = 0;
}
-
- public LSR(String language, String script, String region) {
- this.language = language;
- this.script = script;
- this.region = region;
+ // script lookup
+ if (script.equals("Zzzz")) {
+ script = "";
}
-
- @Override
- public String toString() {
- StringBuilder result = new StringBuilder(language);
+ value = trieNext(iter, script, false);
+ if (value >= 0) {
if (!script.isEmpty()) {
- result.append('-').append(script);
- }
- if (!region.isEmpty()) {
- result.append('-').append(region);
+ retainOldMask |= 2;
}
- return result.toString();
- }
- public LSR replace(String language2, String script2, String region2) {
- if (language2 == null && script2 == null && region2 == null) return this;
- return new LSR(
- language2 == null ? language: language2,
- script2 == null ? script : script2,
- region2 == null ? region : region2);
- }
- @Override
- public boolean equals(Object obj) {
- LSR other;
- return this == obj ||
- (obj != null
- && obj.getClass() == this.getClass()
- && language.equals((other = (LSR) obj).language)
- && script.equals(other.script)
- && region.equals(other.region));
- }
- @Override
- public int hashCode() {
- return Objects.hash(language, script, region);
- }
- }
-
- final Map<String, Map<String, Map<String, LSR>>> langTable;
-
- public XLikelySubtags() {
- this(getDefaultRawData());
- }
-
- private static Map<String, String> getDefaultRawData() {
- Map<String, String> rawData = new TreeMap<>();
- UResourceBundle bundle = UResourceBundle.getBundleInstance( ICUData.ICU_BASE_NAME, "likelySubtags");
- for (Enumeration<String> enumer = bundle.getKeys(); enumer.hasMoreElements();) {
- String key = enumer.nextElement();
- rawData.put(key, bundle.getString(key));
- }
- return rawData;
- }
-
- public XLikelySubtags(Map<String, String> rawData) {
- this.langTable = init(rawData);
- }
-
- private Map<String, Map<String, Map<String, LSR>>> init(final Map<String, String> rawData) {
- // prepare alias info. We want a mapping from the canonical form to all aliases
-
- //Multimap<String,String> canonicalToAliasLanguage = HashMultimap.create();
- // getAliasInfo(LANGUAGE_ALIASES, canonicalToAliasLanguage);
-
- // Don't bother with script; there are none
-
- //Multimap<String,String> canonicalToAliasRegion = HashMultimap.create();
- // getAliasInfo(REGION_ALIASES, canonicalToAliasRegion);
-
- Map<String, Map<String, Map<String, LSR>>> result = new TreeMap<>();
- // Splitter bar = Splitter.on('_');
- // int last = -1;
- // set the base data
- Map<LSR,LSR> internCache = new HashMap<>();
- for (Entry<String, String> sourceTarget : rawData.entrySet()) {
- LSR ltp = LSR.from(sourceTarget.getKey());
- final String language = ltp.language;
- final String script = ltp.script;
- final String region = ltp.region;
-
- ltp = LSR.from(sourceTarget.getValue());
- String languageTarget = ltp.language;
- final String scriptTarget = ltp.script;
- final String regionTarget = ltp.region;
-
- set(result, language, script, region, languageTarget, scriptTarget, regionTarget, internCache);
- // now add aliases
- Collection<String> languageAliases = LSR.LANGUAGE_ALIASES.getAliases(language);
- // if (languageAliases.isEmpty()) {
- // languageAliases = Collections.singleton(language);
- // }
- Collection<String> regionAliases = LSR.REGION_ALIASES.getAliases(region);
- // if (regionAliases.isEmpty()) {
- // regionAliases = Collections.singleton(region);
- // }
- for (String languageAlias : languageAliases) {
- for (String regionAlias : regionAliases) {
- if (languageAlias.equals(language) && regionAlias.equals(region)) {
- continue;
- }
- set(result, languageAlias, script, regionAlias, languageTarget, scriptTarget, regionTarget, internCache);
- }
+ state = iter.getState64();
+ } else {
+ retainOldMask |= 2;
+ if (state == 0) {
+ iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
+ } else {
+ iter.resetToState64(state);
+ value = trieNext(iter, "", false);
+ assert value == 0;
+ state = iter.getState64();
}
}
- // hack
- set(result, "und", "Latn", "", "en", "Latn", "US", internCache);
-
- // hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
- // <likelySubtag from="und_GH" to="ak_Latn_GH"/>
-
- // so und-Latn-GH => ak-Latn-GH
- Map<String, Map<String, LSR>> undScriptMap = result.get("und");
- Map<String, LSR> undEmptyRegionMap = undScriptMap.get("");
- for (Entry<String, LSR> regionEntry : undEmptyRegionMap.entrySet()) {
- final LSR value = regionEntry.getValue();
- set(result, "und", value.script, value.region, value);
- }
- //
- // check that every level has "" (or "und")
- if (!result.containsKey("und")) {
- throw new IllegalArgumentException("failure: base");
+ // region lookup
+ if (region.equals("ZZ")) {
+ region = "";
}
- for (Entry<String, Map<String, Map<String, LSR>>> langEntry : result.entrySet()) {
- String lang = langEntry.getKey();
- final Map<String, Map<String, LSR>> scriptMap = langEntry.getValue();
- if (!scriptMap.containsKey("")) {
- throw new IllegalArgumentException("failure: " + lang);
+ value = trieNext(iter, region, true);
+ if (value >= 0) {
+ if (!region.isEmpty()) {
+ retainOldMask |= 1;
}
- for (Entry<String, Map<String, LSR>> scriptEntry : scriptMap.entrySet()) {
- String script = scriptEntry.getKey();
- final Map<String, LSR> regionMap = scriptEntry.getValue();
- if (!regionMap.containsKey("")) {
- throw new IllegalArgumentException("failure: " + lang + "-" + script);
+ } else {
+ retainOldMask |= 1;
+ if (state == 0) {
+ value = defaultLsrIndex;
+ } else {
+ iter.resetToState64(state);
+ value = trieNext(iter, "", true);
+ if (value < 0) { // TODO: should never happen?! just assert value >= 0?
+ return null;
}
- // for (Entry<String, LSR> regionEntry : regionMap.entrySet()) {
- // String region = regionEntry.getKey();
- // LSR value = regionEntry.getValue();
- // }
}
}
- return result;
- }
+ LSR result = lsrs[value];
- // private void getAliasInfo(Map<String, R2<List<String>, String>> aliasInfo, Multimap<String, String> canonicalToAlias) {
- // for (Entry<String, R2<List<String>, String>> e : aliasInfo.entrySet()) {
- // final String alias = e.getKey();
- // if (alias.contains("_")) {
- // continue; // only do simple aliasing
- // }
- // String canonical = getCanonical(e.getValue());
- // canonicalToAlias.put(canonical, alias);
- // }
- // }
-
- // private static String getCanonical(R2<List<String>, String> aliasAndReason) {
- // if (aliasAndReason == null) {
- // return null;
- // }
- // if (aliasAndReason.get1().equals("overlong")) {
- // return null;
- // }
- // List<String> value = aliasAndReason.get0();
- // if (value.size() != 1) {
- // return null;
- // }
- // final String canonical = value.iterator().next();
- // if (canonical.contains("_")) {
- // return null; // only do simple aliasing
- // }
- // return canonical;
- // }
-
- private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region,
- final String languageTarget, final String scriptTarget, final String regionTarget, Map<LSR, LSR> internCache) {
- LSR newValue = new LSR(languageTarget, scriptTarget, regionTarget);
- LSR oldValue = internCache.get(newValue);
- if (oldValue == null) {
- internCache.put(newValue, newValue);
- oldValue = newValue;
+ if (language.isEmpty()) {
+ language = "und";
}
- set(langTable, language, script, region, oldValue);
- }
-
- private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region, LSR newValue) {
- Map<String, Map<String, LSR>> scriptTable = getSubtable(langTable, language);
- Map<String, LSR> regionTable = getSubtable(scriptTable, script);
- // LSR oldValue = regionTable.get(region);
- // if (oldValue != null) {
- // int debug = 0;
- // }
- regionTable.put(region, newValue);
- }
-
- /**
- * Convenience methods
- */
- public LSR maximize(String source) {
- return maximize(ULocale.forLanguageTag(source));
- }
-
- public LSR maximize(ULocale source) {
- return maximize(source.getLanguage(), source.getScript(), source.getCountry());
- }
-
- public LSR maximize(LSR source) {
- return maximize(source.language, source.script, source.region);
- }
-
- // public static ULocale addLikelySubtags(ULocale loc) {
- //
- // }
- /**
- * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
- */
- public LSR maximize(String language, String script, String region) {
- int retainOldMask = 0;
- Map<String, Map<String, LSR>> scriptTable = langTable.get(language);
- if (scriptTable == null) { // cannot happen if language == "und"
- retainOldMask |= 4;
- scriptTable = langTable.get("und");
- } else if (!language.equals("und")) {
- retainOldMask |= 4;
+ if (retainOldMask == 0) {
+ return result;
}
-
- if (script.equals("Zzzz")) {
- script = "";
+ if ((retainOldMask & 4) == 0) {
+ language = result.language;
}
- Map<String, LSR> regionTable = scriptTable.get(script);
- if (regionTable == null) { // cannot happen if script == ""
- retainOldMask |= 2;
- regionTable = scriptTable.get("");
- } else if (!script.isEmpty()) {
- retainOldMask |= 2;
+ if ((retainOldMask & 2) == 0) {
+ script = result.script;
}
-
- if (region.equals("ZZ")) {
- region = "";
+ if ((retainOldMask & 1) == 0) {
+ region = result.region;
}
- LSR result = regionTable.get(region);
- if (result == null) { // cannot happen if region == ""
- retainOldMask |= 1;
- result = regionTable.get("");
- if (result == null) {
- return null;
+ return new LSR(language, script, region);
+ }
+
+ private static final int trieNext(BytesTrie iter, String s, boolean finalSubtag) {
+ BytesTrie.Result result;
+ if (s.isEmpty()) {
+ result = iter.next('*');
+ } else {
+ int end = s.length() - 1;
+ for (int i = 0;; ++i) {
+ result = iter.next(s.charAt(i));
+ if (i < end) {
+ if (!result.hasNext()) {
+ return -1;
+ }
+ } else {
+ // last character of this subtag
+ break;
+ }
}
- } else if (!region.isEmpty()) {
- retainOldMask |= 1;
}
-
- switch (retainOldMask) {
- default:
- case 0: return result;
- case 1: return result.replace(null, null, region);
- case 2: return result.replace(null, script, null);
- case 3: return result.replace(null, script, region);
- case 4: return result.replace(language, null, null);
- case 5: return result.replace(language, null, region);
- case 6: return result.replace(language, script, null);
- case 7: return result.replace(language, script, region);
+ if (!finalSubtag) {
+ if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
+ return 0; // value should be 0, don't care
+ }
+ } else {
+ if (result.hasValue()) {
+ return iter.getValue();
+ }
}
+ return -1;
}
- @SuppressWarnings("unused")
- private LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn, Minimize fieldToFavor) {
+ LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn,
+ ULocale.Minimize fieldToFavor) {
LSR result = maximize(languageIn, scriptIn, regionIn);
// We could try just a series of checks, like:
// (languageIn, "", "")
// (languageIn, "", regionIn)
- Map<String, Map<String, LSR>> scriptTable = langTable.get(result.language);
-
- Map<String, LSR> regionTable0 = scriptTable.get("");
- LSR value00 = regionTable0.get("");
+ // value00 = lookup(result.language, "", "")
+ BytesTrie iter = new BytesTrie(trie);
+ int value = trieNext(iter, result.language, false);
+ assert value >= 0;
+ value = trieNext(iter, "", false);
+ assert value >= 0;
+ value = trieNext(iter, "", true);
+ LSR value00 = lsrs[value];
boolean favorRegionOk = false;
if (result.script.equals(value00.script)) { //script is default
if (result.region.equals(value00.region)) {
- return result.replace(null, "", "");
- } else if (fieldToFavor == Minimize.FAVOR_REGION) {
- return result.replace(null, "", null);
+ return new LSR(result.language, "", "");
+ } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
+ return new LSR(result.language, "", result.region);
} else {
favorRegionOk = true;
}
// Maybe do later, but for now use the straightforward code.
LSR result2 = maximize(languageIn, scriptIn, "");
if (result2.equals(result)) {
- return result.replace(null, null, "");
+ return new LSR(result.language, result.script, "");
} else if (favorRegionOk) {
- return result.replace(null, "", null);
+ return new LSR(result.language, "", result.region);
}
return result;
}
- private static StringBuilder show(Map<?,?> map, String indent, StringBuilder output) {
- String first = indent.isEmpty() ? "" : "\t";
- for (Entry<?,?> e : map.entrySet()) {
- String key = e.getKey().toString();
- Object value = e.getValue();
- output.append(first + (key.isEmpty() ? "∅" : key));
- if (value instanceof Map) {
- show((Map<?,?>)value, indent+"\t", output);
+ private Map<String, LSR> getTable() {
+ Map<String, LSR> map = new LinkedHashMap<>();
+ Set<String> prefixes = new HashSet<>();
+ StringBuilder sb = new StringBuilder();
+ for (BytesTrie.Entry entry : trie) {
+ sb.setLength(0);
+ int length = entry.bytesLength();
+ for (int i = 0; i < length;) {
+ byte b = entry.byteAt(i++);
+ sb.append((char) b);
+ if (i < length && prefixes.contains(sb.toString())) {
+ sb.append('-');
+ }
+ }
+ String s = sb.toString();
+ if (entry.value == 0) {
+ // intermediate match point
+ prefixes.add(s);
} else {
- output.append("\t" + Objects.toString(value)).append("\n");
+ map.put(s, lsrs[entry.value]);
}
- first = indent;
}
- return output;
+ return map;
}
@Override
public String toString() {
- return show(langTable, "", new StringBuilder()).toString();
+ return getTable().toString();
}
-
- // public static void main(String[] args) {
- // System.out.println(LSR.fromMaximalized(ULocale.ENGLISH));
- //
- // final Map<String, String> rawData = sdi.getLikelySubtags();
- // XLikelySubtags ls = XLikelySubtags.getDefault();
- // System.out.println(ls);
- // ls.maximize(new ULocale("iw"));
- // if (true) return;
- //
- // LanguageTagParser ltp = new LanguageTagParser();
- //
- // // get all the languages, scripts, and regions
- // Set<String> languages = new TreeSet<String>();
- // Set<String> scripts = new TreeSet<String>();
- // Set<String> regions = new TreeSet<String>();
- // Counter<String> languageCounter = new Counter<String>();
- // Counter<String> scriptCounter = new Counter<String>();
- // Counter<String> regionCounter = new Counter<String>();
- //
- // for (Entry<String, String> sourceTarget : rawData.entrySet()) {
- // final String source = sourceTarget.getKey();
- // ltp.set(source);
- // languages.add(ltp.getLanguage());
- // scripts.add(ltp.getScript());
- // regions.add(ltp.getRegion());
- // final String target = sourceTarget.getValue();
- // ltp.set(target);
- // add(target, languageCounter, ltp.getLanguage(), 1);
- // add(target, scriptCounter, ltp.getScript(), 1);
- // add(target, regionCounter, ltp.getRegion(), 1);
- // }
- // ltp.set("und-Zzzz-ZZ");
- // languageCounter.add(ltp.getLanguage(), 1);
- // scriptCounter.add(ltp.getScript(), 1);
- // regionCounter.add(ltp.getRegion(), 1);
- //
- // if (SHORT) {
- // removeSingletons(languages, languageCounter);
- // removeSingletons(scripts, scriptCounter);
- // removeSingletons(regions, regionCounter);
- // }
- //
- // System.out.println("languages: " + languages.size() + "\n\t" + languages + "\n\t" + languageCounter);
- // System.out.println("scripts: " + scripts.size() + "\n\t" + scripts + "\n\t" + scriptCounter);
- // System.out.println("regions: " + regions.size() + "\n\t" + regions + "\n\t" + regionCounter);
- //
- // int maxCount = Integer.MAX_VALUE;
- //
- // int counter = maxCount;
- // long tempTime = System.nanoTime();
- // newMax:
- // for (String language : languages) {
- // for (String script : scripts) {
- // for (String region : regions) {
- // if (--counter < 0) break newMax;
- // LSR result = ls.maximize(language, script, region);
- // }
- // }
- // }
- // long newMaxTime = System.nanoTime() - tempTime;
- // System.out.println("newMaxTime: " + newMaxTime);
- //
- // counter = maxCount;
- // tempTime = System.nanoTime();
- // newMin:
- // for (String language : languages) {
- // for (String script : scripts) {
- // for (String region : regions) {
- // if (--counter < 0) break newMin;
- // LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
- // }
- // }
- // }
- // long newMinTime = System.nanoTime() - tempTime;
- // System.out.println("newMinTime: " + newMinTime);
- //
- // // *****
- //
- // tempTime = System.nanoTime();
- // counter = maxCount;
- // oldMax:
- // for (String language : languages) {
- // for (String script : scripts) {
- // for (String region : regions) {
- // if (--counter < 0) break oldMax;
- // ULocale tempLocale = new ULocale(language, script, region);
- // ULocale max = ULocale.addLikelySubtags(tempLocale);
- // }
- // }
- // }
- // long oldMaxTime = System.nanoTime() - tempTime;
- // System.out.println("oldMaxTime: " + oldMaxTime + "\t" + oldMaxTime/newMaxTime + "x");
- //
- // counter = maxCount;
- // tempTime = System.nanoTime();
- // oldMin:
- // for (String language : languages) {
- // for (String script : scripts) {
- // for (String region : regions) {
- // if (--counter < 0) break oldMin;
- // ULocale tempLocale = new ULocale(language, script, region);
- // ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
- // }
- // }
- // }
- // long oldMinTime = System.nanoTime() - tempTime;
- // System.out.println("oldMinTime: " + oldMinTime + "\t" + oldMinTime/newMinTime + "x");
- //
- // counter = maxCount;
- // testMain:
- // for (String language : languages) {
- // System.out.println(language);
- // int tests = 0;
- // for (String script : scripts) {
- // for (String region : regions) {
- // ++tests;
- // if (--counter < 0) break testMain;
- // LSR maxNew = ls.maximize(language, script, region);
- // LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
- // LSR minNewR = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_REGION);
- //
- // ULocale tempLocale = new ULocale(language, script, region);
- // ULocale maxOld = ULocale.addLikelySubtags(tempLocale);
- // ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
- // ULocale minOldR = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_REGION);
- //
- // // check values
- // final String maxNewS = String.valueOf(maxNew);
- // final String maxOldS = maxOld.toLanguageTag();
- // boolean sameMax = maxOldS.equals(maxNewS);
- //
- // final String minNewSS = String.valueOf(minNewS);
- // final String minOldSS = minOldS.toLanguageTag();
- // boolean sameMinS = minNewSS.equals(minOldSS);
- //
- // final String minNewRS = String.valueOf(minNewR);
- // final String minOldRS = minOldS.toLanguageTag();
- // boolean sameMinR = minNewRS.equals(minOldRS);
- //
- // if (sameMax && sameMinS && sameMinR) continue;
- // System.out.println(new LSR(language, script, region)
- // + "\tmax: " + maxNew
- // + (sameMax ? "" : "≠" + maxOldS)
- // + "\tminS: " + minNewS
- // + (sameMinS ? "" : "≠" + minOldS)
- // + "\tminR: " + minNewR
- // + (sameMinR ? "" : "≠" + minOldR)
- // );
- // }
- // }
- // System.out.println(language + ": " + tests);
- // }
- // }
- //
- // private static void add(String target, Counter<String> languageCounter, String language, int count) {
- // if (language.equals("aa")) {
- // int debug = 0;
- // }
- // languageCounter.add(language, count);
- // }
- //
- // private static void removeSingletons(Set<String> languages, Counter<String> languageCounter) {
- // for (String s : languageCounter) {
- // final long count = languageCounter.get(s);
- // if (count <= 1) {
- // languages.remove(s);
- // }
- // }
- // }
}
+++ /dev/null
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.locale;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Enumeration;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Objects;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-import com.ibm.icu.impl.ICUResourceBundle;
-import com.ibm.icu.impl.Row;
-import com.ibm.icu.impl.Row.R4;
-import com.ibm.icu.impl.locale.XCldrStub.CollectionUtilities;
-import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap;
-import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
-import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
-import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
-import com.ibm.icu.impl.locale.XCldrStub.Multimap;
-import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
-import com.ibm.icu.impl.locale.XCldrStub.Predicate;
-import com.ibm.icu.impl.locale.XCldrStub.Splitter;
-import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap;
-import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
-import com.ibm.icu.impl.locale.XLocaleDistance.RegionMapper.Builder;
-import com.ibm.icu.text.LocaleDisplayNames;
-import com.ibm.icu.util.LocaleMatcher;
-import com.ibm.icu.util.Output;
-import com.ibm.icu.util.ULocale;
-import com.ibm.icu.util.UResourceBundleIterator;
-
-public class XLocaleDistance {
-
- static final boolean PRINT_OVERRIDES = false;
-
- public static final int ABOVE_THRESHOLD = 100;
-
- // Activates debugging output to stderr with details of GetBestMatch.
- // Be sure to set this to false before checking this in for production!
- private static final boolean TRACE_DISTANCE = false;
-
- @Deprecated
- public static final String ANY = "�"; // matches any character. Uses value above any subtag.
-
- private static String fixAny(String string) {
- return "*".equals(string) ? ANY : string;
- }
-
- static final LocaleDisplayNames english = LocaleDisplayNames.getInstance(ULocale.ENGLISH);
-
- private static List<R4<String, String, Integer, Boolean>> xGetLanguageMatcherData() {
- List<R4<String, String, Integer, Boolean>> distanceList = new ArrayList<>();
-
- ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
- ICUResourceBundle languageMatchingNew = suppData.findTopLevel("languageMatchingNew");
- ICUResourceBundle written = (ICUResourceBundle) languageMatchingNew.get("written");
-
- for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
- ICUResourceBundle item = (ICUResourceBundle) iter.next();
- boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
- distanceList.add(
- (R4<String, String, Integer, Boolean>) // note: .freeze returning wrong type, so casting.
- Row.of(
- item.getString(0),
- item.getString(1),
- Integer.parseInt(item.getString(2)),
- oneway)
- .freeze());
- }
- return Collections.unmodifiableList(distanceList);
- }
-
- @SuppressWarnings("unused")
- private static Set<String> xGetParadigmLocales() {
- ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
- ICUResourceBundle languageMatchingInfo = suppData.findTopLevel("languageMatchingInfo");
- ICUResourceBundle writtenParadigmLocales = (ICUResourceBundle) languageMatchingInfo.get("written")
- .get("paradigmLocales");
- // paradigmLocales{ "en", "en-GB",... }
- HashSet<String> paradigmLocales = new HashSet<>(Arrays.asList(writtenParadigmLocales.getStringArray()));
- return Collections.unmodifiableSet(paradigmLocales);
- }
-
- @SuppressWarnings("unused")
- private static Map<String, String> xGetMatchVariables() {
- ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
- ICUResourceBundle languageMatchingInfo = suppData.findTopLevel("languageMatchingInfo");
- ICUResourceBundle writtenMatchVariables = (ICUResourceBundle) languageMatchingInfo.get("written")
- .get("matchVariable");
- // matchVariable{ americas{"019"} cnsar{"HK+MO"} ...}
-
- HashMap<String,String> matchVariables = new HashMap<>();
- for (Enumeration<String> enumer = writtenMatchVariables.getKeys(); enumer.hasMoreElements(); ) {
- String key = enumer.nextElement();
- matchVariables.put(key, writtenMatchVariables.getString(key));
- }
- return Collections.unmodifiableMap(matchVariables);
- }
-
- private static Multimap<String, String> xGetContainment() {
- TreeMultimap<String,String> containment = TreeMultimap.create();
- containment
- .putAll("001", "019", "002", "150", "142", "009")
- .putAll("011", "BF", "BJ", "CI", "CV", "GH", "GM", "GN", "GW", "LR", "ML", "MR", "NE", "NG", "SH", "SL", "SN", "TG")
- .putAll("013", "BZ", "CR", "GT", "HN", "MX", "NI", "PA", "SV")
- .putAll("014", "BI", "DJ", "ER", "ET", "KE", "KM", "MG", "MU", "MW", "MZ", "RE", "RW", "SC", "SO", "SS", "TZ", "UG", "YT", "ZM", "ZW")
- .putAll("142", "145", "143", "030", "034", "035")
- .putAll("143", "TM", "TJ", "KG", "KZ", "UZ")
- .putAll("145", "AE", "AM", "AZ", "BH", "CY", "GE", "IL", "IQ", "JO", "KW", "LB", "OM", "PS", "QA", "SA", "SY", "TR", "YE", "NT", "YD")
- .putAll("015", "DZ", "EG", "EH", "LY", "MA", "SD", "TN", "EA", "IC")
- .putAll("150", "154", "155", "151", "039")
- .putAll("151", "BG", "BY", "CZ", "HU", "MD", "PL", "RO", "RU", "SK", "UA", "SU")
- .putAll("154", "GG", "IM", "JE", "AX", "DK", "EE", "FI", "FO", "GB", "IE", "IS", "LT", "LV", "NO", "SE", "SJ")
- .putAll("155", "AT", "BE", "CH", "DE", "FR", "LI", "LU", "MC", "NL", "DD", "FX")
- .putAll("017", "AO", "CD", "CF", "CG", "CM", "GA", "GQ", "ST", "TD", "ZR")
- .putAll("018", "BW", "LS", "NA", "SZ", "ZA")
- .putAll("019", "021", "013", "029", "005", "003", "419")
- .putAll("002", "015", "011", "017", "014", "018")
- .putAll("021", "BM", "CA", "GL", "PM", "US")
- .putAll("029", "AG", "AI", "AW", "BB", "BL", "BQ", "BS", "CU", "CW", "DM", "DO", "GD", "GP", "HT", "JM", "KN", "KY", "LC", "MF", "MQ", "MS", "PR", "SX", "TC", "TT", "VC", "VG", "VI", "AN")
- .putAll("003", "021", "013", "029")
- .putAll("030", "CN", "HK", "JP", "KP", "KR", "MN", "MO", "TW")
- .putAll("035", "BN", "ID", "KH", "LA", "MM", "MY", "PH", "SG", "TH", "TL", "VN", "BU", "TP")
- .putAll("039", "AD", "AL", "BA", "ES", "GI", "GR", "HR", "IT", "ME", "MK", "MT", "RS", "PT", "SI", "SM", "VA", "XK", "CS", "YU")
- .putAll("419", "013", "029", "005")
- .putAll("005", "AR", "BO", "BR", "CL", "CO", "EC", "FK", "GF", "GY", "PE", "PY", "SR", "UY", "VE")
- .putAll("053", "AU", "NF", "NZ")
- .putAll("054", "FJ", "NC", "PG", "SB", "VU")
- .putAll("057", "FM", "GU", "KI", "MH", "MP", "NR", "PW")
- .putAll("061", "AS", "CK", "NU", "PF", "PN", "TK", "TO", "TV", "WF", "WS")
- .putAll("034", "AF", "BD", "BT", "IN", "IR", "LK", "MV", "NP", "PK")
- .putAll("009", "053", "054", "057", "061", "QO")
- .putAll("QO", "AQ", "BV", "CC", "CX", "GS", "HM", "IO", "TF", "UM", "AC", "CP", "DG", "TA")
- ;
- //Can't use following, because data from CLDR is discarded
- // ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
- // UResourceBundle territoryContainment = suppData.get("territoryContainment");
- // for (int i = 0 ; i < territoryContainment.getSize(); i++) {
- // UResourceBundle mapping = territoryContainment.get(i);
- // String parent = mapping.getKey();
- // for (int j = 0 ; j < mapping.getSize(); j++) {
- // String child = mapping.getString(j);
- // containment.put(parent,child);
- // System.out.println(parent + " => " + child);
- // }
- // }
- TreeMultimap<String,String> containmentResolved = TreeMultimap.create();
- fill("001", containment, containmentResolved);
- return ImmutableMultimap.copyOf(containmentResolved);
- }
-
- private static Set<String> fill(String region, TreeMultimap<String, String> containment, Multimap<String, String> toAddTo) {
- Set<String> contained = containment.get(region);
- if (contained == null) {
- return Collections.emptySet();
- }
- toAddTo.putAll(region, contained); // do top level
- // then recursively
- for (String subregion : contained) {
- toAddTo.putAll(region, fill(subregion, containment, toAddTo));
- }
- return toAddTo.get(region);
- }
-
-
- static final Multimap<String,String> CONTAINER_TO_CONTAINED;
- static final Multimap<String,String> CONTAINER_TO_CONTAINED_FINAL;
- static {
- // Multimap<String, String> containerToContainedTemp = xGetContainment();
- // fill(Region.getInstance("001"), containerToContainedTemp);
-
- CONTAINER_TO_CONTAINED = xGetContainment();
- Multimap<String, String> containerToFinalContainedBuilder = TreeMultimap.create();
- for (Entry<String, Set<String>> entry : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
- String container = entry.getKey();
- for (String contained : entry.getValue()) {
- if (CONTAINER_TO_CONTAINED.get(contained) == null) {
- containerToFinalContainedBuilder.put(container, contained);
- }
- }
- }
- CONTAINER_TO_CONTAINED_FINAL = ImmutableMultimap.copyOf(containerToFinalContainedBuilder);
- }
-
- final static private Set<String> ALL_FINAL_REGIONS = ImmutableSet.copyOf(CONTAINER_TO_CONTAINED_FINAL.get("001"));
-
- // end of data from CLDR
-
- private final DistanceTable languageDesired2Supported;
- private final RegionMapper regionMapper;
- private final int defaultLanguageDistance;
- private final int defaultScriptDistance;
- private final int defaultRegionDistance;
-
- @Deprecated
- public static abstract class DistanceTable {
- abstract int getDistance(String desiredLang, String supportedlang, Output<DistanceTable> table, boolean starEquals);
- abstract Set<String> getCloser(int threshold);
- abstract String toString(boolean abbreviate);
- public DistanceTable compact() {
- return this;
- }
- // public Integer getInternalDistance(String a, String b) {
- // return null;
- // }
- public DistanceNode getInternalNode(String any, String any2) {
- return null;
- }
- public Map<String, Set<String>> getInternalMatches() {
- return null;
- }
- public boolean isEmpty() {
- return true;
- }
- }
-
- @Deprecated
- public static class DistanceNode {
- final int distance;
-
- public DistanceNode(int distance) {
- this.distance = distance;
- }
-
- public DistanceTable getDistanceTable() {
- return null;
- }
-
- @Override
- public boolean equals(Object obj) {
- return this == obj ||
- (obj != null
- && obj.getClass() == this.getClass()
- && distance == ((DistanceNode) obj).distance);
- }
- @Override
- public int hashCode() {
- return distance;
- }
- @Override
- public String toString() {
- return "\ndistance: " + distance;
- }
- }
-
- private interface IdMapper<K,V> {
- public V toId(K source);
- }
-
- static class IdMakerFull<T> implements IdMapper<T,Integer> {
- private final Map<T, Integer> objectToInt = new HashMap<>();
- private final List<T> intToObject = new ArrayList<>();
- final String name; // for debugging
-
- IdMakerFull(String name) {
- this.name = name;
- }
-
- IdMakerFull() {
- this("unnamed");
- }
-
- IdMakerFull(String name, T zeroValue) {
- this(name);
- add(zeroValue);
- }
-
- /**
- * Return an id, making one if there wasn't one already.
- */
- public Integer add(T source) {
- Integer result = objectToInt.get(source);
- if (result == null) {
- Integer newResult = intToObject.size();
- objectToInt.put(source, newResult);
- intToObject.add(source);
- return newResult;
- } else {
- return result;
- }
- }
-
- /**
- * Return an id, or null if there is none.
- */
- @Override
- public Integer toId(T source) {
- return objectToInt.get(source);
- // return value == null ? 0 : value;
- }
-
- /**
- * Return the object for the id, or null if there is none.
- */
- public T fromId(int id) {
- return intToObject.get(id);
- }
-
- /**
- * Return interned object
- */
- public T intern(T source) {
- return fromId(add(source));
- }
-
- public int size() {
- return intToObject.size();
- }
- /**
- * Same as add, except if the object didn't have an id, return null;
- */
- public Integer getOldAndAdd(T source) {
- Integer result = objectToInt.get(source);
- if (result == null) {
- Integer newResult = intToObject.size();
- objectToInt.put(source, newResult);
- intToObject.add(source);
- }
- return result;
- }
-
- @Override
- public String toString() {
- return size() + ": " + intToObject;
- }
- @Override
- public boolean equals(Object obj) {
- return this == obj ||
- (obj != null
- && obj.getClass() == this.getClass()
- && intToObject.equals(((IdMakerFull<?>) obj).intToObject));
- }
- @Override
- public int hashCode() {
- return intToObject.hashCode();
- }
- }
-
- static class StringDistanceNode extends DistanceNode {
- final DistanceTable distanceTable;
-
- public StringDistanceNode(int distance, DistanceTable distanceTable) {
- super(distance);
- this.distanceTable = distanceTable;
- }
-
- @Override
- public boolean equals(Object obj) {
- StringDistanceNode other;
- return this == obj ||
- (obj != null
- && obj.getClass() == this.getClass()
- && distance == (other = (StringDistanceNode) obj).distance
- && Objects.equals(distanceTable, other.distanceTable)
- && super.equals(other));
- }
- @Override
- public int hashCode() {
- return distance ^ Objects.hashCode(distanceTable);
- }
-
- StringDistanceNode(int distance) {
- this(distance, new StringDistanceTable());
- }
-
- public void addSubtables(String desiredSub, String supportedSub, CopyIfEmpty r) {
- ((StringDistanceTable) distanceTable).addSubtables(desiredSub, supportedSub, r);
- }
- @Override
- public String toString() {
- return "distance: " + distance + "\n" + distanceTable;
- }
-
- public void copyTables(StringDistanceTable value) {
- if (value != null) {
- ((StringDistanceTable)distanceTable).copy(value);
- }
- }
-
- @Override
- public DistanceTable getDistanceTable() {
- return distanceTable;
- }
- }
-
- public XLocaleDistance(DistanceTable datadistancetable2, RegionMapper regionMapper) {
- languageDesired2Supported = datadistancetable2;
- this.regionMapper = regionMapper;
-
- StringDistanceNode languageNode = (StringDistanceNode) ((StringDistanceTable) languageDesired2Supported).subtables.get(ANY).get(ANY);
- defaultLanguageDistance = languageNode.distance;
- StringDistanceNode scriptNode = (StringDistanceNode) ((StringDistanceTable)languageNode.distanceTable).subtables.get(ANY).get(ANY);
- defaultScriptDistance = scriptNode.distance;
- DistanceNode regionNode = ((StringDistanceTable)scriptNode.distanceTable).subtables.get(ANY).get(ANY);
- defaultRegionDistance = regionNode.distance;
- }
-
- @SuppressWarnings("rawtypes")
- private static Map newMap() { // for debugging
- return new TreeMap();
- }
-
- /**
- * Internal class
- */
- @Deprecated
- public static class StringDistanceTable extends DistanceTable {
- final Map<String, Map<String, DistanceNode>> subtables;
-
- StringDistanceTable(Map<String, Map<String, DistanceNode>> tables) {
- subtables = tables;
- }
- @SuppressWarnings("unchecked")
- StringDistanceTable() {
- this(newMap());
- }
-
- @Override
- public boolean isEmpty() {
- return subtables.isEmpty();
- }
-
- @Override
- public boolean equals(Object obj) {
- return this == obj ||
- (obj != null
- && obj.getClass() == this.getClass()
- && subtables.equals(((StringDistanceTable) obj).subtables));
- }
- @Override
- public int hashCode() {
- return subtables.hashCode();
- }
-
- @Override
- public int getDistance(String desired, String supported, Output<DistanceTable> distanceTable, boolean starEquals) {
- if (TRACE_DISTANCE) {
- System.err.printf(" Entering getDistance: desired=%s supported=%s starEquals=%s\n",
- desired, supported, Boolean.toString(starEquals));
- }
- boolean star = false;
- Map<String, DistanceNode> sub2 = subtables.get(desired);
- if (sub2 == null) {
- sub2 = subtables.get(ANY); // <*, supported>
- star = true;
- }
- DistanceNode value = sub2.get(supported); // <*/desired, supported>
- if (value == null) {
- value = sub2.get(ANY); // <*/desired, *>
- if (value == null && !star) {
- sub2 = subtables.get(ANY); // <*, supported>
- value = sub2.get(supported);
- if (value == null) {
- value = sub2.get(ANY); // <*, *>
- }
- }
- star = true;
- }
- if (distanceTable != null) {
- distanceTable.value = ((StringDistanceNode) value).distanceTable;
- }
- int result = starEquals && star && desired.equals(supported) ? 0 : value.distance;
- if (TRACE_DISTANCE) {
- System.err.printf(" Returning from getDistance: %d\n", result);
- }
- return result;
- }
-
- public void copy(StringDistanceTable other) {
- for (Entry<String, Map<String, DistanceNode>> e1 : other.subtables.entrySet()) {
- for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
- DistanceNode value = e2.getValue();
- @SuppressWarnings("unused")
- DistanceNode subNode = addSubtable(e1.getKey(), e2.getKey(), value.distance);
- }
- }
- }
-
- @SuppressWarnings("unchecked")
- DistanceNode addSubtable(String desired, String supported, int distance) {
- Map<String, DistanceNode> sub2 = subtables.get(desired);
- if (sub2 == null) {
- subtables.put(desired, sub2 = newMap());
- }
- DistanceNode oldNode = sub2.get(supported);
- if (oldNode != null) {
- return oldNode;
- }
-
- final StringDistanceNode newNode = new StringDistanceNode(distance);
- sub2.put(supported, newNode);
- return newNode;
- }
-
- /**
- * Return null if value doesn't exist
- */
- private DistanceNode getNode(String desired, String supported) {
- Map<String, DistanceNode> sub2 = subtables.get(desired);
- if (sub2 == null) {
- return null;
- }
- return sub2.get(supported);
- }
-
-
- /** add table for each subitem that matches and doesn't have a table already
- */
- public void addSubtables(
- String desired, String supported,
- Predicate<DistanceNode> action) {
- DistanceNode node = getNode(desired, supported);
- if (node == null) {
- // get the distance it would have
- Output<DistanceTable> node2 = new Output<>();
- int distance = getDistance(desired, supported, node2, true);
- // now add it
- node = addSubtable(desired, supported, distance);
- if (node2.value != null) {
- ((StringDistanceNode)node).copyTables((StringDistanceTable)(node2.value));
- }
- }
- action.test(node);
- }
-
- public void addSubtables(String desiredLang, String supportedLang,
- String desiredScript, String supportedScript,
- int percentage) {
-
- // add to all the values that have the matching desiredLang and supportedLang
- @SuppressWarnings("unused")
- boolean haveKeys = false;
- for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
- String key1 = e1.getKey();
- final boolean desiredIsKey = desiredLang.equals(key1);
- if (desiredIsKey || desiredLang.equals(ANY)) {
- for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
- String key2 = e2.getKey();
- final boolean supportedIsKey = supportedLang.equals(key2);
- haveKeys |= (desiredIsKey && supportedIsKey);
- if (supportedIsKey || supportedLang.equals(ANY)) {
- DistanceNode value = e2.getValue();
- ((StringDistanceTable)value.getDistanceTable()).addSubtable(desiredScript, supportedScript, percentage);
- }
- }
- }
- }
- // now add the sequence explicitly
- StringDistanceTable dt = new StringDistanceTable();
- dt.addSubtable(desiredScript, supportedScript, percentage);
- CopyIfEmpty r = new CopyIfEmpty(dt);
- addSubtables(desiredLang, supportedLang, r);
- }
-
- public void addSubtables(String desiredLang, String supportedLang,
- String desiredScript, String supportedScript,
- String desiredRegion, String supportedRegion,
- int percentage) {
-
- // add to all the values that have the matching desiredLang and supportedLang
- @SuppressWarnings("unused")
- boolean haveKeys = false;
- for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
- String key1 = e1.getKey();
- final boolean desiredIsKey = desiredLang.equals(key1);
- if (desiredIsKey || desiredLang.equals(ANY)) {
- for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
- String key2 = e2.getKey();
- final boolean supportedIsKey = supportedLang.equals(key2);
- haveKeys |= (desiredIsKey && supportedIsKey);
- if (supportedIsKey || supportedLang.equals(ANY)) {
- StringDistanceNode value = (StringDistanceNode) e2.getValue();
- ((StringDistanceTable)value.distanceTable).addSubtables(desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
- }
- }
- }
- }
- // now add the sequence explicitly
-
- StringDistanceTable dt = new StringDistanceTable();
- dt.addSubtable(desiredRegion, supportedRegion, percentage);
- AddSub r = new AddSub(desiredScript, supportedScript, dt);
- addSubtables(desiredLang, supportedLang, r);
- }
-
- @Override
- public String toString() {
- return toString(false);
- }
-
- @Override
- public String toString(boolean abbreviate) {
- return toString(abbreviate, "", new IdMakerFull<>("interner"), new StringBuilder()).toString();
- }
-
- public StringBuilder toString(boolean abbreviate, String indent, IdMakerFull<Object> intern, StringBuilder buffer) {
- String indent2 = indent.isEmpty() ? "" : "\t";
- Integer id = abbreviate ? intern.getOldAndAdd(subtables) : null;
- if (id != null) {
- buffer.append(indent2).append('#').append(id).append('\n');
- } else for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
- final Map<String, DistanceNode> subsubtable = e1.getValue();
- buffer.append(indent2).append(e1.getKey());
- String indent3 = "\t";
- id = abbreviate ? intern.getOldAndAdd(subsubtable) : null;
- if (id != null) {
- buffer.append(indent3).append('#').append(id).append('\n');
- } else for (Entry<String, DistanceNode> e2 : subsubtable.entrySet()) {
- DistanceNode value = e2.getValue();
- buffer.append(indent3).append(e2.getKey());
- id = abbreviate ? intern.getOldAndAdd(value) : null;
- if (id != null) {
- buffer.append('\t').append('#').append(id).append('\n');
- } else {
- buffer.append('\t').append(value.distance);
- final DistanceTable distanceTable = value.getDistanceTable();
- if (distanceTable != null) {
- id = abbreviate ? intern.getOldAndAdd(distanceTable) : null;
- if (id != null) {
- buffer.append('\t').append('#').append(id).append('\n');
- } else {
- ((StringDistanceTable)distanceTable).toString(abbreviate, indent+"\t\t\t", intern, buffer);
- buffer.append('\n');
- }
- } else {
- buffer.append('\n');
- }
- }
- indent3 = indent+'\t';
- }
- indent2 = indent;
- }
- return buffer;
- }
-
- @Override
- public StringDistanceTable compact() {
- return new CompactAndImmutablizer().compact(this);
- }
-
- @Override
- public Set<String> getCloser(int threshold) {
- Set<String> result = new HashSet<>();
- for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
- String desired = e1.getKey();
- for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
- if (e2.getValue().distance < threshold) {
- result.add(desired);
- break;
- }
- }
- }
- return result;
- }
-
- public Integer getInternalDistance(String a, String b) {
- Map<String, DistanceNode> subsub = subtables.get(a);
- if (subsub == null) {
- return null;
- }
- DistanceNode dnode = subsub.get(b);
- return dnode == null ? null : dnode.distance;
- }
-
- @Override
- public DistanceNode getInternalNode(String a, String b) {
- Map<String, DistanceNode> subsub = subtables.get(a);
- if (subsub == null) {
- return null;
- }
- return subsub.get(b);
- }
-
- @Override
- public Map<String, Set<String>> getInternalMatches() {
- Map<String, Set<String>> result = new LinkedHashMap<>();
- for (Entry<String, Map<String, DistanceNode>> entry : subtables.entrySet()) {
- result.put(entry.getKey(), new LinkedHashSet<>(entry.getValue().keySet()));
- }
- return result;
- }
- }
-
- static class CopyIfEmpty implements Predicate<DistanceNode> {
- private final StringDistanceTable toCopy;
- CopyIfEmpty(StringDistanceTable resetIfNotNull) {
- this.toCopy = resetIfNotNull;
- }
- @Override
- public boolean test(DistanceNode node) {
- final StringDistanceTable subtables = (StringDistanceTable) node.getDistanceTable();
- if (subtables.subtables.isEmpty()) {
- subtables.copy(toCopy);
- }
- return true;
- }
- }
-
- static class AddSub implements Predicate<DistanceNode> {
- private final String desiredSub;
- private final String supportedSub;
- private final CopyIfEmpty r;
-
- AddSub(String desiredSub, String supportedSub, StringDistanceTable distanceTableToCopy) {
- this.r = new CopyIfEmpty(distanceTableToCopy);
- this.desiredSub = desiredSub;
- this.supportedSub = supportedSub;
- }
- @Override
- public boolean test(DistanceNode node) {
- if (node == null) {
- throw new IllegalArgumentException("bad structure");
- } else {
- ((StringDistanceNode)node).addSubtables(desiredSub, supportedSub, r);
- }
- return true;
- }
- }
-
- public int distance(ULocale desired, ULocale supported, int threshold, DistanceOption distanceOption) {
- LSR supportedLSR = LSR.fromMaximalized(supported);
- LSR desiredLSR = LSR.fromMaximalized(desired);
- return distanceRaw(desiredLSR, supportedLSR, threshold, distanceOption);
- }
-
- /**
- * Returns distance, from 0 to ABOVE_THRESHOLD.
- * ULocales must be in canonical, addLikelySubtags format. Returns distance
- */
- public int distanceRaw(LSR desired, LSR supported, int threshold, DistanceOption distanceOption) {
- if (TRACE_DISTANCE) {
- System.err.printf(" Entering distanceRaw: desired=%s supported=%s "
- + "threshold=%d preferred=%s\n",
- desired, supported, threshold,
- distanceOption.name());
- }
- int result = distanceRaw(desired.language, supported.language,
- desired.script, supported.script,
- desired.region, supported.region,
- threshold, distanceOption);
- if (TRACE_DISTANCE) {
- System.err.printf(" Returning from distanceRaw: %d\n", result);
- }
- return result;
- }
-
- public enum DistanceOption {REGION_FIRST, SCRIPT_FIRST}
- // NOTE: Replaced "NORMAL" with "REGION_FIRST". By default, scripts have greater weight
- // than regions, so they might be considered the "normal" case.
-
- /**
- * Returns distance, from 0 to ABOVE_THRESHOLD.
- * ULocales must be in canonical, addLikelySubtags format.
- * (Exception: internal calls may pass any strings. They do this for pseudo-locales.)
- * Returns distance.
- */
- public int distanceRaw(
- String desiredLang, String supportedLang,
- String desiredScript, String supportedScript,
- String desiredRegion, String supportedRegion,
- int threshold,
- DistanceOption distanceOption) {
-
- Output<DistanceTable> subtable = new Output<>();
-
- int distance = languageDesired2Supported.getDistance(desiredLang, supportedLang, subtable, true);
- boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST;
- if (scriptFirst) {
- distance >>= 2;
- }
- if (distance < 0) {
- distance = 0;
- } else if (distance >= threshold) {
- return ABOVE_THRESHOLD;
- }
-
- int scriptDistance = subtable.value.getDistance(desiredScript, supportedScript, subtable, true);
- if (scriptFirst) {
- scriptDistance >>= 1;
- }
- distance += scriptDistance;
- if (distance >= threshold) {
- return ABOVE_THRESHOLD;
- }
-
- if (desiredRegion.equals(supportedRegion)) {
- return distance;
- }
-
- // From here on we know the regions are not equal
-
- final String desiredPartition = regionMapper.toId(desiredRegion);
- final String supportedPartition = regionMapper.toId(supportedRegion);
- int subdistance;
-
- // check for macros. If one is found, we take the maximum distance
- // this could be optimized by adding some more structure, but probably not worth it.
-
- Collection<String> desiredPartitions = desiredPartition.isEmpty() ? regionMapper.macroToPartitions.get(desiredRegion) : null;
- Collection<String> supportedPartitions = supportedPartition.isEmpty() ? regionMapper.macroToPartitions.get(supportedRegion) : null;
- if (desiredPartitions != null || supportedPartitions != null) {
- subdistance = 0;
- // make the code simple for now
- if (desiredPartitions == null) {
- desiredPartitions = Collections.singleton(desiredPartition);
- }
- if (supportedPartitions == null) {
- supportedPartitions = Collections.singleton(supportedPartition);
- }
-
- for (String desiredPartition2 : desiredPartitions) {
- for (String supportedPartition2 : supportedPartitions) {
- int tempSubdistance = subtable.value.getDistance(desiredPartition2, supportedPartition2, null, false);
- if (subdistance < tempSubdistance) {
- subdistance = tempSubdistance;
- }
- }
- }
- } else {
- subdistance = subtable.value.getDistance(desiredPartition, supportedPartition, null, false);
- }
- distance += subdistance;
- return distance >= threshold ? ABOVE_THRESHOLD : distance;
- }
-
-
- private static final XLocaleDistance DEFAULT;
-
- public static XLocaleDistance getDefault() {
- return DEFAULT;
- }
-
- static {
- String[][] variableOverrides = {
- {"$enUS", "AS+GU+MH+MP+PR+UM+US+VI"},
-
- {"$cnsar", "HK+MO"},
-
- {"$americas", "019"},
-
- {"$maghreb", "MA+DZ+TN+LY+MR+EH"},
- };
- String[] paradigmRegions = {
- "en", "en-GB", "es", "es-419", "pt-BR", "pt-PT"
- };
- String[][] regionRuleOverrides = {
- {"ar_*_$maghreb", "ar_*_$maghreb", "96"},
- {"ar_*_$!maghreb", "ar_*_$!maghreb", "96"},
- {"ar_*_*", "ar_*_*", "95"},
-
- {"en_*_$enUS", "en_*_$enUS", "96"},
- {"en_*_$!enUS", "en_*_$!enUS", "96"},
- {"en_*_*", "en_*_*", "95"},
-
- {"es_*_$americas", "es_*_$americas", "96"},
- {"es_*_$!americas", "es_*_$!americas", "96"},
- {"es_*_*", "es_*_*", "95"},
-
- {"pt_*_$americas", "pt_*_$americas", "96"},
- {"pt_*_$!americas", "pt_*_$!americas", "96"},
- {"pt_*_*", "pt_*_*", "95"},
-
- {"zh_Hant_$cnsar", "zh_Hant_$cnsar", "96"},
- {"zh_Hant_$!cnsar", "zh_Hant_$!cnsar", "96"},
- {"zh_Hant_*", "zh_Hant_*", "95"},
-
- {"*_*_*", "*_*_*", "96"},
- };
-
- Builder rmb = new RegionMapper.Builder().addParadigms(paradigmRegions);
- for (String[] variableRule : variableOverrides) {
- rmb.add(variableRule[0], variableRule[1]);
- }
- if (PRINT_OVERRIDES) {
- System.out.println("\t\t<languageMatches type=\"written\" alt=\"enhanced\">");
- System.out.println("\t\t\t<paradigmLocales locales=\"" + XCldrStub.join(paradigmRegions, " ")
- + "\"/>");
- for (String[] variableRule : variableOverrides) {
- System.out.println("\t\t\t<matchVariable id=\"" + variableRule[0]
- + "\" value=\""
- + variableRule[1]
- + "\"/>");
- }
- }
-
- final StringDistanceTable defaultDistanceTable = new StringDistanceTable();
- final RegionMapper defaultRegionMapper = rmb.build();
-
- Splitter bar = Splitter.on('_');
-
- @SuppressWarnings({"unchecked", "rawtypes"})
- List<Row.R4<List<String>, List<String>, Integer, Boolean>>[] sorted = new ArrayList[3];
- sorted[0] = new ArrayList<>();
- sorted[1] = new ArrayList<>();
- sorted[2] = new ArrayList<>();
-
- // sort the rules so that the language-only are first, then the language-script, and finally the language-script-region.
- for (R4<String, String, Integer, Boolean> info : xGetLanguageMatcherData()) {
- String desiredRaw = info.get0();
- String supportedRaw = info.get1();
- List<String> desired = bar.splitToList(desiredRaw);
- List<String> supported = bar.splitToList(supportedRaw);
- Boolean oneway = info.get3();
- int distance = desiredRaw.equals("*_*") ? 50 : info.get2();
- int size = desired.size();
-
- // for now, skip size == 3
- if (size == 3) continue;
-
- sorted[size-1].add(Row.of(desired, supported, distance, oneway));
- }
-
- for (List<Row.R4<List<String>, List<String>, Integer, Boolean>> item1 : sorted) {
- for (Row.R4<List<String>, List<String>, Integer, Boolean> item2 : item1) {
- List<String> desired = item2.get0();
- List<String> supported = item2.get1();
- Integer distance = item2.get2();
- Boolean oneway = item2.get3();
- add(defaultDistanceTable, desired, supported, distance);
- if (oneway != Boolean.TRUE && !desired.equals(supported)) {
- add(defaultDistanceTable, supported, desired, distance);
- }
- printMatchXml(desired, supported, distance, oneway);
- }
- }
-
- // add new size=3
- for (String[] rule : regionRuleOverrides) {
- // if (PRINT_OVERRIDES) System.out.println("\t\t\t<languageMatch desired=\""
- // + rule[0]
- // + "\" supported=\""
- // + rule[1]
- // + "\" distance=\""
- // + rule[2]
- // + "\"/>");
- // if (rule[0].equals("en_*_*") || rule[1].equals("*_*_*")) {
- // int debug = 0;
- // }
- List<String> desiredBase = new ArrayList<>(bar.splitToList(rule[0]));
- List<String> supportedBase = new ArrayList<>(bar.splitToList(rule[1]));
- Integer distance = 100-Integer.parseInt(rule[2]);
- printMatchXml(desiredBase, supportedBase, distance, false);
-
- Collection<String> desiredRegions = defaultRegionMapper.getIdsFromVariable(desiredBase.get(2));
- if (desiredRegions.isEmpty()) {
- throw new IllegalArgumentException("Bad region variable: " + desiredBase.get(2));
- }
- Collection<String> supportedRegions = defaultRegionMapper.getIdsFromVariable(supportedBase.get(2));
- if (supportedRegions.isEmpty()) {
- throw new IllegalArgumentException("Bad region variable: " + supportedBase.get(2));
- }
- for (String desiredRegion2 : desiredRegions) {
- desiredBase.set(2, desiredRegion2.toString()); // fix later
- for (String supportedRegion2 : supportedRegions) {
- supportedBase.set(2, supportedRegion2.toString()); // fix later
- add(defaultDistanceTable, desiredBase, supportedBase, distance);
- add(defaultDistanceTable, supportedBase, desiredBase, distance);
- }
- }
- }
-
- // Pseudo regions should match no other regions.
- // {"*-*-XA", "*-*-*", "0"},
- // {"*-*-XB", "*-*-*", "0"},
- // {"*-*-XC", "*-*-*", "0"},
- // {"x1-*-*", "*-*-*", "0"},
- // {"x2-*-*", "*-*-*", "0"},
- // ...
- // {"x8-*-*", "*-*-*", "0"},
- List<String> supported = Arrays.asList("*", "*", "*");
- for (String x : Arrays.asList("XA", "XB", "XC")) {
- List<String> desired = Arrays.asList("*", "*", x);
- add(defaultDistanceTable, desired, supported, 100);
- add(defaultDistanceTable, supported, desired, 100);
- }
- // See XLikelySubtags.java for the mapping of pseudo-locales to x1 ... x8.
- for (int i = 1; i <= 8; ++i) {
- List<String> desired = Arrays.asList("x" + String.valueOf(i), "*", "*");
- add(defaultDistanceTable, desired, supported, 100);
- add(defaultDistanceTable, supported, desired, 100);
- }
-
- if (PRINT_OVERRIDES) {
- System.out.println("\t\t</languageMatches>");
- }
-
- DEFAULT = new XLocaleDistance(defaultDistanceTable.compact(), defaultRegionMapper);
-
- if (PRINT_OVERRIDES) {
- System.out.println(defaultRegionMapper);
- System.out.println(defaultDistanceTable);
- throw new IllegalArgumentException();
- }
- }
-
- private static void printMatchXml(List<String> desired, List<String> supported, Integer distance, Boolean oneway) {
- if (PRINT_OVERRIDES) {
- String desiredStr = CollectionUtilities.join(desired, "_");
- String supportedStr = CollectionUtilities.join(supported, "_");
- String desiredName = fixedName(desired);
- String supportedName = fixedName(supported);
- System.out.println("\t\t\t<languageMatch"
- + " desired=\"" + desiredStr
- + "\"\tsupported=\"" + supportedStr
- + "\"\tdistance=\"" + distance
- + (!oneway ? "" : "\"\toneway=\"true")
- + "\"/>\t<!-- " + desiredName + " ⇒ " + supportedName + " -->");
- }
- }
-
- private static String fixedName(List<String> match) {
- List<String> alt = new ArrayList<>(match);
- int size = alt.size();
- assert size >= 1 && size <= 3;
-
- StringBuilder result = new StringBuilder();
-
- if (size >= 3) {
- String region = alt.get(2);
- if (region.equals("*") || region.startsWith("$")) {
- result.append(region);
- } else {
- result.append(english.regionDisplayName(region));
- }
- }
- if (size >= 2) {
- String script = alt.get(1);
- if (script.equals("*")) {
- result.insert(0, script);
- } else {
- result.insert(0, english.scriptDisplayName(script));
- }
- }
- if (size >= 1) {
- String language = alt.get(0);
- if (language.equals("*")) {
- result.insert(0, language);
- } else {
- result.insert(0, english.languageDisplayName(language));
- }
- }
- return CollectionUtilities.join(alt, "; ");
- }
-
- static public void add(StringDistanceTable languageDesired2Supported, List<String> desired, List<String> supported, int percentage) {
- int size = desired.size();
- if (size != supported.size() || size < 1 || size > 3) {
- throw new IllegalArgumentException();
- }
- final String desiredLang = fixAny(desired.get(0));
- final String supportedLang = fixAny(supported.get(0));
- if (size == 1) {
- languageDesired2Supported.addSubtable(desiredLang, supportedLang, percentage);
- } else {
- final String desiredScript = fixAny(desired.get(1));
- final String supportedScript = fixAny(supported.get(1));
- if (size == 2) {
- languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, percentage);
- } else {
- final String desiredRegion = fixAny(desired.get(2));
- final String supportedRegion = fixAny(supported.get(2));
- languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
- }
- }
- }
-
- @Override
- public String toString() {
- return toString(false);
- }
-
- public String toString(boolean abbreviate) {
- return regionMapper + "\n" + languageDesired2Supported.toString(abbreviate);
- }
-
-
- // public static XLocaleDistance createDefaultInt() {
- // IntDistanceTable d = new IntDistanceTable(DEFAULT_DISTANCE_TABLE);
- // return new XLocaleDistance(d, DEFAULT_REGION_MAPPER);
- // }
-
- static Set<String> getContainingMacrosFor(Collection<String> input, Set<String> output) {
- output.clear();
- for (Entry<String, Set<String>> entry : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
- if (input.containsAll(entry.getValue())) { // example; if all southern Europe are contained, then add S. Europe
- output.add(entry.getKey());
- }
- }
- return output;
- }
-
- static class RegionMapper implements IdMapper<String,String> {
- /**
- * Used for processing rules. At the start we have a variable setting like $A1=US+CA+MX. We generate a mapping from $A1 to a set of partitions {P1, P2}
- * When we hit a rule that contains a variable, we replace that rule by multiple rules for the partitions.
- */
- final Multimap<String,String> variableToPartition;
- /**
- * Used for executing the rules. We map a region to a partition before processing.
- */
- final Map<String,String> regionToPartition;
- /**
- * Used to support es_419 compared to es_AR, etc.
- */
- final Multimap<String,String> macroToPartitions;
- /**
- * Used to get the paradigm region for a cluster, if there is one
- */
- final Set<ULocale> paradigms;
-
- private RegionMapper(
- Multimap<String, String> variableToPartitionIn,
- Map<String, String> regionToPartitionIn,
- Multimap<String,String> macroToPartitionsIn,
- Set<ULocale> paradigmsIn) {
- variableToPartition = ImmutableMultimap.copyOf(variableToPartitionIn);
- regionToPartition = ImmutableMap.copyOf(regionToPartitionIn);
- macroToPartitions = ImmutableMultimap.copyOf(macroToPartitionsIn);
- paradigms = ImmutableSet.copyOf(paradigmsIn);
- }
-
- @Override
- public String toId(String region) {
- String result = regionToPartition.get(region);
- return result == null ? "" : result;
- }
-
- public Collection<String> getIdsFromVariable(String variable) {
- if (variable.equals("*")) {
- return Collections.singleton("*");
- }
- Collection<String> result = variableToPartition.get(variable);
- if (result == null || result.isEmpty()) {
- throw new IllegalArgumentException("Variable not defined: " + variable);
- }
- return result;
- }
-
- public Set<String> regions() {
- return regionToPartition.keySet();
- }
-
- public Set<String> variables() {
- return variableToPartition.keySet();
- }
-
- @Override
- public String toString() {
- TreeMultimap<String, String> partitionToVariables = Multimaps.invertFrom(variableToPartition,
- TreeMultimap.<String, String>create());
- TreeMultimap<String, String> partitionToRegions = TreeMultimap.create();
- for (Entry<String, String> e : regionToPartition.entrySet()) {
- partitionToRegions.put(e.getValue(), e.getKey());
- }
- StringBuilder buffer = new StringBuilder();
- buffer.append("Partition ➠ Variables ➠ Regions (final)");
- for (Entry<String, Set<String>> e : partitionToVariables.asMap().entrySet()) {
- buffer.append('\n');
- buffer.append(e.getKey() + "\t" + e.getValue() + "\t" + partitionToRegions.get(e.getKey()));
- }
- buffer.append("\nMacro ➠ Partitions");
- for (Entry<String, Set<String>> e : macroToPartitions.asMap().entrySet()) {
- buffer.append('\n');
- buffer.append(e.getKey() + "\t" + e.getValue());
- }
-
- return buffer.toString();
- }
-
- static class Builder {
- final private Multimap<String, String> regionToRawPartition = TreeMultimap.create();
- final private RegionSet regionSet = new RegionSet();
- final private Set<ULocale> paradigms = new LinkedHashSet<>();
-
- void add(String variable, String barString) {
- Set<String> tempRegions = regionSet.parseSet(barString);
-
- for (String region : tempRegions) {
- regionToRawPartition.put(region, variable);
- }
-
- // now add the inverse variable
-
- Set<String> inverse = regionSet.inverse();
- String inverseVariable = "$!" + variable.substring(1);
- for (String region : inverse) {
- regionToRawPartition.put(region, inverseVariable);
- }
- }
-
- public Builder addParadigms(String... paradigmRegions) {
- for (String paradigm : paradigmRegions) {
- paradigms.add(new ULocale(paradigm));
- }
- return this;
- }
-
- RegionMapper build() {
- final IdMakerFull<Collection<String>> id = new IdMakerFull<>("partition");
- Multimap<String,String> variableToPartitions = TreeMultimap.create();
- Map<String,String> regionToPartition = new TreeMap<>();
- Multimap<String,String> partitionToRegions = TreeMultimap.create();
-
- for (Entry<String, Set<String>> e : regionToRawPartition.asMap().entrySet()) {
- final String region = e.getKey();
- final Collection<String> rawPartition = e.getValue();
- String partition = String.valueOf((char)('α' + id.add(rawPartition)));
-
- regionToPartition.put(region, partition);
- partitionToRegions.put(partition, region);
-
- for (String variable : rawPartition) {
- variableToPartitions.put(variable, partition);
- }
- }
-
- // we get a mapping of each macro to the partitions it intersects with
- Multimap<String,String> macroToPartitions = TreeMultimap.create();
- for (Entry<String, Set<String>> e : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
- String macro = e.getKey();
- for (Entry<String, Set<String>> e2 : partitionToRegions.asMap().entrySet()) {
- String partition = e2.getKey();
- if (!Collections.disjoint(e.getValue(), e2.getValue())) {
- macroToPartitions.put(macro, partition);
- }
- }
- }
-
- return new RegionMapper(
- variableToPartitions,
- regionToPartition,
- macroToPartitions,
- paradigms);
- }
- }
- }
-
- /**
- * Parses a string of regions like "US+005-BR" and produces a set of resolved regions.
- * All macroregions are fully resolved to sets of non-macro regions.
- * <br>Syntax is simple for now:
- * <pre>regionSet := region ([-+] region)*</pre>
- * No precedence, so "x+y-y+z" is (((x+y)-y)+z) NOT (x+y)-(y+z)
- */
- private static class RegionSet {
- private enum Operation {add, remove}
- // temporaries used in processing
- final private Set<String> tempRegions = new TreeSet<>();
- private Operation operation = null;
-
- private Set<String> parseSet(String barString) {
- operation = Operation.add;
- int last = 0;
- tempRegions.clear();
- int i = 0;
- for (; i < barString.length(); ++i) {
- char c = barString.charAt(i); // UTF16 is ok, since syntax is only ascii
- switch(c) {
- case '+':
- add(barString, last, i);
- last = i+1;
- operation = Operation.add;
- break;
- case '-':
- add(barString, last, i);
- last = i+1;
- operation = Operation.remove;
- break;
- }
- }
- add(barString, last, i);
- return tempRegions;
- }
-
- private Set<String> inverse() {
- TreeSet<String> result = new TreeSet<>(ALL_FINAL_REGIONS);
- result.removeAll(tempRegions);
- return result;
- }
-
- private void add(String barString, int last, int i) {
- if (i > last) {
- String region = barString.substring(last,i);
- changeSet(operation, region);
- }
- }
-
- private void changeSet(Operation operation, String region) {
- Collection<String> contained = CONTAINER_TO_CONTAINED_FINAL.get(region);
- if (contained != null && !contained.isEmpty()) {
- if (Operation.add == operation) {
- tempRegions.addAll(contained);
- } else {
- tempRegions.removeAll(contained);
- }
- } else if (Operation.add == operation) {
- tempRegions.add(region);
- } else {
- tempRegions.remove(region);
- }
- }
- }
-
- public static <K,V> Multimap<K,V> invertMap(Map<V,K> map) {
- return Multimaps.invertFrom(Multimaps.forMap(map), LinkedHashMultimap.<K,V>create());
- }
-
- public Set<ULocale> getParadigms() {
- return regionMapper.paradigms;
- }
-
- public int getDefaultLanguageDistance() {
- return defaultLanguageDistance;
- }
-
- public int getDefaultScriptDistance() {
- return defaultScriptDistance;
- }
-
- public int getDefaultRegionDistance() {
- return defaultRegionDistance;
- }
-
- static class CompactAndImmutablizer extends IdMakerFull<Object> {
- StringDistanceTable compact(StringDistanceTable item) {
- if (toId(item) != null) {
- return (StringDistanceTable) intern(item);
- }
- return new StringDistanceTable(compact(item.subtables, 0));
- }
- @SuppressWarnings({ "unchecked", "rawtypes" })
- <K,T> Map<K,T> compact(Map<K,T> item, int level) {
- if (toId(item) != null) {
- return (Map<K, T>) intern(item);
- }
- Map<K,T> copy = new LinkedHashMap<>();
- for (Entry<K,T> entry : item.entrySet()) {
- T value = entry.getValue();
- if (value instanceof Map) {
- copy.put(entry.getKey(), (T)compact((Map)value, level+1));
- } else {
- copy.put(entry.getKey(), (T)compact((DistanceNode)value));
- }
- }
- return ImmutableMap.copyOf(copy);
- }
- DistanceNode compact(DistanceNode item) {
- if (toId(item) != null) {
- return (DistanceNode) intern(item);
- }
- final DistanceTable distanceTable = item.getDistanceTable();
- if (distanceTable == null || distanceTable.isEmpty()) {
- return new DistanceNode(item.distance);
- } else {
- return new StringDistanceNode(item.distance, compact((StringDistanceTable)((StringDistanceNode)item).distanceTable));
- }
- }
- }
-
- @Deprecated
- public StringDistanceTable internalGetDistanceTable() {
- return (StringDistanceTable) languageDesired2Supported;
- }
-
- public static void main(String[] args) {
- // for (Entry<String, Collection<String>> entry : containerToContained.asMap().entrySet()) {
- // System.out.println(entry.getKey() + "\t⥢" + entry.getValue() + "; " + containerToFinalContained.get(entry.getKey()));
- // }
- // final Multimap<String,String> regionToMacros = ImmutableMultimap.copyOf(Multimaps.invertFrom(containerToContained, TreeMultimap.create()));
- // for (Entry<String, Collection<String>> entry : regionToMacros.asMap().entrySet()) {
- // System.out.println(entry.getKey() + "\t⥤ " + entry.getValue());
- // }
- if (PRINT_OVERRIDES) {
- System.out.println(getDefault().toString(true));
- }
- DistanceTable table = getDefault().languageDesired2Supported;
- DistanceTable compactedTable = table.compact();
- if (!table.equals(compactedTable)) {
- throw new IllegalArgumentException("Compaction isn't equal");
- }
- }
-}
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Locale;
import java.util.Map;
-import java.util.Map.Entry;
+import java.util.Objects;
import java.util.Set;
-import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
-import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
-import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
-import com.ibm.icu.impl.locale.XCldrStub.Multimap;
-import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
-import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
+import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
import com.ibm.icu.util.LocalePriorityList;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
* Immutable class that picks best match between user's desired locales and application's supported locales.
* @author markdavis
*/
-public class XLocaleMatcher {
- private static final LSR UND = new LSR("und","","");
+public final class XLocaleMatcher {
+ private static final LSR UND_LSR = new LSR("und","","");
private static final ULocale UND_LOCALE = new ULocale("und");
+ private static final Iterator<ULocale> NULL_ITERATOR = null;
// Activates debugging output to stderr with details of GetBestMatch.
private static final boolean TRACE_MATCHER = false;
+ // List of indexes, optimized for one or two.
+ private static final class Indexes {
+ // Some indexes without further object creation and auto-boxing.
+ int first, second = -1;
+ // We could turn the List into an int array + length and manage its growth.
+ List<Integer> remaining;
+
+ Indexes(int firstIndex) {
+ first = firstIndex;
+ }
+ void add(int i) {
+ if (second < 0) {
+ second = i;
+ } else {
+ if (remaining == null) {
+ remaining = new ArrayList<>();
+ }
+ remaining.add(i);
+ }
+ }
+ int getFirst() { return first; }
+ int get(int i) { // returns -1 when i >= length
+ if (i == 0) {
+ return first;
+ } else if (i == 1) {
+ return second;
+ } else if (remaining != null && (i -= 2) < remaining.size()) {
+ return remaining.get(i);
+ } else {
+ return -1;
+ }
+ }
+ }
+
+ // TODO: Make public, and add public methods that return it.
+ private static final class Result {
+ private Result(ULocale desired, ULocale supported,
+ /* Locale jdesired, */ Locale jsupported,
+ int desIndex, int suppIndex) {
+ desiredLocale = desired;
+ supportedLocale = supported;
+ // desiredJavaLocale = jdesired;
+ supportedJavaLocale = jsupported;
+ desiredIndex = desIndex;
+ supportedIndex = suppIndex;
+ }
+
+ ULocale desiredLocale;
+ ULocale supportedLocale;
+ // Locale desiredJavaLocale;
+ Locale supportedJavaLocale;
+ int desiredIndex;
+ @SuppressWarnings("unused") // unused until public, for other wrappers
+ int supportedIndex;
+ }
+
// normally the default values, but can be set via constructor
- private final XLocaleDistance localeDistance;
private final int thresholdDistance;
private final int demotionPerAdditionalDesiredLocale;
private final DistanceOption distanceOption;
// built based on application's supported languages in constructor
- private final Map<LSR, Set<ULocale>> supportedLanguages; // the locales in the collection are ordered!
- private final Set<ULocale> exactSupportedLocales; // the locales in the collection are ordered!
- private final ULocale defaultLanguage;
+ private final ULocale[] supportedLocales;
+ private final Locale[] supportedJavaLocales;
+ private final Map<ULocale, Integer> supportedToIndex;
+ private final Map<LSR, Indexes> supportedLsrToIndexes;
+ // Array versions of the supportedLsrToIndexes keys and values.
+ // The distance lookup loops over the supportedLsrs and returns the index of the best match.
+ private final LSR[] supportedLsrs;
+ private final Indexes[] supportedIndexes;
+ private final ULocale defaultLocale;
+ private final Locale defaultJavaLocale;
+ private final int defaultLocaleIndex;
public static class Builder {
- private Set<ULocale> supportedLanguagesList;
+ /**
+ * Supported locales. A Set, to avoid duplicates.
+ * Maintains iteration order for consistent matching behavior (first best match wins).
+ */
+ private Set<ULocale> supportedLocales;
private int thresholdDistance = -1;
private int demotionPerAdditionalDesiredLocale = -1;;
- private ULocale defaultLanguage;
- private XLocaleDistance localeDistance;
+ private ULocale defaultLocale;
private DistanceOption distanceOption;
/**
- * @param languagePriorityList the languagePriorityList to set
+ * @param locales the languagePriorityList to set
* @return this Builder object
*/
- public Builder setSupportedLocales(String languagePriorityList) {
- this.supportedLanguagesList = asSet(LocalePriorityList.add(languagePriorityList).build());
+ public Builder setSupportedLocales(String locales) {
+ return setSupportedLocales(LocalePriorityList.add(locales).build());
+ }
+ public Builder setSupportedLocales(Iterable<ULocale> locales) {
+ supportedLocales = new LinkedHashSet<>(); // maintain order
+ for (ULocale locale : locales) {
+ supportedLocales.add(locale);
+ }
return this;
}
- public Builder setSupportedLocales(LocalePriorityList languagePriorityList) {
- this.supportedLanguagesList = asSet(languagePriorityList);
+ public Builder setSupportedLocales(Collection<ULocale> locales) {
+ supportedLocales = new LinkedHashSet<>(locales); // maintain order
return this;
}
- public Builder setSupportedLocales(Set<ULocale> languagePriorityList) {
- Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order
- temp.addAll(languagePriorityList);
- this.supportedLanguagesList = temp;
+ public Builder setSupportedJavaLocales(Collection<Locale> locales) {
+ supportedLocales = new LinkedHashSet<>(locales.size()); // maintain order
+ for (Locale locale : locales) {
+ supportedLocales.add(ULocale.forLocale(locale));
+ }
+ return this;
+ }
+ public Builder addSupportedLocale(ULocale locale) {
+ if (supportedLocales == null) {
+ supportedLocales = new LinkedHashSet<>();
+ }
+ supportedLocales.add(locale);
return this;
}
+ public Builder addSupportedLocale(Locale locale) {
+ return addSupportedLocale(ULocale.forLocale(locale));
+ }
/**
* @param thresholdDistance the thresholdDistance to set, with -1 = default
* @return this Builder object
*/
public Builder setThresholdDistance(int thresholdDistance) {
+ if (thresholdDistance > 100) {
+ thresholdDistance = 100;
+ }
this.thresholdDistance = thresholdDistance;
return this;
}
return this;
}
- /**
- * @param localeDistance the localeDistance to set, with default = XLocaleDistance.getDefault().
- * @return this Builder object
- */
- public Builder setLocaleDistance(XLocaleDistance localeDistance) {
- this.localeDistance = localeDistance;
- return this;
- }
-
/**
* Set the default language, with null = default = first supported language
- * @param defaultLanguage the default language
+ * @param defaultLocale the default language
* @return this Builder object
*/
- public Builder setDefaultLanguage(ULocale defaultLanguage) {
- this.defaultLanguage = defaultLanguage;
+ public Builder setDefaultLanguage(ULocale defaultLocale) {
+ this.defaultLocale = defaultLocale;
return this;
}
@Override
public String toString() {
- StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
- if (!supportedLanguagesList.isEmpty()) {
- s.append(" supported={").append(supportedLanguagesList.toString()).append("}");
- }
- if (defaultLanguage != null) {
- s.append(" default=").append(defaultLanguage.toString());
- }
- if (thresholdDistance >= 0) {
- s.append(String.format(" thresholdDistance=%d", thresholdDistance));
- }
- s.append(" preference=").append(distanceOption.name());
- return s.append("}").toString();
+ StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
+ if (!supportedLocales.isEmpty()) {
+ s.append(" supported={").append(supportedLocales.toString()).append('}');
+ }
+ if (defaultLocale != null) {
+ s.append(" default=").append(defaultLocale.toString());
+ }
+ if (distanceOption != null) {
+ s.append(" distance=").append(distanceOption.toString());
+ }
+ if (thresholdDistance >= 0) {
+ s.append(String.format(" threshold=%d", thresholdDistance));
+ }
+ if (demotionPerAdditionalDesiredLocale >= 0) {
+ s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
+ }
+ return s.append('}').toString();
}
}
}
/**
- * Create a locale matcher with the given parameters.
- * @param supportedLocales
- * @param thresholdDistance
- * @param demotionPerAdditionalDesiredLocale
- * @param localeDistance
- * @param likelySubtags
+ * Creates a locale matcher with the given Builder parameters.
*/
private XLocaleMatcher(Builder builder) {
- localeDistance = builder.localeDistance == null ? XLocaleDistance.getDefault()
- : builder.localeDistance;
- thresholdDistance = builder.thresholdDistance < 0 ? localeDistance.getDefaultScriptDistance()
- : builder.thresholdDistance;
- // only do AFTER above are set
- Set<LSR> paradigms = extractLsrSet(localeDistance.getParadigms());
- final Multimap<LSR, ULocale> temp2 = extractLsrMap(builder.supportedLanguagesList, paradigms);
- supportedLanguages = temp2.asMap();
- exactSupportedLocales = ImmutableSet.copyOf(temp2.values());
- defaultLanguage = builder.defaultLanguage != null ? builder.defaultLanguage
- : supportedLanguages.isEmpty() ? null
- : supportedLanguages.entrySet().iterator().next().getValue().iterator().next(); // first language
- demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ? localeDistance.getDefaultRegionDistance()+1
- : builder.demotionPerAdditionalDesiredLocale;
+ thresholdDistance = builder.thresholdDistance < 0 ?
+ LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
+ // Store the supported locales in input order,
+ // so that when different types are used (e.g., java.util.Locale)
+ // we can return those by parallel index.
+ int supportedLocalesLength = builder.supportedLocales.size();
+ supportedLocales = new ULocale[supportedLocalesLength];
+ supportedJavaLocales = new Locale[supportedLocalesLength];
+ supportedToIndex = new HashMap<>(supportedLocalesLength);
+ // We need an unordered map from LSR to first supported locale with that LSR,
+ // and an ordered list of (LSR, Indexes).
+ // We use a LinkedHashMap for both,
+ // and insert the supported locales in the following order:
+ // 1. First supported locale.
+ // 2. Priority locales in builder order.
+ // 3. Remaining locales in builder order.
+ supportedLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
+ Map<LSR, Indexes> otherLsrToIndexes = null;
+ LSR firstLSR = null;
+ int i = 0;
+ for (ULocale locale : builder.supportedLocales) {
+ supportedLocales[i] = locale;
+ supportedJavaLocales[i] = locale.toLocale();
+ // supportedToIndex.putIfAbsent(locale, i)
+ Integer oldIndex = supportedToIndex.get(locale);
+ if (oldIndex == null) {
+ supportedToIndex.put(locale, i);
+ }
+ LSR lsr = getMaximalLsrOrUnd(locale);
+ if (i == 0) {
+ firstLSR = lsr;
+ supportedLsrToIndexes.put(lsr, new Indexes(0));
+ } else if (lsr.equals(firstLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
+ addIndex(supportedLsrToIndexes, lsr, i);
+ } else {
+ if (otherLsrToIndexes == null) {
+ otherLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
+ }
+ addIndex(otherLsrToIndexes, lsr, i);
+ }
+ ++i;
+ }
+ if (otherLsrToIndexes != null) {
+ supportedLsrToIndexes.putAll(otherLsrToIndexes);
+ }
+ int numSuppLsrs = supportedLsrToIndexes.size();
+ supportedLsrs = supportedLsrToIndexes.keySet().toArray(new LSR[numSuppLsrs]);
+ supportedIndexes = supportedLsrToIndexes.values().toArray(new Indexes[numSuppLsrs]);
+ ULocale def;
+ Locale jdef = null;
+ int idef = -1;
+ if (builder.defaultLocale != null) {
+ def = builder.defaultLocale;
+ } else if (supportedLocalesLength > 0) {
+ def = supportedLocales[0]; // first language
+ jdef = supportedJavaLocales[0];
+ idef = 0;
+ } else {
+ def = null;
+ }
+ if (jdef == null && def != null) {
+ jdef = def.toLocale();
+ }
+ defaultLocale = def;
+ defaultJavaLocale = jdef;
+ defaultLocaleIndex = idef;
+ demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ?
+ LocaleDistance.INSTANCE.getDefaultRegionDistance() + 1 :
+ builder.demotionPerAdditionalDesiredLocale;
distanceOption = builder.distanceOption;
}
- // Result is not immutable!
- private Set<LSR> extractLsrSet(Set<ULocale> languagePriorityList) {
- Set<LSR> result = new LinkedHashSet<LSR>();
- for (ULocale item : languagePriorityList) {
- final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
- result.add(max);
+ private static final void addIndex(Map<LSR, Indexes> lsrToIndexes, LSR lsr, int i) {
+ Indexes indexes = lsrToIndexes.get(lsr);
+ if (indexes == null) {
+ lsrToIndexes.put(lsr, new Indexes(i));
+ } else {
+ indexes.add(i);
}
- return result;
}
- private Multimap<LSR,ULocale> extractLsrMap(Set<ULocale> languagePriorityList, Set<LSR> priorities) {
- Multimap<LSR, ULocale> builder = LinkedHashMultimap.create();
- for (ULocale item : languagePriorityList) {
- final LSR max = item.equals(UND_LOCALE) ? UND :
- LSR.fromMaximalized(item);
- builder.put(max, item);
- }
- if (builder.size() > 1 && priorities != null) {
- // for the supported list, we put any priorities before all others, except for the first.
- Multimap<LSR, ULocale> builder2 = LinkedHashMultimap.create();
-
- // copy the long way so the priorities are in the same order as in the original
- boolean first = true;
- for (Entry<LSR, Set<ULocale>> entry : builder.asMap().entrySet()) {
- final LSR key = entry.getKey();
- if (first || priorities.contains(key)) {
- builder2.putAll(key, entry.getValue());
- first = false;
- }
- }
- // now copy the rest
- builder2.putAll(builder);
- if (!builder2.equals(builder)) {
- throw new IllegalArgumentException();
- }
- builder = builder2;
+ private static final LSR getMaximalLsrOrUnd(ULocale locale) {
+ if (locale.equals(UND_LOCALE)) {
+ return UND_LSR;
+ } else {
+ return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
}
- return ImmutableMultimap.copyOf(builder);
}
-
/** Convenience method */
public ULocale getBestMatch(ULocale ulocale) {
- return getBestMatch(ulocale, null);
+ return getBestMatch(ulocale, NULL_ITERATOR).supportedLocale;
}
/** Convenience method */
public ULocale getBestMatch(String languageList) {
}
/** Convenience method */
public ULocale getBestMatch(ULocale... locales) {
- return getBestMatch(new LinkedHashSet<ULocale>(Arrays.asList(locales)), null);
- }
- /** Convenience method */
- public ULocale getBestMatch(Set<ULocale> desiredLanguages) {
- return getBestMatch(desiredLanguages, null);
- }
- /** Convenience method */
- public ULocale getBestMatch(LocalePriorityList desiredLanguages) {
- return getBestMatch(desiredLanguages, null);
+ return getBestMatch(Arrays.asList(locales), null);
}
/** Convenience method */
- public ULocale getBestMatch(LocalePriorityList desiredLanguages, Output<ULocale> outputBestDesired) {
- return getBestMatch(asSet(desiredLanguages), outputBestDesired);
- }
-
- // TODO add LocalePriorityList method asSet() for ordered Set view backed by LocalePriorityList
- private static Set<ULocale> asSet(LocalePriorityList languageList) {
- Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order
- for (ULocale locale : languageList) {
- temp.add(locale);
- };
- return temp;
+ public ULocale getBestMatch(Iterable<ULocale> desiredLocales) {
+ return getBestMatch(desiredLocales, null);
}
/**
* Get the best match between the desired languages and supported languages
- * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
- * @param outputBestDesired The one of the desired languages that matched best.
+ * @param desiredLocales Typically the supplied user's languages, in order of preference, with best first.
+ * @param outputBestDesired The one of the desired languages that matched best (can be null).
* Set to null if the best match was not below the threshold distance.
* @return the best match.
*/
- public ULocale getBestMatch(Set<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
- // fast path for singleton
- if (desiredLanguages.size() == 1) {
- return getBestMatch(desiredLanguages.iterator().next(), outputBestDesired);
- }
- // TODO produce optimized version for single desired ULocale
- Multimap<LSR, ULocale> desiredLSRs = extractLsrMap(desiredLanguages,null);
- int bestDistance = Integer.MAX_VALUE;
+ public ULocale getBestMatch(Iterable<ULocale> desiredLocales, Output<ULocale> outputBestDesired) {
+ Iterator<ULocale> desiredIter = desiredLocales.iterator();
+ if (!desiredIter.hasNext()) {
+ if (outputBestDesired != null) {
+ outputBestDesired.value = null;
+ }
+ if (TRACE_MATCHER) {
+ System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
+ }
+ return defaultLocale;
+ }
+ ULocale desiredLocale = desiredIter.next();
+ return getBestMatch(desiredLocale, desiredIter, outputBestDesired);
+ }
+
+ /**
+ * @param desiredLocale First desired locale.
+ * @param remainingIter Remaining desired locales, null or empty if none.
+ * @param outputBestDesired If not null,
+ * will be set to the desired locale that matches the best supported one.
+ * @return the best supported locale.
+ */
+ private ULocale getBestMatch(ULocale desiredLocale, Iterator<ULocale> remainingIter,
+ Output<ULocale> outputBestDesired) {
+ Result result = getBestMatch(desiredLocale, remainingIter);
+ if (outputBestDesired != null) {
+ outputBestDesired.value = result.desiredLocale;
+ }
+ return result.supportedLocale;
+ }
+
+ private Result getBestMatch(ULocale desiredLocale, Iterator<ULocale> remainingIter) {
+ int desiredIndex = 0;
+ int bestDesiredIndex = -1;
ULocale bestDesiredLocale = null;
- Collection<ULocale> bestSupportedLocales = null;
- int delta = 0;
- mainLoop:
- for (final Entry<LSR, Set<ULocale>> desiredLsrAndLocales : desiredLSRs.asMap().entrySet()) {
- LSR desiredLSR = desiredLsrAndLocales.getKey();
- for (ULocale desiredLocale : desiredLsrAndLocales.getValue()) {
- // quick check for exact match
- if (delta < bestDistance) {
- if (exactSupportedLocales.contains(desiredLocale)) {
- if (outputBestDesired != null) {
- outputBestDesired.value = desiredLocale;
- }
+ int bestSupportedLsrIndex = 0;
+ for (int bestDistance = thresholdDistance; bestDistance > 0;
+ bestDistance -= demotionPerAdditionalDesiredLocale) {
+ // Quick check for exact locale match.
+ Integer supportedIndex = supportedToIndex.get(desiredLocale);
+ if (supportedIndex != null) {
if (TRACE_MATCHER) {
- System.err.printf(
- "Returning %s, which is an exact match for a supported language\n",
- desiredLocale);
- }
- return desiredLocale;
- }
- // quick check for maximized locale
- Collection<ULocale> found = supportedLanguages.get(desiredLSR);
- if (found != null) {
- // if we find one in the set, return first (lowest). We already know the exact one isn't
- // there.
- if (outputBestDesired != null) {
- outputBestDesired.value = desiredLocale;
+ System.err.printf("Returning %s: desired=supported\n", desiredLocale);
}
- ULocale result = found.iterator().next();
+ int suppIndex = supportedIndex;
+ return new Result(desiredLocale, supportedLocales[suppIndex],
+ supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
+ }
+ // Quick check for exact maximized LSR.
+ LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+ Indexes indexes = supportedLsrToIndexes.get(desiredLSR);
+ if (indexes != null) {
+ // If this is a supported LSR, return the first locale.
+ // We already know the exact locale isn't there.
+ int suppIndex = indexes.getFirst();
+ ULocale result = supportedLocales[suppIndex];
if (TRACE_MATCHER) {
- System.err.printf("Returning %s\n", result.toString());
+ System.err.printf("Returning %s: desiredLSR=supportedLSR\n", result);
}
- return result;
- }
+ return new Result(desiredLocale, result,
+ supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
}
- for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
- int distance =
- delta
- + localeDistance.distanceRaw(
- desiredLSR,
- supportedLsrAndLocale.getKey(),
- thresholdDistance,
- distanceOption);
- if (distance < bestDistance) {
- bestDistance = distance;
+ int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
+ desiredLSR, supportedLsrs, bestDistance, distanceOption);
+ if (bestIndexAndDistance >= 0) {
+ bestDistance = bestIndexAndDistance & 0xff;
+ bestDesiredIndex = desiredIndex;
bestDesiredLocale = desiredLocale;
- bestSupportedLocales = supportedLsrAndLocale.getValue();
- if (distance == 0) {
- break mainLoop;
+ bestSupportedLsrIndex = bestIndexAndDistance >> 8;
+ if (bestDistance == 0) {
+ break;
}
- }
}
- delta += demotionPerAdditionalDesiredLocale;
- }
- }
- if (bestDistance >= thresholdDistance) {
- if (outputBestDesired != null) {
- outputBestDesired.value = null;
+ if (remainingIter == null || !remainingIter.hasNext()) {
+ break;
}
+ desiredLocale = remainingIter.next();
+ ++desiredIndex;
+ }
+ if (bestDesiredIndex < 0) {
if (TRACE_MATCHER) {
- System.err.printf("Returning default %s\n", defaultLanguage.toString());
+ System.err.printf("Returning default %s: no good match\n", defaultLocale);
}
- return defaultLanguage;
+ return new Result(null, defaultLocale, defaultJavaLocale, -1, defaultLocaleIndex);
}
- if (outputBestDesired != null) {
- outputBestDesired.value = bestDesiredLocale;
- }
- // pick exact match if there is one
- if (bestSupportedLocales.contains(bestDesiredLocale)) {
- if (TRACE_MATCHER) {
- System.err.printf(
- "Returning %s which matches a supported language\n", bestDesiredLocale.toString());
+ // Pick exact match if there is one.
+ // The length of the list is normally 1.
+ Indexes bestSupportedIndexes = supportedIndexes[bestSupportedLsrIndex];
+ int suppIndex;
+ for (int i = 0; (suppIndex = bestSupportedIndexes.get(i)) >= 0; ++i) {
+ ULocale locale = supportedLocales[suppIndex];
+ if (bestDesiredLocale.equals(locale)) {
+ if (TRACE_MATCHER) {
+ System.err.printf("Returning %s: desired=best matching supported language\n",
+ bestDesiredLocale);
+ }
+ return new Result(bestDesiredLocale, locale,
+ supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
}
- return bestDesiredLocale;
}
- // otherwise return first supported, combining variants and extensions from bestDesired
- ULocale result = bestSupportedLocales.iterator().next();
+ // Otherwise return the first of the supported languages that share the best-matching LSR.
+ suppIndex = bestSupportedIndexes.getFirst();
+ ULocale result = supportedLocales[suppIndex];
if (TRACE_MATCHER) {
- System.err.printf("Returning first supported language %s\n", result.toString());
+ System.err.printf("Returning %s: first best matching supported language\n", result);
}
- return result;
+ return new Result(bestDesiredLocale, result,
+ supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
}
/**
* @return the best match.
*/
public ULocale getBestMatch(ULocale desiredLocale, Output<ULocale> outputBestDesired) {
- int bestDistance = Integer.MAX_VALUE;
- ULocale bestDesiredLocale = null;
- Collection<ULocale> bestSupportedLocales = null;
+ return getBestMatch(desiredLocale, null, outputBestDesired);
+ }
- // quick check for exact match, with hack for und
- final LSR desiredLSR = desiredLocale.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(desiredLocale);
+ /**
+ * Converts Locales to ULocales on the fly.
+ */
+ private static final class LocalesWrapper implements Iterator<ULocale> {
+ private Iterator<Locale> locales;
+ // Cache locales to avoid conversion of the result.
+ private Locale first, second;
+ private List<Locale> remaining;
+
+ LocalesWrapper(Iterator<Locale> locales) {
+ this.locales = locales;
+ }
- if (exactSupportedLocales.contains(desiredLocale)) {
- if (outputBestDesired != null) {
- outputBestDesired.value = desiredLocale;
- }
- if (TRACE_MATCHER) {
- System.err.printf("Exact match with a supported locale.\n");
- }
- return desiredLocale;
- }
- // quick check for maximized locale
- if (distanceOption == DistanceOption.REGION_FIRST) {
- Collection<ULocale> found = supportedLanguages.get(desiredLSR);
- if (found != null) {
- // if we find one in the set, return first (lowest). We already know the exact one isn't there.
- if (outputBestDesired != null) {
- outputBestDesired.value = desiredLocale;
- }
- ULocale result = found.iterator().next();
- if (TRACE_MATCHER) {
- System.err.printf("Matches a maximized supported locale: %s\n", result);
+ @Override
+ public boolean hasNext() {
+ return locales.hasNext();
+ }
+
+ @Override
+ public ULocale next() {
+ Locale locale = locales.next();
+ if (first == null) {
+ first = locale;
+ } else if (second == null) {
+ second = locale;
+ } else {
+ if (remaining == null) {
+ remaining = new ArrayList<>();
}
- return result;
+ remaining.add(locale);
}
+ return ULocale.forLocale(locale);
}
- for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
- int distance = localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
- thresholdDistance, distanceOption);
- if (distance < bestDistance) {
- bestDistance = distance;
- bestDesiredLocale = desiredLocale;
- bestSupportedLocales = supportedLsrAndLocale.getValue();
- if (distance == 0) {
- break;
- }
+
+ Locale getJavaLocale(int i) {
+ if (i == 0) {
+ return first;
+ } else if (i == 1) {
+ return second;
+ } else {
+ // TODO: test code coverage
+ return remaining.get(i - 2);
}
}
- if (bestDistance >= thresholdDistance) {
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ public Locale getBestJavaMatch(Iterable<Locale> desiredLocales, Output<Locale> outputBestDesired) {
+ Iterator<Locale> desiredIter = desiredLocales.iterator();
+ if (!desiredIter.hasNext()) {
if (outputBestDesired != null) {
outputBestDesired.value = null;
}
if (TRACE_MATCHER) {
- System.err.printf(
- "Returning default %s because everything exceeded the threshold of %d.\n",
- defaultLanguage, thresholdDistance);
+ System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
}
- return defaultLanguage;
+ return defaultJavaLocale;
}
+ LocalesWrapper wrapper = new LocalesWrapper(desiredIter);
+ ULocale desiredLocale = wrapper.next();
+ Result result = getBestMatch(desiredLocale, NULL_ITERATOR);
if (outputBestDesired != null) {
- outputBestDesired.value = bestDesiredLocale;
+ outputBestDesired.value = result.desiredIndex >= 0 ?
+ wrapper.getJavaLocale(result.desiredIndex) : null;
}
- // pick exact match if there is one
- if (bestSupportedLocales.contains(bestDesiredLocale)) {
- return bestDesiredLocale;
- }
- // otherwise return first supported, combining variants and extensions from bestDesired
- ULocale result = bestSupportedLocales.iterator().next();
- if (TRACE_MATCHER) {
- System.err.printf("First in the list of supported locales: %s\n", result);
+ return result.supportedJavaLocale;
+ }
+
+ public Locale getBestJavaMatch(Locale desiredLocale, Output<Locale> outputBestDesired) {
+ ULocale desiredULocale = ULocale.forLocale(desiredLocale);
+ Result result = getBestMatch(desiredULocale, NULL_ITERATOR);
+ if (outputBestDesired != null) {
+ outputBestDesired.value = result.desiredIndex >= 0 ? desiredLocale : null;
}
- return result;
+ return result.supportedJavaLocale;
}
/** Combine features of the desired locale into those of the supported, and return result. */
* A language is first maximized with add likely subtags, then compared.
*/
public int distance(ULocale desired, ULocale supported) {
- return localeDistance.distanceRaw(
- LSR.fromMaximalized(desired),
- LSR.fromMaximalized(supported), thresholdDistance, distanceOption);
+ return LocaleDistance.INSTANCE.getBestIndexAndDistance(
+ XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
+ new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
+ thresholdDistance, distanceOption) & 0xff;
}
/** Convenience method */
public int distance(String desiredLanguage, String supportedLanguage) {
- return localeDistance.distanceRaw(
- LSR.fromMaximalized(new ULocale(desiredLanguage)),
- LSR.fromMaximalized(new ULocale(supportedLanguage)),
- thresholdDistance, distanceOption);
+ return LocaleDistance.INSTANCE.getBestIndexAndDistance(
+ XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(desiredLanguage)),
+ new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(supportedLanguage)) },
+ thresholdDistance, distanceOption) & 0xff;
}
@Override
public String toString() {
- return exactSupportedLocales.toString();
+ StringBuilder s = new StringBuilder().append("{XLocaleMatcher");
+ if (supportedLocales.length > 0) {
+ s.append(" supported={").append(supportedLocales[0].toString());
+ for (int i = 1; i < supportedLocales.length; ++i) {
+ s.append(", ").append(supportedLocales[1].toString());
+ }
+ s.append('}');
+ }
+ s.append(" default=").append(Objects.toString(defaultLocale));
+ if (distanceOption != null) {
+ s.append(" distance=").append(distanceOption.toString());
+ }
+ if (thresholdDistance >= 0) {
+ s.append(String.format(" threshold=%d", thresholdDistance));
+ }
+ s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
+ return s.append('}').toString();
}
/** Return the inverse of the distance: that is, 1-distance(desired, supported) */
import com.ibm.icu.impl.Relation;
import com.ibm.icu.impl.Row;
import com.ibm.icu.impl.Row.R3;
-import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
+import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
import com.ibm.icu.impl.locale.XLocaleMatcher;
import com.ibm.icu.impl.locale.XLocaleMatcher.Builder;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
import java.util.Set;
import org.junit.Ignore;
import org.junit.runners.JUnit4;
import com.ibm.icu.dev.test.TestFmwk;
-import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
-import com.ibm.icu.impl.locale.XLocaleDistance;
-import com.ibm.icu.impl.locale.XLocaleDistance.DistanceNode;
-import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
-import com.ibm.icu.impl.locale.XLocaleDistance.DistanceTable;
+import com.ibm.icu.impl.locale.LocaleDistance;
+import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
import com.ibm.icu.util.LocaleMatcher;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
/**
- * Test the XLocaleDistance.
+ * Test the LocaleDistance.
+ * TODO: Rename to LocaleDistanceTest.
*
* @author markdavis
*/
public class XLocaleDistanceTest extends TestFmwk {
private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
- public static final int FAIL = XLocaleDistance.ABOVE_THRESHOLD;
-
- private XLocaleDistance localeMatcher = XLocaleDistance.getDefault();
+ private LocaleDistance localeDistance = LocaleDistance.INSTANCE;
DataDrivenTestHelper tfh = new MyTestFileHandler()
.setFramework(this)
.load(XLocaleDistanceTest.class, "data/localeDistanceTest.txt");
@Ignore("Disabled because of Linux; need to investigate.")
@Test
public void testTiming() {
- List<Arguments> testArgs = new ArrayList<Arguments>();
+ List<Arguments> testArgs = new ArrayList<>();
for (List<String> line : tfh.getLines()) {
if (tfh.isTestLine(line)) {
testArgs.add(new Arguments(line));
oldTimeMinusLikely += System.nanoTime()-temp;
temp = System.nanoTime();
- final LSR desiredLSR = LSR.fromMaximalized(desired);
- final LSR supportedLSR = LSR.fromMaximalized(supported);
+// final LSR desiredLSR = LSR.maximizedFrom(desired);
+// final LSR supportedLSR = LSR.maximizedFrom(supported);
newLikelyTime += System.nanoTime()-temp;
temp = System.nanoTime();
- int dist1 = localeMatcher.distanceRaw(desiredLSR, supportedLSR, 1000, DistanceOption.REGION_FIRST);
- int dist2 = localeMatcher.distanceRaw(supportedLSR, desiredLSR, 1000, DistanceOption.REGION_FIRST);
+ int dist1 = localeDistance.testOnlyDistance(desired, supported, 1000, DistanceOption.REGION_FIRST);
+ int dist2 = localeDistance.testOnlyDistance(supported, desired, 1000, DistanceOption.REGION_FIRST);
newTimeMinusLikely += System.nanoTime()-temp;
}
}
}
@Test
- @SuppressWarnings("deprecation")
public void testInternalTable() {
- checkTables(localeMatcher.internalGetDistanceTable(), "", 1);
- }
-
- @SuppressWarnings("deprecation")
- private void checkTables(DistanceTable internalGetDistanceTable, String title, int depth) {
- // Check that ANY, ANY is always present, and that the table has a depth of exactly 3 everyplace.
- Map<String, Set<String>> matches = internalGetDistanceTable.getInternalMatches();
-
- // must have ANY,ANY
- boolean haveANYANY = false;
- for (Entry<String, Set<String>> entry : matches.entrySet()) {
- String first = entry.getKey();
- boolean haveANYfirst = first.equals(XLocaleDistance.ANY);
- for (String second : entry.getValue()) {
- haveANYANY |= haveANYfirst && second.equals(XLocaleDistance.ANY);
- DistanceNode distanceNode = internalGetDistanceTable.getInternalNode(first, second);
- DistanceTable subDistanceTable = distanceNode.getDistanceTable();
- if (subDistanceTable == null || subDistanceTable.isEmpty()) {
- if (depth != 3) {
- logln("depth should be 3");
- }
- if (distanceNode.getClass() != DistanceNode.class) {
- logln("should be plain DistanceNode");
- }
- } else {
- if (depth >= 3) {
- logln("depth should be ≤ 3");
- }
- if (distanceNode.getClass() == DistanceNode.class) {
- logln("should NOT be plain DistanceNode");
- }
- checkTables(subDistanceTable, first + "," + second + ",", depth+1);
+ Set<String> strings = localeDistance.testOnlyGetDistanceTable(false).keySet();
+ // Check that the table has a depth of exactly 3 (desired, supported) pairs everyplace
+ // by removing every prefix of a 6-subtag string from a copy of the set of strings.
+ // Any remaining string is not a prefix of a full-depth string.
+ Set<String> remaining = new HashSet<>(strings);
+ // Check that ANY, ANY is always present.
+ assertTrue("*-*", strings.contains("*-*"));
+ for (String s : strings) {
+ int num = countSubtags(s);
+ assertTrue(s, 1 <= num && num <= 6);
+ if (num > 1) {
+ String oneShorter = removeLastSubtag(s);
+ assertTrue(oneShorter, strings.contains(oneShorter));
+ }
+ if (num == 2 || num == 4) {
+ String sPlusAnyAny = s + "-*-*";
+ assertTrue(sPlusAnyAny, strings.contains(sPlusAnyAny));
+ } else if (num == 6) {
+ for (;; --num) {
+ remaining.remove(s);
+ if (num == 1) { break; }
+ s = removeLastSubtag(s);
}
}
}
- if (!haveANYANY) {
- logln("ANY-ANY not in" + matches);
+ assertTrue("strings that do not lead to 6-subtag matches", remaining.isEmpty());
+ }
+
+ private static final int countSubtags(String s) {
+ if (s.isEmpty()) { return 0; }
+ int num = 1;
+ for (int pos = 0; (pos = s.indexOf('-', pos)) >= 0; ++pos) {
+ ++num;
}
+ return num;
+ }
+
+ private static final String removeLastSubtag(String s) {
+ int last = s.lastIndexOf('-');
+ return s.substring(0, last);
}
@Test
public void testShowDistanceTable() {
if (isVerbose()) {
- System.out.println(XLocaleDistance.getDefault().toString(false));
+ localeDistance.testOnlyPrintDistanceTable();
}
}
}
class MyTestFileHandler extends DataDrivenTestHelper {
- final XLocaleDistance distance = XLocaleDistance.getDefault();
- Output<ULocale> bestDesired = new Output<ULocale>();
+ Output<ULocale> bestDesired = new Output<>();
private DistanceOption distanceOption = DistanceOption.REGION_FIRST;
- private Integer threshold = distance.getDefaultScriptDistance();
+ private Integer threshold = localeDistance.getDefaultScriptDistance();
@Override
public void handle(int lineNumber, boolean breakpoint, String commentBase, List<String> arguments) {
breakpoint = false; // put debugger breakpoint here to break at @debug in test file
}
Arguments args = new Arguments(arguments);
- int supportedToDesiredActual = distance.distance(args.supported, args.desired, threshold, distanceOption);
- int desiredToSupportedActual = distance.distance(args.desired, args.supported, threshold, distanceOption);
+ int supportedToDesiredActual = localeDistance.testOnlyDistance(args.supported, args.desired, threshold, distanceOption);
+ int desiredToSupportedActual = localeDistance.testOnlyDistance(args.desired, args.supported, threshold, distanceOption);
String desiredTag = args.desired.toLanguageTag();
String supportedTag = args.supported.toLanguageTag();
final String comment = commentBase.isEmpty() ? "" : "\t# " + commentBase;
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.dev.test.util;
-
import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.HashMap;
import org.junit.runner.RunWith;
import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.locale.LocaleDistance;
+import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
-import com.ibm.icu.impl.locale.XLocaleDistance;
-import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
import com.ibm.icu.impl.locale.XLocaleMatcher;
import com.ibm.icu.util.LocaleMatcher;
import com.ibm.icu.util.LocalePriorityList;
public class XLocaleMatcherTest extends TestFmwk {
private static final int REGION_DISTANCE = 4;
- private static final XLocaleDistance LANGUAGE_MATCHER_DATA = XLocaleDistance.getDefault();
+ private static final LocaleDistance LANGUAGE_MATCHER_DATA = LocaleDistance.INSTANCE;
private XLocaleMatcher newXLocaleMatcher() {
return new XLocaleMatcher("");
}
}
+ private static final class PerfCase {
+ ULocale desired;
+ ULocale expectedShort;
+ ULocale expectedLong;
+ ULocale expectedVeryLong;
+
+ PerfCase(String des, String expShort, String expLong, String expVeryLong) {
+ desired = new ULocale(des);
+ expectedShort = new ULocale(expShort);
+ expectedLong = new ULocale(expLong);
+ expectedVeryLong = new ULocale(expVeryLong);
+ }
+ }
+
+ private static final int WARM_UP_ITERATIONS = 1000;
+ private static final int BENCHMARK_ITERATIONS = 20000;
+ private static final int AVG_PCT_MEDIUM_NEW_OLD = 33;
+ private static final int AVG_PCT_LONG_NEW_OLD = 80;
@Test
public void testPerf() {
if (LANGUAGE_MATCHER_DATA == null) {
return; // skip except when testing data
}
- final ULocale desired = new ULocale("sv");
final String shortList = "en, sv";
- final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu";
- final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
+ final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
+ "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
+ "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
+ "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
+ "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
+ "zh-CN, zh-TW, zu";
+ final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " +
+ "ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " +
+ "ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " +
+ "ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " +
+ "ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " +
+ "bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " +
+ "bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " +
+ "bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " +
+ "ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " +
+ "cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " +
+ "de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " +
+ // removed en_001 to avoid exact match
+ "ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " +
+ "en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " +
+ "en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " +
+ "en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " +
+ "en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " +
+ "en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " +
+ "en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " +
+ "en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " +
+ "en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " +
+ "en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " +
+ "en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " +
+ "es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " +
+ "es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " +
+ "et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " +
+ "ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " +
+ "fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " +
+ "fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " +
+ "fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " +
+ "fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " +
+ "fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " +
+ "gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " +
+ "he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " +
+ "id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " +
+ "jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " +
+ "kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " +
+ "kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " +
+ "ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " +
+ "lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " +
+ "lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " +
+ "lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " +
+ "mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " +
+ "ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " +
+ "nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " +
+ "nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " +
+ "nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " +
+ "pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " +
+ "pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " +
+ "rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " +
+ "ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " +
+ "sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " +
+ "shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " +
+ "smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " +
+ "sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " +
+ "sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " +
+ "sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " +
+ "teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " +
+ "tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " +
+ "uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " +
+ "vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " +
+ "wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " +
+ "zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " +
+ "zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
final XLocaleMatcher matcherShort = newXLocaleMatcher(shortList);
final XLocaleMatcher matcherLong = newXLocaleMatcher(longList);
final LocaleMatcher matcherLongOld = new LocaleMatcher(longList);
final LocaleMatcher matcherVeryLongOld = new LocaleMatcher(veryLongList);
- //XLocaleMatcher.DEBUG = true;
- ULocale expected = new ULocale("sv");
- assertEquals(expected, matcherShort.getBestMatch(desired));
- assertEquals(expected, matcherLong.getBestMatch(desired));
- assertEquals(expected, matcherVeryLong.getBestMatch(desired));
- //XLocaleMatcher.DEBUG = false;
-
long timeShortNew=0;
long timeMediumNew=0;
long timeLongNew=0;
- for (int i = 0; i < 2; ++i) {
- int iterations = i == 0 ? 1000 : 1000000;
- boolean showMessage = i != 0;
- timeShortNew = timeXLocaleMatcher("Duration (few supported):\t", desired, matcherShort, showMessage, iterations);
- timeMediumNew = timeXLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations);
- timeLongNew = timeXLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations);
- }
-
long timeShortOld=0;
long timeMediumOld=0;
long timeLongOld=0;
- for (int i = 0; i < 2; ++i) {
- int iterations = i == 0 ? 1000 : 100000;
- boolean showMessage = i != 0;
- timeShortOld = timeLocaleMatcher("Old Duration (few supported):\t", desired, matcherShortOld, showMessage, iterations);
- timeMediumOld = timeLocaleMatcher("Old Duration (med. supported):\t", desired, matcherLongOld, showMessage, iterations);
- timeLongOld = timeLocaleMatcher("Old Duration (many supported):\t", desired, matcherVeryLongOld, showMessage, iterations);
+ PerfCase[] pcs = new PerfCase[] {
+ // Exact match in all matchers.
+ new PerfCase("sv", "sv", "sv", "sv"),
+ // Common locale, exact match only in very long list.
+ new PerfCase("fr_CA", "en", "fr", "fr_CA"),
+ // Unusual locale, no exact match.
+ new PerfCase("de_CA", "en", "de", "de"),
+ // World English maps to several region partitions.
+ new PerfCase("en_001", "en", "en", "en"),
+ // Ancient language with interesting subtags.
+ new PerfCase("egy_Copt_CY", "en", "af", "af")
+ };
+
+ for (PerfCase pc : pcs) {
+ final ULocale desired = pc.desired;
+
+ assertEquals(pc.expectedShort, matcherShort.getBestMatch(desired));
+ assertEquals(pc.expectedLong, matcherLong.getBestMatch(desired));
+ assertEquals(pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
+
+ timeXLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
+ timeXLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
+ timeXLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS);
+ long tns = timeXLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS);
+ System.out.format("New Duration (few supported):\t%s\t%d\tnanos\n", desired, tns);
+ timeShortNew += tns;
+ long tnl = timeXLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS);
+ System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl);
+ timeMediumNew += tnl;
+ long tnv = timeXLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS);
+ System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv);
+ timeLongNew += tnv;
+
+ timeLocaleMatcher(desired, matcherShortOld, WARM_UP_ITERATIONS);
+ timeLocaleMatcher(desired, matcherLongOld, WARM_UP_ITERATIONS);
+ timeLocaleMatcher(desired, matcherVeryLongOld, WARM_UP_ITERATIONS);
+ long tos = timeLocaleMatcher(desired, matcherShortOld, BENCHMARK_ITERATIONS);
+ System.out.format("Old Duration (few supported):\t%s\t%d\tnanos new/old=%d%%\n",
+ desired, tos, (100 * tns) / tos);
+ timeShortOld += tos;
+ long tol = timeLocaleMatcher(desired, matcherLongOld, BENCHMARK_ITERATIONS);
+ System.out.format("Old Duration (med. supported):\t%s\t%d\tnanos new/old=%d%%\n",
+ desired, tol, (100 * tnl) / tol);
+ timeMediumOld += tol;
+ long tov = timeLocaleMatcher(desired, matcherVeryLongOld, BENCHMARK_ITERATIONS);
+ System.out.format("Old Duration (many supported):\t%s\t%d\tnanos new/old=%d%%\n",
+ desired, tov, (100 * tnv) / tov);
+ timeLongOld += tov;
}
- assertTrue("timeShortNew (=" + timeShortNew + ") < 25% of timeShortOld (=" + timeShortOld + ")", timeShortNew * 4 < timeShortOld);
- assertTrue("timeMediumNew (=" + timeMediumNew + ") < 25% of timeMediumOld (=" + timeMediumOld + ")", timeMediumNew * 4 < timeMediumOld);
- assertTrue("timeLongNew (=" + timeLongNew + ") < 25% of timeLongOld (=" + timeLongOld + ")", timeLongNew * 4 < timeLongOld);
-
+ assertTrue(
+ String.format("timeShortNew=%d < %d%% of timeShortOld=%d",
+ timeShortNew, AVG_PCT_MEDIUM_NEW_OLD, timeShortOld),
+ timeShortNew * 100 < timeShortOld * AVG_PCT_MEDIUM_NEW_OLD);
+ assertTrue(
+ String.format("timeMediumNew=%d < %d%% of timeMediumOld=%d",
+ timeMediumNew, AVG_PCT_MEDIUM_NEW_OLD, timeMediumOld),
+ timeMediumNew * 100 < timeMediumOld * AVG_PCT_MEDIUM_NEW_OLD);
+ assertTrue(
+ String.format("timeLongNew=%d < %d%% of timeLongOld=%d",
+ timeLongNew, AVG_PCT_LONG_NEW_OLD, timeLongOld),
+ timeLongNew * 100 < timeLongOld * AVG_PCT_LONG_NEW_OLD);
}
- private long timeXLocaleMatcher(String title, ULocale desired, XLocaleMatcher matcher,
- boolean showmessage, int iterations) {
+ private long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
matcher.getBestMatch(desired);
}
long delta = System.nanoTime() - start;
- if (showmessage) logln(title + (delta / iterations) + " nanos");
return (delta / iterations);
}
- private long timeLocaleMatcher(String title, ULocale desired, LocaleMatcher matcher,
- boolean showmessage, int iterations) {
+ private long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
matcher.getBestMatch(desired);
}
long delta = System.nanoTime() - start;
- if (showmessage) logln(title + (delta / iterations) + " nanos");
return (delta / iterations);
}