From 5182ad7d98e324e7cbeb04183d5b0ca972bd2f12 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Sat, 9 Feb 2019 14:20:56 -0800 Subject: [PATCH] ICU-20467 get XLocaleMatcher ready for drop-in Get XLocaleMatcher ready for replacing the LocaleMatcher code. More simplifications beyond ICU-20330 PR #409, smaller data, some more optimizations. New API ready to be moved over. - less work for region partitions distance lookup: - encode each array of single-character partition strings as one string - look up each desired partition only once, not for each (desired, supported) pair - look up the * fallback region distance only for the first mismatch, not for each non-matching pair - skip region distance lookup if minRegionDistance>=remainingThreshold - locale distance table: remove subtables that contain only *-* with default script/region distance - mark intermediate subtag matches via last-character bit 7, not also with a match value - likely subtags data: prune trailing *-only levels, and skip *-only script levels; likely subtags perf test - likely subtags: skip_script=1; LSR.indexForRegion(ill-formed)=0 not negative - likely subtags small optimization: array lookup for first letter of language subtag - defaultDemotionPerDesiredLocale=distance(en, en-GB) - favor=script: still reject a script mismatch - if an explicit default locale is given, prefer that (by LSR), not the first supported locale - XLocaleMatcher.Builder: copy supported locales into a List not a Set to preserve input indexes; duplicates are harmless - match by LSR only, not exact locale match; results consistent with no fastpath, simpler, sometimes a little slower - internal getBestMatch() returns just the suppIndex - store the best desired locale & index in an LSR iterator - make an LSR from Locale without ULocale detour - adjust the XLocaleMatcher API as proposed; remove unused internal methods; clean up LocalePriorityList docs --- .../core/src/com/ibm/icu/impl/locale/LSR.java | 24 +- .../icu/impl/locale/LikelySubtagsBuilder.java | 137 ++- .../ibm/icu/impl/locale/LocaleDistance.java | 320 +++-- .../impl/locale/LocaleDistanceBuilder.java | 93 +- .../ibm/icu/impl/locale/XLikelySubtags.java | 201 +-- .../ibm/icu/impl/locale/XLocaleMatcher.java | 1083 ++++++++++------- .../src/com/ibm/icu/util/LocaleMatcher.java | 31 +- .../com/ibm/icu/util/LocalePriorityList.java | 181 +-- .../icu/dev/test/util/LocaleMatcherTest.java | 7 +- .../dev/test/util/XLocaleDistanceTest.java | 69 +- .../icu/dev/test/util/XLocaleMatcherTest.java | 137 ++- .../dev/test/util/data/localeMatcherTest.txt | 268 ++-- 12 files changed, 1489 insertions(+), 1062 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java index dd32de09074..317f5444ebc 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java @@ -5,7 +5,9 @@ package com.ibm.icu.impl.locale; import java.util.Objects; final class LSR { - static final int REGION_INDEX_LIMIT = 1000 + 26 * 26; + static final int REGION_INDEX_LIMIT = 1001 + 26 * 26; + + static final boolean DEBUG_OUTPUT = false; final String language; final String script; @@ -21,27 +23,27 @@ final class LSR { } /** - * Returns a non-negative index for a well-formed region code. + * Returns a positive index (>0) for a well-formed region code. * Do not rely on a particular region->index mapping; it may change. - * Returns -1 for ill-formed strings. + * Returns 0 for ill-formed strings. */ static final int indexForRegion(String region) { if (region.length() == 2) { int a = region.charAt(0) - 'A'; - if (a < 0 || 25 < a) { return -1; } + if (a < 0 || 25 < a) { return 0; } int b = region.charAt(1) - 'A'; - if (b < 0 || 25 < b) { return -1; } - return 26 * a + b + 1000; + if (b < 0 || 25 < b) { return 0; } + return 26 * a + b + 1001; } else if (region.length() == 3) { int a = region.charAt(0) - '0'; - if (a < 0 || 9 < a) { return -1; } + if (a < 0 || 9 < a) { return 0; } int b = region.charAt(1) - '0'; - if (b < 0 || 9 < b) { return -1; } + if (b < 0 || 9 < b) { return 0; } int c = region.charAt(2) - '0'; - if (c < 0 || 9 < c) { return -1; } - return (10 * a + b) * 10 + c; + if (c < 0 || 9 < c) { return 0; } + return (10 * a + b) * 10 + c + 1; } - return -1; + return 0; } @Override diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java index b6fad04d4d0..a6bdbf695be 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java @@ -26,7 +26,7 @@ import com.ibm.icu.util.ICUException; * Reads source data from ICU resource bundles. */ class LikelySubtagsBuilder { - private static final boolean DEBUG_OUTPUT = false; + private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT; private static ICUResourceBundle getSupplementalDataBundle(String name) { return ICUResourceBundle.getBundleInstance( @@ -84,12 +84,33 @@ class LikelySubtagsBuilder { private static final class TrieBuilder { byte[] bytes = new byte[24]; + int length = 0; BytesTrieBuilder tb = new BytesTrieBuilder(); - void addMapping(String s, int value) { - // s contains only ASCII characters. - s.getBytes(0, s.length(), bytes, 0); - tb.add(bytes, s.length(), value); + void addValue(int value) { + assert value >= 0; + tb.add(bytes, length, value); + } + + void addStar() { + bytes[length++] = '*'; + } + + void addSubtag(String s) { + assert !s.isEmpty(); + assert !s.equals("*"); + int end = s.length() - 1; + for (int i = 0;; ++i) { + char c = s.charAt(i); + assert c <= 0x7f; + if (i < end) { + bytes[length++] = (byte) c; + } else { + // Mark the last character as a terminator to avoid overlap matches. + bytes[length++] = (byte) (c | 0x80); + break; + } + } } BytesTrie build() { @@ -114,44 +135,70 @@ class LikelySubtagsBuilder { TrieBuilder trieBuilder = new TrieBuilder(); Map lsrIndexes = new LinkedHashMap<>(); - // Bogus LSR at index 0 for some code to easily distinguish between - // intermediate match points and real result values. - LSR bogus = new LSR("", "", ""); - lsrIndexes.put(bogus, 0); + // Reserve index 0 as "no value": + // The runtime lookup returns 0 for an intermediate match with no value. + lsrIndexes.put(new LSR("", "", ""), 0); // arbitrary LSR + // Reserve index 1 for SKIP_SCRIPT: + // The runtime lookup returns 1 for an intermediate match with a value. + lsrIndexes.put(new LSR("skip", "script", ""), 1); // looks good when printing the data // We could prefill the lsrList with common locales to give them small indexes, // and see if that improves performance a little. for (Map.Entry>> ls : langTable.entrySet()) { + trieBuilder.length = 0; String lang = ls.getKey(); if (lang.equals("und")) { - lang = "*"; + trieBuilder.addStar(); + } else { + trieBuilder.addSubtag(lang); } - // Create a match point for the language. - trieBuilder.addMapping(lang, 0); Map> scriptTable = ls.getValue(); + boolean skipScript = false; + if (scriptTable.size() == 1) { + Map regionTable = scriptTable.get(""); + if (regionTable.size() == 1) { + // Prune the script and region levels from language with + // only * for scripts and regions. + int i = uniqueIdForLsr(lsrIndexes, regionTable.get("")); + trieBuilder.addValue(i); + continue; + } else { + // Prune the script level from language with only * for scripts + // but with real regions. + // Set an intermediate value as a signal to the lookup code. + trieBuilder.addValue(XLikelySubtags.SKIP_SCRIPT); + skipScript = true; + } + } + int scriptStartLength = trieBuilder.length; for (Map.Entry> sr : scriptTable.entrySet()) { - String script = sr.getKey(); - if (script.isEmpty()) { - script = "*"; + trieBuilder.length = scriptStartLength; + if (!skipScript) { + String script = sr.getKey(); + if (script.isEmpty()) { + trieBuilder.addStar(); + } else { + trieBuilder.addSubtag(script); + } } - // Match point for lang+script. - trieBuilder.addMapping(lang + script, 0); Map regionTable = sr.getValue(); + if (regionTable.size() == 1) { + // Prune the region level from language+script with only * for regions. + int i = uniqueIdForLsr(lsrIndexes, regionTable.get("")); + trieBuilder.addValue(i); + continue; + } + int regionStartLength = trieBuilder.length; for (Map.Entry r2lsr : regionTable.entrySet()) { + trieBuilder.length = regionStartLength; String region = r2lsr.getKey(); - if (region.isEmpty()) { - region = "*"; - } // Map the whole lang+script+region to a unique, dense index of the LSR. - LSR lsr = r2lsr.getValue(); - Integer index = lsrIndexes.get(lsr); - int i; - if (index != null) { - i = index.intValue(); + if (region.isEmpty()) { + trieBuilder.addStar(); } else { - i = lsrIndexes.size(); - lsrIndexes.put(lsr, i); + trieBuilder.addSubtag(region); } - trieBuilder.addMapping(lang + script + region, i); + int i = uniqueIdForLsr(lsrIndexes, r2lsr.getValue()); + trieBuilder.addValue(i); } } } @@ -161,6 +208,17 @@ class LikelySubtagsBuilder { languageAliasesBuilder.toCanonical, regionAliasesBuilder.toCanonical, trie, lsrs); } + private static int uniqueIdForLsr(Map lsrIndexes, LSR lsr) { + Integer index = lsrIndexes.get(lsr); + if (index != null) { + return index.intValue(); + } else { + int i = lsrIndexes.size(); + lsrIndexes.put(lsr, i); + return i; + } + } + private static Map>> makeTable( AliasesBuilder languageAliasesBuilder, AliasesBuilder regionAliasesBuilder) { Map>> result = new TreeMap<>(); @@ -176,11 +234,8 @@ class LikelySubtagsBuilder { final String region = ltp.region; ltp = lsrFromLocaleID(value.getString()); // target - String languageTarget = ltp.language; - final String scriptTarget = ltp.script; - final String regionTarget = ltp.region; + set(result, language, script, region, ltp); - set(result, language, script, region, languageTarget, scriptTarget, regionTarget); // now add aliases Collection languageAliases = languageAliasesBuilder.getAliases(language); Collection regionAliases = regionAliasesBuilder.getAliases(region); @@ -189,13 +244,12 @@ class LikelySubtagsBuilder { if (languageAlias.equals(language) && regionAlias.equals(region)) { continue; } - set(result, languageAlias, script, regionAlias, - languageTarget, scriptTarget, regionTarget); + set(result, languageAlias, script, regionAlias, ltp); } } } // hack - set(result, "und", "Latn", "", "en", "Latn", "US"); + set(result, "und", "Latn", "", new LSR("en", "Latn", "US")); // hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table // @@ -241,13 +295,6 @@ class LikelySubtagsBuilder { return p2.length() < 4 ? new LSR(lang, "", p2) : new LSR(lang, p2, p3); } - private static void set(Map>> langTable, - final String language, final String script, final String region, - final String languageTarget, final String scriptTarget, final String regionTarget) { - LSR target = new LSR(languageTarget, scriptTarget, regionTarget); - set(langTable, language, script, region, target); - } - private static void set(Map>> langTable, final String language, final String script, final String region, LSR newValue) { Map> scriptTable = getSubtable(langTable, language); @@ -255,10 +302,10 @@ class LikelySubtagsBuilder { regionTable.put(region, newValue); } - private static Map getSubtable(Map> table, final K language) { - Map subTable = table.get(language); + private static Map getSubtable(Map> table, final K subtag) { + Map subTable = table.get(subtag); if (subTable == null) { - table.put(language, subTable = new TreeMap<>()); + table.put(subtag, subTable = new TreeMap<>()); } return subTable; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java index 44c71694633..56735a8b5cd 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java @@ -2,10 +2,11 @@ // License & terms of use: http://www.unicode.org/copyright.html#License package com.ibm.icu.impl.locale; -import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; +import java.util.TreeMap; +import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag; import com.ibm.icu.util.BytesTrie; import com.ibm.icu.util.ULocale; @@ -14,9 +15,21 @@ import com.ibm.icu.util.ULocale; * Mostly but not only the data for mapping locales to their maximized forms. */ public class LocaleDistance { + /** Distance value bit flag, set by the builder. */ + static final int DISTANCE_SKIP_SCRIPT = 0x80; + /** Distance value bit flag, set by trieNext(). */ + private static final int DISTANCE_IS_FINAL = 0x100; + private static final int DISTANCE_IS_FINAL_OR_SKIP_SCRIPT = + DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT; + // Indexes into array of distances. + static final int IX_DEF_LANG_DISTANCE = 0; + static final int IX_DEF_SCRIPT_DISTANCE = 1; + static final int IX_DEF_REGION_DISTANCE = 2; + static final int IX_MIN_REGION_DISTANCE = 3; + static final int IX_LIMIT = 4; private static final int ABOVE_THRESHOLD = 100; - private static final boolean DEBUG_OUTPUT = false; + private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT; // The trie maps each dlang+slang+dscript+sscript+dregion+sregion // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance. @@ -28,7 +41,7 @@ public class LocaleDistance { * Maps each region to zero or more single-character partitions. */ private final byte[] regionToPartitionsIndex; - private final String[][] partitionArrays; + private final String[] partitionArrays; /** * Used to get the paradigm region for a cluster, if there is one. @@ -38,6 +51,8 @@ public class LocaleDistance { private final int defaultLanguageDistance; private final int defaultScriptDistance; private final int defaultRegionDistance; + private final int minRegionDistance; + private final int defaultDemotionPerDesiredLocale; // TODO: Load prebuilt data from a resource bundle // to avoid the dependency on the builder code. @@ -45,42 +60,40 @@ public class LocaleDistance { public static final LocaleDistance INSTANCE = LocaleDistanceBuilder.build(); LocaleDistance(BytesTrie trie, - byte[] regionToPartitionsIndex, String[][] partitionArrays, - Set paradigmLSRs) { + byte[] regionToPartitionsIndex, String[] partitionArrays, + Set paradigmLSRs, int[] distances) { this.trie = trie; - if (DEBUG_OUTPUT) { - System.out.println("*** locale distance"); - testOnlyPrintDistanceTable(); - } this.regionToPartitionsIndex = regionToPartitionsIndex; this.partitionArrays = partitionArrays; this.paradigmLSRs = paradigmLSRs; + defaultLanguageDistance = distances[IX_DEF_LANG_DISTANCE]; + defaultScriptDistance = distances[IX_DEF_SCRIPT_DISTANCE]; + defaultRegionDistance = distances[IX_DEF_REGION_DISTANCE]; + this.minRegionDistance = distances[IX_MIN_REGION_DISTANCE]; - BytesTrie iter = new BytesTrie(trie); - BytesTrie.Result result = iter.next('*'); - assert result == BytesTrie.Result.INTERMEDIATE_VALUE; - defaultLanguageDistance = iter.getValue(); - result = iter.next('*'); - assert result == BytesTrie.Result.INTERMEDIATE_VALUE; - defaultScriptDistance = iter.getValue(); - result = iter.next('*'); - assert result.hasValue(); - defaultRegionDistance = iter.getValue(); + LSR en = new LSR("en", "Latn", "US"); + LSR enGB = new LSR("en", "Latn", "GB"); + defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, new LSR[] { enGB }, + 50, FavorSubtag.LANGUAGE) & 0xff; + + if (DEBUG_OUTPUT) { + System.out.println("*** locale distance"); + System.out.println("defaultLanguageDistance=" + defaultLanguageDistance); + System.out.println("defaultScriptDistance=" + defaultScriptDistance); + System.out.println("defaultRegionDistance=" + defaultRegionDistance); + testOnlyPrintDistanceTable(); + } } // VisibleForTesting public int testOnlyDistance(ULocale desired, ULocale supported, - int threshold, DistanceOption distanceOption) { + int threshold, FavorSubtag favorSubtag) { LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported); LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired); return getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR }, - threshold, distanceOption) & 0xff; + threshold, favorSubtag) & 0xff; } - public enum DistanceOption {REGION_FIRST, SCRIPT_FIRST} - // NOTE: Replaced "NORMAL" with "REGION_FIRST". By default, scripts have greater weight - // than regions, so they might be considered the "normal" case. - /** * Finds the supported LSR with the smallest distance from the desired one. * Equivalent LSR subtags must be normalized into a canonical form. @@ -90,13 +103,12 @@ public class LocaleDistance { * and its distance (0..ABOVE_THRESHOLD) in bits 7..0. */ int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs, - int threshold, DistanceOption distanceOption) { + int threshold, FavorSubtag favorSubtag) { BytesTrie iter = new BytesTrie(trie); // Look up the desired language only once for all supported LSRs. // Its "distance" is either a match point value of 0, or a non-match negative value. // Note: The data builder verifies that there are no <*, supported> or rules. - // Set wantValue=true so that iter reads & skips the match point value. - int desLangDistance = trieNext(iter, desired.language, true, true); + int desLangDistance = trieNext(iter, desired.language, false); long desLangState = desLangDistance >= 0 && supportedLsrs.length > 1 ? iter.getState64() : 0; // Index of the supported LSR with the lowest distance. int bestIndex = -1; @@ -105,26 +117,31 @@ public class LocaleDistance { boolean star = false; int distance = desLangDistance; if (distance >= 0) { + assert (distance & DISTANCE_IS_FINAL) == 0; if (slIndex != 0) { iter.resetToState64(desLangState); } - distance = trieNext(iter, supported.language, true, true); + distance = trieNext(iter, supported.language, true); } // Note: The data builder verifies that there are no rules with "any" (*) language and // real (non *) script or region subtags. // This means that if the lookup for either language fails we can use // the default distances without further lookups. - if (distance < 0) { // <*, *> + int flags; + if (distance >= 0) { + flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT; + distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT; + } else { // <*, *> if (desired.language.equals(supported.language)) { distance = 0; } else { distance = defaultLanguageDistance; } + flags = 0; star = true; } assert 0 <= distance && distance <= 100; - boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST; - if (scriptFirst) { + if (favorSubtag == FavorSubtag.SCRIPT) { distance >>= 2; } if (distance >= threshold) { @@ -132,18 +149,17 @@ public class LocaleDistance { } int scriptDistance; - if (star) { + if (star || flags != 0) { if (desired.script.equals(supported.script)) { scriptDistance = 0; } else { scriptDistance = defaultScriptDistance; } } else { - scriptDistance = getDesSuppDistance(iter, iter.getState64(), - desired.script, supported.script, false); - } - if (scriptFirst) { - scriptDistance >>= 1; + scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(), + desired.script, supported.script); + flags = scriptDistance & DISTANCE_IS_FINAL; + scriptDistance &= ~DISTANCE_IS_FINAL; } distance += scriptDistance; if (distance >= threshold) { @@ -152,27 +168,24 @@ public class LocaleDistance { if (desired.region.equals(supported.region)) { // regionDistance = 0 - } else if (star) { + } else if (star || (flags & DISTANCE_IS_FINAL) != 0) { distance += defaultRegionDistance; } else { - long startState = iter.getState64(); + int remainingThreshold = threshold - distance; + if (minRegionDistance >= remainingThreshold) { + continue; + } // From here on we know the regions are not equal. - // Map each region to zero or more partitions. (zero = one empty string) + // Map each region to zero or more partitions. (zero = one non-matching string) + // (Each array of single-character partition strings is encoded as one string.) // If either side has more than one, then we find the maximum distance. // This could be optimized by adding some more structure, but probably not worth it. - final String[] desiredPartitions = partitionsForRegion(desired); - final String[] supportedPartitions = partitionsForRegion(supported); - int regionDistance; - - if (desiredPartitions.length > 1 || supportedPartitions.length > 1) { - regionDistance = getRegionPartitionsDistance(iter, startState, - desiredPartitions, supportedPartitions, threshold - distance); - } else { - regionDistance = getDesSuppDistance(iter, startState, - desiredPartitions[0], supportedPartitions[0], true); - } - distance += regionDistance; + distance += getRegionPartitionsDistance( + iter, iter.getState64(), + partitionsForRegion(desired), + partitionsForRegion(supported), + remainingThreshold); } if (distance < threshold) { if (distance == 0) { @@ -185,101 +198,140 @@ public class LocaleDistance { return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD; } - private int getRegionPartitionsDistance(BytesTrie iter, long startState, - String[] desiredPartitions, String[] supportedPartitions, int threshold) { - int regionDistance = -1; - for (String dp : desiredPartitions) { - for (String sp : supportedPartitions) { - if (regionDistance >= 0) { // no need to reset in first iteration - iter.resetToState64(startState); + private static final int getDesSuppScriptDistance(BytesTrie iter, long startState, + String desired, String supported) { + // Note: The data builder verifies that there are no <*, supported> or rules. + int distance = trieNext(iter, desired, false); + if (distance >= 0) { + distance = trieNext(iter, supported, true); + } + if (distance < 0) { + BytesTrie.Result result = iter.resetToState64(startState).next('*'); // <*, *> + assert result.hasValue(); + if (desired.equals(supported)) { + distance = 0; // same script + } else { + distance = iter.getValue(); + assert distance >= 0; + } + if (result == BytesTrie.Result.FINAL_VALUE) { + distance |= DISTANCE_IS_FINAL; + } + } + return distance; + } + + private static final int getRegionPartitionsDistance(BytesTrie iter, long startState, + String desiredPartitions, String supportedPartitions, int threshold) { + int desLength = desiredPartitions.length(); + int suppLength = supportedPartitions.length(); + if (desLength == 1 && suppLength == 1) { + BytesTrie.Result result = iter.next(desiredPartitions.charAt(0) | 0x80); + if (result.hasNext()) { + result = iter.next(supportedPartitions.charAt(0) | 0x80); + if (result.hasValue()) { + return iter.getValue(); } - int d = getDesSuppDistance(iter, startState, dp, sp, true); - if (regionDistance < d) { + } + return getFallbackRegionDistance(iter, startState); + } + + int regionDistance = 0; + // Fall back to * only once, not for each pair of partition strings. + boolean star = false; + for (int di = 0;;) { + // Look up each desired-partition string only once, + // not for each (desired, supported) pair. + BytesTrie.Result result = iter.next(desiredPartitions.charAt(di++) | 0x80); + if (result.hasNext()) { + long desState = suppLength > 1 ? iter.getState64() : 0; + for (int si = 0;;) { + result = iter.next(supportedPartitions.charAt(si++) | 0x80); + int d; + if (result.hasValue()) { + d = iter.getValue(); + } else if (star) { + d = 0; + } else { + d = getFallbackRegionDistance(iter, startState); + star = true; + } if (d >= threshold) { return d; + } else if (regionDistance < d) { + regionDistance = d; } + if (si < suppLength) { + iter.resetToState64(desState); + } else { + break; + } + } + } else if (!star) { + int d = getFallbackRegionDistance(iter, startState); + if (d >= threshold) { + return d; + } else if (regionDistance < d) { regionDistance = d; } + star = true; + } + if (di < desLength) { + iter.resetToState64(startState); + } else { + break; } } - assert regionDistance >= 0; return regionDistance; } - // Modified from - // DistanceTable#getDistance(desired, supported, Output distanceTable, starEquals). - private static final int getDesSuppDistance(BytesTrie iter, long startState, - String desired, String supported, boolean finalSubtag) { - // Note: The data builder verifies that there are no <*, supported> or rules. - int distance = trieNext(iter, desired, false, true); - if (distance >= 0) { - distance = trieNext(iter, supported, true, !finalSubtag); - } - if (distance < 0) { - BytesTrie.Result result = iter.resetToState64(startState).next('*'); // <*, *> - assert finalSubtag ? result.hasValue() : result == BytesTrie.Result.INTERMEDIATE_VALUE; - if (!finalSubtag && desired.equals(supported)) { - distance = 0; // same language or script - } else { - distance = iter.getValue(); - assert distance >= 0; - } - } + private static final int getFallbackRegionDistance(BytesTrie iter, long startState) { + BytesTrie.Result result = iter.resetToState64(startState).next('*'); // <*, *> + assert result.hasValue(); + int distance = iter.getValue(); + assert distance >= 0; return distance; } - private static final int trieNext(BytesTrie iter, String s, boolean wantValue, boolean wantNext) { + private static final int trieNext(BytesTrie iter, String s, boolean wantValue) { if (s.isEmpty()) { return -1; // no empty subtags in the distance data } - BytesTrie.Result result; - int end = s.length() - 1; - for (int i = 0;; ++i) { + for (int i = 0, end = s.length() - 1;; ++i) { int c = s.charAt(i); - assert c <= 0x7f; if (i < end) { - result = iter.next(c); - if (!result.hasNext()) { + if (!iter.next(c).hasNext()) { return -1; } } else { // last character of this subtag - result = iter.next(c | 0x80); - break; - } - } - if (wantValue) { - if (wantNext) { - if (result == BytesTrie.Result.INTERMEDIATE_VALUE) { - return iter.getValue(); - } - } else { - if (result.hasValue()) { - return iter.getValue(); - } - } - } else { - if (wantNext) { - if (result == BytesTrie.Result.INTERMEDIATE_VALUE) { - return 0; - } - } else { - if (result.hasValue()) { - return 0; + BytesTrie.Result result = iter.next(c | 0x80); + if (wantValue) { + if (result.hasValue()) { + int value = iter.getValue(); + if (result == BytesTrie.Result.FINAL_VALUE) { + value |= DISTANCE_IS_FINAL; + } + return value; + } + } else { + if (result.hasNext()) { + return 0; + } } + return -1; } } - return -1; } @Override public String toString() { - return testOnlyGetDistanceTable(true).toString(); + return testOnlyGetDistanceTable().toString(); } - private String[] partitionsForRegion(LSR lsr) { - // ill-formed region -> one empty string - int pIndex = lsr.regionIndex >= 0 ? regionToPartitionsIndex[lsr.regionIndex] : 0; + private String partitionsForRegion(LSR lsr) { + // ill-formed region -> one non-matching string + int pIndex = regionToPartitionsIndex[lsr.regionIndex]; return partitionArrays[pIndex]; } @@ -296,48 +348,50 @@ public class LocaleDistance { return defaultRegionDistance; } + int getDefaultDemotionPerDesiredLocale() { + return defaultDemotionPerDesiredLocale; + } + + // TODO: When we build data offline, + // write test code to compare the loaded table with the builder output. + // Fail if different, with instructions for how to update the data file. // VisibleForTesting - public Map testOnlyGetDistanceTable(boolean skipIntermediateMatchPoints) { - Map map = new LinkedHashMap<>(); + public Map testOnlyGetDistanceTable() { + Map map = new TreeMap<>(); StringBuilder sb = new StringBuilder(); for (BytesTrie.Entry entry : trie) { sb.setLength(0); - int numSubtags = 0; int length = entry.bytesLength(); for (int i = 0; i < length; ++i) { byte b = entry.byteAt(i); if (b == '*') { // One * represents a (desired, supported) = (ANY, ANY) pair. sb.append("*-*-"); - numSubtags += 2; } else { if (b >= 0) { sb.append((char) b); } else { // end of subtag sb.append((char) (b & 0x7f)).append('-'); - ++numSubtags; } } } assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-'; - if (!skipIntermediateMatchPoints || (numSubtags & 1) == 0) { - sb.setLength(sb.length() - 1); - String s = sb.toString(); - if (!skipIntermediateMatchPoints && s.endsWith("*-*")) { - // Re-insert single-ANY match points to show consistent structure - // for the test code. - map.put(s.substring(0, s.length() - 2), 0); - } - map.put(s, entry.value); - } + sb.setLength(sb.length() - 1); + map.put(sb.toString(), entry.value); } return map; } // VisibleForTesting public void testOnlyPrintDistanceTable() { - for (Map.Entry mapping : testOnlyGetDistanceTable(true).entrySet()) { - System.out.println(mapping); + for (Map.Entry mapping : testOnlyGetDistanceTable().entrySet()) { + String suffix = ""; + int value = mapping.getValue(); + if ((value & DISTANCE_SKIP_SCRIPT) != 0) { + value &= ~DISTANCE_SKIP_SCRIPT; + suffix = " skip script"; + } + System.out.println(mapping.getKey() + '=' + value + suffix); } } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java index aa5bc5316dc..83cbe4a3e74 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java @@ -29,7 +29,7 @@ import com.ibm.icu.util.ULocale; public final class LocaleDistanceBuilder { private static final String ANY = "�"; // matches any character. Uses value above any subtag. - private static final boolean DEBUG_OUTPUT = false; + private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT; private static String fixAny(String string) { return "*".equals(string) ? ANY : string; @@ -135,7 +135,6 @@ public final class LocaleDistanceBuilder { void addSubtag(String s, int value) { assert !s.isEmpty(); - assert value >= 0; assert !s.equals(ANY); int end = s.length() - 1; for (int i = 0;; ++i) { @@ -149,7 +148,9 @@ public final class LocaleDistanceBuilder { break; } } - tb.add(bytes, length, value); + if (value >= 0) { + tb.add(bytes, length, value); + } } BytesTrie build() { @@ -166,7 +167,7 @@ public final class LocaleDistanceBuilder { } private static final class DistanceTable { - final int nodeDistance; // distance for the lookup so far + int nodeDistance; // distance for the lookup so far final Map> subtables; DistanceTable(int distance) { @@ -188,7 +189,8 @@ public final class LocaleDistanceBuilder { return nodeDistance ^ subtables.hashCode(); } - public int getDistance(String desired, String supported, Output distanceTable, boolean starEquals) { + private int getDistance(String desired, String supported, + Output distanceTable, boolean starEquals) { boolean star = false; Map sub2 = subtables.get(desired); if (sub2 == null) { @@ -214,6 +216,10 @@ public final class LocaleDistanceBuilder { return result; } + private DistanceTable getAnyAnyNode() { + return subtables.get(ANY).get(ANY); + } + void copy(DistanceTable other) { for (Map.Entry> e1 : other.subtables.entrySet()) { for (Map.Entry e2 : e1.getValue().entrySet()) { @@ -330,6 +336,34 @@ public final class LocaleDistanceBuilder { addSubtables(desiredLang, supportedLang, r); } + void prune(int level, int[] distances) { + for (Map suppNodeMap : subtables.values()) { + for (DistanceTable node : suppNodeMap.values()) { + node.prune(level + 1, distances); + } + } + if (subtables.size() == 1) { + DistanceTable next = getAnyAnyNode(); + if (level == 1) { + // Remove script table -*-*-50 where there are no other script rules + // and no following region rules. + // If there are region rules, then mark this table for skipping. + if (next.nodeDistance == distances[LocaleDistance.IX_DEF_SCRIPT_DISTANCE]) { + if (next.subtables.isEmpty()) { + subtables.clear(); + } else { + nodeDistance |= LocaleDistance.DISTANCE_SKIP_SCRIPT; + } + } + } else if (level == 2) { + // Remove region table -*-*-4 where there are no other region rules. + if (next.nodeDistance == distances[LocaleDistance.IX_DEF_REGION_DISTANCE]) { + subtables.clear(); + } + } + } + } + @Override public String toString() { StringBuilder sb = new StringBuilder("distance: ").append(nodeDistance).append('\n'); @@ -356,6 +390,10 @@ public final class LocaleDistanceBuilder { } void toTrie(TrieBuilder builder) { + if (nodeDistance >= 0 && (nodeDistance & LocaleDistance.DISTANCE_SKIP_SCRIPT) != 0) { + getAnyAnyNode().toTrie(builder); + return; + } int startLength = builder.length; for (Map.Entry> desSuppNode : subtables.entrySet()) { String desired = desSuppNode.getKey(); @@ -367,7 +405,7 @@ public final class LocaleDistanceBuilder { builder.addStar(node.nodeDistance); node.toTrie(builder); } else { - builder.addSubtag(desired, 0); + builder.addSubtag(desired, -1); int desiredLength = builder.length; for (Map.Entry suppNode : suppNodeMap.entrySet()) { String supported = suppNode.getKey(); @@ -508,6 +546,7 @@ public final class LocaleDistanceBuilder { final Multimap variableToPartition = rmb.variableToPartitions; final DistanceTable defaultDistanceTable = new DistanceTable(-1); + int minRegionDistance = 100; for (Rule rule : rules) { List desired = rule.desired; List supported = rule.supported; @@ -519,6 +558,9 @@ public final class LocaleDistanceBuilder { } } else { // language-script-region + if (rule.distance < minRegionDistance) { + minRegionDistance = rule.distance; + } Collection desiredRegions = getIdsFromVariable(variableToPartition, desired.get(2)); Collection supportedRegions = getIdsFromVariable(variableToPartition, supported.get(2)); for (String desiredRegion2 : desiredRegions) { @@ -534,11 +576,25 @@ public final class LocaleDistanceBuilder { } } + int[] distances = new int[LocaleDistance.IX_LIMIT]; + DistanceTable node = defaultDistanceTable.getAnyAnyNode(); + distances[LocaleDistance.IX_DEF_LANG_DISTANCE] = node.nodeDistance; + node = node.getAnyAnyNode(); + distances[LocaleDistance.IX_DEF_SCRIPT_DISTANCE] = node.nodeDistance; + node = node.getAnyAnyNode(); + distances[LocaleDistance.IX_DEF_REGION_DISTANCE] = node.nodeDistance; + distances[LocaleDistance.IX_MIN_REGION_DISTANCE] = minRegionDistance; + + defaultDistanceTable.prune(0, distances); + assert defaultDistanceTable.getAnyAnyNode().subtables.isEmpty(); + defaultDistanceTable.subtables.remove(ANY); + TrieBuilder trieBuilder = new TrieBuilder(); defaultDistanceTable.toTrie(trieBuilder); BytesTrie trie = trieBuilder.build(); return new LocaleDistance( - trie, rmb.regionToPartitionsIndex, rmb.partitionArrays, paradigmLSRs); + trie, rmb.regionToPartitionsIndex, rmb.partitionArrays, + paradigmLSRs, distances); } private static int checkStars(String desired, String supported, boolean allStars) { @@ -587,7 +643,7 @@ public final class LocaleDistanceBuilder { // build() output Multimap variableToPartitions; private byte[] regionToPartitionsIndex; - private String[][] partitionArrays; + private String[] partitionArrays; RegionMapperBuilder(TerritoryContainment tc) { regionSet = new RegionSet(tc); @@ -623,7 +679,7 @@ public final class LocaleDistanceBuilder { void ensureRegionIsVariable(List lsrList) { String region = lsrList.get(2); if (!isKnownVariable(region)) { - assert LSR.indexForRegion(region) >= 0; // well-formed region subtag + assert LSR.indexForRegion(region) > 0; // well-formed region subtag String variable = "$" + region; add(variable, region); lsrList.set(2, variable); @@ -639,7 +695,7 @@ public final class LocaleDistanceBuilder { // Example: {"1", "5"} Map, Integer> partitionStrings = new LinkedHashMap<>(); // pIndex 0: default value in regionToPartitionsIndex - Collection noPartitions = Collections.singleton(""); + Collection noPartitions = Collections.singleton("."); makeUniqueIndex(partitionStrings, noPartitions); // Example: "$americas" -> {"1", "5"} @@ -697,13 +753,24 @@ public final class LocaleDistanceBuilder { regionToPartitionsIndex[regionIndex] = (byte) pIndex; } } + // LSR.indexForRegion(ill-formed region) returns 0. + // Its regionToPartitionsIndex must also be 0 for the noPartitions value. + assert regionToPartitionsIndex[0] == 0; - // Turn the Collection of Collections into an array of arrays. + // Turn the Collection of Collections of single-character strings + // into an array of strings. Collection> list = partitionStrings.keySet(); - partitionArrays = new String[list.size()][]; + partitionArrays = new String[list.size()]; + StringBuilder sb = new StringBuilder(); int i = 0; for (Collection partitions : list) { - partitionArrays[i++] = partitions.toArray(new String[partitions.size()]); + assert !partitions.isEmpty(); + sb.setLength(0); + for (String p : partitions) { + assert p.length() == 1; + sb.append(p); + } + partitionArrays[i++] = sb.toString(); } } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java index 26b540fc23e..0873b6d3241 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java @@ -2,10 +2,9 @@ // License & terms of use: http://www.unicode.org/copyright.html#License package com.ibm.icu.impl.locale; -import java.util.HashSet; -import java.util.LinkedHashMap; +import java.util.Locale; import java.util.Map; -import java.util.Set; +import java.util.TreeMap; import com.ibm.icu.util.BytesTrie; import com.ibm.icu.util.ULocale; @@ -15,11 +14,14 @@ public final class XLikelySubtags { private static final String PSEUDO_BIDI_PREFIX = "+"; // -XB, -PSBIDI private static final String PSEUDO_CRACKED_PREFIX = ","; // -XC, -PSCRACK - private static final boolean DEBUG_OUTPUT = false; + static final int SKIP_SCRIPT = 1; + + private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT; // TODO: Load prebuilt data from a resource bundle // to avoid the dependency on the builder code. - static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build()); + // VisibleForTesting + public static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build()); static final class Data { private final Map languageAliases; @@ -46,6 +48,7 @@ public final class XLikelySubtags { private final long trieUndState; private final long trieUndZzzzState; private final int defaultLsrIndex; + private final long[] trieFirstLetterStates = new long[26]; private final LSR[] lsrs; private XLikelySubtags(XLikelySubtags.Data data) { @@ -56,20 +59,24 @@ public final class XLikelySubtags { // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**"). BytesTrie.Result result = trie.next('*'); - assert result == BytesTrie.Result.INTERMEDIATE_VALUE; - int value = trie.getValue(); - assert value == 0; + assert result.hasNext(); trieUndState = trie.getState64(); result = trie.next('*'); - assert result == BytesTrie.Result.INTERMEDIATE_VALUE; - value = trie.getValue(); - assert value == 0; + assert result.hasNext(); trieUndZzzzState = trie.getState64(); result = trie.next('*'); assert result.hasValue(); defaultLsrIndex = trie.getValue(); trie.reset(); + for (char c = 'a'; c <= 'z'; ++c) { + result = trie.next(c); + if (result == BytesTrie.Result.NO_VALUE) { + trieFirstLetterStates[c - 'a'] = trie.getState64(); + } + trie.reset(); + } + if (DEBUG_OUTPUT) { System.out.println("*** likely subtags"); for (Map.Entry mapping : getTable().entrySet()) { @@ -83,19 +90,31 @@ public final class XLikelySubtags { return canonical == null ? alias : canonical; } - LSR makeMaximizedLsrFrom(ULocale locale) { + // VisibleForTesting + public LSR makeMaximizedLsrFrom(ULocale locale) { String name = locale.getName(); if (name.startsWith("@x=")) { // Private use language tag x-subtag-subtag... return new LSR(name, "", ""); } + return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(), + locale.getVariant()); + } + LSR makeMaximizedLsrFrom(Locale locale) { + String tag = locale.toLanguageTag(); + if (tag.startsWith("x-")) { + // Private use language tag x-subtag-subtag... + return new LSR(tag, "", ""); + } + return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(), + locale.getVariant()); + } + + private LSR makeMaximizedLsr(String language, String script, String region, String variant) { // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK. // They should match only themselves, // not other locales with what looks like the same language and script subtags. - String language = locale.getLanguage(); - String script = locale.getScript(); - String region = locale.getCountry(); if (region.length() == 2 && region.charAt(0) == 'X') { switch (region.charAt(1)) { case 'A': @@ -112,7 +131,6 @@ public final class XLikelySubtags { } } - String variant = locale.getVariant(); if (variant.startsWith("PS")) { switch (variant) { case "PSACCENT": @@ -130,7 +148,7 @@ public final class XLikelySubtags { } language = getCanonical(languageAliases, language); - // script is ok + // (We have no script mappings.) region = getCanonical(regionAliases, region); return INSTANCE.maximize(language, script, region); } @@ -139,14 +157,31 @@ public final class XLikelySubtags { * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN". */ private LSR maximize(String language, String script, String region) { - int retainOldMask = 0; - BytesTrie iter = new BytesTrie(trie); - // language lookup if (language.equals("und")) { language = ""; } + if (script.equals("Zzzz")) { + script = ""; + } + if (region.equals("ZZ")) { + region = ""; + } + if (!script.isEmpty() && !region.isEmpty() && !language.isEmpty()) { + return new LSR(language, script, region); // already maximized + } + + int retainOldMask = 0; + BytesTrie iter = new BytesTrie(trie); long state; - int value = trieNext(iter, language, false); + int value; + // Small optimization: Array lookup for first language letter. + int c0; + if (language.length() >= 2 && 0 <= (c0 = language.charAt(0) - 'a') && c0 <= 25 && + (state = trieFirstLetterStates[c0]) != 0) { + value = trieNext(iter.resetToState64(state), language, 1); + } else { + value = trieNext(iter, language, 0); + } if (value >= 0) { if (!language.isEmpty()) { retainOldMask |= 4; @@ -157,45 +192,54 @@ public final class XLikelySubtags { iter.resetToState64(trieUndState); // "und" ("*") state = 0; } - // script lookup - if (script.equals("Zzzz")) { - script = ""; - } - value = trieNext(iter, script, false); - if (value >= 0) { + + if (value > 0) { + // Intermediate or final value from just language. + if (value == SKIP_SCRIPT) { + value = 0; + } if (!script.isEmpty()) { retainOldMask |= 2; } - state = iter.getState64(); } else { - retainOldMask |= 2; - if (state == 0) { - iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**") - } else { - iter.resetToState64(state); - value = trieNext(iter, "", false); - assert value == 0; + value = trieNext(iter, script, 0); + if (value >= 0) { + if (!script.isEmpty()) { + retainOldMask |= 2; + } state = iter.getState64(); + } else { + retainOldMask |= 2; + if (state == 0) { + iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**") + } else { + iter.resetToState64(state); + value = trieNext(iter, "", 0); + assert value >= 0; + state = iter.getState64(); + } } } - // region lookup - if (region.equals("ZZ")) { - region = ""; - } - value = trieNext(iter, region, true); - if (value >= 0) { + + if (value > 0) { + // Final value from just language or language+script. if (!region.isEmpty()) { retainOldMask |= 1; } } else { - retainOldMask |= 1; - if (state == 0) { - value = defaultLsrIndex; + value = trieNext(iter, region, 0); + if (value >= 0) { + if (!region.isEmpty()) { + retainOldMask |= 1; + } } else { - iter.resetToState64(state); - value = trieNext(iter, "", true); - if (value < 0) { // TODO: should never happen?! just assert value >= 0? - return null; + retainOldMask |= 1; + if (state == 0) { + value = defaultLsrIndex; + } else { + iter.resetToState64(state); + value = trieNext(iter, "", 0); + assert value > 0; } } } @@ -220,34 +264,34 @@ public final class XLikelySubtags { return new LSR(language, script, region); } - private static final int trieNext(BytesTrie iter, String s, boolean finalSubtag) { + private static final int trieNext(BytesTrie iter, String s, int i) { BytesTrie.Result result; if (s.isEmpty()) { result = iter.next('*'); } else { int end = s.length() - 1; - for (int i = 0;; ++i) { - result = iter.next(s.charAt(i)); + for (;; ++i) { + int c = s.charAt(i); if (i < end) { - if (!result.hasNext()) { + if (!iter.next(c).hasNext()) { return -1; } } else { // last character of this subtag + result = iter.next(c | 0x80); break; } } } - if (!finalSubtag) { - if (result == BytesTrie.Result.INTERMEDIATE_VALUE) { - return 0; // value should be 0, don't care - } - } else { - if (result.hasValue()) { - return iter.getValue(); - } + switch (result) { + case NO_MATCH: return -1; + case NO_VALUE: return 0; + case INTERMEDIATE_VALUE: + assert iter.getValue() == SKIP_SCRIPT; + return SKIP_SCRIPT; + case FINAL_VALUE: return iter.getValue(); + default: return -1; } - return -1; } LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn, @@ -263,11 +307,16 @@ public final class XLikelySubtags { // value00 = lookup(result.language, "", "") BytesTrie iter = new BytesTrie(trie); - int value = trieNext(iter, result.language, false); - assert value >= 0; - value = trieNext(iter, "", false); + int value = trieNext(iter, result.language, 0); assert value >= 0; - value = trieNext(iter, "", true); + if (value == 0) { + value = trieNext(iter, "", 0); + assert value >= 0; + if (value == 0) { + value = trieNext(iter, "", 0); + } + } + assert value > 0; LSR value00 = lsrs[value]; boolean favorRegionOk = false; if (result.script.equals(value00.script)) { //script is default @@ -292,26 +341,24 @@ public final class XLikelySubtags { } private Map getTable() { - Map map = new LinkedHashMap<>(); - Set prefixes = new HashSet<>(); + Map map = new TreeMap<>(); StringBuilder sb = new StringBuilder(); for (BytesTrie.Entry entry : trie) { sb.setLength(0); int length = entry.bytesLength(); for (int i = 0; i < length;) { byte b = entry.byteAt(i++); - sb.append((char) b); - if (i < length && prefixes.contains(sb.toString())) { - sb.append('-'); + if (b == '*') { + sb.append("*-"); + } else if (b >= 0) { + sb.append((char) b); + } else { // end of subtag + sb.append((char) (b & 0x7f)).append('-'); } } - String s = sb.toString(); - if (entry.value == 0) { - // intermediate match point - prefixes.add(s); - } else { - map.put(s, lsrs[entry.value]); - } + assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-'; + sb.setLength(sb.length() - 1); + map.put(sb.toString(), lsrs[entry.value]); } return map; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java index a527c6192f0..f7ffeb22e77 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java @@ -3,198 +3,485 @@ package com.ibm.icu.impl.locale; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; -import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; -import java.util.Set; -import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption; import com.ibm.icu.util.LocalePriorityList; -import com.ibm.icu.util.Output; import com.ibm.icu.util.ULocale; /** - * Immutable class that picks best match between user's desired locales and application's supported locales. + * Immutable class that picks the best match between a user's desired locales and + * and application's supported locales. + * + *

If there are multiple supported locales with the same (language, script, region) + * likely subtags, then the current implementation returns the first of those locales. + * It ignores variant subtags (except for pseudolocale variants) and extensions. + * This may change in future versions. + * + *

For example, the current implementation does not distinguish between + * de, de-DE, de-Latn, de-1901, de-u-co-phonebk. + * + *

If you prefer one equivalent locale over another, then provide only the preferred one, + * or place it earlier in the list of supported locales. + * + *

Otherwise, the order of supported locales may have no effect on the best-match results. + * The current implementation compares each desired locale with supported locales + * in the following order: + * 1. Default locale, if supported; + * 2. CLDR "paradigm locales" like en-GB and es-419; + * 3. other supported locales. + * This may change in future versions. + * + *

TODO: Migration notes. + * * @author markdavis */ public final class XLocaleMatcher { private static final LSR UND_LSR = new LSR("und","",""); - private static final ULocale UND_LOCALE = new ULocale("und"); - private static final Iterator NULL_ITERATOR = null; + private static final ULocale UND_ULOCALE = new ULocale("und"); + private static final Locale UND_LOCALE = new Locale("und"); // Activates debugging output to stderr with details of GetBestMatch. private static final boolean TRACE_MATCHER = false; - // List of indexes, optimized for one or two. - private static final class Indexes { - // Some indexes without further object creation and auto-boxing. - int first, second = -1; - // We could turn the List into an int array + length and manage its growth. - List remaining; + private static abstract class LsrIterator implements Iterator { + int bestDesiredIndex = -1; - Indexes(int firstIndex) { - first = firstIndex; - } - void add(int i) { - if (second < 0) { - second = i; - } else { - if (remaining == null) { - remaining = new ArrayList<>(); - } - remaining.add(i); - } - } - int getFirst() { return first; } - int get(int i) { // returns -1 when i >= length - if (i == 0) { - return first; - } else if (i == 1) { - return second; - } else if (remaining != null && (i -= 2) < remaining.size()) { - return remaining.get(i); - } else { - return -1; - } + @Override + public void remove() { + throw new UnsupportedOperationException(); } + + public abstract void rememberCurrent(int desiredIndex); + } + + /** + * Builder option for whether the language subtag or the script subtag is most important. + * + * @see Builder#setFavorSubtag(FavorSubtag) + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public enum FavorSubtag { + /** + * Language differences are most important, then script differences, then region differences. + * (This is the default behavior.) + * + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + LANGUAGE, + /** + * Makes script differences matter relatively more than language differences. + * + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + SCRIPT } - // TODO: Make public, and add public methods that return it. - private static final class Result { - private Result(ULocale desired, ULocale supported, - /* Locale jdesired, */ Locale jsupported, + /** + * Builder option for whether all desired locales are treated equally or + * earlier ones are preferred. + * + * @see Builder#setDemotionPerDesiredLocale(Demotion) + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public enum Demotion { + /** + * All desired locales are treated equally. + * + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + NONE, + /** + * Earlier desired locales are preferred. + * + *

From each desired locale to the next, + * the distance to any supported locale is increased by an additional amount + * which is at least as large as most region mismatches. + * A later desired locale has to have a better match with some supported locale + * due to more than merely having the same region subtag. + * + *

For example: Supported={en, sv} desired=[en-GB, sv] + * yields Result(en-GB, en) because + * with the demotion of sv its perfect match is no better than + * the region distance between the earlier desired locale en-GB and en=en-US. + * + *

Notes: + *

    + *
  • In some cases, language and/or script differences can be as small as + * the typical region difference. (Example: sr-Latn vs. sr-Cyrl) + *
  • It is possible for certain region differences to be larger than usual, + * and larger than the demotion. + * (As of CLDR 35 there is no such case, but + * this is possible in future versions of the data.) + *
+ * + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + REGION + } + + /** + * Data for the best-matching pair of a desired and a supported locale. + * + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public static final class Result { + private final ULocale desiredULocale; + private final ULocale supportedULocale; + private final Locale desiredLocale; + private final Locale supportedLocale; + private final int desiredIndex; + private final int supportedIndex; + + private Result(ULocale udesired, ULocale usupported, + Locale desired, Locale supported, int desIndex, int suppIndex) { + desiredULocale = udesired; + supportedULocale = usupported; desiredLocale = desired; supportedLocale = supported; - // desiredJavaLocale = jdesired; - supportedJavaLocale = jsupported; desiredIndex = desIndex; supportedIndex = suppIndex; } - ULocale desiredLocale; - ULocale supportedLocale; - // Locale desiredJavaLocale; - Locale supportedJavaLocale; - int desiredIndex; - @SuppressWarnings("unused") // unused until public, for other wrappers - int supportedIndex; - } + /** + * Returns the best-matching desired locale. + * null if the list of desired locales is empty or if none matched well enough. + * + * @return the best-matching desired locale, or null. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public ULocale getDesiredULocale() { + return desiredULocale == null && desiredLocale != null ? + ULocale.forLocale(desiredLocale) : desiredULocale; + } + /** + * Returns the best-matching desired locale. + * null if the list of desired locales is empty or if none matched well enough. + * + * @return the best-matching desired locale, or null. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Locale getDesiredLocale() { + return desiredLocale == null && desiredULocale != null ? + desiredULocale.toLocale() : desiredLocale; + } - // normally the default values, but can be set via constructor + /** + * Returns the best-matching supported locale. + * If none matched well enough, this is the default locale. + * The default locale is null if the list of supported locales is empty and + * no explicit default locale is set. + * + * @return the best-matching supported locale, or null. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public ULocale getSupportedULocale() { return supportedULocale; } + /** + * Returns the best-matching supported locale. + * If none matched well enough, this is the default locale. + * The default locale is null if the list of supported locales is empty and + * no explicit default locale is set. + * + * @return the best-matching supported locale, or null. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Locale getSupportedLocale() { return supportedLocale; } - private final int thresholdDistance; - private final int demotionPerAdditionalDesiredLocale; - private final DistanceOption distanceOption; + /** + * Returns the index of the best-matching desired locale in the input Iterable order. + * -1 if the list of desired locales is empty or if none matched well enough. + * + * @return the index of the best-matching desired locale, or -1. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public int getDesiredIndex() { return desiredIndex; } + + /** + * Returns the index of the best-matching supported locale in the constructor’s or builder’s input order + * (“set” Collection plus “added” locales). + * If the matcher was built from a locale list string, then the iteration order is that + * of a LocalePriorityList built from the same string. + * -1 if the list of supported locales is empty or if none matched well enough. + * + * @return the index of the best-matching supported locale, or -1. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public int getSupportedIndex() { return supportedIndex; } - // built based on application's supported languages in constructor + /** + * Takes the best-matching supported locale and adds relevant fields of the + * best-matching desired locale, such as the -t- and -u- extensions. + * May replace some fields of the supported locale. + * The result is the locale that should be used for date and number formatting, collation, etc. + * + *

Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn + * + * @return the service locale, combining the best-matching desired and supported locales. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public ULocale makeServiceULocale() { + ULocale bestDesired = getDesiredULocale(); + ULocale serviceLocale = supportedULocale; + if (!serviceLocale.equals(bestDesired) && bestDesired != null) { + ULocale.Builder b = new ULocale.Builder().setLocale(serviceLocale); + + // Copy the region from bestDesired, if there is one. + // TODO: Seems wrong to clobber serviceLocale.getCountry() if that is not empty. + String region = bestDesired.getCountry(); + if (!region.isEmpty()) { + b.setRegion(region); + } - private final ULocale[] supportedLocales; - private final Locale[] supportedJavaLocales; - private final Map supportedToIndex; - private final Map supportedLsrToIndexes; - // Array versions of the supportedLsrToIndexes keys and values. + // Copy the variants from bestDesired, if there are any. + // Note that this will override any serviceLocale variants. + // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster). + // TODO: Why replace? Why not append? + String variants = bestDesired.getVariant(); + if (!variants.isEmpty()) { + b.setVariant(variants); + } + + // Copy the extensions from bestDesired, if there are any. + // Note that this will override any serviceLocale extensions. + // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native" + // (replacing calendar). + // TODO: Maybe enumerate -u- keys to not replace others in the serviceLocale?? + // (Unsure about this one.) + for (char extensionKey : bestDesired.getExtensionKeys()) { + b.setExtension(extensionKey, bestDesired.getExtension(extensionKey)); + } + serviceLocale = b.build(); + } + return serviceLocale; + } + + /** + * Takes the best-matching supported locale and adds relevant fields of the + * best-matching desired locale, such as the -t- and -u- extensions. + * May replace some fields of the supported locale. + * The result is the locale that should be used for date and number formatting, collation, etc. + * + *

Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn + * + * @return the service locale, combining the best-matching desired and supported locales. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Locale makeServiceLocale() { + return makeServiceULocale().toLocale(); + } + } + + private final int thresholdDistance; + private final int demotionPerDesiredLocale; + private final FavorSubtag favorSubtag; + + // These are in input order. + private final ULocale[] supportedULocales; + private final Locale[] supportedLocales; + // These are in preference order: 1. Default locale 2. paradigm locales 3. others. + private final Map supportedLsrToIndex; + // Array versions of the supportedLsrToIndex keys and values. // The distance lookup loops over the supportedLsrs and returns the index of the best match. private final LSR[] supportedLsrs; - private final Indexes[] supportedIndexes; - private final ULocale defaultLocale; - private final Locale defaultJavaLocale; + private final int[] supportedIndexes; + private final ULocale defaultULocale; + private final Locale defaultLocale; private final int defaultLocaleIndex; + /** + * LocaleMatcher Builder. + * + * @see XLocaleMatcher#builder() + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ public static class Builder { - /** - * Supported locales. A Set, to avoid duplicates. - * Maintains iteration order for consistent matching behavior (first best match wins). - */ - private Set supportedLocales; + private List supportedLocales; private int thresholdDistance = -1; - private int demotionPerAdditionalDesiredLocale = -1;; + private Demotion demotion; private ULocale defaultLocale; - private DistanceOption distanceOption; + private FavorSubtag favor; + /** + * Parses the string like {@link LocalePriorityList} does and + * sets the supported locales accordingly. + * Clears any previously set/added supported locales first. + * * @param locales the languagePriorityList to set * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. */ public Builder setSupportedLocales(String locales) { - return setSupportedLocales(LocalePriorityList.add(locales).build()); + return setSupportedULocales(LocalePriorityList.add(locales).build().getULocales()); } - public Builder setSupportedLocales(Iterable locales) { - supportedLocales = new LinkedHashSet<>(); // maintain order - for (ULocale locale : locales) { - supportedLocales.add(locale); - } - return this; - } - public Builder setSupportedLocales(Collection locales) { - supportedLocales = new LinkedHashSet<>(locales); // maintain order + + /** + * Copies the supported locales, preserving iteration order. + * Clears any previously set/added supported locales first. + * Duplicates are allowed, and are not removed. + * + * @param locales the list of locale + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Builder setSupportedULocales(Collection locales) { + supportedLocales = new ArrayList<>(locales); return this; } - public Builder setSupportedJavaLocales(Collection locales) { - supportedLocales = new LinkedHashSet<>(locales.size()); // maintain order + + /** + * Copies the supported locales, preserving iteration order. + * Clears any previously set/added supported locales first. + * Duplicates are allowed, and are not removed. + * + * @param locales the list of locale + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Builder setSupportedLocales(Collection locales) { + supportedLocales = new ArrayList<>(locales.size()); for (Locale locale : locales) { supportedLocales.add(ULocale.forLocale(locale)); } return this; } - public Builder addSupportedLocale(ULocale locale) { + + /** + * Adds another supported locale. + * Duplicates are allowed, and are not removed. + * + * @param locale the list of locale + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Builder addSupportedULocale(ULocale locale) { if (supportedLocales == null) { - supportedLocales = new LinkedHashSet<>(); + supportedLocales = new ArrayList<>(); } supportedLocales.add(locale); return this; } + + /** + * Adds another supported locale. + * Duplicates are allowed, and are not removed. + * + * @param locale the list of locale + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ public Builder addSupportedLocale(Locale locale) { - return addSupportedLocale(ULocale.forLocale(locale)); + return addSupportedULocale(ULocale.forLocale(locale)); } /** - * @param thresholdDistance the thresholdDistance to set, with -1 = default + * Sets the default locale; if null, or if it is not set explicitly, + * then the first supported locale is used as the default locale. + * + * @param defaultLocale the default locale * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. */ - public Builder setThresholdDistance(int thresholdDistance) { - if (thresholdDistance > 100) { - thresholdDistance = 100; - } - this.thresholdDistance = thresholdDistance; + public Builder setDefaultULocale(ULocale defaultLocale) { + this.defaultLocale = defaultLocale; return this; } + /** - * @param demotionPerAdditionalDesiredLocale the demotionPerAdditionalDesiredLocale to set, with -1 = default + * Sets the default locale; if null, or if it is not set explicitly, + * then the first supported locale is used as the default locale. + * + * @param defaultLocale the default locale * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. */ - public Builder setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale) { - this.demotionPerAdditionalDesiredLocale = demotionPerAdditionalDesiredLocale; + public Builder setDefaultLocale(Locale defaultLocale) { + this.defaultLocale = ULocale.forLocale(defaultLocale); return this; } /** - * Set the default language, with null = default = first supported language - * @param defaultLocale the default language + * If SCRIPT, then the language differences are smaller than script differences. + * This is used in situations (such as maps) where + * it is better to fall back to the same script than a similar language. + * + * @param subtag the subtag to favor * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. */ - public Builder setDefaultLanguage(ULocale defaultLocale) { - this.defaultLocale = defaultLocale; + public Builder setFavorSubtag(FavorSubtag subtag) { + this.favor = subtag; + return this; + } + + /** + * Option for whether all desired locales are treated equally or + * earlier ones are preferred (this is the default). + * + * @param demotion the demotion per desired locale to set. + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Builder setDemotionPerDesiredLocale(Demotion demotion) { + this.demotion = demotion; return this; } /** - * If true, then the language differences are smaller than than script differences. - * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language. - * @param distanceOption the distance option + * Internal only! + * + * @param thresholdDistance the thresholdDistance to set, with -1 = default * @return this Builder object + * @internal + * @deprecated This API is ICU internal only. */ - public Builder setDistanceOption(DistanceOption distanceOption) { - this.distanceOption = distanceOption; + @Deprecated + public Builder internalSetThresholdDistance(int thresholdDistance) { + if (thresholdDistance > 100) { + thresholdDistance = 100; + } + this.thresholdDistance = thresholdDistance; return this; } + /** + * Builds and returns a new locale matcher. + * This builder can continue to be used. + * + * @return new XLocaleMatcher. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ public XLocaleMatcher build() { return new XLocaleMatcher(this); } @@ -208,22 +495,25 @@ public final class XLocaleMatcher { if (defaultLocale != null) { s.append(" default=").append(defaultLocale.toString()); } - if (distanceOption != null) { - s.append(" distance=").append(distanceOption.toString()); + if (favor != null) { + s.append(" distance=").append(favor.toString()); } if (thresholdDistance >= 0) { s.append(String.format(" threshold=%d", thresholdDistance)); } - if (demotionPerAdditionalDesiredLocale >= 0) { - s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale)); + if (demotion != null) { + s.append(" demotion=").append(demotion.toString()); } return s.append('}').toString(); } } /** - * Returns a builder used in chaining parameters for building a Locale Matcher. - * @return this Builder object + * Returns a builder used in chaining parameters for building a LocaleMatcher. + * + * @return a new Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. */ public static Builder builder() { return new Builder(); @@ -235,16 +525,9 @@ public final class XLocaleMatcher { } /** Convenience method */ public XLocaleMatcher(LocalePriorityList supportedLocales) { - this(builder().setSupportedLocales(supportedLocales)); - } - /** Convenience method */ - public XLocaleMatcher(Set supportedLocales) { - this(builder().setSupportedLocales(supportedLocales)); + this(builder().setSupportedULocales(supportedLocales.getULocales())); } - /** - * Creates a locale matcher with the given Builder parameters. - */ private XLocaleMatcher(Builder builder) { thresholdDistance = builder.thresholdDistance < 0 ? LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance; @@ -252,244 +535,139 @@ public final class XLocaleMatcher { // so that when different types are used (e.g., java.util.Locale) // we can return those by parallel index. int supportedLocalesLength = builder.supportedLocales.size(); - supportedLocales = new ULocale[supportedLocalesLength]; - supportedJavaLocales = new Locale[supportedLocalesLength]; - supportedToIndex = new HashMap<>(supportedLocalesLength); + supportedULocales = new ULocale[supportedLocalesLength]; + supportedLocales = new Locale[supportedLocalesLength]; + // Supported LRSs in input order. + LSR lsrs[] = new LSR[supportedLocalesLength]; + // Also find the first supported locale whose LSR is + // the same as that for the default locale. + ULocale udef = builder.defaultLocale; + Locale def = null; + LSR defLSR = null; + int idef = -1; + if (udef != null) { + def = udef.toLocale(); + defLSR = getMaximalLsrOrUnd(udef); + } + int i = 0; + for (ULocale locale : builder.supportedLocales) { + supportedULocales[i] = locale; + supportedLocales[i] = locale.toLocale(); + LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale); + if (idef < 0 && defLSR != null && lsr.equals(defLSR)) { + idef = i; + } + ++i; + } + // We need an unordered map from LSR to first supported locale with that LSR, // and an ordered list of (LSR, Indexes). // We use a LinkedHashMap for both, // and insert the supported locales in the following order: - // 1. First supported locale. + // 1. Default locale, if it is supported. // 2. Priority locales in builder order. // 3. Remaining locales in builder order. - supportedLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength); - Map otherLsrToIndexes = null; - LSR firstLSR = null; - int i = 0; - for (ULocale locale : builder.supportedLocales) { - supportedLocales[i] = locale; - supportedJavaLocales[i] = locale.toLocale(); - // supportedToIndex.putIfAbsent(locale, i) - Integer oldIndex = supportedToIndex.get(locale); - if (oldIndex == null) { - supportedToIndex.put(locale, i); - } - LSR lsr = getMaximalLsrOrUnd(locale); - if (i == 0) { - firstLSR = lsr; - supportedLsrToIndexes.put(lsr, new Indexes(0)); - } else if (lsr.equals(firstLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) { - addIndex(supportedLsrToIndexes, lsr, i); + supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength); + Map otherLsrToIndex = null; + if (idef >= 0) { + supportedLsrToIndex.put(defLSR, idef); + } + i = 0; + for (ULocale locale : supportedULocales) { + if (i == idef) { continue; } + LSR lsr = lsrs[i]; + if (defLSR == null) { + assert i == 0; + udef = locale; + def = supportedLocales[0]; + defLSR = lsr; + idef = 0; + supportedLsrToIndex.put(lsr, 0); + } else if (lsr.equals(defLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) { + putIfAbsent(supportedLsrToIndex, lsr, i); } else { - if (otherLsrToIndexes == null) { - otherLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength); + if (otherLsrToIndex == null) { + otherLsrToIndex = new LinkedHashMap<>(supportedLocalesLength); } - addIndex(otherLsrToIndexes, lsr, i); + putIfAbsent(otherLsrToIndex, lsr, i); } ++i; } - if (otherLsrToIndexes != null) { - supportedLsrToIndexes.putAll(otherLsrToIndexes); - } - int numSuppLsrs = supportedLsrToIndexes.size(); - supportedLsrs = supportedLsrToIndexes.keySet().toArray(new LSR[numSuppLsrs]); - supportedIndexes = supportedLsrToIndexes.values().toArray(new Indexes[numSuppLsrs]); - ULocale def; - Locale jdef = null; - int idef = -1; - if (builder.defaultLocale != null) { - def = builder.defaultLocale; - } else if (supportedLocalesLength > 0) { - def = supportedLocales[0]; // first language - jdef = supportedJavaLocales[0]; - idef = 0; - } else { - def = null; + if (otherLsrToIndex != null) { + supportedLsrToIndex.putAll(otherLsrToIndex); } - if (jdef == null && def != null) { - jdef = def.toLocale(); + int numSuppLsrs = supportedLsrToIndex.size(); + supportedLsrs = new LSR[numSuppLsrs]; + supportedIndexes = new int[numSuppLsrs]; + i = 0; + for (Map.Entry entry : supportedLsrToIndex.entrySet()) { + supportedLsrs[i] = entry.getKey(); // = lsrs[entry.getValue()] + supportedIndexes[i++] = entry.getValue(); } + + defaultULocale = udef; defaultLocale = def; - defaultJavaLocale = jdef; defaultLocaleIndex = idef; - demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ? - LocaleDistance.INSTANCE.getDefaultRegionDistance() + 1 : - builder.demotionPerAdditionalDesiredLocale; - distanceOption = builder.distanceOption; + demotionPerDesiredLocale = + builder.demotion == Demotion.NONE ? 0 : + LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION + favorSubtag = builder.favor; } - private static final void addIndex(Map lsrToIndexes, LSR lsr, int i) { - Indexes indexes = lsrToIndexes.get(lsr); - if (indexes == null) { - lsrToIndexes.put(lsr, new Indexes(i)); - } else { - indexes.add(i); + private static final void putIfAbsent(Map lsrToIndex, LSR lsr, int i) { + Integer index = lsrToIndex.get(lsr); + if (index == null) { + lsrToIndex.put(lsr, i); } } private static final LSR getMaximalLsrOrUnd(ULocale locale) { - if (locale.equals(UND_LOCALE)) { + if (locale.equals(UND_ULOCALE)) { return UND_LSR; } else { return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale); } } - /** Convenience method */ - public ULocale getBestMatch(ULocale ulocale) { - return getBestMatch(ulocale, NULL_ITERATOR).supportedLocale; - } - /** Convenience method */ - public ULocale getBestMatch(String languageList) { - return getBestMatch(LocalePriorityList.add(languageList).build(), null); - } - /** Convenience method */ - public ULocale getBestMatch(ULocale... locales) { - return getBestMatch(Arrays.asList(locales), null); - } - /** Convenience method */ - public ULocale getBestMatch(Iterable desiredLocales) { - return getBestMatch(desiredLocales, null); - } - - /** - * Get the best match between the desired languages and supported languages - * @param desiredLocales Typically the supplied user's languages, in order of preference, with best first. - * @param outputBestDesired The one of the desired languages that matched best (can be null). - * Set to null if the best match was not below the threshold distance. - * @return the best match. - */ - public ULocale getBestMatch(Iterable desiredLocales, Output outputBestDesired) { - Iterator desiredIter = desiredLocales.iterator(); - if (!desiredIter.hasNext()) { - if (outputBestDesired != null) { - outputBestDesired.value = null; - } - if (TRACE_MATCHER) { - System.err.printf("Returning default %s: no desired languages\n", defaultLocale); - } - return defaultLocale; + private static final LSR getMaximalLsrOrUnd(Locale locale) { + if (locale.equals(UND_LOCALE)) { + return UND_LSR; + } else { + return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale); } - ULocale desiredLocale = desiredIter.next(); - return getBestMatch(desiredLocale, desiredIter, outputBestDesired); } - /** - * @param desiredLocale First desired locale. - * @param remainingIter Remaining desired locales, null or empty if none. - * @param outputBestDesired If not null, - * will be set to the desired locale that matches the best supported one. - * @return the best supported locale. - */ - private ULocale getBestMatch(ULocale desiredLocale, Iterator remainingIter, - Output outputBestDesired) { - Result result = getBestMatch(desiredLocale, remainingIter); - if (outputBestDesired != null) { - outputBestDesired.value = result.desiredLocale; - } - return result.supportedLocale; - } + private static final class ULocaleLsrIterator extends LsrIterator { + private Iterator locales; + private ULocale current, remembered; - private Result getBestMatch(ULocale desiredLocale, Iterator remainingIter) { - int desiredIndex = 0; - int bestDesiredIndex = -1; - ULocale bestDesiredLocale = null; - int bestSupportedLsrIndex = 0; - for (int bestDistance = thresholdDistance; bestDistance > 0; - bestDistance -= demotionPerAdditionalDesiredLocale) { - // Quick check for exact locale match. - Integer supportedIndex = supportedToIndex.get(desiredLocale); - if (supportedIndex != null) { - if (TRACE_MATCHER) { - System.err.printf("Returning %s: desired=supported\n", desiredLocale); - } - int suppIndex = supportedIndex; - return new Result(desiredLocale, supportedLocales[suppIndex], - supportedJavaLocales[suppIndex], desiredIndex, suppIndex); - } - // Quick check for exact maximized LSR. - LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale); - Indexes indexes = supportedLsrToIndexes.get(desiredLSR); - if (indexes != null) { - // If this is a supported LSR, return the first locale. - // We already know the exact locale isn't there. - int suppIndex = indexes.getFirst(); - ULocale result = supportedLocales[suppIndex]; - if (TRACE_MATCHER) { - System.err.printf("Returning %s: desiredLSR=supportedLSR\n", result); - } - return new Result(desiredLocale, result, - supportedJavaLocales[suppIndex], desiredIndex, suppIndex); - } - int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance( - desiredLSR, supportedLsrs, bestDistance, distanceOption); - if (bestIndexAndDistance >= 0) { - bestDistance = bestIndexAndDistance & 0xff; - bestDesiredIndex = desiredIndex; - bestDesiredLocale = desiredLocale; - bestSupportedLsrIndex = bestIndexAndDistance >> 8; - if (bestDistance == 0) { - break; - } - } - if (remainingIter == null || !remainingIter.hasNext()) { - break; - } - desiredLocale = remainingIter.next(); - ++desiredIndex; + ULocaleLsrIterator(Iterator locales) { + this.locales = locales; } - if (bestDesiredIndex < 0) { - if (TRACE_MATCHER) { - System.err.printf("Returning default %s: no good match\n", defaultLocale); - } - return new Result(null, defaultLocale, defaultJavaLocale, -1, defaultLocaleIndex); - } - // Pick exact match if there is one. - // The length of the list is normally 1. - Indexes bestSupportedIndexes = supportedIndexes[bestSupportedLsrIndex]; - int suppIndex; - for (int i = 0; (suppIndex = bestSupportedIndexes.get(i)) >= 0; ++i) { - ULocale locale = supportedLocales[suppIndex]; - if (bestDesiredLocale.equals(locale)) { - if (TRACE_MATCHER) { - System.err.printf("Returning %s: desired=best matching supported language\n", - bestDesiredLocale); - } - return new Result(bestDesiredLocale, locale, - supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex); - } + + @Override + public boolean hasNext() { + return locales.hasNext(); } - // Otherwise return the first of the supported languages that share the best-matching LSR. - suppIndex = bestSupportedIndexes.getFirst(); - ULocale result = supportedLocales[suppIndex]; - if (TRACE_MATCHER) { - System.err.printf("Returning %s: first best matching supported language\n", result); + + @Override + public LSR next() { + current = locales.next(); + return getMaximalLsrOrUnd(current); } - return new Result(bestDesiredLocale, result, - supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex); - } - /** - * Get the best match between the desired languages and supported languages - * @param desiredLocale the supplied user's language. - * @param outputBestDesired The one of the desired languages that matched best. - * Set to null if the best match was not below the threshold distance. - * @return the best match. - */ - public ULocale getBestMatch(ULocale desiredLocale, Output outputBestDesired) { - return getBestMatch(desiredLocale, null, outputBestDesired); + @Override + public void rememberCurrent(int desiredIndex) { + bestDesiredIndex = desiredIndex; + remembered = current; + } } - /** - * Converts Locales to ULocales on the fly. - */ - private static final class LocalesWrapper implements Iterator { + private static final class LocaleLsrIterator extends LsrIterator { private Iterator locales; - // Cache locales to avoid conversion of the result. - private Locale first, second; - private List remaining; + private Locale current, remembered; - LocalesWrapper(Iterator locales) { + LocaleLsrIterator(Iterator locales) { this.locales = locales; } @@ -499,148 +677,191 @@ public final class XLocaleMatcher { } @Override - public ULocale next() { - Locale locale = locales.next(); - if (first == null) { - first = locale; - } else if (second == null) { - second = locale; - } else { - if (remaining == null) { - remaining = new ArrayList<>(); - } - remaining.add(locale); - } - return ULocale.forLocale(locale); + public LSR next() { + current = locales.next(); + return getMaximalLsrOrUnd(current); } - Locale getJavaLocale(int i) { - if (i == 0) { - return first; - } else if (i == 1) { - return second; - } else { - // TODO: test code coverage - return remaining.get(i - 2); - } + @Override + public void rememberCurrent(int desiredIndex) { + bestDesiredIndex = desiredIndex; + remembered = current; } + } - @Override - public void remove() { - throw new UnsupportedOperationException(); + public ULocale getBestMatch(ULocale desiredLocale) { + LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale); + int suppIndex = getBestSuppIndex(desiredLSR, null); + return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale; + } + + public ULocale getBestMatch(Iterable desiredLocales) { + Iterator desiredIter = desiredLocales.iterator(); + if (!desiredIter.hasNext()) { + return defaultULocale; } + ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter); + LSR desiredLSR = lsrIter.next(); + int suppIndex = getBestSuppIndex(desiredLSR, lsrIter); + return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale; } - public Locale getBestJavaMatch(Iterable desiredLocales, Output outputBestDesired) { + public ULocale getBestMatch(String desiredLocaleList) { + return getBestMatch(LocalePriorityList.add(desiredLocaleList).build()); + } + + public Locale getBestLocale(Locale desiredLocale) { + LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale); + int suppIndex = getBestSuppIndex(desiredLSR, null); + return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale; + } + + public Locale getBestLocale(Iterable desiredLocales) { Iterator desiredIter = desiredLocales.iterator(); if (!desiredIter.hasNext()) { - if (outputBestDesired != null) { - outputBestDesired.value = null; - } - if (TRACE_MATCHER) { - System.err.printf("Returning default %s: no desired languages\n", defaultLocale); - } - return defaultJavaLocale; - } - LocalesWrapper wrapper = new LocalesWrapper(desiredIter); - ULocale desiredLocale = wrapper.next(); - Result result = getBestMatch(desiredLocale, NULL_ITERATOR); - if (outputBestDesired != null) { - outputBestDesired.value = result.desiredIndex >= 0 ? - wrapper.getJavaLocale(result.desiredIndex) : null; + return defaultLocale; } - return result.supportedJavaLocale; + LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter); + LSR desiredLSR = lsrIter.next(); + int suppIndex = getBestSuppIndex(desiredLSR, lsrIter); + return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale; } - public Locale getBestJavaMatch(Locale desiredLocale, Output outputBestDesired) { - ULocale desiredULocale = ULocale.forLocale(desiredLocale); - Result result = getBestMatch(desiredULocale, NULL_ITERATOR); - if (outputBestDesired != null) { - outputBestDesired.value = result.desiredIndex >= 0 ? desiredLocale : null; + private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) { + if (suppIndex < 0) { + return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex); + } else if (desiredLocale != null) { + return new Result(desiredLocale, supportedULocales[suppIndex], + null, supportedLocales[suppIndex], 0, suppIndex); + } else { + return new Result(lsrIter.remembered, supportedULocales[suppIndex], + null, supportedLocales[suppIndex], lsrIter.bestDesiredIndex, suppIndex); } - return result.supportedJavaLocale; } - /** Combine features of the desired locale into those of the supported, and return result. */ - public static ULocale combine(ULocale bestSupported, ULocale bestDesired) { - // for examples of extensions, variants, see - // http://unicode.org/repos/cldr/tags/latest/common/bcp47/ - // http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml + private Result makeResult(Locale desiredLocale, LocaleLsrIterator lsrIter, int suppIndex) { + if (suppIndex < 0) { + return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex); + } else if (desiredLocale != null) { + return new Result(null, supportedULocales[suppIndex], + desiredLocale, supportedLocales[suppIndex], 0, suppIndex); + } else { + return new Result(null, supportedULocales[suppIndex], + lsrIter.remembered, supportedLocales[suppIndex], + lsrIter.bestDesiredIndex, suppIndex); + } + } - if (!bestSupported.equals(bestDesired) && bestDesired != null) { - // add region, variants, extensions - ULocale.Builder b = new ULocale.Builder().setLocale(bestSupported); + public Result getBestMatchResult(ULocale desiredLocale) { + LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale); + int suppIndex = getBestSuppIndex(desiredLSR, null); + return makeResult(desiredLocale, null, suppIndex); + } - // copy the region from the desired, if there is one - String region = bestDesired.getCountry(); - if (!region.isEmpty()) { - b.setRegion(region); - } + /** + * Returns the best match between the desired and supported locales. + * + * @param desiredLocales Typically a user's languages, in order of preference (descending). + * @return the best-matching pair of a desired and a supported locale. + */ + public Result getBestMatchResult(Iterable desiredLocales) { + Iterator desiredIter = desiredLocales.iterator(); + if (!desiredIter.hasNext()) { + return makeResult(UND_ULOCALE, null, -1); + } + ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter); + LSR desiredLSR = lsrIter.next(); + int suppIndex = getBestSuppIndex(desiredLSR, lsrIter); + return makeResult(null, lsrIter, suppIndex); + } - // copy the variants from desired, if there is one - // note that this will override any subvariants. Eg "sco-ulster-fonipa" + "…-fonupa" => "sco-fonupa" (nuking ulster) - String variants = bestDesired.getVariant(); - if (!variants.isEmpty()) { - b.setVariant(variants); - } + public Result getBestLocaleResult(Locale desiredLocale) { + LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale); + int suppIndex = getBestSuppIndex(desiredLSR, null); + return makeResult(desiredLocale, null, suppIndex); + } - // copy the extensions from desired, if there are any - // note that this will override any subkeys. Eg "th-u-nu-latn-ca-buddhist" + "…-u-nu-native" => "th-u-nu-native" (nuking calendar) - for (char extensionKey : bestDesired.getExtensionKeys()) { - b.setExtension(extensionKey, bestDesired.getExtension(extensionKey)); - } - bestSupported = b.build(); + public Result getBestLocaleResult(Iterable desiredLocales) { + Iterator desiredIter = desiredLocales.iterator(); + if (!desiredIter.hasNext()) { + return makeResult(UND_LOCALE, null, -1); } - return bestSupported; + LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter); + LSR desiredLSR = lsrIter.next(); + int suppIndex = getBestSuppIndex(desiredLSR, lsrIter); + return makeResult(null, lsrIter, suppIndex); } - /** Returns the distance between the two languages. The values are not necessarily symmetric. - * @param desired A locale desired by the user - * @param supported A locale supported by a program. - * @return A return of 0 is a complete match, and 100 is a failure case (above the thresholdDistance). - * A language is first maximized with add likely subtags, then compared. + /** + * @param desiredLSR The first desired locale's LSR. + * @param remainingIter Remaining desired LSRs, null or empty if none. + * @return the index of the best-matching supported locale, or -1 if there is no good match. */ - public int distance(ULocale desired, ULocale supported) { - return LocaleDistance.INSTANCE.getBestIndexAndDistance( - XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired), - new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) }, - thresholdDistance, distanceOption) & 0xff; - } - - /** Convenience method */ - public int distance(String desiredLanguage, String supportedLanguage) { - return LocaleDistance.INSTANCE.getBestIndexAndDistance( - XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(desiredLanguage)), - new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(supportedLanguage)) }, - thresholdDistance, distanceOption) & 0xff; + private int getBestSuppIndex(LSR desiredLSR, LsrIterator remainingIter) { + int desiredIndex = 0; + int bestSupportedLsrIndex = -1; + for (int bestDistance = thresholdDistance;;) { + // Quick check for exact maximized LSR. + Integer index = supportedLsrToIndex.get(desiredLSR); + if (index != null) { + int suppIndex = index; + if (TRACE_MATCHER) { + System.err.printf("Returning %s: desiredLSR=supportedLSR\n", + supportedULocales[suppIndex]); + } + if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); } + return suppIndex; + } + int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance( + desiredLSR, supportedLsrs, bestDistance, favorSubtag); + if (bestIndexAndDistance >= 0) { + bestDistance = bestIndexAndDistance & 0xff; + if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); } + bestSupportedLsrIndex = bestIndexAndDistance >> 8; + } + if ((bestDistance -= demotionPerDesiredLocale) <= 0) { + break; + } + if (remainingIter == null || !remainingIter.hasNext()) { + break; + } + desiredLSR = remainingIter.next(); + } + if (bestSupportedLsrIndex < 0) { + if (TRACE_MATCHER) { + System.err.printf("Returning default %s: no good match\n", defaultULocale); + } + return -1; + } + int suppIndex = supportedIndexes[bestSupportedLsrIndex]; + if (TRACE_MATCHER) { + System.err.printf("Returning %s: best matching supported locale\n", + supportedULocales[suppIndex]); + } + return suppIndex; } @Override public String toString() { StringBuilder s = new StringBuilder().append("{XLocaleMatcher"); - if (supportedLocales.length > 0) { - s.append(" supported={").append(supportedLocales[0].toString()); - for (int i = 1; i < supportedLocales.length; ++i) { - s.append(", ").append(supportedLocales[1].toString()); + if (supportedULocales.length > 0) { + s.append(" supported={").append(supportedULocales[0].toString()); + for (int i = 1; i < supportedULocales.length; ++i) { + s.append(", ").append(supportedULocales[i].toString()); } s.append('}'); } - s.append(" default=").append(Objects.toString(defaultLocale)); - if (distanceOption != null) { - s.append(" distance=").append(distanceOption.toString()); + s.append(" default=").append(Objects.toString(defaultULocale)); + if (favorSubtag != null) { + s.append(" distance=").append(favorSubtag.toString()); } if (thresholdDistance >= 0) { s.append(String.format(" threshold=%d", thresholdDistance)); } - s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale)); + s.append(String.format(" demotion=%d", demotionPerDesiredLocale)); return s.append('}').toString(); } - /** Return the inverse of the distance: that is, 1-distance(desired, supported) */ - public double match(ULocale desired, ULocale supported) { - return (100-distance(desired, supported))/100.0; - } - /** * Returns a fraction between 0 and 1, where 1 means that the languages are a * perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0. @@ -652,11 +873,16 @@ public final class XLocaleMatcher { * @param supported Supported locale * @param supportedMax Maximized locale (using likely subtags) * @return value between 0 and 1, inclusive. - * @deprecated Use the form with 2 parameters instead. + * @deprecated ICU 65 Build and use a matcher rather than comparing pairs of locales. */ @Deprecated public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) { - return match(desired, supported); + // Returns the inverse of the distance: That is, 1-distance(desired, supported). + int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance( + XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired), + new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) }, + thresholdDistance, favorSubtag) & 0xff; + return (100 - distance) / 100.0; } /** @@ -671,11 +897,4 @@ public final class XLocaleMatcher { // TODO return null; } - - /** - * @return the thresholdDistance. Any distance above this value is treated as a match failure. - */ - public int getThresholdDistance() { - return thresholdDistance; - } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java index 7df1a857fdb..c1bf6af2f92 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java @@ -25,9 +25,9 @@ import com.ibm.icu.impl.ICUResourceBundle; import com.ibm.icu.impl.Relation; import com.ibm.icu.impl.Row; import com.ibm.icu.impl.Row.R3; -import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption; import com.ibm.icu.impl.locale.XLocaleMatcher; import com.ibm.icu.impl.locale.XLocaleMatcher.Builder; +import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag; /** * Provides a way to match the languages (locales) supported by a product to the @@ -864,30 +864,15 @@ public class LocaleMatcher { transient ULocale xDefaultLanguage = null; transient boolean xFavorScript = false; - /** - * Returns the distance between the two languages, using the new CLDR syntax (see getBestMatch). - * The values are not necessarily symmetric. - * @param desired A locale desired by the user - * @param supported A locale supported by a program. - * @return A return of 0 is a complete match, and 100 is a complete mismatch (above the thresholdDistance). - * A language is first maximized with add likely subtags, then compared. - * @internal - * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release. - */ - @Deprecated - public int distance(ULocale desired, ULocale supported) { - return getLocaleMatcher().distance(desired, supported); - } - private synchronized XLocaleMatcher getLocaleMatcher() { if (xLocaleMatcher == null) { Builder builder = XLocaleMatcher.builder(); - builder.setSupportedLocales(languagePriorityList); + builder.setSupportedULocales(languagePriorityList.getULocales()); if (xDefaultLanguage != null) { - builder.setDefaultLanguage(xDefaultLanguage); + builder.setDefaultULocale(xDefaultLanguage); } if (xFavorScript) { - builder.setDistanceOption(DistanceOption.SCRIPT_FIRST); + builder.setFavorSubtag(FavorSubtag.SCRIPT); } xLocaleMatcher = builder.build(); } @@ -908,7 +893,13 @@ public class LocaleMatcher { */ @Deprecated public ULocale getBestMatch(LinkedHashSet desiredLanguages, Output outputBestDesired) { - return getLocaleMatcher().getBestMatch(desiredLanguages, outputBestDesired); + if (outputBestDesired == null) { + return getLocaleMatcher().getBestMatch(desiredLanguages); + } else { + XLocaleMatcher.Result result = getLocaleMatcher().getBestMatchResult(desiredLanguages); + outputBestDesired.value = result.getDesiredULocale(); + return result.getSupportedULocale(); + } } /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/LocalePriorityList.java b/icu4j/main/classes/core/src/com/ibm/icu/util/LocalePriorityList.java index b8a1a7402d9..0726b1d79f1 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/LocalePriorityList.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/LocalePriorityList.java @@ -22,43 +22,45 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; /** - * Provides an immutable list of languages (locales) in priority order. - * The string format is based on the Accept-Language format + * Provides an immutable list of languages/locales in priority order. + * The string format is based on the Accept-Language format * http://www.ietf.org/rfc/rfc2616.txt, such as * "af, en, fr;q=0.9". Syntactically it is slightly * more lenient, in allowing extra whitespace between elements, extra commas, * and more than 3 decimals (on input), and pins between 0 and 1. + * *

In theory, Accept-Language indicates the relative 'quality' of each item, - * but in practice, all of the browsers just take an ordered list, like + * but in practice, all of the browsers just take an ordered list, like * "en, fr, de", and synthesize arbitrary quality values that put these in the * right order, like: "en, fr;q=0.7, de;q=0.3". The quality values in these de facto * semantics thus have nothing to do with the relative qualities of the * original. Accept-Language also doesn't * specify the interpretation of multiple instances, eg what "en, fr, en;q=.5" * means. - *

There are various ways to build a LanguagePriorityList, such + *

There are various ways to build a LocalePriorityList, such * as using the following equivalent patterns: - * + * *

- * list = LanguagePriorityList.add("af, en, fr;q=0.9").build();
- * 
- * list2 = LanguagePriorityList
+ * list = LocalePriorityList.add("af, en, fr;q=0.9").build();
+ *
+ * list2 = LocalePriorityList
  *  .add(ULocale.forString("af"))
  *  .add(ULocale.ENGLISH)
  *  .add(ULocale.FRENCH, 0.9d)
  *  .build();
  * 
- * When the list is built, the internal values are sorted in descending order by - * weight, and then by input order. That is, if two languages have the same weight, the first one in the original order - * comes first. If exactly the same language tag appears multiple times, - * the last one wins. - * - * There are two options when building. If preserveWeights are on, then "de;q=0.3, ja;q=0.3, en, fr;q=0.7, de " would result in the following: + * When the list is built, the internal values are sorted in descending order by weight, + * and then by input order. + * That is, if two languages/locales have the same weight, the first one in the original order comes first. + * If exactly the same language tag appears multiple times, the last one wins. + * + *

There are two options when building. + * If preserveWeights are on, then "de;q=0.3, ja;q=0.3, en, fr;q=0.7, de " would result in the following: *

 en;q=1.0
  * de;q=1.0
  * fr;q=0.7
  * ja;q=0.3
- * If it is off (the default), then all weights are reset to 1.0 after reordering. + * If it is off (the default), then all weights are reset to 1.0 after reordering. * This is to match the effect of the Accept-Language semantics as used in browsers, and results in the following: * *
 en;q=1.0
  * de;q=1.0
@@ -73,49 +75,48 @@ public class LocalePriorityList implements Iterable {
 
     private static final Pattern languageSplitter = Pattern.compile("\\s*,\\s*");
     private static final Pattern weightSplitter = Pattern
-    .compile("\\s*(\\S*)\\s*;\\s*q\\s*=\\s*(\\S*)");
+            .compile("\\s*(\\S*)\\s*;\\s*q\\s*=\\s*(\\S*)");
     private final Map languagesAndWeights;
 
     /**
-     * Add a language code to the list being built, with weight 1.0.
-     * 
-     * @param languageCode locale/language to be added
-     * @return internal builder, for chaining
+     * Creates a Builder and adds locales, each with weight 1.0.
+     *
+     * @param locales locales/languages to be added
+     * @return a new builder with these locales, for chaining
      * @stable ICU 4.4
      */
-    public static Builder add(ULocale... languageCode) {
-        return new Builder().add(languageCode);
+    public static Builder add(ULocale... locales) {
+        return new Builder().add(locales);
     }
 
     /**
-     * Add a language code to the list being built, with specified weight.
-     * 
-     * @param languageCode locale/language to be added
+     * Creates a Builder and adds a locale with a specified weight.
+     *
+     * @param locale locale/language to be added
      * @param weight value from 0.0 to 1.0
-     * @return internal builder, for chaining
+     * @return a new builder with this locale, for chaining
      * @stable ICU 4.4
      */
-    public static Builder add(ULocale languageCode, final double weight) {
-        return new Builder().add(languageCode, weight);
+    public static Builder add(ULocale locale, final double weight) {
+        return new Builder().add(locale, weight);
     }
 
     /**
-     * Add a language priority list.
-     * 
-     * @param languagePriorityList list to add all the members of
-     * @return internal builder, for chaining
+     * Creates a Builder and adds locales with weights.
+     *
+     * @param list list of locales with weights
+     * @return a new builder with these locales, for chaining
      * @stable ICU 4.4
      */
-    public static Builder add(LocalePriorityList languagePriorityList) {
-        return new Builder().add(languagePriorityList);
+    public static Builder add(LocalePriorityList list) {
+        return new Builder().add(list);
     }
 
     /**
-     * Add language codes to the list being built, using a string in rfc2616
-     * (lenient) format, where each language is a valid {@link ULocale}.
-     * 
-     * @param acceptLanguageString String in rfc2616 format (but leniently parsed)
-     * @return internal builder, for chaining
+     * Creates a Builder, parses the RFC 2616 string, and adds locales with weights accordingly.
+     *
+     * @param acceptLanguageString String in RFC 2616 format (leniently parsed)
+     * @return a new builder with these locales, for chaining
      * @stable ICU 4.4
      */
     public static Builder add(String acceptLanguageString) {
@@ -123,15 +124,27 @@ public class LocalePriorityList implements Iterable {
     }
 
     /**
-     * Return the weight for a given language, or null if there is none. Note that
-     * the weights may be adjusted from those used to build the list.
-     * 
-     * @param language to get weight of
+     * Returns the weight for a given language/locale, or null if there is none.
+     * Note that the weights may be adjusted from those used to build the list.
+     *
+     * @param locale to get weight of
      * @return weight
      * @stable ICU 4.4
      */
-    public Double getWeight(ULocale language) {
-        return languagesAndWeights.get(language);
+    public Double getWeight(ULocale locale) {
+        return languagesAndWeights.get(locale);
+    }
+
+    /**
+     * Returns the locales as an immutable Set view.
+     * The set has the same iteration order as this object itself.
+     *
+     * @return the locales
+     * @draft ICU 65
+     * @provisional This API might change or be removed in a future release.
+     */
+    public Set getULocales() {
+        return languagesAndWeights.keySet();
     }
 
     /**
@@ -158,6 +171,7 @@ public class LocalePriorityList implements Iterable {
      * {@inheritDoc}
      * @stable ICU 4.4
      */
+    @Override
     public Iterator iterator() {
         return languagesAndWeights.keySet().iterator();
     }
@@ -199,7 +213,7 @@ public class LocalePriorityList implements Iterable {
     }
 
     /**
-     * Class used for building LanguagePriorityLists
+     * Class used for building LocalePriorityLists.
      * @stable ICU 4.4
      */
     public static class Builder {
@@ -207,8 +221,8 @@ public class LocalePriorityList implements Iterable {
          * These store the input languages and weights, in chronological order,
          * where later additions override previous ones.
          */
-        private final Map languageToWeight 
-        = new LinkedHashMap();
+        private final Map languageToWeight
+        = new LinkedHashMap<>();
 
         /*
          * Private constructor, only used by LocalePriorityList
@@ -219,7 +233,7 @@ public class LocalePriorityList implements Iterable {
         /**
          * Creates a LocalePriorityList.  This is equivalent to
          * {@link Builder#build(boolean) Builder.build(false)}.
-         * 
+         *
          * @return A LocalePriorityList
          * @stable ICU 4.4
          */
@@ -229,27 +243,26 @@ public class LocalePriorityList implements Iterable {
 
         /**
          * Creates a LocalePriorityList.
-         * 
-         * @param preserveWeights when true, the weights originally came
-         * from a language priority list specified by add() are preserved.
+         *
+         * @param preserveWeights when true, each locale's given weight is preserved.
          * @return A LocalePriorityList
          * @stable ICU 4.4
          */
         public LocalePriorityList build(boolean preserveWeights) {
             // Walk through the input list, collecting the items with the same weights.
-            final Map> doubleCheck = new TreeMap>(
+            final Map> doubleCheck = new TreeMap<>(
                     myDescendingDouble);
             for (final ULocale lang : languageToWeight.keySet()) {
                 Double weight = languageToWeight.get(lang);
                 Set s = doubleCheck.get(weight);
                 if (s == null) {
-                    doubleCheck.put(weight, s = new LinkedHashSet());
+                    doubleCheck.put(weight, s = new LinkedHashSet<>());
                 }
                 s.add(lang);
             }
             // We now have a bunch of items sorted by weight, then chronologically.
             // We can now create a list in the right order
-            final Map temp = new LinkedHashMap();
+            final Map temp = new LinkedHashMap<>();
             for (Entry> langEntry : doubleCheck.entrySet()) {
                 final Double weight = langEntry.getKey();
                 for (final ULocale lang : langEntry.getValue()) {
@@ -260,73 +273,72 @@ public class LocalePriorityList implements Iterable {
         }
 
         /**
-         * Adds a LocalePriorityList
-         * 
-         * @param languagePriorityList a LocalePriorityList
+         * Adds locales with weights.
+         *
+         * @param list list of locales with weights
          * @return this, for chaining
          * @stable ICU 4.4
          */
-        public Builder add(
-                final LocalePriorityList languagePriorityList) {
-            for (final ULocale language : languagePriorityList.languagesAndWeights
+        public Builder add(final LocalePriorityList list) {
+            for (final ULocale language : list.languagesAndWeights
                     .keySet()) {
-                add(language, languagePriorityList.languagesAndWeights.get(language));
+                add(language, list.languagesAndWeights.get(language));
             }
             return this;
         }
 
         /**
-         * Adds a new language code, with weight = 1.0.
-         * 
-         * @param languageCode to add with weight 1.0
+         * Adds a locale with weight 1.0.
+         *
+         * @param locale to add with weight 1.0
          * @return this, for chaining
          * @stable ICU 4.4
          */
-        public Builder add(final ULocale languageCode) {
-            return add(languageCode, D1);
+        public Builder add(final ULocale locale) {
+            return add(locale, D1);
         }
 
         /**
-         * Adds language codes, with each having weight = 1.0.
-         * 
-         * @param languageCodes List of language codes.
+         * Adds locales, each with weight 1.0.
+         *
+         * @param locales locales/languages to be added
          * @return this, for chaining.
          * @stable ICU 4.4
          */
-        public Builder add(ULocale... languageCodes) {
-            for (final ULocale languageCode : languageCodes) {
+        public Builder add(ULocale... locales) {
+            for (final ULocale languageCode : locales) {
                 add(languageCode, D1);
             }
             return this;
         }
 
         /**
-         * Adds a new supported languageCode, with specified weight. Overrides any
-         * previous weight for the language.
-         * 
-         * @param languageCode language/locale to add
+         * Adds a locale with a specified weight.
+         * Overrides any previous weight for the locale.
+         * Removes a locale if the weight is zero.
+         *
+         * @param locale language/locale to add
          * @param weight value between 0.0 and 1.1
          * @return this, for chaining.
          * @stable ICU 4.4
          */
-        public Builder add(final ULocale languageCode,
-                double weight) {
-            if (languageToWeight.containsKey(languageCode)) {
-                languageToWeight.remove(languageCode);
+        public Builder add(final ULocale locale, double weight) {
+            if (languageToWeight.containsKey(locale)) {
+                languageToWeight.remove(locale);
             }
             if (weight <= D0) {
                 return this; // skip zeros
             } else if (weight > D1) {
                 weight = D1;
             }
-            languageToWeight.put(languageCode, weight);
+            languageToWeight.put(locale, weight);
             return this;
         }
 
         /**
-         * Adds rfc2616 list.
-         * 
-         * @param acceptLanguageList in rfc2616 format
+         * Parses the RFC 2616 string, and adds locales with weights accordingly.
+         *
+         * @param acceptLanguageList in RFC 2616 format (leniently parsed)
          * @return this, for chaining.
          * @stable ICU 4.4
          */
@@ -351,6 +363,7 @@ public class LocalePriorityList implements Iterable {
     }
 
     private static Comparator myDescendingDouble = new Comparator() {
+        @Override
         public int compare(Double o1, Double o2) {
             int result = o1.compareTo(o2);
             return result > 0 ? -1 : result < 0 ? 1 : 0; // Reverse the order.
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
index 0b1f7cda5b5..e80c7f5196e 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
@@ -451,7 +451,7 @@ public class LocaleMatcherTest extends TestFmwk {
     @Test
     public void testExactMatches() {
         String lastBase = "";
-        TreeSet sorted = new TreeSet();
+        TreeSet sorted = new TreeSet<>();
         for (ULocale loc : ULocale.getAvailableLocales()) {
             String language = loc.getLanguage();
             if (!lastBase.equals(language)) {
@@ -650,10 +650,7 @@ public class LocaleMatcherTest extends TestFmwk {
         ULocale bulgarian = new ULocale("bg");
         ULocale russian = new ULocale("ru");
 
-        assertEquals("es-419/MX", 4, matcher.distance(new ULocale("es","419"), new ULocale("es","MX")));
-        assertEquals("es-ES/DE", 4, matcher.distance(new ULocale("es","DE"), new ULocale("es","ES")));
-
-        Output outputBestDesired = new Output();
+        Output outputBestDesired = new Output<>();
 
         ULocale best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
         assertEquals(ULocale.ITALIAN, best);
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java
index c5d57ca0534..cb32b1fbce5 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java
@@ -4,9 +4,7 @@ package com.ibm.icu.dev.test.util;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 
 import org.junit.Ignore;
 import org.junit.Test;
@@ -15,7 +13,7 @@ import org.junit.runners.JUnit4;
 
 import com.ibm.icu.dev.test.TestFmwk;
 import com.ibm.icu.impl.locale.LocaleDistance;
-import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
+import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
 import com.ibm.icu.util.LocaleMatcher;
 import com.ibm.icu.util.Output;
 import com.ibm.icu.util.ULocale;
@@ -94,8 +92,8 @@ public class XLocaleDistanceTest extends TestFmwk {
                 newLikelyTime += System.nanoTime()-temp;
 
                 temp = System.nanoTime();
-                int dist1 = localeDistance.testOnlyDistance(desired, supported, 1000, DistanceOption.REGION_FIRST);
-                int dist2 = localeDistance.testOnlyDistance(supported, desired, 1000, DistanceOption.REGION_FIRST);
+                int dist1 = localeDistance.testOnlyDistance(desired, supported, 1000, FavorSubtag.LANGUAGE);
+                int dist2 = localeDistance.testOnlyDistance(supported, desired, 1000, FavorSubtag.LANGUAGE);
                 newTimeMinusLikely += System.nanoTime()-temp;
             }
         }
@@ -112,50 +110,6 @@ public class XLocaleDistanceTest extends TestFmwk {
         //logln("totalInt:\t" + (intTime)/maxIterations);
     }
 
-    @Test
-    public void testInternalTable() {
-        Set strings = localeDistance.testOnlyGetDistanceTable(false).keySet();
-        // Check that the table has a depth of exactly 3 (desired, supported) pairs everyplace
-        // by removing every prefix of a 6-subtag string from a copy of the set of strings.
-        // Any remaining string is not a prefix of a full-depth string.
-        Set remaining = new HashSet<>(strings);
-        // Check that ANY, ANY is always present.
-        assertTrue("*-*", strings.contains("*-*"));
-        for (String s : strings) {
-            int num = countSubtags(s);
-            assertTrue(s, 1 <= num && num <= 6);
-            if (num > 1) {
-                String oneShorter = removeLastSubtag(s);
-                assertTrue(oneShorter, strings.contains(oneShorter));
-            }
-            if (num == 2 || num == 4) {
-                String sPlusAnyAny = s + "-*-*";
-                assertTrue(sPlusAnyAny, strings.contains(sPlusAnyAny));
-            } else if (num == 6) {
-                for (;; --num) {
-                    remaining.remove(s);
-                    if (num == 1) { break; }
-                    s = removeLastSubtag(s);
-                }
-            }
-        }
-        assertTrue("strings that do not lead to 6-subtag matches", remaining.isEmpty());
-    }
-
-    private static final int countSubtags(String s) {
-        if (s.isEmpty()) { return 0; }
-        int num = 1;
-        for (int pos = 0; (pos = s.indexOf('-', pos)) >= 0; ++pos) {
-            ++num;
-        }
-        return num;
-    }
-
-    private static final String removeLastSubtag(String s) {
-        int last = s.lastIndexOf('-');
-        return s.substring(0, last);
-    }
-
     @Test
     public void testShowDistanceTable() {
         if (isVerbose()) {
@@ -173,7 +127,7 @@ public class XLocaleDistanceTest extends TestFmwk {
 
     class MyTestFileHandler extends DataDrivenTestHelper {
         Output bestDesired = new Output<>();
-        private DistanceOption distanceOption = DistanceOption.REGION_FIRST;
+        private FavorSubtag favorSubtag = FavorSubtag.LANGUAGE;
         private Integer threshold = localeDistance.getDefaultScriptDistance();
 
         @Override
@@ -182,20 +136,21 @@ public class XLocaleDistanceTest extends TestFmwk {
                 breakpoint = false; // put debugger breakpoint here to break at @debug in test file
             }
             Arguments args = new Arguments(arguments);
-            int supportedToDesiredActual = localeDistance.testOnlyDistance(args.supported, args.desired, threshold, distanceOption);
-            int desiredToSupportedActual = localeDistance.testOnlyDistance(args.desired, args.supported, threshold, distanceOption);
             String desiredTag = args.desired.toLanguageTag();
             String supportedTag = args.supported.toLanguageTag();
             final String comment = commentBase.isEmpty() ? "" : "\t# " + commentBase;
-            if (assertEquals("(" + lineNumber + ") " + desiredTag + " to " + supportedTag + comment, args.desiredToSupported, desiredToSupportedActual)) {
-                assertEquals("(" + lineNumber + ") " + supportedTag + " to " + desiredTag + comment, args.supportedToDesired, supportedToDesiredActual);
-            }
+            int supportedToDesiredActual = localeDistance.testOnlyDistance(args.supported, args.desired, threshold, favorSubtag);
+            assertEquals("(" + lineNumber + ") " + supportedTag + " to " + desiredTag + comment,
+                    args.supportedToDesired, supportedToDesiredActual);
+            int desiredToSupportedActual = localeDistance.testOnlyDistance(args.desired, args.supported, threshold, favorSubtag);
+            assertEquals("(" + lineNumber + ") " + desiredTag + " to " + supportedTag + comment,
+                    args.desiredToSupported, desiredToSupportedActual);
         }
         @Override
         public void handleParams(String comment, List arguments) {
             String switchArg = arguments.get(0);
-            if (switchArg.equals("@DistanceOption")) {
-                distanceOption = DistanceOption.valueOf(arguments.get(1));
+            if (switchArg.equals("@FavorSubtag")) {
+                favorSubtag = FavorSubtag.valueOf(arguments.get(1));
             } else if (switchArg.equals("@Threshold")) {
                 threshold = Integer.valueOf(arguments.get(1));
             } else {
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java
index 7a4df3b3061..f06e8be01c7 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java
@@ -7,7 +7,6 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;
 
@@ -16,12 +15,12 @@ import org.junit.runner.RunWith;
 
 import com.ibm.icu.dev.test.TestFmwk;
 import com.ibm.icu.impl.locale.LocaleDistance;
-import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
 import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
+import com.ibm.icu.impl.locale.XLikelySubtags;
 import com.ibm.icu.impl.locale.XLocaleMatcher;
+import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
 import com.ibm.icu.util.LocaleMatcher;
 import com.ibm.icu.util.LocalePriorityList;
-import com.ibm.icu.util.Output;
 import com.ibm.icu.util.ULocale;
 
 import junitparams.JUnitParamsRunner;
@@ -51,8 +50,9 @@ public class XLocaleMatcherTest extends TestFmwk {
     }
 
     @SuppressWarnings("unused")
-    private XLocaleMatcher newXLocaleMatcher(LocalePriorityList string, int d) {
-        return XLocaleMatcher.builder().setSupportedLocales(string).setThresholdDistance(d).build();
+    private XLocaleMatcher newXLocaleMatcher(LocalePriorityList list, int d) {
+        return XLocaleMatcher.builder().setSupportedULocales(list.getULocales()).
+                internalSetThresholdDistance(d).build();
     }
 
     //    public void testParentLocales() {
@@ -104,10 +104,6 @@ public class XLocaleMatcherTest extends TestFmwk {
 //    }
 
 
-    private void assertEquals(Object expected, Object string) {
-        assertEquals("", expected, string);
-    }
-
     /**
      * If all the base languages are the same, then each sublocale matches
      * itself most closely
@@ -139,40 +135,41 @@ public class XLocaleMatcherTest extends TestFmwk {
         check2(sorted);
     }
 
+    private static final ULocale posix = new ULocale("en_US_POSIX");
+
     /**
      * @param sorted
      */
     private void check2(Set sorted) {
-        // TODO Auto-generated method stub
         logln("Checking: " + sorted);
         XLocaleMatcher matcher = newXLocaleMatcher(
             LocalePriorityList.add(
                 sorted.toArray(new ULocale[sorted.size()]))
             .build());
         for (ULocale loc : sorted) {
-            String stringLoc = loc.toString();
-            assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
+            // The result may not be the exact same locale, but it must be equivalent.
+            // Variants and extensions are ignored.
+            if (loc.equals(posix)) { continue; }
+            ULocale max = ULocale.addLikelySubtags(loc);
+            ULocale best = matcher.getBestMatch(loc);
+            ULocale maxBest = ULocale.addLikelySubtags(best);
+            assertEquals(loc.toString(), max, maxBest);
         }
     }
 
     @Test
-    public void testComputeDistance_monkeyTest() {
-        String[] codes = ULocale.getISOCountries();
-        Random random = new Random();
-        XLocaleMatcher lm = newXLocaleMatcher();
-        for (int i = 0; i < 1000; ++i) {
-            String x = codes[random.nextInt(codes.length)];
-            String y = codes[random.nextInt(codes.length)];
-            double d = lm.distance(ULocale.forLanguageTag("xx-Xxxx-"+x), ULocale.forLanguageTag("xx-Xxxx-"+y));
-            if (x.equals("ZZ") || y.equals("ZZ")) {
-                assertEquals("dist(regionDistance," + x + ") = 0", REGION_DISTANCE, d);
-            } else if (x.equals(y)) {
-                assertEquals("dist(x,x) = 0", 0.0, d);
-            } else {
-                assertTrue("dist(" + x + "," + y + ") > 0", d > 0);
-                assertTrue("dist(" + x + "," + y + ") ≤ " + REGION_DISTANCE, d <= REGION_DISTANCE);
-            }
-        }
+    public void testDemotion() {
+        LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build();
+        LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build();
+        XLocaleMatcher noDemotion = XLocaleMatcher.builder().
+                setSupportedULocales(supported.getULocales()).
+                setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.NONE).build();
+        assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired));
+
+        XLocaleMatcher regionDemotion = XLocaleMatcher.builder().
+                setSupportedULocales(supported.getULocales()).
+                setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.REGION).build();
+        assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
     }
 
     private static final class PerfCase {
@@ -304,9 +301,9 @@ public class XLocaleMatcherTest extends TestFmwk {
         for (PerfCase pc : pcs) {
             final ULocale desired = pc.desired;
 
-            assertEquals(pc.expectedShort, matcherShort.getBestMatch(desired));
-            assertEquals(pc.expectedLong, matcherLong.getBestMatch(desired));
-            assertEquals(pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
+            assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired));
+            assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired));
+            assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
 
             timeXLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
             timeXLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
@@ -350,9 +347,11 @@ public class XLocaleMatcherTest extends TestFmwk {
                 String.format("timeLongNew=%d < %d%% of timeLongOld=%d",
                         timeLongNew, AVG_PCT_LONG_NEW_OLD, timeLongOld),
                 timeLongNew * 100 < timeLongOld * AVG_PCT_LONG_NEW_OLD);
+
+        maximizePerf();
     }
 
-    private long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) {
+    private static long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) {
         long start = System.nanoTime();
         for (int i = iterations; i > 0; --i) {
             matcher.getBestMatch(desired);
@@ -361,7 +360,7 @@ public class XLocaleMatcherTest extends TestFmwk {
         return (delta / iterations);
     }
 
-    private long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
+    private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
         long start = System.nanoTime();
         for (int i = iterations; i > 0; --i) {
             matcher.getBestMatch(desired);
@@ -370,6 +369,37 @@ public class XLocaleMatcherTest extends TestFmwk {
         return (delta / iterations);
     }
 
+    private void maximizePerf() {
+        final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
+                "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
+                "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
+                "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
+                "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
+                "zh-CN, zh-TW, zu";
+        LocalePriorityList list = LocalePriorityList.add(tags).build();
+        int few = 1000;
+        long t = timeMaximize(list, few);  // warm up
+        t = timeMaximize(list, few);  // measure for scale
+        long targetTime = 100000000L;  // 10^8 ns = 0.1s
+        int iterations = (int)((targetTime * few) / t);
+        t = timeMaximize(list, iterations);
+        int length = 0;
+        for (@SuppressWarnings("unused") ULocale locale : list) { ++length; }
+        System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " +
+                t + " ns / " + iterations + " iterations / " + length + " locales");
+    }
+
+    // returns total ns not per iteration
+    private  static long timeMaximize(Iterable list, int iterations) {
+        long start = System.nanoTime();
+        for (int i = iterations; i > 0; --i) {
+            for (ULocale locale : list) {
+                XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
+            }
+        }
+        return System.nanoTime() - start;
+    }
+
     private static final class TestCase implements Cloneable {
         private static final String ENDL = System.getProperties().getProperty("line.separator");
 
@@ -384,7 +414,7 @@ public class XLocaleMatcherTest extends TestFmwk {
 
         String supported = "";
         String def = "";
-        String distance = "";
+        String favor = "";
         String threshold = "";
         String desired = "";
         String expMatch = "";
@@ -405,12 +435,12 @@ public class XLocaleMatcherTest extends TestFmwk {
 
             supported = "";
             def = "";
-            distance = "";
+            favor = "";
             threshold = "";
         }
 
         String toInputsKey() {
-            return supported + '+' + def + '+' + distance + '+' + threshold + '+' + desired;
+            return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired;
         }
 
         private static void appendLine(StringBuilder sb, String line) {
@@ -471,9 +501,9 @@ public class XLocaleMatcherTest extends TestFmwk {
                 } else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) {
                     test.defaultLine = line;
                     test.def = suffix;
-                } else if ((suffix = getSuffixAfterPrefix(line, limit, "@distance=")) != null) {
+                } else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) {
                     test.distanceLine = line;
-                    test.distance = suffix;
+                    test.favor = suffix;
                 } else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) {
                     test.thresholdLine = line;
                     test.threshold = suffix;
@@ -531,31 +561,31 @@ public class XLocaleMatcherTest extends TestFmwk {
     @Parameters(method = "readTestCases")
     public void dataDriven(TestCase test) {
         XLocaleMatcher matcher;
-        if (test.def.isEmpty() && test.distance.isEmpty() && test.threshold.isEmpty()) {
+        if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) {
             matcher = new XLocaleMatcher(test.supported);
         } else {
             XLocaleMatcher.Builder builder = XLocaleMatcher.builder();
             builder.setSupportedLocales(test.supported);
             if (!test.def.isEmpty()) {
-                builder.setDefaultLanguage(new ULocale(test.def));
+                builder.setDefaultULocale(new ULocale(test.def));
             }
-            if (!test.distance.isEmpty()) {
-                DistanceOption distance;
-                switch (test.distance) {
+            if (!test.favor.isEmpty()) {
+                FavorSubtag favor;
+                switch (test.favor) {
                 case "normal":
-                    distance = DistanceOption.REGION_FIRST;
+                    favor = FavorSubtag.LANGUAGE;
                     break;
                 case "script":
-                    distance = DistanceOption.SCRIPT_FIRST;
+                    favor = FavorSubtag.SCRIPT;
                     break;
                 default:
-                    throw new IllegalArgumentException("unsupported distance value " + test.distance);
+                    throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor);
                 }
-                builder.setDistanceOption(distance);
+                builder.setFavorSubtag(favor);
             }
             if (!test.threshold.isEmpty()) {
                 int threshold = Integer.valueOf(test.threshold);
-                builder.setThresholdDistance(threshold);
+                builder.internalSetThresholdDistance(threshold);
             }
             matcher = builder.build();
         }
@@ -566,16 +596,15 @@ public class XLocaleMatcherTest extends TestFmwk {
             assertEquals("bestSupported", expMatch, bestSupported);
         } else {
             LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
-            Output bestDesired = new Output<>();
-            ULocale bestSupported = matcher.getBestMatch(desired, bestDesired);
-            assertEquals("bestSupported", expMatch, bestSupported);
+            XLocaleMatcher.Result result = matcher.getBestMatchResult(desired);
+            assertEquals("bestSupported", expMatch, result.getSupportedULocale());
             if (!test.expDesired.isEmpty()) {
                 ULocale expDesired = getULocaleOrNull(test.expDesired);
-                assertEquals("bestDesired", expDesired, bestDesired.value);
+                assertEquals("bestDesired", expDesired, result.getDesiredULocale());
             }
             if (!test.expCombined.isEmpty()) {
                 ULocale expCombined = getULocaleOrNull(test.expCombined);
-                ULocale combined = XLocaleMatcher.combine(bestSupported, bestDesired.value);
+                ULocale combined = result.makeServiceULocale();
                 assertEquals("combined", expCombined, combined);
             }
         }
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt
index bd653a7a59a..21c9b601410 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt
@@ -10,7 +10,7 @@
 # Lines starting with an '@' sign provide matcher parameters.
 # @supported=
 # @default=  # no value = no explicit default
-# @distance=[normal|script]  # no value = no explicit setting
+# @favor=[normal|script]  # no value = no explicit setting
 # @threshold=  # no value = no explicit setting
 #
 # A line with ">>" is a getBestMatch() test case:
@@ -93,7 +93,7 @@ zh-HK >> zh-MO
 @supported=zh, zh-MO
 zh-HK >> zh-MO
 
-@distance=script
+@favor=script
 @supported=es-419, es-ES
 es-AR >> es-419
 @supported=es-ES, es-419
@@ -153,7 +153,7 @@ zh-Hans-CN >> zh-CN
 zh-CN >> zh-CN
 zh >> zh-CN
 
-@distance=script
+@favor=script
 zh-Hant-TW >> zh-TW
 zh-Hant >> zh-TW
 zh-TW >> zh-TW
@@ -169,7 +169,7 @@ es-ES >> es
 es-AR >> es-419
 es-MX >> es-MX
 
-@distance=script
+@favor=script
 en-NZ >> en-GB
 es-ES >> es
 es-AR >> es-419
@@ -180,7 +180,7 @@ es-MX >> es-MX
 @supported=91, en, hi
 sa >> hi
 
-@distance=script
+@favor=script
 sa >> hi
 
 ** test: testBasics
@@ -191,7 +191,7 @@ en >> en
 fr >> fr
 ja >> fr # return first if no match
 
-@distance=script
+@favor=script
 en-GB >> en-GB
 en >> en
 fr >> fr
@@ -208,7 +208,7 @@ zh-Hans-CN >> zh-CN
 zh-Hant-HK >> zh-TW
 he-IT >> iw
 
-@distance=script
+@favor=script
 zh-Hant >> zh-TW
 zh >> zh-CN
 zh-Hans-CN >> zh-CN
@@ -228,7 +228,7 @@ nb >> nn
 
 ja >> en
 
-@distance=script
+@favor=script
 tl >> fil
 mo >> ro
 nb >> nn
@@ -243,7 +243,7 @@ es-MX >> es-419
 en-AU >> en-GB
 es-ES >> es
 
-@distance=script
+@favor=script
 es-MX >> es-419
 en-AU >> en-GB
 es-ES >> es
@@ -257,7 +257,7 @@ zh-HK >> zh-MO
 @supported=zh, zh-TW, zh-HK
 zh-MO >> zh-HK
 
-@distance=script
+@favor=script
 @supported=zh, zh-TW, zh-MO
 zh-HK >> zh-MO
 @supported=zh, zh-TW, zh-HK
@@ -272,7 +272,7 @@ und-TW >> zh-Hant # und-TW should be closer to zh-Hant than to zh
 zh-Hant >> und-TW # zh-Hant should be closer to und-TW than to en-Hant-TW
 zh >> und-TW # zh should be closer to und-TW than to en-Hant-TW
 
-@distance=script
+@favor=script
 @supported=zh, zh-Hant
 und-TW >> zh-Hant
 @supported=en-Hant-TW, und-TW
@@ -284,14 +284,14 @@ zh >> und-TW
 @supported=fr, i-klingon, en-Latn-US
 en-GB-oed >> en-Latn-US
 
-@distance=script
+@favor=script
 en-GB-oed >> en-Latn-US
 
 ** test: testGetBestMatchForList-exactMatch
 @supported=fr, en-GB, ja, es-ES, es-MX
 ja, de >> ja
 
-@distance=script
+@favor=script
 ja, de >> ja
 
 ** test: testGetBestMatchForList-simpleVariantMatch
@@ -302,7 +302,7 @@ de, en-US >> en-GB # Intentionally avoiding a perfect-match or two candidates fo
 
 de, zh >> fr
 
-@distance=script
+@favor=script
 de, en-US >> en-GB
 de, zh >> fr
 
@@ -320,7 +320,7 @@ ja-JP, en-US >> ja # Match for ja-Jpan-JP (maximized already)
 
 ja-Jpan-JP, en-US >> ja # Match for ja-Jpan-JP (maximized already)
 
-@distance=script
+@favor=script
 ja-Jpan-JP, en-AU >> ja
 ja-JP, en-US >> ja
 ja-Jpan-JP, en-US >> ja
@@ -331,7 +331,7 @@ ja-Jpan-JP, en-US >> ja
 @supported=en, de, fr, ja
 de-CH, fr >> de
 
-@distance=script
+@favor=script
 de-CH, fr >> de
 
 ** test: testBestMatchForTraditionalChinese
@@ -357,7 +357,7 @@ zh-TW, en >> en-US
 zh-Hant-CN, en >> en-US
 zh-Hans, en >> zh-Hans-CN
 
-@distance=script
+@favor=script
 zh-TW >> zh-Hans-CN
 zh-Hant >> zh-Hans-CN
 zh-TW, en >> en-US
@@ -389,7 +389,7 @@ und >> it
 @supported=it, und
 en >> it
 
-@distance=script
+@favor=script
 @supported=it, fr
 und >> it
 @supported=it, und
@@ -408,7 +408,7 @@ en-CA >> en-GB
 @supported=de-AT, de-DE, de-CH
 de >> de-DE
 
-@distance=script
+@favor=script
 @supported=es-AR, es
 es-MX >> es-AR
 @supported=fr, en, en-GB
@@ -423,7 +423,7 @@ af >> nl # af => nl
 @supported=mul, af
 nl >> mul # but nl !=> af
 
-@distance=script
+@favor=script
 @supported=mul, nl
 af >> nl
 @supported=mul, af
@@ -440,7 +440,7 @@ ja-JP, en-GB >> ja # Match for ja-JP, with likely region subtag
 
 ja-Jpan-JP, en-GB >> ja # Match for ja-Jpan-JP (maximized already)
 
-@distance=script
+@favor=script
 ja-JP, en-GB >> ja
 ja-Jpan-JP, en-GB >> ja
 
@@ -450,7 +450,7 @@ ja-Jpan-JP, en-GB >> ja
 de-CH, fr >> de
 en-US, ar, nl, de, ja >> en
 
-@distance=script
+@favor=script
 de-CH, fr >> de
 en-US, ar, nl, de, ja >> en
 
@@ -487,7 +487,7 @@ pt-US, pt-PT >> pt-BR
 @supported=pt-PT, pt, es, es-419
 pt-US, pt-PT, pt >> pt # pt-BR implicit
 
-@distance=script
+@favor=script
 @supported=pt-PT, pt-BR, es, es-419
 pt-PT, es, pt >> pt-PT
 @supported=pt-PT, pt, es, es-419
@@ -515,7 +515,7 @@ en-GB >> en
 @supported=en, sv
 en-GB, sv >> en
 
-@distance=script
+@favor=script
 @supported=fr, en, sv
 en-GB >> en
 @supported=en, sv
@@ -532,7 +532,7 @@ sv >> sv
 @supported=af, af-NA, af-ZA, agq, agq-CM, ak, ak-GH, am, am-ET, ar, ar-001, ar-AE, ar-BH, ar-DJ, ar-DZ, ar-EG, ar-EH, ar-ER, ar-IL, ar-IQ, ar-JO, ar-KM, ar-KW, ar-LB, ar-LY, ar-MA, ar-MR, ar-OM, ar-PS, ar-QA, ar-SA, ar-SD, ar-SO, ar-SS, ar-SY, ar-TD, ar-TN, ar-YE, as, as-IN, asa, asa-TZ, ast, ast-ES, az, az-Cyrl, az-Cyrl-AZ, az-Latn, az-Latn-AZ, bas, bas-CM, be, be-BY, bem, bem-ZM, bez, bez-TZ, bg, bg-BG, bm, bm-ML, bn, bn-BD, bn-IN, bo, bo-CN, bo-IN, br, br-FR, brx, brx-IN, bs, bs-Cyrl, bs-Cyrl-BA, bs-Latn, bs-Latn-BA, ca, ca-AD, ca-ES, ca-ES-VALENCIA, ca-FR, ca-IT, ce, ce-RU, cgg, cgg-UG, chr, chr-US, ckb, ckb-IQ, ckb-IR, cs, cs-CZ, cu, cu-RU, cy, cy-GB, da, da-DK, da-GL, dav, dav-KE, de, de-AT, de-BE, de-CH, de-DE, de-LI, de-LU, dje, dje-NE, dsb, dsb-DE, dua, dua-CM, dyo, dyo-SN, dz, dz-BT, ebu, ebu-KE, ee, ee-GH, ee-TG, el, el-CY, el-GR, en, en-001, en-150, en-AG, en-AI, en-AS, en-AT, en-AU, en-BB, en-BE, en-BI, en-BM, en-BS, en-BW, en-BZ, en-CA, en-CC, en-CH, en-CK, en-CM, en-CX, en-CY, en-DE, en-DG, en-DK, en-DM, en-ER, en-FI, en-FJ, en-FK, en-FM, en-GB, en-GD, en-GG, en-GH, en-GI, en-GM, en-GU, en-GY, en-HK, en-IE, en-IL, en-IM, en-IN, en-IO, en-JE, en-JM, en-KE, en-KI, en-KN, en-KY, en-LC, en-LR, en-LS, en-MG, en-MH, en-MO, en-MP, en-MS, en-MT, en-MU, en-MW, en-MY, en-NA, en-NF, en-NG, en-NL, en-NR, en-NU, en-NZ, en-PG, en-PH, en-PK, en-PN, en-PR, en-PW, en-RW, en-SB, en-SC, en-SD, en-SE, en-SG, en-SH, en-SI, en-SL, en-SS, en-SX, en-SZ, en-TC, en-TK, en-TO, en-TT, en-TV, en-TZ, en-UG, en-UM, en-US, en-US-POSIX, en-VC, en-VG, en-VI, en-VU, en-WS, en-ZA, en-ZM, en-ZW, eo, eo-001, es, es-419, es-AR, es-BO, es-CL, es-CO, es-CR, es-CU, es-DO, es-EA, es-EC, es-ES, es-GQ, es-GT, es-HN, es-IC, es-MX, es-NI, es-PA, es-PE, es-PH, es-PR, es-PY, es-SV, es-US, es-UY, es-VE, et, et-EE, eu, eu-ES, ewo, ewo-CM, fa, fa-AF, fa-IR, ff, ff-CM, ff-GN, ff-MR, ff-SN, fi, fi-FI, fil, fil-PH, fo, fo-DK, fo-FO, fr, fr-BE, fr-BF, fr-BI, fr-BJ, fr-BL, fr-CA, fr-CD, fr-CF, fr-CG, fr-CH, fr-CI, fr-CM, fr-DJ, fr-DZ, fr-FR, fr-GA, fr-GF, fr-GN, fr-GP, fr-GQ, fr-HT, fr-KM, fr-LU, fr-MA, fr-MC, fr-MF, fr-MG, fr-ML, fr-MQ, fr-MR, fr-MU, fr-NC, fr-NE, fr-PF, fr-PM, fr-RE, fr-RW, fr-SC, fr-SN, fr-SY, fr-TD, fr-TG, fr-TN, fr-VU, fr-WF, fr-YT, fur, fur-IT, fy, fy-NL, ga, ga-IE, gd, gd-GB, gl, gl-ES, gsw, gsw-CH, gsw-FR, gsw-LI, gu, gu-IN, guz, guz-KE, gv, gv-IM, ha, ha-GH, ha-NE, ha-NG, haw, haw-US, he, he-IL, hi, hi-IN, hr, hr-BA, hr-HR, hsb, hsb-DE, hu, hu-HU, hy, hy-AM, id, id-ID, ig, ig-NG, ii, ii-CN, is, is-IS, it, it-CH, it-IT, it-SM, ja, ja-JP, jgo, jgo-CM, jmc, jmc-TZ, ka, ka-GE, kab, kab-DZ, kam, kam-KE, kde, kde-TZ, kea, kea-CV, khq, khq-ML, ki, ki-KE, kk, kk-KZ, kkj, kkj-CM, kl, kl-GL, kln, kln-KE, km, km-KH, kn, kn-IN, ko, ko-KP, ko-KR, kok, kok-IN, ks, ks-IN, ksb, ksb-TZ, ksf, ksf-CM, ksh, ksh-DE, kw, kw-GB, ky, ky-KG, lag, lag-TZ, lb, lb-LU, lg, lg-UG, lkt, lkt-US, ln, ln-AO, ln-CD, ln-CF, ln-CG, lo, lo-LA, lrc, lrc-IQ, lrc-IR, lt, lt-LT, lu, lu-CD, luo, luo-KE, luy, luy-KE, lv, lv-LV, mas, mas-KE, mas-TZ, mer, mer-KE, mfe, mfe-MU, mg, mg-MG, mgh, mgh-MZ, mgo, mgo-CM, mk, mk-MK, ml, ml-IN, mn, mn-MN, mr, mr-IN, ms, ms-BN, ms-MY, ms-SG, mt, mt-MT, mua, mua-CM, my, my-MM, mzn, mzn-IR, naq, naq-NA, nb, nb-NO, nb-SJ, nd, nd-ZW, ne, ne-IN, ne-NP, nl, nl-AW, nl-BE, nl-BQ, nl-CW, nl-NL, nl-SR, nl-SX, nmg, nmg-CM, nn, nn-NO, nnh, nnh-CM, nus, nus-SS, nyn, nyn-UG, om, om-ET, om-KE, or, or-IN, os, os-GE, os-RU, pa, pa-Arab, pa-Arab-PK, pa-Guru, pa-Guru-IN, pl, pl-PL, prg, prg-001, ps, ps-AF, pt, pt-AO, pt-BR, pt-CV, pt-GW, pt-MO, pt-MZ, pt-PT, pt-ST, pt-TL, qu, qu-BO, qu-EC, qu-PE, rm, rm-CH, rn, rn-BI, ro, ro-MD, ro-RO, rof, rof-TZ, root, ru, ru-BY, ru-KG, ru-KZ, ru-MD, ru-RU, ru-UA, rw, rw-RW, rwk, rwk-TZ, sah, sah-RU, saq, saq-KE, sbp, sbp-TZ, se, se-FI, se-NO, se-SE, seh, seh-MZ, ses, ses-ML, sg, sg-CF, shi, shi-Latn, shi-Latn-MA, shi-Tfng, shi-Tfng-MA, si, si-LK, sk, sk-SK, sl, sl-SI, smn, smn-FI, sn, sn-ZW, so, so-DJ, so-ET, so-KE, so-SO, sq, sq-AL, sq-MK, sq-XK, sr, sr-Cyrl, sr-Cyrl-BA, sr-Cyrl-ME, sr-Cyrl-RS, sr-Cyrl-XK, sr-Latn, sr-Latn-BA, sr-Latn-ME, sr-Latn-RS, sr-Latn-XK, sv, sv-AX, sv-FI, sv-SE, sw, sw-CD, sw-KE, sw-TZ, sw-UG, ta, ta-IN, ta-LK, ta-MY, ta-SG, te, te-IN, teo, teo-KE, teo-UG, th, th-TH, ti, ti-ER, ti-ET, tk, tk-TM, to, to-TO, tr, tr-CY, tr-TR, twq, twq-NE, tzm, tzm-MA, ug, ug-CN, uk, uk-UA, ur, ur-IN, ur-PK, uz, uz-Arab, uz-Arab-AF, uz-Cyrl, uz-Cyrl-UZ, uz-Latn, uz-Latn-UZ, vai, vai-Latn, vai-Latn-LR, vai-Vaii, vai-Vaii-LR, vi, vi-VN, vo, vo-001, vun, vun-TZ, wae, wae-CH, xog, xog-UG, yav, yav-CM, yi, yi-001, yo, yo-BJ, yo-NG, zgh, zgh-MA, zh, zh-Hans, zh-Hans-CN, zh-Hans-HK, zh-Hans-MO, zh-Hans-SG, zh-Hant, zh-Hant-HK, zh-Hant-MO, zh-Hant-TW, zu, zu-ZA
 sv >> sv
 
-@distance=script
+@favor=script
 @supported=en, sv
 sv >> sv
 
@@ -552,7 +552,7 @@ und, en >> en
 # http://unicode.org/repos/cldr/tags/latest/common/bcp47/
 # http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
 
-@distance=script
+@favor=script
 und >> it
 und, en >> en
 
@@ -561,7 +561,7 @@ und, en >> en
 @supported=en-NZ, en-IT
 en-US >> en-NZ
 
-@distance=script
+@favor=script
 en-US >> en-NZ
 
 ** test: testEmptySupported => null
@@ -587,7 +587,7 @@ fr-PSCRACK >> fr-PSCRACK
 fr >> en-PSCRACK
 de-CH >> en-PSCRACK
 
-@distance=script
+@favor=script
 @supported=und, fr
 fr-BE-fonipa >> fr
 @supported=und, fr-CA
@@ -649,7 +649,7 @@ en-VI >> en-GU
 @supported=und, en-GU, en-GB, en-IN
 en-VI >> en-GU
 
-@distance=script
+@favor=script
 @supported=und, es, es-MA, es-MX, es-419
 es-AR >> es-419
 @supported=und, es-MA, es, es-419, es-MX
@@ -695,12 +695,12 @@ fr-BE-fonipa >> fr-Cyrl-CA-fonupa | | fr-Cyrl-BE-fonipa
 @threshold=50
 fr-BE-fonipa >> und
 
-@distance=script
+@favor=script
 @supported=50, und, fr-CA-fonupa
 @threshold=
 fr-BE-fonipa >> fr-CA-fonupa | | fr-BE-fonipa
 @supported=und, fr-Cyrl-CA-fonupa
-fr-BE-fonipa >> fr-Cyrl-CA-fonupa | fr-BE-fonipa
+fr-BE-fonipa >> und
 
 ** test: testScriptFirst
 @supported=ru, fr
@@ -711,7 +711,7 @@ sr >> hr
 @supported=da, ru, hr
 sr >> da
 
-@distance=script
+@favor=script
 @supported=ru, fr
 zh, pl >> fr
 zh-Cyrl, pl >> ru
@@ -730,11 +730,11 @@ en-US >> en
 fr >> fr
 ja >> fr
 
-@distance=script
+@favor=script
 en-GB >> en-GB
 en-US >> en
 fr >> en-GB
-ja >> en-GB
+ja >> fr
 
 ** test: testEmptyWithDefault
 @default=en
@@ -765,7 +765,7 @@ ja-JP >> fr
 zu >> en-GB
 zxx >> fr
 
-@distance=script
+@favor=script
 en-GB >> en-GB
 en-US >> en
 fr-FR >> fr
@@ -792,7 +792,7 @@ ja-Jpan-JP, en-GB >> ja
 @supported=fr, zh-Hant, en
 zh, en >> en
 
-@distance=script
+@favor=script
 zh, en >> en
 
 ** test: TestCloseEnoughMatchOnMaximized
@@ -829,7 +829,7 @@ pt-US, pt-PT >> pt-BR
 @supported=pt-PT, pt, es, es-419
 pt-US, pt-PT >> pt
 
-@distance=script
+@favor=script
 @supported=pt-BR, es, es-419
 pt-PT, es, pt >> pt-BR
 @supported=pt-PT, pt, es, es-419
@@ -844,7 +844,7 @@ fr-CA, en-CA >> fr
 @supported=zh-Hant, zh-TW
 zh-HK >> zh-Hant
 
-@distance=script
+@favor=script
 @supported=en-GB, en
 en-CA >> en-GB
 @supported=fr, en-GB, en
@@ -871,7 +871,7 @@ zh-Hans-CN >> zh-CN
 zh-Hant-HK >> zh-TW
 he-IT >> iw
 
-@distance=script
+@favor=script
 zh-Hant >> zh-TW
 zh >> zh-CN
 zh-Hans-CN >> zh-CN
@@ -894,7 +894,7 @@ en-AU >> en-GB
 es-MX >> es-419
 es-PT >> es-ES
 
-@distance=script
+@favor=script
 en-AU >> en-GB
 es-MX >> es-419
 es-PT >> es-ES
@@ -930,7 +930,7 @@ en >> it
 en-GB >> en
 en-GB, sv >> en
 
-@distance=script
+@favor=script
 en-GB, sv >> en
 
 ** test: Serbian
@@ -951,7 +951,7 @@ sr >> sr-Latn
 @supported=und, sr
 sr-Latn >> sr
 
-@distance=script
+@favor=script
 sr-ME >> sr
 @supported=und, sr-ME
 sr >> sr-ME
@@ -976,7 +976,7 @@ x-bork >> x-bork
 x-piglatin >> fr
 x-bork >> x-bork
 
-@distance=script
+@favor=script
 @supported=fr, x-bork, en-Latn-US
 x-piglatin >> x-bork
 x-bork >> x-bork
@@ -989,7 +989,7 @@ x-bork >> x-bork
 en-GB-oed >> en-Latn-US
 i-klingon >> tlh
 
-@distance=script
+@favor=script
 en-GB-oed >> en-Latn-US
 i-klingon >> tlh
 
@@ -1007,7 +1007,7 @@ pt-BR >> pt
 pt-PT-PSCRACK >> pt-PT-PSCRACK
 zh-Hans-PSCRACK >> zh-Hans-PSCRACK
 
-@distance=script
+@favor=script
 de >> fr
 en-US >> fr
 en >> fr
@@ -1030,7 +1030,7 @@ en-XC >> en-XC
 pt-BR >> pt
 zh-Hans-XC >> zh-Hans-XC
 
-@distance=script
+@favor=script
 de >> fr
 en-US >> fr
 en >> fr
@@ -1052,20 +1052,20 @@ en >> en-DE
 ar-EG >> ar-SY
 pt-BR >> pt
 ar-XB >> ar-XB
-ar-PSBIDI >> ar-PSBIDI
+ar-PSBIDI >> ar-XB  # These are equivalent.
 en-XA >> en-XA
-en-PSACCENT >> en-PSACCENT
+en-PSACCENT >> en-XA  # These are equivalent.
 ar-PSCRACK >> ar-PSCRACK
 
-@distance=script
+@favor=script
 de >> en-DE
 en >> en-DE
 ar-EG >> ar-SY
 pt-BR >> pt
 ar-XB >> ar-XB
-ar-PSBIDI >> ar-PSBIDI
+ar-PSBIDI >> ar-XB  # These are equivalent.
 en-XA >> en-XA
-en-PSACCENT >> en-PSACCENT
+en-PSACCENT >> en-XA  # These are equivalent.
 ar-PSCRACK >> ar-PSCRACK
 
 ** test: BestMatchForTraditionalChinese
@@ -1095,7 +1095,7 @@ zh-Hans, en >> zh-Hans-CN
 @supported=en, fr-CA
 en-US, fr-CA >> en
 
-@distance=script
+@favor=script
 en-US, fr-CA >> en
 
 ** test: SiblingDefaultRegion
@@ -1111,15 +1111,15 @@ de >> und
 @default=und
 hi >> und
 
-@distance=script
-hi >> de
+@favor=script
+hi >> und
 
 ** test: MatchedLanguageIgnoresDefault
 @supported=de, en, fr
 @default=und
 fr >> fr
 
-@distance=script
+@favor=script
 fr >> fr
 
 ## GenX
@@ -1168,9 +1168,9 @@ es-US >> es-MX
 es-UY >> es-MX
 es-VE >> es-MX
 
-@distance=script
+@favor=script
 es-001 >> es
-und >> es
+und >> und
 ca >> es
 gl-ES >> es
 es >> es
@@ -1254,9 +1254,9 @@ es-US >> es-419
 es-UY >> es-419
 es-VE >> es-419
 
-@distance=script
+@favor=script
 es-001 >> es
-und >> es
+und >> und
 ca >> es
 gl-ES >> es
 es >> es
@@ -1319,9 +1319,9 @@ en-ZA >> en-GB
 en-US >> en-US
 en >> en-US
 
-@distance=script
-und >> en-GB
-ja >> en-GB
+@favor=script
+und >> und
+ja >> und
 fr-CA >> en-GB
 en-AU >> en-GB
 en-BZ >> en-GB
@@ -1355,10 +1355,10 @@ fr >> und
 @supported=pl, ja, ca
 fr >> und
 
-@distance=script
+@favor=script
 @supported=en-GB, en-US, en, en-AU
-und >> en-GB
-ja >> en-GB
+und >> und
+ja >> und
 fr-CA >> en-GB
 fr >> en-GB
 @supported=en-AU, ja, ca
@@ -1384,7 +1384,7 @@ zh-Hant-HK >> zh-TW
 @default=iw
 he-IT >> iw
 
-@distance=script
+@favor=script
 he-IT >> iw
 
 ** test: language-specific script fallbacks 1
@@ -1395,7 +1395,7 @@ hr >> en
 bs >> en
 nl-Cyrl >> en # Mark: Expected value should be en not sr. Script difference exceeds threshold, so can't be nl
 
-@distance=script
+@favor=script
 sr-Latn >> sr
 hr >> en
 bs >> en
@@ -1408,7 +1408,7 @@ sr-Cyrl >> sr-Latn
 @default=und
 hr >> und
 
-@distance=script
+@favor=script
 @default=
 sr >> sr-Latn
 sr-Cyrl >> sr-Latn
@@ -1419,45 +1419,45 @@ hr >> en
 @supported=en, sr-Latn
 hr >> en
 
-@distance=script
+@favor=script
 hr >> en
 
 ** test: both deprecated and not
 @supported=fil, tl, iw, he
 he-IT >> iw
-he >> he
+he >> iw
 iw >> iw
 fil-IT >> fil
 fil >> fil
-tl >> tl
+tl >> fil
 
-@distance=script
+@favor=script
 he-IT >> iw
-he >> he
+he >> iw
 iw >> iw
 fil-IT >> fil
 fil >> fil
-tl >> tl
+tl >> fil
 
 ** test: nearby languages: Nynorsk to Bokmål
 @supported=en, nb
 nn >> nb
 
-@distance=script
+@favor=script
 nn >> nb
 
 ** test: nearby languages: Danish does not match nn
 @supported=en, nn
 da >> en
 
-@distance=script
+@favor=script
 da >> en
 
 ** test: nearby languages: Danish matches no
 @supported=en, no
 da >> no
 
-@distance=script
+@favor=script
 da >> no
 
 ** test: nearby languages: Danish matches nb
@@ -1469,7 +1469,7 @@ da >> nb
 no, en-US >> nn
 nb, en-US >> nn
 
-@distance=script
+@favor=script
 no, en-US >> nn
 nb, en-US >> nn
 
@@ -1477,7 +1477,7 @@ nb, en-US >> nn
 @supported=nl, he, en-GB
 iw, en-US >> he
 
-@distance=script
+@favor=script
 iw, en-US >> he
 
 ** test: macro equivalent is closer than same language with other differences
@@ -1485,7 +1485,7 @@ iw, en-US >> he
 cmn, en-US >> zh
 nb, en-US >> no
 
-@distance=script
+@favor=script
 cmn, en-US >> zh
 nb, en-US >> no
 
@@ -1493,18 +1493,18 @@ nb, en-US >> no
 @supported=nl, fil, en-GB
 tl, en-US >> fil
 
-@distance=script
+@favor=script
 tl, en-US >> fil
 
 ** test: distinguish near equivalents
 @supported=en, ro, mo, ro-MD
 ro >> ro
-mo >> mo
+mo >> ro # ro=mo for the locale matcher
 ro-MD >> ro-MD
 
-@distance=script
+@favor=script
 ro >> ro
-mo >> mo
+mo >> ro # ro=mo for the locale matcher
 ro-MD >> ro-MD
 
 ** test: maximization of legacy
@@ -1512,7 +1512,7 @@ ro-MD >> ro-MD
 sh >> sr-Latn
 mo >> ro
 
-@distance=script
+@favor=script
 sh >> sr-Latn
 mo >> ro
 
@@ -1544,31 +1544,50 @@ zh-TW, en >> en-US
 zh-Hant-CN, en >> en-US
 zh-Hans, en >> zh-Hans-CN
 
-** test: more specific script should win in case regions are identical
+** test: return first among likely-subtags equivalent locales
+# Was: more specific script should win in case regions are identical
+# with some different results.
 @supported=af, af-Latn, af-Arab
 af >> af
 af-ZA >> af
 af-Latn-ZA >> af
-af-Latn >> af-Latn
+af-Latn >> af
 
-@distance=script
+@favor=script
 af >> af
 af-ZA >> af
 af-Latn-ZA >> af
-af-Latn >> af-Latn
+af-Latn >> af
 
-** test: more specific region should win
+# Was: more specific region should win
+# with some different results.
 @supported=nl, nl-NL, nl-BE
+@favor=
+nl >> nl
+nl-Latn >> nl
+nl-Latn-NL >> nl
+nl-NL >> nl
+
+@favor=script
+nl >> nl
+nl-Latn >> nl
+nl-Latn-NL >> nl
+nl-NL >> nl
+
+# Was: more specific region wins over more specific script
+# with some different results.
+@supported=nl, nl-Latn, nl-NL, nl-BE
+@favor=
 nl >> nl
 nl-Latn >> nl
+nl-NL >> nl
 nl-Latn-NL >> nl
-nl-NL >> nl-NL
 
-@distance=script
+@favor=script
 nl >> nl
 nl-Latn >> nl
+nl-NL >> nl
 nl-Latn-NL >> nl
-nl-NL >> nl-NL
 
 ** test: region may replace matched if matched is enclosing
 @supported=es-419, es
@@ -1577,37 +1596,24 @@ es-MX >> es-419
 @default=
 es-SG >> es
 
-@distance=script
+@favor=script
 @default=es-MX
 es-MX >> es-419
 @default=
 es-SG >> es
 
-** test: more specific region wins over more specific script
-@supported=nl, nl-Latn, nl-NL, nl-BE
-nl >> nl
-nl-Latn >> nl-Latn
-nl-NL >> nl-NL
-nl-Latn-NL >> nl
-
-@distance=script
-nl >> nl
-nl-Latn >> nl-Latn
-nl-NL >> nl-NL
-nl-Latn-NL >> nl
-
 ** test: region distance Portuguese
 @supported=pt, pt-PT
 pt-ES >> pt-PT
 
-@distance=script
+@favor=script
 pt-ES >> pt-PT
 
 ** test: if no preferred locale specified, pick top language, not regional
 @supported=en, fr, fr-CA, fr-CH
 fr-US >> fr
 
-@distance=script
+@favor=script
 fr-US >> fr
 
 ** test: region distance German
@@ -1622,7 +1628,7 @@ es-MX >> es-419
 @default=
 es-PT >> es-ES
 
-@distance=script
+@favor=script
 en-AU >> en-GB
 es-MX >> es-419
 @default=
@@ -1649,7 +1655,7 @@ und-Hans >> zh
 und-Hant >> zh
 und-Latn >> it
 
-@distance=script
+@favor=script
 und-FR >> fr
 und-CN >> zh
 und-Hans >> zh
@@ -1664,22 +1670,22 @@ ja-Jpan-JP, en-GB >> ja
 ** test: pick best maximized tag
 @supported=ja, ja-Jpan-US, ja-JP, en, ru
 ja-Jpan, ru >> ja
-ja-JP, ru >> ja-JP
+ja-JP, ru >> ja
 ja-US, ru >> ja-Jpan-US
 
-@distance=script
+@favor=script
 ja-Jpan, ru >> ja
-ja-JP, ru >> ja-JP
+ja-JP, ru >> ja
 ja-US, ru >> ja-Jpan-US
 
 ** test: termination: pick best maximized match
 @supported=ja, ja-Jpan, ja-JP, en, ru
 ja-Jpan-JP, ru >> ja
-ja-Jpan, ru >> ja-Jpan
+ja-Jpan, ru >> ja
 
-@distance=script
+@favor=script
 ja-Jpan-JP, ru >> ja
-ja-Jpan, ru >> ja-Jpan
+ja-Jpan, ru >> ja
 
 ** test: same language over exact, but distinguish when user is explicit
 @supported=fr, en-GB, ja, es-ES, es-MX
@@ -1690,7 +1696,7 @@ de-CH, fr >> de
 en, nl >> en-GB
 en, nl, en-GB >> en-GB
 
-@distance=script
+@favor=script
 @supported=fr, en-GB, ja, es-ES, es-MX
 ja, de >> ja
 @supported=en, de, fr, ja
@@ -1767,7 +1773,7 @@ pt-MZ >> pt-PT
 pt-ST >> pt-PT
 pt-TL >> pt-PT
 
-@distance=script
+@favor=script
 en-150 >> en-GB
 en-AU >> en-GB
 en-BE >> en-GB
@@ -1845,7 +1851,7 @@ sl-HR-NEDIS-u-cu-eur >> sl-NEDIS
 @default=de-t-m0-iso-i0-pinyin
 de-t-m0-iso-i0-pinyin >> de
 
-@distance=script
+@favor=script
 @default=de-u-co-phonebk
 de-FR-u-co-phonebk >> de
 @default=sl-NEDIS-u-cu-eur
@@ -1865,28 +1871,28 @@ de-t-m0-iso-i0-pinyin >> de
 @supported=de
 fr >> de
 
-@distance=script
+@favor=script
 fr >> de
 
 ** test: testLooseMatchForGeneral_getBestMatches
 @supported=es-419
 es-MX >> es-419
 
-@distance=script
+@favor=script
 es-MX >> es-419
 
 ** test: testLooseMatchForEnglish_getBestMatches
 @supported=en, en-GB
 en-CA >> en-GB
 
-@distance=script
+@favor=script
 en-CA >> en-GB
 
 ** test: testLooseMatchForChinese_getBestMatches
 @supported=zh
 zh-TW >> zh
 
-@distance=script
+@favor=script
 zh-TW >> zh
 
 ## Geo
@@ -1894,7 +1900,7 @@ zh-TW >> zh
 ** test: testGetBestMatchWithMinMatchScore
 @supported=fr-FR, fr, fr-CA, en
 @default=und
-fr >> fr # Exact match is chosen.
+fr >> fr-FR # First likely-subtags equivalent match is chosen.
 @supported=en, fr, fr-CA
 fr-FR >> fr # Parent match is chosen.
 @supported=en, fr-CA
@@ -1922,9 +1928,9 @@ zh-CN >> zh-TW
 @supported=ja
 ru >> und
 
-@distance=script
+@favor=script
 @supported=fr-FR, fr, fr-CA, en
-fr >> fr
+fr >> fr-FR
 @supported=en, fr, fr-CA
 fr-FR >> fr
 @supported=en, fr-CA
@@ -1935,19 +1941,19 @@ fr-SN >> fr-CA
 @supported=en, fr-FR
 fr >> fr-FR
 @supported=de, en, it
-fr >> de
+fr >> en
 @supported=iw, en
 iw-Latn >> en
 @supported=iw, no
-ru >> iw
+ru >> und
 @supported=iw-Latn, iw-Cyrl, iw
 ru >> iw-Cyrl
 @supported=iw, iw-Latn
-ru >> iw
+ru >> und
 en >> iw-Latn
 @supported=en, uk
 ru >> uk
 @supported=zh-TW, en
 zh-CN >> zh-TW
 @supported=ja
-ru >> ja
+ru >> und
-- 
2.40.0