/*
****************************************************************************************
- * Copyright (C) 2009-2013, Google, Inc.; International Business Machines Corporation *
+ * Copyright (C) 2009-2014, Google, Inc.; International Business Machines Corporation *
* and others. All Rights Reserved. *
****************************************************************************************
*/
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
+import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.Row;
import com.ibm.icu.impl.Row.R2;
import com.ibm.icu.impl.Row.R3;
* @stable ICU 4.4
*/
public class LocaleMatcher {
- private static final boolean DEBUG = false;
+
+ private static boolean DEBUG = false;
+
+ private static final ULocale UNKNOWN_LOCALE = new ULocale("und");
/**
* Threshold for falling back to the default (first) language. May make this
*/
private final ULocale defaultLanguage;
+ /**
+ * The default language, in case the threshold is not met.
+ */
+ private final double threshold;
+
/**
* Create a new language matcher. The highest-weighted language is the
* default. That means that if no other language is matches closer than a given
* @deprecated This API is ICU internal only.
*/
public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData) {
+ this(languagePriorityList, matcherData, DEFAULT_THRESHOLD);
+ }
+
+ /**
+ * Internal testing function; may expose API later.
+ * @param languagePriorityList LocalePriorityList to match
+ * @param matcherData Internal matching data
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) {
this.matcherData = matcherData;
for (final ULocale language : languagePriorityList) {
add(language, languagePriorityList.getWeight(language));
}
Iterator<ULocale> it = languagePriorityList.iterator();
defaultLanguage = it.hasNext() ? it.next() : null;
+ this.threshold = threshold;
}
lang2 == null ? lang : lang2,
script2 == null ? script : script2,
region2 == null ? region : region2
- );
+ );
}
return ulocale;
}
bestTableMatch = matchRow.get0();
}
}
- if (bestWeight < DEFAULT_THRESHOLD) {
+ if (bestWeight < threshold) {
bestTableMatch = defaultLanguage;
}
return bestTableMatch;
return getBestMatchInternal(ulocale).get0();
}
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public ULocale getBestMatch(ULocale... ulocales) {
+ return getBestMatch(LocalePriorityList.add(ulocales).build());
+ }
+
/**
* {@inheritDoc}
* @stable ICU 4.4
@Override
public String toString() {
return "{" + defaultLanguage + ", "
- + maximizedLanguageToWeight + "}";
+ + maximizedLanguageToWeight + "}";
}
// ================= Privates =====================
R2<ULocale, Double> row = maximizedLanguageToWeight.get(tableKey);
final double match = match(languageCode, maximized, tableKey, row.get0());
if (DEBUG) {
- System.out.println("\t" + tableKey + ";\t" + row.toString() + ";\t" + match);
+ System.out.println("\t" + tableKey + ";\t" + row.toString() + ";\t" + match + "\n");
}
final double weight = match * row.get1();
if (weight > bestWeight) {
bestTableMatch = tableKey;
}
}
- if (bestWeight < DEFAULT_THRESHOLD) {
+ if (bestWeight < threshold) {
bestTableMatch = defaultLanguage;
}
return Row.R2.of(bestTableMatch, bestWeight);
*/
// TODO(markdavis): update the above when CLDR 1.6 is final.
private ULocale addLikelySubtags(ULocale languageCode) {
+ // max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined
+ // language would normally match English. But that would produce the counterintuitive results
+ // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and
+ // getBestMatch("en", LocaleMatcher("it,und")) would be "und".
+ //
+ // To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults)
+ // so that max("und")="und". That produces the following, more desirable results:
+ if (languageCode.equals(UNKNOWN_LOCALE)) {
+ return UNKNOWN_LOCALE;
+ }
final ULocale result = ULocale.addLikelySubtags(languageCode);
// should have method on getLikelySubtags for this
if (result == null || result.equals(languageCode)) {
private String region;
private Level level;
static Pattern pattern = Pattern.compile(
- "([a-zA-Z]{1,8}|\\*)" +
- "(?:-([a-zA-Z]{4}|\\*))?" +
- "(?:-([a-zA-Z]{2}|[0-9]{3}|\\*))?");
+ "([a-z]{1,8}|\\*)"
+ + "(?:[_-]([A-Z][a-z]{3}|\\*))?"
+ + "(?:[_-]([A-Z]{2}|[0-9]{3}|\\*))?");
public LocalePatternMatcher(String toMatch) {
Matcher matcher = pattern.matcher(toMatch);
}
}
- enum Level {language, script, region}
+ enum Level {
+ language(0.99),
+ script(0.2),
+ region(0.04);
+
+ final double worst;
+
+ Level(double d) {
+ worst = d;
+ }
+ }
private static class ScoreData implements Freezable<ScoreData> {
+ /**
+ *
+ */
+ private static final double maxUnequal_changeD_sameS = 0.5;
+ /**
+ *
+ */
+ private static final double maxUnequal_changeEqual = 0.75;
LinkedHashSet<Row.R3<LocalePatternMatcher,LocalePatternMatcher,Double>> scores = new LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>();
- final double worst;
final Level level;
public ScoreData(Level level) {
this.level = level;
- this.worst = (1-(level == Level.language ? 90 : level == Level.script ? 20 : 4))/100.0;
}
void addDataToScores(String desired, String supported, R3<LocalePatternMatcher,LocalePatternMatcher,Double> data) {
* else
* rd = 0.25*StdRDiff // lines 2,5
*/
+
+ // example: input en-GB, supported en en-GB
+ // we want to have a closer match with
boolean desiredChange = desiredRaw.equals(desiredMax);
boolean supportedChange = supportedRaw.equals(supportedMax);
- double distance;
+ double distance = 0;
if (!desiredMax.equals(supportedMax)) {
// Map<String, Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>>> lang_result = scores.get(desiredMax);
// if (lang_result == null) {
// } else {
distance = getRawScore(dMax, sMax);
// }
- if (desiredChange == supportedChange) {
- distance *= 0.75;
- } else if (desiredChange) {
- distance *= 0.5;
- }
- } else if (desiredChange == supportedChange) { // maxes are equal, changes are equal
- distance = 0;
+// if (desiredChange == supportedChange) {
+// distance *= maxUnequal_changeEqual;
+// if (DEBUG) {
+// System.out.println("\t\t\t" + level + " Distance (maxD≠maxS, changeD=changeS)\t" + distance);
+// }
+// } else if (desiredChange) {
+// distance *= maxUnequal_changeD_sameS;
+// if (DEBUG) {
+// System.out.println("\t\t\t" + level + " Distance (maxD≠maxS, changeD, !changeS)\t" + distance);
+// }
+// } else {
+// if (DEBUG) {
+// System.out.println("\t\t\t" + level + " Distance (maxD≠maxS, !changeD, changeS)\t" + distance);
+// }
+// }
+ } else if (!desiredRaw.equals(supportedRaw)) { // maxes are equal, changes are equal
+ distance += 0.001;
+// if (DEBUG) {
+// System.out.println("\t\t\t" + level + " Distance (maxD=maxS, changeD=changeS)\t" + distance);
+// }
} else { // maxes are equal, changes are different
- distance = 0.25*worst;
+// distance = 0.25*level.worst;
+// if (DEBUG) {
+// System.out.println("\t\t\t" + level + " Distance (maxD=maxS, changeD≠changeS)\t" + distance);
+// }
}
return distance;
}
private double getRawScore(ULocale desiredLocale, ULocale supportedLocale) {
if (DEBUG) {
- System.out.println("\t\t\tRaw Score:\t" + desiredLocale + ";\t" + supportedLocale);
+ System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale);
}
for (R3<LocalePatternMatcher,LocalePatternMatcher,Double> datum : scores) { // : result
if (datum.get0().matches(desiredLocale)
&& datum.get1().matches(supportedLocale)) {
if (DEBUG) {
- System.out.println("\t\t\tFOUND\t" + datum);
+ System.out.println("\t\t\t\tFOUND\t" + datum);
}
return datum.get2();
}
}
if (DEBUG) {
- System.out.println("\t\t\tNOTFOUND\t" + worst);
+ System.out.println("\t\t\t\tNOTFOUND\t" + level.worst);
}
- return worst;
+ return level.worst;
}
public String toString() {
- return level + ", " + scores;
+ StringBuilder result = new StringBuilder().append(level);
+ for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) {
+ result.append("\n\t\t").append(score);
+ }
+ return result.toString();
}
+
@SuppressWarnings("unchecked")
public ScoreData cloneAsThawed() {
try {
public LanguageMatcherData() {
}
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public String toString() {
+ return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores;
+ }
+
/**
* @internal
* @deprecated This API is ICU internal only.
diff += regionScores.getScore(a, aMax, a.getCountry(), aMax.getCountry(), b, bMax, b.getCountry(), bMax.getCountry());
if (!a.getVariant().equals(b.getVariant())) {
- diff += 1;
+ diff += 0.01;
}
if (diff < 0.0d) {
diff = 0.0d;
} else if (diff > 1.0d) {
diff = 1.0d;
}
+ if (DEBUG) {
+ System.out.println("\t\t\tTotal Distance\t" + diff);
+ }
return 1.0 - diff;
}
LocalePatternMatcher supportedMatcher = new LocalePatternMatcher(supported);
Level supportedLen = supportedMatcher.getLevel();
if (desiredLen != supportedLen) {
- throw new IllegalArgumentException();
+ throw new IllegalArgumentException("Lengths unequal: " + desired + ", " + supported);
}
R3<LocalePatternMatcher,LocalePatternMatcher,Double> data = Row.of(desiredMatcher, supportedMatcher, score);
R3<LocalePatternMatcher,LocalePatternMatcher,Double> data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score);
LanguageMatcherData matcherData;
- private static LanguageMatcherData defaultWritten = new LanguageMatcherData()
- // TODO get data from CLDR
- .addDistance("no", "nb", 100, "The language no is normally taken as nb in content; we might alias this for lookup.")
- .addDistance("nn", "nb", 96)
- .addDistance("nn", "no", 96)
- .addDistance("da", "no", 90, "Danish and norwegian are reasonably close.")
- .addDistance("da", "nb", 90)
- .addDistance("hr", "br", 96, "Serbo-croatian variants are all very close.")
- .addDistance("sh", "br", 96)
- .addDistance("sr", "br", 96)
- .addDistance("sh", "hr", 96)
- .addDistance("sr", "hr", 96)
- .addDistance("sh", "sr", 96)
- .addDistance("sr-Latn", "sr-Cyrl", 90, "Most serbs can read either script.")
- .addDistance("*-Hans", "*-Hant", 85, true, "Readers of simplified can read traditional much better than reverse.")
- .addDistance("*-Hant", "*-Hans", 75, true)
- .addDistance("en-*-US", "en-*-CA", 98, "US is different than others, and Canadian is inbetween.")
- .addDistance("en-*-US", "en-*-*", 97)
- .addDistance("en-*-CA", "en-*-*", 98)
- .addDistance("en-*-*", "en-*-*", 99)
- .addDistance("es-*-ES", "es-*-ES", 100, "Latin American Spanishes are closer to each other. Approximate by having es-ES be further from everything else.")
- .addDistance("es-*-ES", "es-*-*", 93)
- .addDistance("*", "*", 1, "[Default value -- must be at end!] Normally there is no comprehension of different languages.")
- .addDistance("*-*", "*-*", 20, "[Default value -- must be at end!] Normally there is little comprehension of different scripts.")
- .addDistance("*-*-*", "*-*-*", 96, "[Default value -- must be at end!] Normally there are small differences across regions.")
- .freeze();
+ private static final LanguageMatcherData defaultWritten;
+// = new LanguageMatcherData()
+// // TODO get data from CLDR
+// .addDistance("no", "nb", 100, "The language no is normally taken as nb in content; we might alias this for lookup.")
+// .addDistance("nn", "nb", 96)
+// .addDistance("nn", "no", 96)
+// .addDistance("da", "no", 90, "Danish and norwegian are reasonably close.")
+// .addDistance("da", "nb", 90)
+// .addDistance("hr", "br", 96, "Serbo-croatian variants are all very close.")
+// .addDistance("sh", "br", 96)
+// .addDistance("sr", "br", 96)
+// .addDistance("sh", "hr", 96)
+// .addDistance("sr", "hr", 96)
+// .addDistance("sh", "sr", 96)
+// .addDistance("sr-Latn", "sr-Cyrl", 90, "Most serbs can read either script.")
+// .addDistance("*-Hans", "*-Hant", 85, true, "Readers of simplified can read traditional much better than reverse.")
+// .addDistance("*-Hant", "*-Hans", 75, true)
+// .addDistance("en-*-US", "en-*-*", 97, "Non-US English variants are closer to each other (written). Make en-US be further from everything else.")
+// .addDistance("en-*-*", "en-*-*", 99)
+// .addDistance("es-*-ES", "es-*-*", 97, "Latin American Spanishes are closer to each other. Make es-ES be further from everything else.")
+// .addDistance("es-*-419", "es-*-*", 99, "Have es-MX, es-AR, etc be closer to es-419 than to each other")
+// .addDistance("es-*-*", "es-*-*", 97)
+// .addDistance("*", "*", 1, "[Default value -- must be at end!] Normally there is no comprehension of different languages.")
+// .addDistance("*-*", "*-*", 20, "[Default value -- must be at end!] Normally there is little comprehension of different scripts.")
+// .addDistance("*-*-*", "*-*-*", 96, "[Default value -- must be at end!] Normally there are small differences across regions.")
+// .freeze();
private static HashMap<String,String> canonicalMap = new HashMap<String, String>();
+ static class DataHack implements Comparable<DataHack>{
+ final String source;
+ final String target;
+ int percent;
+ public DataHack(String source, String target, int percent) {
+ this.source = source;
+ this.target = target.equals("de_CH") ? "de" : target; // hack to fix bad data
+ this.percent = percent;
+ }
+ static final Pattern STAR_KEEP = Pattern.compile("([^_]+)(?:_[^_]+(?:_[^_]+)?)?");
+ public int compareTo(DataHack other) {
+ // this is just a one-time hack so we don't need to optimize
+ int diff = getUnderbars(source) - getUnderbars(other.source);
+ if (0 != diff) {
+ return diff;
+ }
+ String thisSource = source.replace('*', 'þ'); // just something after Z
+ String otherSource = other.source.replace('*', 'þ'); // just something after Z
+ diff = thisSource.compareTo(otherSource);
+ if (0 != diff) {
+ return diff;
+ }
+ String thisTarget = target.replace('*', 'þ'); // just something after Z
+ String otherTarget = other.target.replace('*', 'þ'); // just something after Z
+ diff = thisTarget.compareTo(otherTarget);
+
+// Matcher matcher = STAR_KEEP.matcher(source);
+// matcher.matches();
+// String first = matcher.group(0);
+// String second = matcher.group(1);
+// String third = matcher.group(2);
+// Matcher matcherB = STAR_KEEP.matcher(source);
+// String firstB = matcher.group(0);
+// String secondB = matcher.group(1);
+// String thirdB = matcher.group(2);
+//
+// int diff = onlyStars.length() - onlyStarsOther.length();
+
+ if (0 != diff) {
+ return diff;
+ }
+ diff = source.compareTo(other.source);
+ if (0 != diff) {
+ return diff;
+ }
+ return target.compareTo(other.target);
+ }
+ /**
+ * @param source2
+ */
+ private int getUnderbars(String source2) {
+ int pos = source2.indexOf('_');
+ if (pos < 0) {
+ return 0;
+ }
+ pos = source2.indexOf('_',pos+1);
+ return pos < 0 ? 1 : 2;
+ }
+ public String toString() {
+ return source + ", " + target + " => " + percent;
+ }
+ }
+
static {
// TODO get data from CLDR
canonicalMap.put("iw", "he");
canonicalMap.put("mo", "ro");
canonicalMap.put("tl", "fil");
+
+ ICUResourceBundle suppData = getICUSupplementalData();
+ ICUResourceBundle languageMatching = suppData.findTopLevel("languageMatching");
+ ICUResourceBundle written = (ICUResourceBundle) languageMatching.get("written");
+ defaultWritten = new LanguageMatcherData();
+ // HACK
+ // The data coming from ICU may be old, and badly ordered.
+ TreeSet<DataHack> hack = new TreeSet<DataHack>();
+ defaultWritten.addDistance("en_*_US", "en_*_*", 97);
+ defaultWritten.addDistance("en_*_GB", "en_*_*", 98);
+ defaultWritten.addDistance("es_*_ES", "es_*_*", 97);
+ defaultWritten.addDistance("es_*_419", "es_*_*", 99);
+ defaultWritten.addDistance("es_*_*", "es_*_*", 98);
+
+ for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
+ ICUResourceBundle item = (ICUResourceBundle) iter.next();
+ /*
+ "*_*_*",
+ "*_*_*",
+ "96",
+ */
+ hack.add(new DataHack(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2))));
+ }
+ for (DataHack dataHack : hack) {
+ defaultWritten.addDistance(dataHack.source, dataHack.target, dataHack.percent);
+ }
+ defaultWritten.freeze();
+ }
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public static ICUResourceBundle getICUSupplementalData() {
+ ICUResourceBundle suppData = (ICUResourceBundle) UResourceBundle.getBundleInstance(
+ ICUResourceBundle.ICU_BASE_NAME,
+ "supplementalData",
+ ICUResourceBundle.ICU_DATA_CLASS_LOADER);
+ return suppData;
+ }
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public static double match(ULocale a, ULocale b) {
+ final LocaleMatcher matcher = new LocaleMatcher("");
+ return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b));
}
}
/*
******************************************************************************************
- * Copyright (C) 2009-2010, Google, Inc.; International Business Machines Corporation and *
+ * Copyright (C) 2009-2014, Google, Inc.; International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************************
*/
package com.ibm.icu.dev.test.util;
+import java.util.Set;
+import java.util.TreeSet;
+
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.util.LocaleMatcher;
import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
new LocaleMatcherTest().run(args);
}
+ public void testenGB() {
+ final LocaleMatcher matcher = new LocaleMatcher("fr, en, en_GB, es_MX, es_419, es");
+ assertEquals("en_GB", matcher.getBestMatch("en_NZ").toString());
+ assertEquals("es", matcher.getBestMatch("es_ES").toString());
+ assertEquals("es_419", matcher.getBestMatch("es_AR").toString());
+ assertEquals("es_MX", matcher.getBestMatch("es_MX").toString());
+ }
+
+ public void testFallbacks() {
+ final LocaleMatcher matcher = new LocaleMatcher("en, hi");
+ if (!logKnownIssue("10705", "Need new data from CLDR for languageMatching")) {
+ assertEquals("hi", matcher.getBestMatch("sa").toString());
+ }
+ }
+
+ public void testOverrideData() {
+ double threshold = 0.05;
+ LanguageMatcherData localeMatcherData = new LanguageMatcherData()
+ .addDistance("br", "fr", 10, true)
+ .addDistance("es", "cy", 10, true)
+ ;
+ logln(localeMatcherData.toString());
+
+ final LocaleMatcher matcher = new LocaleMatcher(
+ LocalePriorityList
+ .add(ULocale.ENGLISH)
+ .add(ULocale.FRENCH)
+ .add(ULocale.UK)
+ .build(), localeMatcherData , threshold);
+ logln(matcher.toString());
+
+ assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
+ assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one way
+ }
+
public void testBasics() {
final LocaleMatcher matcher = new LocaleMatcher(LocalePriorityList.add(ULocale.FRENCH).add(ULocale.UK)
.add(ULocale.ENGLISH).build());
private void assertEquals(Object expected, Object string) {
assertEquals("", expected, string);
}
+ private void assertNull(Object bestMatch) {
+ assertNull("", bestMatch);
+ }
+
+ public void testEmpty() {
+ final LocaleMatcher matcher = new LocaleMatcher("");
+ assertNull(matcher.getBestMatch(ULocale.FRENCH));
+ }
+
+ static final ULocale ENGLISH_CANADA = new ULocale("en_CA");
+
+ public void testMatch_exact() {
+ assertEquals(1.0,
+ LocaleMatcher.match(ENGLISH_CANADA, ENGLISH_CANADA));
+ }
+
+ public void testMatch_none() {
+ double match = LocaleMatcher.match(
+ new ULocale("ar_MK"),
+ ENGLISH_CANADA);
+ assertTrue("Actual < 0: " + match, 0 <= match);
+ assertTrue("Actual > 0.15 (~ language + script distance): " + match, 0.2 > match);
+ }
+
+ public void testMatch_matchOnMazimized() {
+ ULocale undTw = new ULocale("und_TW");
+ ULocale zhHant = new ULocale("zh_Hant");
+ double matchZh = LocaleMatcher.match(undTw, new ULocale("zh"));
+ double matchZhHant = LocaleMatcher.match(undTw, zhHant);
+ assertTrue("und_TW should be closer to zh_Hant (" + matchZhHant +
+ ") than to zh (" + matchZh + ")",
+ matchZh < matchZhHant);
+ double matchEnHantTw = LocaleMatcher.match(new ULocale("en_Hant_TW"),
+ zhHant);
+ assertTrue("zh_Hant should be closer to und_TW (" + matchZhHant +
+ ") than to en_Hant_TW (" + matchEnHantTw + ")",
+ matchEnHantTw < matchZhHant);
+ assertTrue("zh should be closer to und_TW (" + matchZh +
+ ") than to en_Hant_TW (" + matchEnHantTw + ")",
+ matchEnHantTw < matchZh);
+ }
+
+ public void testMatchGrandfatheredCode() {
+ final LocaleMatcher matcher = new LocaleMatcher("fr, i_klingon, en_Latn_US");
+ assertEquals("en_Latn_US", matcher.getBestMatch("en_GB_oed").toString());
+ //assertEquals("tlh", matcher.getBestMatch("i_klingon").toString());
+ }
+
+ public void testGetBestMatchForList_exactMatch() {
+ final LocaleMatcher matcher = new LocaleMatcher("fr, en_GB, ja, es_ES, es_MX");
+ assertEquals("ja", matcher.getBestMatch("ja, de").toString());
+ }
+
+ public void testGetBestMatchForList_simpleVariantMatch() {
+ final LocaleMatcher matcher = new LocaleMatcher("fr, en_GB, ja, es_ES, es_MX");
+ // Intentionally avoiding a perfect_match or two candidates for variant matches.
+ assertEquals("en_GB", matcher.getBestMatch("de, en_US").toString());
+ // Fall back.
+ assertEquals("fr", matcher.getBestMatch("de, zh").toString());
+ }
+
+ public void testGetBestMatchForList_matchOnMaximized() {
+ final LocaleMatcher matcher = new LocaleMatcher("en, ja");
+ //final LocaleMatcher matcher = new LocaleMatcher("fr, en, ja, es_ES, es_MX");
+ // Check that if the preference is maximized already, it works as well.
+ assertEquals("Match for ja_Jpan_JP (maximized already)",
+ "ja", matcher.getBestMatch("ja_Jpan_JP, en-AU").toString());
+ if (true) return;
+ // ja_JP matches ja on likely subtags, and it's listed first, thus it wins over
+ // thus it wins over the second preference en_GB.
+ assertEquals("Match for ja_JP, with likely region subtag",
+ "ja", matcher.getBestMatch("ja_JP, en_US").toString());
+ // Check that if the preference is maximized already, it works as well.
+ assertEquals("Match for ja_Jpan_JP (maximized already)",
+ "ja", matcher.getBestMatch("ja_Jpan_JP, en_US").toString());
+ }
+
+ public void testGetBestMatchForList_noMatchOnMaximized() {
+ // Regression test for http://b/5714572 .
+ final LocaleMatcher matcher = new LocaleMatcher("en, de, fr, ja");
+ // de maximizes to de_DE. Pick the exact match for the secondary language instead.
+ assertEquals("fr", matcher.getBestMatch("de_CH, fr").toString());
+ }
+
+ public void testBestMatchForTraditionalChinese() {
+ // Scenario: An application that only supports Simplified Chinese (and some other languages),
+ // but does not support Traditional Chinese. zh_Hans_CN could be replaced with zh_CN, zh, or
+ // zh_Hans, it wouldn't make much of a difference.
+ final LocaleMatcher matcher = new LocaleMatcher("fr, zh_Hans_CN, en_US");
+
+ // The script distance (simplified vs. traditional Han) is considered small enough
+ // to be an acceptable match. The regional difference is considered almost insignificant.
+ assertEquals("zh_Hans_CN", matcher.getBestMatch("zh_TW").toString());
+ assertEquals("zh_Hans_CN", matcher.getBestMatch("zh_Hant").toString());
+
+ // For geo_political reasons, you might want to avoid a zh_Hant -> zh_Hans match.
+ // In this case, if zh_TW, zh_HK or a tag starting with zh_Hant is requested, you can
+ // change your call to getBestMatch to include a 2nd language preference.
+ // "en" is a better match since its distance to "en_US" is closer than the distance
+ // from "zh_TW" to "zh_CN" (script distance).
+ assertEquals("en_US", matcher.getBestMatch("zh_TW, en").toString());
+ assertEquals("en_US", matcher.getBestMatch("zh_Hant_CN, en").toString());
+ assertEquals("zh_Hans_CN", matcher.getBestMatch("zh_Hans, en").toString());
+ }
+
+ public void testUndefined() {
+ // When the undefined language doesn't match anything in the list, getBestMatch returns
+ // the default, as usual.
+ LocaleMatcher matcher = new LocaleMatcher("it,fr");
+ assertEquals("it", matcher.getBestMatch("und").toString());
+
+ // When it *does* occur in the list, BestMatch returns it, as expected.
+ matcher = new LocaleMatcher("it,und");
+ assertEquals("und", matcher.getBestMatch("und").toString());
+
+ // The unusual part:
+ // max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined
+ // language would normally match English. But that would produce the counterintuitive results
+ // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and
+ // getBestMatch("en", LocaleMatcher("it,und")) would be "und".
+ //
+ // To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults)
+ // so that max("und")="und". That produces the following, more desirable results:
+ matcher = new LocaleMatcher("it,en");
+ assertEquals("it", matcher.getBestMatch("und").toString());
+ matcher = new LocaleMatcher("it,und");
+ assertEquals("it", matcher.getBestMatch("en").toString());
+ }
+
+ // public void testGetBestMatch_emptyList() {
+ // final LocaleMatcher matcher = new LocaleMatcher(
+ // new LocalePriorityList(new HashMap()));
+ // assertNull(matcher.getBestMatch(ULocale.ENGLISH));
+ // }
+
+ public void testGetBestMatch_googlePseudoLocales() {
+ // Google pseudo locales are primarily based on variant subtags.
+ // See http://sites/intl_eng/pseudo_locales.
+ // (See below for the region code based fall back options.)
+ final LocaleMatcher matcher = new LocaleMatcher(
+ "fr, pt");
+ assertEquals("fr", matcher.getBestMatch("de").toString());
+ assertEquals("fr", matcher.getBestMatch("en_US").toString());
+ assertEquals("fr", matcher.getBestMatch("en").toString());
+ assertEquals("pt", matcher.getBestMatch("pt_BR").toString());
+ }
+
+ public void testGetBestMatch_regionDistance() {
+ LocaleMatcher matcher = new LocaleMatcher("es_AR, es");
+ assertEquals("es_AR", matcher.getBestMatch("es_MX").toString());
+
+ matcher = new LocaleMatcher("fr, en, en_CA");
+ assertEquals("en_CA", matcher.getBestMatch("en_GB").toString());
+
+ matcher = new LocaleMatcher("de_AT, de_DE, de_CH");
+ assertEquals("de_DE", matcher.getBestMatch("de").toString());
+ }
+
+ /**
+ * If all the base languages are the same, then each sublocale matches itself most closely
+ */
+ public void testExactMatches() {
+ String lastBase = "";
+ TreeSet<ULocale> sorted = new TreeSet();
+ for (ULocale loc : ULocale.getAvailableLocales()) {
+ String language = loc.getLanguage();
+ if (!lastBase.equals(language)) {
+ check(sorted);
+ sorted.clear();
+ lastBase = language;
+ }
+ sorted.add(loc);
+ }
+ check(sorted);
+ }
+
+ private void check(Set<ULocale> sorted) {
+ if (sorted.isEmpty()) {
+ return;
+ }
+ check2(sorted);
+ ULocale first = sorted.iterator().next();
+ ULocale max = ULocale.addLikelySubtags(first);
+ sorted.add(max);
+ check2(sorted);
+ }
+ /**
+ * @param sorted
+ */
+ private void check2(Set<ULocale> sorted) {
+ // TODO Auto-generated method stub
+ logln("Checking: " + sorted);
+ LocaleMatcher matcher = new LocaleMatcher(
+ LocalePriorityList.add(
+ sorted.toArray(new ULocale[sorted.size()]))
+ .build());
+ for (ULocale loc : sorted) {
+ String stringLoc = loc.toString();
+ assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
+ }
+ }
+
+ // public void testComputeDistance_monkeyTest() {
+ // RegionCode[] codes = RegionCode.values();
+ // Random random = new Random();
+ // for (int i = 0; i < 1000; ++i) {
+ // RegionCode x = codes[random.nextInt(codes.length)];
+ // RegionCode y = codes[random.nextInt(codes.length)];
+ // double d = LocaleMatcher.getRegionDistance(x, y, null, null);
+ // if (x == RegionCode.ZZ || y == RegionCode.ZZ) {
+ // assertEquals(LocaleMatcher.REGION_DISTANCE, d);
+ // } else if (x == y) {
+ // assertEquals(0.0, d);
+ // } else {
+ // assertTrue(d > 0);
+ // assertTrue(d <= LocaleMatcher.REGION_DISTANCE);
+ // }
+ // }
+ // }
}