From 76cf9f508ac1bc2d4360dd838d765560796fbe89 Mon Sep 17 00:00:00 2001 From: Peter Edberg Date: Thu, 11 Oct 2012 12:41:08 +0000 Subject: [PATCH] ICU-9638 Make LocaleDisplayNamesImpl actually use context info; add test; make DisplayContext constructor private X-SVN-Rev: 32596 --- .../ibm/icu/impl/LocaleDisplayNamesImpl.java | 153 ++++++++++++++++-- .../src/com/ibm/icu/text/DisplayContext.java | 2 +- .../ibm/icu/dev/test/util/ULocaleTest.java | 56 +++++++ 3 files changed, 198 insertions(+), 13 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java index 36c6ac9e797..333ebabaf85 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java @@ -6,15 +6,21 @@ */ package com.ibm.icu.impl; +import java.util.HashMap; import java.util.Iterator; import java.util.Locale; +import java.util.Map; +import java.util.MissingResourceException; +import com.ibm.icu.impl.ICUResourceBundle; +import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UScript; import com.ibm.icu.text.LocaleDisplayNames; import com.ibm.icu.text.DisplayContext; import com.ibm.icu.text.MessageFormat; import com.ibm.icu.util.ULocale; import com.ibm.icu.util.UResourceBundle; +import com.ibm.icu.util.UResourceBundleIterator; public class LocaleDisplayNamesImpl extends LocaleDisplayNames { private final ULocale locale; @@ -28,6 +34,37 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames { private static final Cache cache = new Cache(); + /** + * Capitalization context usage types for locale display names + */ + private enum CapitalizationContextUsage { + LANGUAGE, + SCRIPT, + TERRITORY, + VARIANT, + KEY, + TYPE + } + /** + * Capitalization transforms. For each usage type, the first array element indicates + * whether to titlecase for uiListOrMenu context, the second indicates whether to + * titlecase for stand-alone context. + */ + private Map capitalizationUsage = null; + /** + * Map from resource key to CapitalizationContextUsage value + */ + private static final Map contextUsageTypeMap; + static { + contextUsageTypeMap=new HashMap(); + contextUsageTypeMap.put("languages", CapitalizationContextUsage.LANGUAGE); + contextUsageTypeMap.put("script", CapitalizationContextUsage.SCRIPT); + contextUsageTypeMap.put("territory", CapitalizationContextUsage.TERRITORY); + contextUsageTypeMap.put("variant", CapitalizationContextUsage.VARIANT); + contextUsageTypeMap.put("key", CapitalizationContextUsage.KEY); + contextUsageTypeMap.put("type", CapitalizationContextUsage.TYPE); + } + public static LocaleDisplayNames getInstance(ULocale locale, DialectHandling dialectHandling) { synchronized (cache) { return cache.get(locale, dialectHandling); @@ -90,6 +127,45 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames { keyTypePattern = "{0}={1}"; } this.keyTypeFormat = new MessageFormat(keyTypePattern); + + // Get values from the contextTransforms data + // (copied from DateFormatSymbols) + if (capitalization == DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU || + capitalization == DisplayContext.CAPITALIZATION_FOR_STANDALONE) { + capitalizationUsage = new HashMap(); + boolean[] noTransforms = new boolean[2]; + noTransforms[0] = false; + noTransforms[1] = false; + CapitalizationContextUsage allUsages[] = CapitalizationContextUsage.values(); + for (CapitalizationContextUsage usage: allUsages) { + capitalizationUsage.put(usage, noTransforms); + } + ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, locale); + UResourceBundle contextTransformsBundle = null; + try { + contextTransformsBundle = (UResourceBundle)rb.getWithFallback("contextTransforms"); + } + catch (MissingResourceException e) { + contextTransformsBundle = null; // probably redundant + } + if (contextTransformsBundle != null) { + UResourceBundleIterator ctIterator = contextTransformsBundle.getIterator(); + while ( ctIterator.hasNext() ) { + UResourceBundle contextTransformUsage = ctIterator.next(); + int[] intVector = contextTransformUsage.getIntVector(); + if (intVector.length >= 2) { + String usageKey = contextTransformUsage.getKey(); + CapitalizationContextUsage usage = contextUsageTypeMap.get(usageKey); + if (usage != null) { + boolean[] transforms = new boolean[2]; + transforms[0] = (intVector[0] != 0); + transforms[1] = (intVector[1] != 0); + capitalizationUsage.put(usage, transforms); + } + } + } + } + } } @Override @@ -119,6 +195,60 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames { return result; } + private String adjustForUsageAndContext(CapitalizationContextUsage usage, String name) { + String result = name; + boolean titlecase = false; + switch (capitalization) { + case CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE: + titlecase = true; + break; + case CAPITALIZATION_FOR_UI_LIST_OR_MENU: + case CAPITALIZATION_FOR_STANDALONE: + if (capitalizationUsage != null) { + boolean[] transforms = capitalizationUsage.get(usage); + titlecase = (capitalization==DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU)? + transforms[0]: transforms[1]; + } + break; + default: + break; + } + if (titlecase) { + // TODO: Fix this titlecase hack when we figure out something better to do. + // We don't want to titlecase the whole text, only something like the first word, + // of the first segment long enough to have a complete cluster, whichever is + // shorter. We could have keep a word break iterator around, but I am not sure + // that will do the ight thing for the purposes here. For now we assume that in + // languages for which titlecasing makes a difference, we can stop at non-letter + // characters in 0x0000-0x00FF and only titlecase up to the first occurrence of + // any of those, or to a small number of chars, whichever comes first. + int stopPos, stopPosLimit = 8, len = name.length(); + if ( stopPosLimit > len ) { + stopPosLimit = len; + } + for ( stopPos = 0; stopPos < stopPosLimit; stopPos++ ) { + int ch = name.codePointAt(stopPos); + if ( (ch < 0x41) || (ch > 0x5A && ch < 0x61) || (ch > 0x7A && ch < 0xC0) ) { + break; + } + if (ch >= 0x10000) { + stopPos++; + } + } + if ( stopPos > 0 && stopPos < len ) { + String firstWord = name.substring(0, stopPos); + String firstWordTitleCase = UCharacter.toTitleCase(locale, firstWord, null, + UCharacter.TITLECASE_NO_LOWERCASE | UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT); + result = firstWordTitleCase.concat(name.substring(stopPos)); + } else { + // no stopPos, titlecase the whole text + result = UCharacter.toTitleCase(locale, name, null, + UCharacter.TITLECASE_NO_LOWERCASE | UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT); + } + } + return result; + } + @Override public String localeDisplayName(ULocale locale) { return localeDisplayNameInternal(locale); @@ -235,10 +365,10 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames { } if (resultRemainder != null) { - return format.format(new Object[] {resultName, resultRemainder}); + resultName = format.format(new Object[] {resultName, resultRemainder}); } - return resultName; + return adjustForUsageAndContext(CapitalizationContextUsage.LANGUAGE, resultName); } private String localeIdName(String localeId) { @@ -251,47 +381,46 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames { if (lang.equals("root") || lang.indexOf('_') != -1) { return lang; } - return langData.get("Languages", lang); + return adjustForUsageAndContext(CapitalizationContextUsage.LANGUAGE, langData.get("Languages", lang)); } @Override public String scriptDisplayName(String script) { String str = langData.get("Scripts%stand-alone", script); if (str.equals(script) ) { - return langData.get("Scripts", script); - } else { - return str; + str = langData.get("Scripts", script); } + return adjustForUsageAndContext(CapitalizationContextUsage.SCRIPT, str); } @Override public String scriptDisplayNameInContext(String script) { - return langData.get("Scripts", script); + return adjustForUsageAndContext(CapitalizationContextUsage.SCRIPT, langData.get("Scripts", script)); } @Override public String scriptDisplayName(int scriptCode) { - return scriptDisplayName(UScript.getShortName(scriptCode)); + return adjustForUsageAndContext(CapitalizationContextUsage.SCRIPT, scriptDisplayName(UScript.getShortName(scriptCode))); } @Override public String regionDisplayName(String region) { - return regionData.get("Countries", region); + return adjustForUsageAndContext(CapitalizationContextUsage.TERRITORY, regionData.get("Countries", region)); } @Override public String variantDisplayName(String variant) { - return langData.get("Variants", variant); + return adjustForUsageAndContext(CapitalizationContextUsage.VARIANT, langData.get("Variants", variant)); } @Override public String keyDisplayName(String key) { - return langData.get("Keys", key); + return adjustForUsageAndContext(CapitalizationContextUsage.KEY, langData.get("Keys", key)); } @Override public String keyValueDisplayName(String key, String value) { - return langData.get("Types", key, value); + return adjustForUsageAndContext(CapitalizationContextUsage.TYPE, langData.get("Types", key, value)); } public static class DataTable { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DisplayContext.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DisplayContext.java index df9b8f7144f..af457d55ca3 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DisplayContext.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DisplayContext.java @@ -90,7 +90,7 @@ public enum DisplayContext { private final Type type; private final int value; - DisplayContext(Type type, int value) { + private DisplayContext(Type type, int value) { this.type = type; this.value = value; } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java index b3ac00de48d..8a69f614bf5 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java @@ -24,6 +24,7 @@ import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.text.DateFormat; import com.ibm.icu.text.DecimalFormat; +import com.ibm.icu.text.DisplayContext; import com.ibm.icu.text.LocaleDisplayNames; import com.ibm.icu.text.LocaleDisplayNames.DialectHandling; import com.ibm.icu.text.NumberFormat; @@ -1025,6 +1026,61 @@ public class ULocaleTest extends TestFmwk { } } } + // test use of context + { + class TestContextItem { + public String displayLocale; + public DisplayContext dialectHandling; + public DisplayContext capitalization; + public String localeToBeNamed; + public String result; + public TestContextItem(String dLoc, DisplayContext dia, DisplayContext cap, String locToName, String res) { + displayLocale = dLoc; + dialectHandling = dia; + capitalization = cap; + localeToBeNamed = locToName; + result = res; + } + }; + final TestContextItem[] items = { + new TestContextItem( "da", DisplayContext.STANDARD_NAMES, DisplayContext.CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, "en", "engelsk" ), + new TestContextItem( "da", DisplayContext.STANDARD_NAMES, DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, "en", "Engelsk" ), + new TestContextItem( "da", DisplayContext.STANDARD_NAMES, DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU, "en", "engelsk" ), + new TestContextItem( "da", DisplayContext.STANDARD_NAMES, DisplayContext.CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, "en_US", "engelsk (USA)" ), + new TestContextItem( "da", DisplayContext.STANDARD_NAMES, DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, "en_US", "Engelsk (USA)" ), + new TestContextItem( "da", DisplayContext.STANDARD_NAMES, DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU, "en_US", "engelsk (USA)" ), + new TestContextItem( "da", DisplayContext.DIALECT_NAMES, DisplayContext.CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, "en_US", "amerikansk engelsk" ), + new TestContextItem( "da", DisplayContext.DIALECT_NAMES, DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, "en_US", "Amerikansk engelsk" ), + new TestContextItem( "da", DisplayContext.DIALECT_NAMES, DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU, "en_US", "amerikansk engelsk" ), + new TestContextItem( "es", DisplayContext.STANDARD_NAMES, DisplayContext.CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, "en", "ingl\u00E9s" ), + new TestContextItem( "es", DisplayContext.STANDARD_NAMES, DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, "en", "Ingl\u00E9s" ), + new TestContextItem( "es", DisplayContext.STANDARD_NAMES, DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU, "en", "Ingl\u00E9s" ), + new TestContextItem( "es", DisplayContext.STANDARD_NAMES, DisplayContext.CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, "en_US", "ingl\u00E9s (Estados Unidos)" ), + new TestContextItem( "es", DisplayContext.STANDARD_NAMES, DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, "en_US", "Ingl\u00E9s (Estados Unidos)" ), + new TestContextItem( "es", DisplayContext.STANDARD_NAMES, DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU, "en_US", "Ingl\u00E9s (Estados Unidos)" ), + new TestContextItem( "es", DisplayContext.DIALECT_NAMES, DisplayContext.CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, "en_US", "ingl\u00E9s estadounidense" ), + new TestContextItem( "es", DisplayContext.DIALECT_NAMES, DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, "en_US", "Ingl\u00E9s estadounidense" ), + new TestContextItem( "es", DisplayContext.DIALECT_NAMES, DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU, "en_US", "Ingl\u00E9s estadounidense" ), + }; + for (TestContextItem item: items) { + ULocale locale = new ULocale(item.displayLocale); + LocaleDisplayNames ldn = LocaleDisplayNames.getInstance(locale, item.dialectHandling, item.capitalization); + DisplayContext dialectHandling = ldn.getContext(DisplayContext.Type.DIALECT_HANDLING); + DisplayContext capitalization = ldn.getContext(DisplayContext.Type.CAPITALIZATION); + if (dialectHandling != item.dialectHandling || capitalization != item.capitalization) { + errln("FAIL: displayLocale: " + item.displayLocale + ", dialectHandling: " + item.dialectHandling + + ", capitalization: " + item.capitalization + ", localeToName: " + item.localeToBeNamed + + ", => read back dialectHandling: " + dialectHandling + ", capitalization: " + capitalization); + } else { + String result = ldn.localeDisplayName(item.localeToBeNamed); + if (!result.equals(item.result)) { + errln("FAIL: displayLocale: " + item.displayLocale + ", dialectHandling: " + item.dialectHandling + + ", capitalization: " + item.capitalization + ", localeToName: " + item.localeToBeNamed + + ", => expected result: " + item.result + ", got: " + result); + } + } + } + } } private boolean checkName(String name, String language, String script, String country, String variant, ULocale dl) { -- 2.40.0