From 7f0deb8734cdabf2bae55f9db4e0598d98e172be Mon Sep 17 00:00:00 2001 From: Yoshito Umaoka Date: Wed, 31 Aug 2011 07:02:05 +0000 Subject: [PATCH] ICU-8774 Support locale category in com.ibm.icu.base plug-in. Also fixed a Java 5 support problem in DateFormatSymbols in com.ibm.icu.base. X-SVN-Rev: 30602 --- .gitignore | 1 + .../src/com/ibm/icu/tests/ULocaleTest.java | 99 + .../com.ibm.icu.base/.classpath | 2 +- .../com/ibm/icu/impl/locale/AsciiUtil.java | 3 + .../com/ibm/icu/impl/locale/BaseLocale.java | 257 ++ .../com/ibm/icu/impl/locale/Extension.java | 38 + .../impl/locale/InternalLocaleBuilder.java | 684 +++++ .../com/ibm/icu/impl/locale/LanguageTag.java | 720 +++++ .../ibm/icu/impl/locale/LocaleExtensions.java | 221 ++ .../icu/impl/locale/LocaleObjectCache.java | 83 + .../impl/locale/LocaleSyntaxException.java | 27 + .../com/ibm/icu/impl/locale/ParseStatus.java | 35 + .../icu/impl/locale/StringTokenIterator.java | 93 + .../impl/locale/UnicodeLocaleExtension.java | 102 + .../src/com/ibm/icu/text/DateFormat.java | 9 +- .../com/ibm/icu/text/DateFormatSymbols.java | 31 +- .../src/com/ibm/icu/text/NumberFormat.java | 15 +- .../src/com/ibm/icu/util/Calendar.java | 3 +- .../icu/util/IllformedLocaleException.java | 73 + .../src/com/ibm/icu/util/TimeZone.java | 4 +- .../src/com/ibm/icu/util/ULocale.java | 2538 +++++++++++++++-- .../plugins.template/com.ibm.icu/.classpath | 2 +- 22 files changed, 4707 insertions(+), 333 deletions(-) create mode 100644 icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/BaseLocale.java create mode 100644 icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/Extension.java create mode 100644 icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/InternalLocaleBuilder.java create mode 100644 icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java create mode 100644 icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LocaleExtensions.java create mode 100644 icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LocaleObjectCache.java create mode 100644 icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LocaleSyntaxException.java create mode 100644 icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/ParseStatus.java create mode 100644 icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/StringTokenIterator.java create mode 100644 icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/UnicodeLocaleExtension.java create mode 100644 icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/IllformedLocaleException.java diff --git a/.gitignore b/.gitignore index 9f9c8dd6ed3..f2c6c3f4169 100644 --- a/.gitignore +++ b/.gitignore @@ -920,6 +920,7 @@ icu4j/*.jar icu4j/.project icu4j/demos/out icu4j/doc +icu4j/eclipse-build/out icu4j/main/classes/charset/out icu4j/main/classes/collate/out icu4j/main/classes/core/out diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/ULocaleTest.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/ULocaleTest.java index d8a2b85d80d..5523115246c 100644 --- a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/ULocaleTest.java +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/ULocaleTest.java @@ -11,6 +11,7 @@ import java.util.Iterator; import java.util.Locale; import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.ULocale.Category; public class ULocaleTest extends ICUTestCase { private String sampleName; @@ -745,4 +746,102 @@ public class ULocaleTest extends ICUTestCase { assertNotNull(result); // actual result depends on jdk assertTrue(fallback[0]); } + + /* + * Test method for 'com.ibm.icu.x.util.ULocale.toLanguageTag()' + */ + public void testToLanguageTag() { + ULocale[] test_ulocales = { + new ULocale("en_US"), + new ULocale(""), + new ULocale("de_DE@collation=phonebook"), + new ULocale("en_Latn_US_POSIX"), + new ULocale("th_TH@numbers=thai;calendar=buddhist"), + new ULocale("und_CN@timezone=PRC"), + new ULocale("iw_IL"), + }; + + String[] expected = { + "en-US", + "und", + "de-DE-u-co-phonebk", + "en-Latn-US-u-va-posix", + "th-TH-u-ca-buddhist-nu-thai", + "und-CN-u-tz-cnsha", + "he-IL", + }; + + for (int i = 0; i < test_ulocales.length; i++) { + String result = test_ulocales[i].toLanguageTag(); + assertEquals(expected[i], result); + } + } + + /* + * Test method for 'com.ibm.icu.x.util.ULocale.forLanguageTag()' + */ + public void testForLanguageTag() { + String[] test_tags = { + "en-us", + "Und-Us", + "ja-jp-u-ca-japanese", + "fr-FR-u-tz-frpar-ca-gregory", + }; + + ULocale[] expected = { + new ULocale("en_US"), + new ULocale("und_US"), + new ULocale("ja_JP@calendar=japanese"), + new ULocale("fr_FR@calendar=gregorian;timezone=Europe/Paris"), + }; + + for (int i = 0; i < test_tags.length; i++) { + ULocale result = ULocale.forLanguageTag(test_tags[i]); + assertEquals(expected[i], result); + } + } + + /* + * Test method for 'com.ibm.icu.x.util.ULocale.getDefault(Category)' + */ + public void testGetDefaultCategory() { + ULocale dispLoc = ULocale.getDefault(Category.DISPLAY); + assertNotNull(dispLoc); + ULocale formLoc = ULocale.getDefault(Category.FORMAT); + assertNotNull(formLoc); + } + + /* + * Test method for 'com.ibm.icu.x.util.ULocale.setDefault(Category, ULocale)' + */ + public void testSetDefaultCategoryULocale() { + ULocale orgDefault = ULocale.getDefault(); + ULocale orgDisplay = ULocale.getDefault(Category.DISPLAY); + ULocale orgFormat = ULocale.getDefault(Category.FORMAT); + + ULocale jaUS = new ULocale("ja_US"); + ULocale.setDefault(jaUS); + + // setDefault(ULocale) updates category defaults + assertEquals(ULocale.getDefault(), jaUS); + assertEquals(ULocale.getDefault(Category.DISPLAY), jaUS); + assertEquals(ULocale.getDefault(Category.FORMAT), jaUS); + + ULocale frDE = new ULocale("fr_DE"); + ULocale.setDefault(Category.DISPLAY, frDE); + + // setDefault(Category, ULocale) only updates the category default + assertEquals(ULocale.getDefault(), jaUS); + assertEquals(ULocale.getDefault(Category.DISPLAY), frDE); + assertEquals(ULocale.getDefault(Category.FORMAT), jaUS); + + // restore the original + ULocale.setDefault(orgDefault); + ULocale.setDefault(Category.DISPLAY, orgDisplay); + ULocale.setDefault(Category.FORMAT, orgFormat); + + assertEquals(ULocale.getDefault(), orgDefault); + assertEquals(ULocale.getDefault(Category.DISPLAY), orgDisplay); + assertEquals(ULocale.getDefault(Category.FORMAT), orgFormat); + } } diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/.classpath b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/.classpath index 02159672985..45f024e850e 100644 --- a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/.classpath +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/.classpath @@ -1,7 +1,7 @@ - + diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/AsciiUtil.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/AsciiUtil.java index 7600914c5f3..2f751d9f4c1 100644 --- a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/AsciiUtil.java +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/AsciiUtil.java @@ -167,6 +167,9 @@ public final class AsciiUtil { } public boolean equals(Object o) { + if (this == o) { + return true; + } if (o instanceof CaseInsensitiveKey) { return AsciiUtil.caseIgnoreMatch(_key, ((CaseInsensitiveKey)o)._key); } diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/BaseLocale.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/BaseLocale.java new file mode 100644 index 00000000000..be24cff4ecc --- /dev/null +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/BaseLocale.java @@ -0,0 +1,257 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.locale; + + +public final class BaseLocale { + + private static final boolean JDKIMPL = false; + + public static final String SEP = "_"; + + private static final Cache CACHE = new Cache(); + public static final BaseLocale ROOT = BaseLocale.getInstance("", "", "", ""); + + private String _language = ""; + private String _script = ""; + private String _region = ""; + private String _variant = ""; + + private transient volatile int _hash = 0; + + private BaseLocale(String language, String script, String region, String variant) { + if (language != null) { + _language = AsciiUtil.toLowerString(language).intern(); + } + if (script != null) { + _script = AsciiUtil.toTitleString(script).intern(); + } + if (region != null) { + _region = AsciiUtil.toUpperString(region).intern(); + } + if (variant != null) { + if (JDKIMPL) { + // preserve upper/lower cases + _variant = variant.intern(); + } else { + _variant = AsciiUtil.toUpperString(variant).intern(); + } + } + } + + public static BaseLocale getInstance(String language, String script, String region, String variant) { + if (JDKIMPL) { + // JDK uses deprecated ISO639.1 language codes for he, yi and id + if (AsciiUtil.caseIgnoreMatch(language, "he")) { + language = "iw"; + } else if (AsciiUtil.caseIgnoreMatch(language, "yi")) { + language = "ji"; + } else if (AsciiUtil.caseIgnoreMatch(language, "id")) { + language = "in"; + } + } + Key key = new Key(language, script, region, variant); + BaseLocale baseLocale = CACHE.get(key); + return baseLocale; + } + + public String getLanguage() { + return _language; + } + + public String getScript() { + return _script; + } + + public String getRegion() { + return _region; + } + + public String getVariant() { + return _variant; + } + + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof BaseLocale)) { + return false; + } + BaseLocale other = (BaseLocale)obj; + return hashCode() == other.hashCode() + && _language.equals(other._language) + && _script.equals(other._script) + && _region.equals(other._region) + && _variant.equals(other._variant); + } + + public String toString() { + StringBuilder buf = new StringBuilder(); + if (_language.length() > 0) { + buf.append("language="); + buf.append(_language); + } + if (_script.length() > 0) { + if (buf.length() > 0) { + buf.append(", "); + } + buf.append("script="); + buf.append(_script); + } + if (_region.length() > 0) { + if (buf.length() > 0) { + buf.append(", "); + } + buf.append("region="); + buf.append(_region); + } + if (_variant.length() > 0) { + if (buf.length() > 0) { + buf.append(", "); + } + buf.append("variant="); + buf.append(_variant); + } + return buf.toString(); + } + + public int hashCode() { + int h = _hash; + if (h == 0) { + // Generating a hash value from language, script, region and variant + for (int i = 0; i < _language.length(); i++) { + h = 31*h + _language.charAt(i); + } + for (int i = 0; i < _script.length(); i++) { + h = 31*h + _script.charAt(i); + } + for (int i = 0; i < _region.length(); i++) { + h = 31*h + _region.charAt(i); + } + for (int i = 0; i < _variant.length(); i++) { + h = 31*h + _variant.charAt(i); + } + _hash = h; + } + return h; + } + + private static class Key implements Comparable { + private String _lang = ""; + private String _scrt = ""; + private String _regn = ""; + private String _vart = ""; + + private volatile int _hash; // Default to 0 + + public Key(String language, String script, String region, String variant) { + if (language != null) { + _lang = language; + } + if (script != null) { + _scrt = script; + } + if (region != null) { + _regn = region; + } + if (variant != null) { + _vart = variant; + } + } + + public boolean equals(Object obj) { + if (JDKIMPL) { + return (this == obj) || + (obj instanceof Key) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._lang, this._lang) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._scrt, this._scrt) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._regn, this._regn) + && ((Key)obj)._vart.equals(_vart); // variant is case sensitive in JDK! + } + return (this == obj) || + (obj instanceof Key) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._lang, this._lang) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._scrt, this._scrt) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._regn, this._regn) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._vart, this._vart); + } + + public int compareTo(Key other) { + int res = AsciiUtil.caseIgnoreCompare(this._lang, other._lang); + if (res == 0) { + res = AsciiUtil.caseIgnoreCompare(this._scrt, other._scrt); + if (res == 0) { + res = AsciiUtil.caseIgnoreCompare(this._regn, other._regn); + if (res == 0) { + if (JDKIMPL) { + res = this._vart.compareTo(other._vart); + } else { + res = AsciiUtil.caseIgnoreCompare(this._vart, other._vart); + } + } + } + } + return res; + } + + public int hashCode() { + int h = _hash; + if (h == 0) { + // Generating a hash value from language, script, region and variant + for (int i = 0; i < _lang.length(); i++) { + h = 31*h + AsciiUtil.toLower(_lang.charAt(i)); + } + for (int i = 0; i < _scrt.length(); i++) { + h = 31*h + AsciiUtil.toLower(_scrt.charAt(i)); + } + for (int i = 0; i < _regn.length(); i++) { + h = 31*h + AsciiUtil.toLower(_regn.charAt(i)); + } + for (int i = 0; i < _vart.length(); i++) { + if (JDKIMPL) { + h = 31*h + _vart.charAt(i); + } else { + h = 31*h + AsciiUtil.toLower(_vart.charAt(i)); + } + } + _hash = h; + } + return h; + } + + public static Key normalize(Key key) { + String lang = AsciiUtil.toLowerString(key._lang).intern(); + String scrt = AsciiUtil.toTitleString(key._scrt).intern(); + String regn = AsciiUtil.toUpperString(key._regn).intern(); + String vart; + if (JDKIMPL) { + // preserve upper/lower cases + vart = key._vart.intern(); + } else { + vart = AsciiUtil.toUpperString(key._vart).intern(); + } + return new Key(lang, scrt, regn, vart); + } + } + + private static class Cache extends LocaleObjectCache { + + public Cache() { + } + + protected Key normalizeKey(Key key) { + return Key.normalize(key); + } + + protected BaseLocale createObject(Key key) { + return new BaseLocale(key._lang, key._scrt, key._regn, key._vart); + } + + } +} diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/Extension.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/Extension.java new file mode 100644 index 00000000000..dec331cd162 --- /dev/null +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/Extension.java @@ -0,0 +1,38 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + + +public class Extension { + private char _key; + protected String _value; + + protected Extension(char key) { + _key = key; + } + + Extension(char key, String value) { + _key = key; + _value = value; + } + + public char getKey() { + return _key; + } + + public String getValue() { + return _value; + } + + public String getID() { + return _key + LanguageTag.SEP + _value; + } + + public String toString() { + return getID(); + } +} \ No newline at end of file diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/InternalLocaleBuilder.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/InternalLocaleBuilder.java new file mode 100644 index 00000000000..27d0ef886e5 --- /dev/null +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/InternalLocaleBuilder.java @@ -0,0 +1,684 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public final class InternalLocaleBuilder { + + private static final boolean JDKIMPL = false; + + private String _language = ""; + private String _script = ""; + private String _region = ""; + private String _variant = ""; + + private static final CaseInsensitiveChar PRIVUSE_KEY = new CaseInsensitiveChar(LanguageTag.PRIVATEUSE.charAt(0)); + + private HashMap _extensions; + private HashSet _uattributes; + private HashMap _ukeywords; + + + public InternalLocaleBuilder() { + } + + public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException { + if (language == null || language.length() == 0) { + _language = ""; + } else { + if (!LanguageTag.isLanguage(language)) { + throw new LocaleSyntaxException("Ill-formed language: " + language, 0); + } + _language = language; + } + return this; + } + + public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException { + if (script == null || script.length() == 0) { + _script = ""; + } else { + if (!LanguageTag.isScript(script)) { + throw new LocaleSyntaxException("Ill-formed script: " + script, 0); + } + _script = script; + } + return this; + } + + public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException { + if (region == null || region.length() == 0) { + _region = ""; + } else { + if (!LanguageTag.isRegion(region)) { + throw new LocaleSyntaxException("Ill-formed region: " + region, 0); + } + _region = region; + } + return this; + } + + public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException { + if (variant == null || variant.length() == 0) { + _variant = ""; + } else { + // normalize separators to "_" + String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP); + int errIdx = checkVariants(var, BaseLocale.SEP); + if (errIdx != -1) { + throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); + } + _variant = var; + } + return this; + } + + public InternalLocaleBuilder addUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { + if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) { + throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); + } + // Use case insensitive string to prevent duplication + if (_uattributes == null) { + _uattributes = new HashSet(4); + } + _uattributes.add(new CaseInsensitiveString(attribute)); + return this; + } + + public InternalLocaleBuilder removeUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { + if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) { + throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); + } + if (_uattributes != null) { + _uattributes.remove(new CaseInsensitiveString(attribute)); + } + return this; + } + + public InternalLocaleBuilder setUnicodeLocaleKeyword(String key, String type) throws LocaleSyntaxException { + if (!UnicodeLocaleExtension.isKey(key)) { + throw new LocaleSyntaxException("Ill-formed Unicode locale keyword key: " + key); + } + + CaseInsensitiveString cikey = new CaseInsensitiveString(key); + if (type == null) { + if (_ukeywords != null) { + // null type is used for remove the key + _ukeywords.remove(cikey); + } + } else { + if (type.length() != 0) { + // normalize separator to "-" + String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP); + // validate + StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP); + while (!itr.isDone()) { + String s = itr.current(); + if (!UnicodeLocaleExtension.isTypeSubtag(s)) { + throw new LocaleSyntaxException("Ill-formed Unicode locale keyword type: " + type, itr.currentStart()); + } + itr.next(); + } + } + if (_ukeywords == null) { + _ukeywords = new HashMap(4); + } + _ukeywords.put(cikey, type); + } + return this; + } + + public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException { + // validate key + boolean isBcpPrivateuse = LanguageTag.isPrivateusePrefixChar(singleton); + if (!isBcpPrivateuse && !LanguageTag.isExtensionSingletonChar(singleton)) { + throw new LocaleSyntaxException("Ill-formed extension key: " + singleton); + } + + boolean remove = (value == null || value.length() == 0); + CaseInsensitiveChar key = new CaseInsensitiveChar(singleton); + + if (remove) { + if (UnicodeLocaleExtension.isSingletonChar(key.value())) { + // clear entire Unicode locale extension + if (_uattributes != null) { + _uattributes.clear(); + } + if (_ukeywords != null) { + _ukeywords.clear(); + } + } else { + if (_extensions != null && _extensions.containsKey(key)) { + _extensions.remove(key); + } + } + } else { + // validate value + String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP); + StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP); + while (!itr.isDone()) { + String s = itr.current(); + boolean validSubtag; + if (isBcpPrivateuse) { + validSubtag = LanguageTag.isPrivateuseSubtag(s); + } else { + validSubtag = LanguageTag.isExtensionSubtag(s); + } + if (!validSubtag) { + throw new LocaleSyntaxException("Ill-formed extension value: " + s, itr.currentStart()); + } + itr.next(); + } + + if (UnicodeLocaleExtension.isSingletonChar(key.value())) { + setUnicodeLocaleExtension(val); + } else { + if (_extensions == null) { + _extensions = new HashMap(4); + } + _extensions.put(key, val); + } + } + return this; + } + + /* + * Set extension/private subtags in a single string representation + */ + public InternalLocaleBuilder setExtensions(String subtags) throws LocaleSyntaxException { + if (subtags == null || subtags.length() == 0) { + clearExtensions(); + return this; + } + subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP); + StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); + + List extensions = null; + String privateuse = null; + + int parsed = 0; + int start; + + // Make a list of extension subtags + while (!itr.isDone()) { + String s = itr.current(); + if (LanguageTag.isExtensionSingleton(s)) { + start = itr.currentStart(); + String singleton = s; + StringBuilder sb = new StringBuilder(singleton); + + itr.next(); + while (!itr.isDone()) { + s = itr.current(); + if (LanguageTag.isExtensionSubtag(s)) { + sb.append(LanguageTag.SEP).append(s); + parsed = itr.currentEnd(); + } else { + break; + } + itr.next(); + } + + if (parsed < start) { + throw new LocaleSyntaxException("Incomplete extension '" + singleton + "'", start); + } + + if (extensions == null) { + extensions = new ArrayList(4); + } + extensions.add(sb.toString()); + } else { + break; + } + } + if (!itr.isDone()) { + String s = itr.current(); + if (LanguageTag.isPrivateusePrefix(s)) { + start = itr.currentStart(); + StringBuilder sb = new StringBuilder(s); + + itr.next(); + while (!itr.isDone()) { + s = itr.current(); + if (!LanguageTag.isPrivateuseSubtag(s)) { + break; + } + sb.append(LanguageTag.SEP).append(s); + parsed = itr.currentEnd(); + + itr.next(); + } + if (parsed <= start) { + throw new LocaleSyntaxException("Incomplete privateuse:" + subtags.substring(start), start); + } else { + privateuse = sb.toString(); + } + } + } + + if (!itr.isDone()) { + throw new LocaleSyntaxException("Ill-formed extension subtags:" + subtags.substring(itr.currentStart()), itr.currentStart()); + } + + return setExtensions(extensions, privateuse); + } + + /* + * Set a list of BCP47 extensions and private use subtags + * BCP47 extensions are already validated and well-formed, but may contain duplicates + */ + private InternalLocaleBuilder setExtensions(List bcpExtensions, String privateuse) { + clearExtensions(); + + if (bcpExtensions != null && bcpExtensions.size() > 0) { + HashSet processedExtensions = new HashSet(bcpExtensions.size()); + for (String bcpExt : bcpExtensions) { + CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt.charAt(0)); + // ignore duplicates + if (!processedExtensions.contains(key)) { + // each extension string contains singleton, e.g. "a-abc-def" + if (UnicodeLocaleExtension.isSingletonChar(key.value())) { + setUnicodeLocaleExtension(bcpExt.substring(2)); + } else { + if (_extensions == null) { + _extensions = new HashMap(4); + } + _extensions.put(key, bcpExt.substring(2)); + } + } + } + } + if (privateuse != null && privateuse.length() > 0) { + // privateuse string contains prefix, e.g. "x-abc-def" + if (_extensions == null) { + _extensions = new HashMap(1); + } + _extensions.put(new CaseInsensitiveChar(privateuse.charAt(0)), privateuse.substring(2)); + } + + return this; + } + + /* + * Reset Builder's internal state with the given language tag + */ + public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) { + clear(); + if (langtag.getExtlangs().size() > 0) { + _language = langtag.getExtlangs().get(0); + } else { + String language = langtag.getLanguage(); + if (!language.equals(LanguageTag.UNDETERMINED)) { + _language = language; + } + } + _script = langtag.getScript(); + _region = langtag.getRegion(); + + List bcpVariants = langtag.getVariants(); + if (bcpVariants.size() > 0) { + StringBuilder var = new StringBuilder(bcpVariants.get(0)); + for (int i = 1; i < bcpVariants.size(); i++) { + var.append(BaseLocale.SEP).append(bcpVariants.get(i)); + } + _variant = var.toString(); + } + + setExtensions(langtag.getExtensions(), langtag.getPrivateuse()); + + return this; + } + + public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions extensions) throws LocaleSyntaxException { + String language = base.getLanguage(); + String script = base.getScript(); + String region = base.getRegion(); + String variant = base.getVariant(); + + if (JDKIMPL) { + // Special backward compatibility support + + // Exception 1 - ja_JP_JP + if (language.equals("ja") && region.equals("JP") && variant.equals("JP")) { + // When locale ja_JP_JP is created, ca-japanese is always there. + // The builder ignores the variant "JP" + assert("japanese".equals(extensions.getUnicodeLocaleType("ca"))); + variant = ""; + } + // Exception 2 - th_TH_TH + else if (language.equals("th") && region.equals("TH") && variant.equals("TH")) { + // When locale th_TH_TH is created, nu-thai is always there. + // The builder ignores the variant "TH" + assert("thai".equals(extensions.getUnicodeLocaleType("nu"))); + variant = ""; + } + // Exception 3 - no_NO_NY + else if (language.equals("no") && region.equals("NO") && variant.equals("NY")) { + // no_NO_NY is a valid locale and used by Java 6 or older versions. + // The build ignores the variant "NY" and change the language to "nn". + language = "nn"; + variant = ""; + } + } + + // Validate base locale fields before updating internal state. + // LocaleExtensions always store validated/canonicalized values, + // so no checks are necessary. + if (language.length() > 0 && !LanguageTag.isLanguage(language)) { + throw new LocaleSyntaxException("Ill-formed language: " + language); + } + + if (script.length() > 0 && !LanguageTag.isScript(script)) { + throw new LocaleSyntaxException("Ill-formed script: " + script); + } + + if (region.length() > 0 && !LanguageTag.isRegion(region)) { + throw new LocaleSyntaxException("Ill-formed region: " + region); + } + + if (variant.length() > 0) { + int errIdx = checkVariants(variant, BaseLocale.SEP); + if (errIdx != -1) { + throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); + } + } + + // The input locale is validated at this point. + // Now, updating builder's internal fields. + _language = language; + _script = script; + _region = region; + _variant = variant; + clearExtensions(); + + Set extKeys = (extensions == null) ? null : extensions.getKeys(); + if (extKeys != null) { + // map extensions back to builder's internal format + for (Character key : extKeys) { + Extension e = extensions.getExtension(key); + if (e instanceof UnicodeLocaleExtension) { + UnicodeLocaleExtension ue = (UnicodeLocaleExtension)e; + for (String uatr : ue.getUnicodeLocaleAttributes()) { + if (_uattributes == null) { + _uattributes = new HashSet(4); + } + _uattributes.add(new CaseInsensitiveString(uatr)); + } + for (String ukey : ue.getUnicodeLocaleKeys()) { + if (_ukeywords == null) { + _ukeywords = new HashMap(4); + } + _ukeywords.put(new CaseInsensitiveString(ukey), ue.getUnicodeLocaleType(ukey)); + } + } else { + if (_extensions == null) { + _extensions = new HashMap(4); + } + _extensions.put(new CaseInsensitiveChar(key.charValue()), e.getValue()); + } + } + } + return this; + } + + public InternalLocaleBuilder clear() { + _language = ""; + _script = ""; + _region = ""; + _variant = ""; + clearExtensions(); + return this; + } + + public InternalLocaleBuilder clearExtensions() { + if (_extensions != null) { + _extensions.clear(); + } + if (_uattributes != null) { + _uattributes.clear(); + } + if (_ukeywords != null) { + _ukeywords.clear(); + } + return this; + } + + public BaseLocale getBaseLocale() { + String language = _language; + String script = _script; + String region = _region; + String variant = _variant; + + // Special private use subtag sequence identified by "lvariant" will be + // interpreted as Java variant. + if (_extensions != null) { + String privuse = _extensions.get(PRIVUSE_KEY); + if (privuse != null) { + StringTokenIterator itr = new StringTokenIterator(privuse, LanguageTag.SEP); + boolean sawPrefix = false; + int privVarStart = -1; + while (!itr.isDone()) { + if (sawPrefix) { + privVarStart = itr.currentStart(); + break; + } + if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { + sawPrefix = true; + } + itr.next(); + } + if (privVarStart != -1) { + StringBuilder sb = new StringBuilder(variant); + if (sb.length() != 0) { + sb.append(BaseLocale.SEP); + } + sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP, BaseLocale.SEP)); + variant = sb.toString(); + } + } + } + + return BaseLocale.getInstance(language, script, region, variant); + } + + public LocaleExtensions getLocaleExtensions() { + if ((_extensions == null || _extensions.size() == 0) + && (_uattributes == null || _uattributes.size() == 0) + && (_ukeywords == null || _ukeywords.size() == 0)) { + return LocaleExtensions.EMPTY_EXTENSIONS; + } + + return new LocaleExtensions(_extensions, _uattributes, _ukeywords); + } + + /* + * Remove special private use subtag sequence identified by "lvariant" + * and return the rest. Only used by LocaleExtensions + */ + static String removePrivateuseVariant(String privuseVal) { + StringTokenIterator itr = new StringTokenIterator(privuseVal, LanguageTag.SEP); + + // Note: privateuse value "abc-lvariant" is unchanged + // because no subtags after "lvariant". + + int prefixStart = -1; + boolean sawPrivuseVar = false; + while (!itr.isDone()) { + if (prefixStart != -1) { + // Note: privateuse value "abc-lvariant" is unchanged + // because no subtags after "lvariant". + sawPrivuseVar = true; + break; + } + if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { + prefixStart = itr.currentStart(); + } + itr.next(); + } + if (!sawPrivuseVar) { + return privuseVal; + } + + assert(prefixStart == 0 || prefixStart > 1); + return (prefixStart == 0) ? null : privuseVal.substring(0, prefixStart -1); + } + + /* + * Check if the given variant subtags separated by the given + * separator(s) are valid + */ + private int checkVariants(String variants, String sep) { + StringTokenIterator itr = new StringTokenIterator(variants, sep); + while (!itr.isDone()) { + String s = itr.current(); + if (!LanguageTag.isVariant(s)) { + return itr.currentStart(); + } + itr.next(); + } + return -1; + } + + /* + * Private methods parsing Unicode Locale Extension subtags. + * Duplicated attributes/keywords will be ignored. + * The input must be a valid extension subtags (excluding singleton). + */ + private void setUnicodeLocaleExtension(String subtags) { + // wipe out existing attributes/keywords + if (_uattributes != null) { + _uattributes.clear(); + } + if (_ukeywords != null) { + _ukeywords.clear(); + } + + StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); + + // parse attributes + while (!itr.isDone()) { + if (!UnicodeLocaleExtension.isAttribute(itr.current())) { + break; + } + if (_uattributes == null) { + _uattributes = new HashSet(4); + } + _uattributes.add(new CaseInsensitiveString(itr.current())); + itr.next(); + } + + // parse keywords + CaseInsensitiveString key = null; + String type; + int typeStart = -1; + int typeEnd = -1; + while (!itr.isDone()) { + if (key != null) { + if (UnicodeLocaleExtension.isKey(itr.current())) { + // next keyword - emit previous one + assert(typeStart == -1 || typeEnd != -1); + type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); + if (_ukeywords == null) { + _ukeywords = new HashMap(4); + } + _ukeywords.put(key, type); + + // reset keyword info + CaseInsensitiveString tmpKey = new CaseInsensitiveString(itr.current()); + key = _ukeywords.containsKey(tmpKey) ? null : tmpKey; + typeStart = typeEnd = -1; + } else { + if (typeStart == -1) { + typeStart = itr.currentStart(); + } + typeEnd = itr.currentEnd(); + } + } else if (UnicodeLocaleExtension.isKey(itr.current())) { + // 1. first keyword or + // 2. next keyword, but previous one was duplicate + key = new CaseInsensitiveString(itr.current()); + if (_ukeywords != null && _ukeywords.containsKey(key)) { + // duplicate + key = null; + } + } + + if (!itr.hasNext()) { + if (key != null) { + // last keyword + assert(typeStart == -1 || typeEnd != -1); + type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); + if (_ukeywords == null) { + _ukeywords = new HashMap(4); + } + _ukeywords.put(key, type); + } + break; + } + + itr.next(); + } + } + + static class CaseInsensitiveString { + private String _s; + + CaseInsensitiveString(String s) { + _s = s; + } + + public String value() { + return _s; + } + + public int hashCode() { + return AsciiUtil.toLowerString(_s).hashCode(); + } + + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof CaseInsensitiveString)) { + return false; + } + return AsciiUtil.caseIgnoreMatch(_s, ((CaseInsensitiveString)obj).value()); + } + } + + static class CaseInsensitiveChar { + private char _c; + + CaseInsensitiveChar(char c) { + _c = c; + } + + public char value() { + return _c; + } + + public int hashCode() { + return AsciiUtil.toLower(_c); + } + + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof CaseInsensitiveChar)) { + return false; + } + return _c == AsciiUtil.toLower(((CaseInsensitiveChar)obj).value()); + } + + } +} diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java new file mode 100644 index 00000000000..e889a7dc281 --- /dev/null +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java @@ -0,0 +1,720 @@ +/* + ******************************************************************************* + * Copyright (C) 2010-2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class LanguageTag { + private static final boolean JDKIMPL = false; + + // + // static fields + // + public static final String SEP = "-"; + public static final String PRIVATEUSE = "x"; + public static String UNDETERMINED = "und"; + public static final String PRIVUSE_VARIANT_PREFIX = "lvariant"; + + // + // Language subtag fields + // + private String _language = ""; // language subtag + private String _script = ""; // script subtag + private String _region = ""; // region subtag + private String _privateuse = ""; // privateuse + + private List _extlangs = Collections.emptyList(); // extlang subtags + private List _variants = Collections.emptyList(); // variant subtags + private List _extensions = Collections.emptyList(); // extensions + + // Map contains grandfathered tags and its preferred mappings from + // http://www.ietf.org/rfc/rfc5646.txt + private static final Map GRANDFATHERED = + new HashMap(); + + static { + // grandfathered = irregular ; non-redundant tags registered + // / regular ; during the RFC 3066 era + // + // irregular = "en-GB-oed" ; irregular tags do not match + // / "i-ami" ; the 'langtag' production and + // / "i-bnn" ; would not otherwise be + // / "i-default" ; considered 'well-formed' + // / "i-enochian" ; These tags are all valid, + // / "i-hak" ; but most are deprecated + // / "i-klingon" ; in favor of more modern + // / "i-lux" ; subtags or subtag + // / "i-mingo" ; combination + // / "i-navajo" + // / "i-pwn" + // / "i-tao" + // / "i-tay" + // / "i-tsu" + // / "sgn-BE-FR" + // / "sgn-BE-NL" + // / "sgn-CH-DE" + // + // regular = "art-lojban" ; these tags match the 'langtag' + // / "cel-gaulish" ; production, but their subtags + // / "no-bok" ; are not extended language + // / "no-nyn" ; or variant subtags: their meaning + // / "zh-guoyu" ; is defined by their registration + // / "zh-hakka" ; and all of these are deprecated + // / "zh-min" ; in favor of a more modern + // / "zh-min-nan" ; subtag or sequence of subtags + // / "zh-xiang" + + final String[][] entries = { + //{"tag", "preferred"}, + {"art-lojban", "jbo"}, + {"cel-gaulish", "xtg-x-cel-gaulish"}, // fallback + {"en-GB-oed", "en-GB-x-oed"}, // fallback + {"i-ami", "ami"}, + {"i-bnn", "bnn"}, + {"i-default", "en-x-i-default"}, // fallback + {"i-enochian", "und-x-i-enochian"}, // fallback + {"i-hak", "hak"}, + {"i-klingon", "tlh"}, + {"i-lux", "lb"}, + {"i-mingo", "see-x-i-mingo"}, // fallback + {"i-navajo", "nv"}, + {"i-pwn", "pwn"}, + {"i-tao", "tao"}, + {"i-tay", "tay"}, + {"i-tsu", "tsu"}, + {"no-bok", "nb"}, + {"no-nyn", "nn"}, + {"sgn-BE-FR", "sfb"}, + {"sgn-BE-NL", "vgt"}, + {"sgn-CH-DE", "sgg"}, + {"zh-guoyu", "cmn"}, + {"zh-hakka", "hak"}, + {"zh-min", "nan-x-zh-min"}, // fallback + {"zh-min-nan", "nan"}, + {"zh-xiang", "hsn"}, + }; + for (String[] e : entries) { + GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e); + } + } + + private LanguageTag() { + } + + /* + * BNF in RFC5464 + * + * Language-Tag = langtag ; normal language tags + * / privateuse ; private use tag + * / grandfathered ; grandfathered tags + * + * + * langtag = language + * ["-" script] + * ["-" region] + * *("-" variant) + * *("-" extension) + * ["-" privateuse] + * + * language = 2*3ALPHA ; shortest ISO 639 code + * ["-" extlang] ; sometimes followed by + * ; extended language subtags + * / 4ALPHA ; or reserved for future use + * / 5*8ALPHA ; or registered language subtag + * + * extlang = 3ALPHA ; selected ISO 639 codes + * *2("-" 3ALPHA) ; permanently reserved + * + * script = 4ALPHA ; ISO 15924 code + * + * region = 2ALPHA ; ISO 3166-1 code + * / 3DIGIT ; UN M.49 code + * + * variant = 5*8alphanum ; registered variants + * / (DIGIT 3alphanum) + * + * extension = singleton 1*("-" (2*8alphanum)) + * + * ; Single alphanumerics + * ; "x" reserved for private use + * singleton = DIGIT ; 0 - 9 + * / %x41-57 ; A - W + * / %x59-5A ; Y - Z + * / %x61-77 ; a - w + * / %x79-7A ; y - z + * + * privateuse = "x" 1*("-" (1*8alphanum)) + * + */ + public static LanguageTag parse(String languageTag, ParseStatus sts) { + if (sts == null) { + sts = new ParseStatus(); + } else { + sts.reset(); + } + + StringTokenIterator itr; + + // Check if the tag is grandfathered + String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag)); + if (gfmap != null) { + // use preferred mapping + itr = new StringTokenIterator(gfmap[1], SEP); + } else { + itr = new StringTokenIterator(languageTag, SEP); + } + + LanguageTag tag = new LanguageTag(); + + // langtag must start with either language or privateuse + if (tag.parseLanguage(itr, sts)) { + tag.parseExtlangs(itr, sts); + tag.parseScript(itr, sts); + tag.parseRegion(itr, sts); + tag.parseVariants(itr, sts); + tag.parseExtensions(itr, sts); + } + tag.parsePrivateuse(itr, sts); + + if (!itr.isDone() && !sts.isError()) { + String s = itr.current(); + sts._errorIndex = itr.currentStart(); + if (s.length() == 0) { + sts._errorMsg = "Empty subtag"; + } else { + sts._errorMsg = "Invalid subtag: " + s; + } + } + + return tag; + } + + // + // Language subtag parsers + // + + private boolean parseLanguage(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + String s = itr.current(); + if (isLanguage(s)) { + found = true; + _language = s; + sts._parseLength = itr.currentEnd(); + itr.next(); + } + + return found; + } + + private boolean parseExtlangs(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + while (!itr.isDone()) { + String s = itr.current(); + if (!isExtlang(s)) { + break; + } + found = true; + if (_extlangs.isEmpty()) { + _extlangs = new ArrayList(3); + } + _extlangs.add(s); + sts._parseLength = itr.currentEnd(); + itr.next(); + + if (_extlangs.size() == 3) { + // Maximum 3 extlangs + break; + } + } + + return found; + } + + private boolean parseScript(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + String s = itr.current(); + if (isScript(s)) { + found = true; + _script = s; + sts._parseLength = itr.currentEnd(); + itr.next(); + } + + return found; + } + + private boolean parseRegion(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + String s = itr.current(); + if (isRegion(s)) { + found = true; + _region = s; + sts._parseLength = itr.currentEnd(); + itr.next(); + } + + return found; + } + + private boolean parseVariants(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + while (!itr.isDone()) { + String s = itr.current(); + if (!isVariant(s)) { + break; + } + found = true; + if (_variants.isEmpty()) { + _variants = new ArrayList(3); + } + _variants.add(s); + sts._parseLength = itr.currentEnd(); + itr.next(); + } + + return found; + } + + private boolean parseExtensions(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + while (!itr.isDone()) { + String s = itr.current(); + if (isExtensionSingleton(s)) { + int start = itr.currentStart(); + String singleton = s; + StringBuilder sb = new StringBuilder(singleton); + + itr.next(); + while (!itr.isDone()) { + s = itr.current(); + if (isExtensionSubtag(s)) { + sb.append(SEP).append(s); + sts._parseLength = itr.currentEnd(); + } else { + break; + } + itr.next(); + } + + if (sts._parseLength <= start) { + sts._errorIndex = start; + sts._errorMsg = "Incomplete extension '" + singleton + "'"; + break; + } + + if (_extensions.size() == 0) { + _extensions = new ArrayList(4); + } + _extensions.add(sb.toString()); + found = true; + } else { + break; + } + } + return found; + } + + private boolean parsePrivateuse(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + String s = itr.current(); + if (isPrivateusePrefix(s)) { + int start = itr.currentStart(); + StringBuilder sb = new StringBuilder(s); + + itr.next(); + while (!itr.isDone()) { + s = itr.current(); + if (!isPrivateuseSubtag(s)) { + break; + } + sb.append(SEP).append(s); + sts._parseLength = itr.currentEnd(); + + itr.next(); + } + + if (sts._parseLength <= start) { + // need at least 1 private subtag + sts._errorIndex = start; + sts._errorMsg = "Incomplete privateuse"; + } else { + _privateuse = sb.toString(); + found = true; + } + } + + return found; + } + + public static LanguageTag parseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) { + LanguageTag tag = new LanguageTag(); + + String language = baseLocale.getLanguage(); + String script = baseLocale.getScript(); + String region = baseLocale.getRegion(); + String variant = baseLocale.getVariant(); + + boolean hasSubtag = false; + + String privuseVar = null; // store ill-formed variant subtags + + if (language.length() > 0 && isLanguage(language)) { + // Convert a deprecated language code used by Java to + // a new code + if (language.equals("iw")) { + language = "he"; + } else if (language.equals("ji")) { + language = "yi"; + } else if (language.equals("in")) { + language = "id"; + } + tag._language = language; + } + + if (script.length() > 0 && isScript(script)) { + tag._script = canonicalizeScript(script); + hasSubtag = true; + } + + if (region.length() > 0 && isRegion(region)) { + tag._region = canonicalizeRegion(region); + hasSubtag = true; + } + + if (JDKIMPL) { + // Special handling for no_NO_NY - use nn_NO for language tag + if (tag._language.equals("no") && tag._region.equals("NO") && variant.equals("NY")) { + tag._language = "nn"; + variant = ""; + } + } + + if (variant.length() > 0) { + List variants = null; + StringTokenIterator varitr = new StringTokenIterator(variant, BaseLocale.SEP); + while (!varitr.isDone()) { + String var = varitr.current(); + if (!isVariant(var)) { + break; + } + if (variants == null) { + variants = new ArrayList(); + } + if (JDKIMPL) { + variants.add(var); // Do not canonicalize! + } else { + variants.add(canonicalizeVariant(var)); + } + varitr.next(); + } + if (variants != null) { + tag._variants = variants; + hasSubtag = true; + } + if (!varitr.isDone()) { + // ill-formed variant subtags + StringBuilder buf = new StringBuilder(); + while (!varitr.isDone()) { + String prvv = varitr.current(); + if (!isPrivateuseSubtag(prvv)) { + // cannot use private use subtag - truncated + break; + } + if (buf.length() > 0) { + buf.append(SEP); + } + if (!JDKIMPL) { + prvv = AsciiUtil.toLowerString(prvv); + } + buf.append(prvv); + varitr.next(); + } + if (buf.length() > 0) { + privuseVar = buf.toString(); + } + } + } + + List extensions = null; + String privateuse = null; + + Set locextKeys = localeExtensions.getKeys(); + for (Character locextKey : locextKeys) { + Extension ext = localeExtensions.getExtension(locextKey); + if (isPrivateusePrefixChar(locextKey.charValue())) { + privateuse = ext.getValue(); + } else { + if (extensions == null) { + extensions = new ArrayList(); + } + extensions.add(locextKey.toString() + SEP + ext.getValue()); + } + } + + if (extensions != null) { + tag._extensions = extensions; + hasSubtag = true; + } + + // append ill-formed variant subtags to private use + if (privuseVar != null) { + if (privateuse == null) { + privateuse = PRIVUSE_VARIANT_PREFIX + SEP + privuseVar; + } else { + privateuse = privateuse + SEP + PRIVUSE_VARIANT_PREFIX + SEP + privuseVar.replace(BaseLocale.SEP, SEP); + } + } + + if (privateuse != null) { + tag._privateuse = privateuse; + } + + if (tag._language.length() == 0 && (hasSubtag || privateuse == null)) { + // use lang "und" when 1) no language is available AND + // 2) any of other subtags other than private use are available or + // no private use tag is available + tag._language = UNDETERMINED; + } + + return tag; + } + + // + // Getter methods for language subtag fields + // + + public String getLanguage() { + return _language; + } + + public List getExtlangs() { + return Collections.unmodifiableList(_extlangs); + } + + public String getScript() { + return _script; + } + + public String getRegion() { + return _region; + } + + public List getVariants() { + return Collections.unmodifiableList(_variants); + } + + public List getExtensions() { + return Collections.unmodifiableList(_extensions); + } + + public String getPrivateuse() { + return _privateuse; + } + + // + // Language subtag syntax checking methods + // + + public static boolean isLanguage(String s) { + // language = 2*3ALPHA ; shortest ISO 639 code + // ["-" extlang] ; sometimes followed by + // ; extended language subtags + // / 4ALPHA ; or reserved for future use + // / 5*8ALPHA ; or registered language subtag + return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaString(s); + } + + public static boolean isExtlang(String s) { + // extlang = 3ALPHA ; selected ISO 639 codes + // *2("-" 3ALPHA) ; permanently reserved + return (s.length() == 3) && AsciiUtil.isAlphaString(s); + } + + public static boolean isScript(String s) { + // script = 4ALPHA ; ISO 15924 code + return (s.length() == 4) && AsciiUtil.isAlphaString(s); + } + + public static boolean isRegion(String s) { + // region = 2ALPHA ; ISO 3166-1 code + // / 3DIGIT ; UN M.49 code + return ((s.length() == 2) && AsciiUtil.isAlphaString(s)) + || ((s.length() == 3) && AsciiUtil.isNumericString(s)); + } + + public static boolean isVariant(String s) { + // variant = 5*8alphanum ; registered variants + // / (DIGIT 3alphanum) + int len = s.length(); + if (len >= 5 && len <= 8) { + return AsciiUtil.isAlphaNumericString(s); + } + if (len == 4) { + return AsciiUtil.isNumeric(s.charAt(0)) + && AsciiUtil.isAlphaNumeric(s.charAt(1)) + && AsciiUtil.isAlphaNumeric(s.charAt(2)) + && AsciiUtil.isAlphaNumeric(s.charAt(3)); + } + return false; + } + + public static boolean isExtensionSingleton(String s) { + // singleton = DIGIT ; 0 - 9 + // / %x41-57 ; A - W + // / %x59-5A ; Y - Z + // / %x61-77 ; a - w + // / %x79-7A ; y - z + + return (s.length() == 1) + && AsciiUtil.isAlphaString(s) + && !AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s); + } + + public static boolean isExtensionSingletonChar(char c) { + return isExtensionSingleton(String.valueOf(c)); + } + + public static boolean isExtensionSubtag(String s) { + // extension = singleton 1*("-" (2*8alphanum)) + return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); + } + + public static boolean isPrivateusePrefix(String s) { + // privateuse = "x" 1*("-" (1*8alphanum)) + return (s.length() == 1) + && AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s); + } + + public static boolean isPrivateusePrefixChar(char c) { + return (AsciiUtil.caseIgnoreMatch(PRIVATEUSE, String.valueOf(c))); + } + + public static boolean isPrivateuseSubtag(String s) { + // privateuse = "x" 1*("-" (1*8alphanum)) + return (s.length() >= 1) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); + } + + // + // Language subtag canonicalization methods + // + + public static String canonicalizeLanguage(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeExtlang(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeScript(String s) { + return AsciiUtil.toTitleString(s); + } + + public static String canonicalizeRegion(String s) { + return AsciiUtil.toUpperString(s); + } + + public static String canonicalizeVariant(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeExtension(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeExtensionSingleton(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeExtensionSubtag(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizePrivateuse(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizePrivateuseSubtag(String s) { + return AsciiUtil.toLowerString(s); + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + + if (_language.length() > 0) { + sb.append(_language); + + for (String extlang : _extlangs) { + sb.append(SEP).append(extlang); + } + + if (_script.length() > 0) { + sb.append(SEP).append(_script); + } + + if (_region.length() > 0) { + sb.append(SEP).append(_region); + } + + for (String variant : _extlangs) { + sb.append(SEP).append(variant); + } + + for (String extension : _extensions) { + sb.append(SEP).append(extension); + } + } + if (_privateuse.length() > 0) { + if (sb.length() > 0) { + sb.append(SEP); + } + sb.append(_privateuse); + } + + return sb.toString(); + } +} diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LocaleExtensions.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LocaleExtensions.java new file mode 100644 index 00000000000..fc81e0d55f1 --- /dev/null +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LocaleExtensions.java @@ -0,0 +1,221 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +import java.util.Collections; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.TreeSet; + +import com.ibm.icu.impl.locale.InternalLocaleBuilder.CaseInsensitiveChar; +import com.ibm.icu.impl.locale.InternalLocaleBuilder.CaseInsensitiveString; + + +public class LocaleExtensions { + + private SortedMap _map; + private String _id; + + private static final SortedMap EMPTY_MAP = + Collections.unmodifiableSortedMap(new TreeMap()); + + public static final LocaleExtensions EMPTY_EXTENSIONS; + public static final LocaleExtensions CALENDAR_JAPANESE; + public static final LocaleExtensions NUMBER_THAI; + + static { + EMPTY_EXTENSIONS = new LocaleExtensions(); + EMPTY_EXTENSIONS._id = ""; + EMPTY_EXTENSIONS._map = EMPTY_MAP; + + CALENDAR_JAPANESE = new LocaleExtensions(); + CALENDAR_JAPANESE._id = "u-ca-japanese"; + CALENDAR_JAPANESE._map = new TreeMap(); + CALENDAR_JAPANESE._map.put(Character.valueOf(UnicodeLocaleExtension.SINGLETON), UnicodeLocaleExtension.CA_JAPANESE); + + NUMBER_THAI = new LocaleExtensions(); + NUMBER_THAI._id = "u-nu-thai"; + NUMBER_THAI._map = new TreeMap(); + NUMBER_THAI._map.put(Character.valueOf(UnicodeLocaleExtension.SINGLETON), UnicodeLocaleExtension.NU_THAI); + } + + private LocaleExtensions() { + } + + /* + * Package local constructor, only used by InternalLocaleBuilder. + */ + LocaleExtensions(Map extensions, + Set uattributes, Map ukeywords) { + boolean hasExtension = (extensions != null && extensions.size() > 0); + boolean hasUAttributes = (uattributes != null && uattributes.size() > 0); + boolean hasUKeywords = (ukeywords != null && ukeywords.size() > 0); + + if (!hasExtension && !hasUAttributes && !hasUKeywords) { + _map = EMPTY_MAP; + _id = ""; + return; + } + + // Build extension map + _map = new TreeMap(); + if (hasExtension) { + for (Entry ext : extensions.entrySet()) { + char key = AsciiUtil.toLower(ext.getKey().value()); + String value = ext.getValue(); + + if (LanguageTag.isPrivateusePrefixChar(key)) { + // we need to exclude special variant in privuateuse, e.g. "x-abc-lvariant-DEF" + value = InternalLocaleBuilder.removePrivateuseVariant(value); + if (value == null) { + continue; + } + } + + Extension e = new Extension(key, AsciiUtil.toLowerString(value)); + _map.put(Character.valueOf(key), e); + } + } + + if (hasUAttributes || hasUKeywords) { + TreeSet uaset = null; + TreeMap ukmap = null; + + if (hasUAttributes) { + uaset = new TreeSet(); + for (CaseInsensitiveString cis : uattributes) { + uaset.add(AsciiUtil.toLowerString(cis.value())); + } + } + + if (hasUKeywords) { + ukmap = new TreeMap(); + for (Entry kwd : ukeywords.entrySet()) { + String key = AsciiUtil.toLowerString(kwd.getKey().value()); + String type = AsciiUtil.toLowerString(kwd.getValue()); + ukmap.put(key, type); + } + } + + UnicodeLocaleExtension ule = new UnicodeLocaleExtension(uaset, ukmap); + _map.put(Character.valueOf(UnicodeLocaleExtension.SINGLETON), ule); + } + + if (_map.size() == 0) { + // this could happen when only privuateuse with special variant + _map = EMPTY_MAP; + _id = ""; + } else { + _id = toID(_map); + } + } + + public Set getKeys() { + return Collections.unmodifiableSet(_map.keySet()); + } + + public Extension getExtension(Character key) { + return _map.get(Character.valueOf(AsciiUtil.toLower(key.charValue()))); + } + + public String getExtensionValue(Character key) { + Extension ext = _map.get(Character.valueOf(AsciiUtil.toLower(key.charValue()))); + if (ext == null) { + return null; + } + return ext.getValue(); + } + + public Set getUnicodeLocaleAttributes() { + Extension ext = _map.get(Character.valueOf(UnicodeLocaleExtension.SINGLETON)); + if (ext == null) { + return Collections.emptySet(); + } + assert (ext instanceof UnicodeLocaleExtension); + return ((UnicodeLocaleExtension)ext).getUnicodeLocaleAttributes(); + } + + public Set getUnicodeLocaleKeys() { + Extension ext = _map.get(Character.valueOf(UnicodeLocaleExtension.SINGLETON)); + if (ext == null) { + return Collections.emptySet(); + } + assert (ext instanceof UnicodeLocaleExtension); + return ((UnicodeLocaleExtension)ext).getUnicodeLocaleKeys(); + } + + public String getUnicodeLocaleType(String unicodeLocaleKey) { + Extension ext = _map.get(Character.valueOf(UnicodeLocaleExtension.SINGLETON)); + if (ext == null) { + return null; + } + assert (ext instanceof UnicodeLocaleExtension); + return ((UnicodeLocaleExtension)ext).getUnicodeLocaleType(AsciiUtil.toLowerString(unicodeLocaleKey)); + } + + public boolean isEmpty() { + return _map.isEmpty(); + } + + public static boolean isValidKey(char c) { + return LanguageTag.isExtensionSingletonChar(c) || LanguageTag.isPrivateusePrefixChar(c); + } + + public static boolean isValidUnicodeLocaleKey(String ukey) { + return UnicodeLocaleExtension.isKey(ukey); + } + + private static String toID(SortedMap map) { + StringBuilder buf = new StringBuilder(); + Extension privuse = null; + for (Entry entry : map.entrySet()) { + char singleton = entry.getKey().charValue(); + Extension extension = entry.getValue(); + if (LanguageTag.isPrivateusePrefixChar(singleton)) { + privuse = extension; + } else { + if (buf.length() > 0) { + buf.append(LanguageTag.SEP); + } + buf.append(extension); + } + } + if (privuse != null) { + if (buf.length() > 0) { + buf.append(LanguageTag.SEP); + } + buf.append(privuse); + } + return buf.toString(); + } + + + public String toString() { + return _id; + } + + public String getID() { + return _id; + } + + public int hashCode() { + return _id.hashCode(); + } + + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (!(other instanceof LocaleExtensions)) { + return false; + } + return this._id.equals(((LocaleExtensions)other)._id); + } +} diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LocaleObjectCache.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LocaleObjectCache.java new file mode 100644 index 00000000000..d75519ca440 --- /dev/null +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LocaleObjectCache.java @@ -0,0 +1,83 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +import java.lang.ref.ReferenceQueue; +import java.lang.ref.SoftReference; +import java.util.concurrent.ConcurrentHashMap; + +public abstract class LocaleObjectCache { + private ConcurrentHashMap> _map; + private ReferenceQueue _queue = new ReferenceQueue(); + + public LocaleObjectCache() { + this(16, 0.75f, 16); + } + + public LocaleObjectCache(int initialCapacity, float loadFactor, int concurrencyLevel) { + _map = new ConcurrentHashMap>(initialCapacity, loadFactor, concurrencyLevel); + } + + public V get(K key) { + V value = null; + + cleanStaleEntries(); + CacheEntry entry = _map.get(key); + if (entry != null) { + value = entry.get(); + } + if (value == null) { + key = normalizeKey(key); + V newVal = createObject(key); + if (key == null || newVal == null) { + // subclass must return non-null key/value object + return null; + } + + CacheEntry newEntry = new CacheEntry(key, newVal, _queue); + + while (value == null) { + cleanStaleEntries(); + entry = _map.putIfAbsent(key, newEntry); + if (entry == null) { + value = newVal; + break; + } else { + value = entry.get(); + } + } + } + return value; + } + + @SuppressWarnings("unchecked") + private void cleanStaleEntries() { + CacheEntry entry; + while ((entry = (CacheEntry)_queue.poll()) != null) { + _map.remove(entry.getKey()); + } + } + + protected abstract V createObject(K key); + + protected K normalizeKey(K key) { + return key; + } + + private static class CacheEntry extends SoftReference { + private K _key; + + CacheEntry(K key, V value, ReferenceQueue queue) { + super(value, queue); + _key = key; + } + + K getKey() { + return _key; + } + } +} diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LocaleSyntaxException.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LocaleSyntaxException.java new file mode 100644 index 00000000000..bceba10d6cd --- /dev/null +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LocaleSyntaxException.java @@ -0,0 +1,27 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +public class LocaleSyntaxException extends Exception { + + private static final long serialVersionUID = 1L; + + private int _index = -1; + + public LocaleSyntaxException(String msg) { + this(msg, 0); + } + + public LocaleSyntaxException(String msg, int errorIndex) { + super(msg); + _index = errorIndex; + } + + public int getErrorIndex() { + return _index; + } +} diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/ParseStatus.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/ParseStatus.java new file mode 100644 index 00000000000..6eb5d82773b --- /dev/null +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/ParseStatus.java @@ -0,0 +1,35 @@ +/* + ******************************************************************************* + * Copyright (C) 2010-2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +public class ParseStatus { + int _parseLength = 0; + int _errorIndex = -1; + String _errorMsg = null; + + public void reset() { + _parseLength = 0; + _errorIndex = -1; + _errorMsg = null; + } + + public boolean isError() { + return (_errorIndex >= 0); + } + + public int getErrorIndex() { + return _errorIndex; + } + + public int getParseLength() { + return _parseLength; + } + + public String getErrorMessage() { + return _errorMsg; + } +} diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/StringTokenIterator.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/StringTokenIterator.java new file mode 100644 index 00000000000..b25146d33b3 --- /dev/null +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/StringTokenIterator.java @@ -0,0 +1,93 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +public class StringTokenIterator { + private String _text; + private String _dlms; + + private String _token; + private int _start; + private int _end; + private boolean _done; + + public StringTokenIterator(String text, String dlms) { + _text = text; + _dlms = dlms; + setStart(0); + } + + public String first() { + setStart(0); + return _token; + } + + public String current() { + return _token; + } + + public int currentStart() { + return _start; + } + + public int currentEnd() { + return _end; + } + + public boolean isDone() { + return _done; + } + + public String next() { + if (hasNext()) { + _start = _end + 1; + _end = nextDelimiter(_start); + _token = _text.substring(_start, _end); + } else { + _start = _end; + _token = null; + _done = true; + } + return _token; + } + + public boolean hasNext() { + return (_end < _text.length()); + } + + public StringTokenIterator setStart(int offset) { + if (offset > _text.length()) { + throw new IndexOutOfBoundsException(); + } + _start = offset; + _end = nextDelimiter(_start); + _token = _text.substring(_start, _end); + _done = false; + return this; + } + + public StringTokenIterator setText(String text) { + _text = text; + setStart(0); + return this; + } + + private int nextDelimiter(int start) { + int idx = start; + outer: while (idx < _text.length()) { + char c = _text.charAt(idx); + for (int i = 0; i < _dlms.length(); i++) { + if (c == _dlms.charAt(i)) { + break outer; + } + } + idx++; + } + return idx; + } +} + diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/UnicodeLocaleExtension.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/UnicodeLocaleExtension.java new file mode 100644 index 00000000000..8f9dd9dc3c3 --- /dev/null +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/UnicodeLocaleExtension.java @@ -0,0 +1,102 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +import java.util.Collections; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +public class UnicodeLocaleExtension extends Extension { + public static final char SINGLETON = 'u'; + + private static final SortedSet EMPTY_SORTED_SET = new TreeSet(); + private static final SortedMap EMPTY_SORTED_MAP = new TreeMap(); + + private SortedSet _attributes = EMPTY_SORTED_SET; + private SortedMap _keywords = EMPTY_SORTED_MAP; + + public static final UnicodeLocaleExtension CA_JAPANESE; + public static final UnicodeLocaleExtension NU_THAI; + + static { + CA_JAPANESE = new UnicodeLocaleExtension(); + CA_JAPANESE._keywords = new TreeMap(); + CA_JAPANESE._keywords.put("ca", "japanese"); + CA_JAPANESE._value = "ca-japanese"; + + NU_THAI = new UnicodeLocaleExtension(); + NU_THAI._keywords = new TreeMap(); + NU_THAI._keywords.put("nu", "thai"); + NU_THAI._value = "nu-thai"; + } + + private UnicodeLocaleExtension() { + super(SINGLETON); + } + + UnicodeLocaleExtension(SortedSet attributes, SortedMap keywords) { + this(); + if (attributes != null && attributes.size() > 0) { + _attributes = attributes; + } + if (keywords != null && keywords.size() > 0) { + _keywords = keywords; + } + + if (_attributes.size() > 0 || _keywords.size() > 0) { + StringBuilder sb = new StringBuilder(); + for (String attribute : _attributes) { + sb.append(LanguageTag.SEP).append(attribute); + } + for (Entry keyword : _keywords.entrySet()) { + String key = keyword.getKey(); + String value = keyword.getValue(); + + sb.append(LanguageTag.SEP).append(key); + if (value.length() > 0) { + sb.append(LanguageTag.SEP).append(value); + } + } + _value = sb.substring(1); // skip leading '-' + } + } + + public Set getUnicodeLocaleAttributes() { + return Collections.unmodifiableSet(_attributes); + } + + public Set getUnicodeLocaleKeys() { + return Collections.unmodifiableSet(_keywords.keySet()); + } + + public String getUnicodeLocaleType(String unicodeLocaleKey) { + return _keywords.get(unicodeLocaleKey); + } + + public static boolean isSingletonChar(char c) { + return (SINGLETON == AsciiUtil.toLower(c)); + } + + public static boolean isAttribute(String s) { + // 3*8alphanum + return (s.length() >= 3) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); + } + + public static boolean isKey(String s) { + // 2alphanum + return (s.length() == 2) && AsciiUtil.isAlphaNumericString(s); + } + + public static boolean isTypeSubtag(String s) { + // 3*8alphanum + return (s.length() >= 3) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); + } +} diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormat.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormat.java index 730d3f7df4e..14d0c9ab000 100644 --- a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormat.java +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormat.java @@ -18,6 +18,7 @@ import java.util.Map; import com.ibm.icu.util.Calendar; import com.ibm.icu.util.TimeZone; import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.ULocale.Category; /** * {@icuenhanced java.text.DateFormat}.{@icu _usage_} @@ -1417,7 +1418,7 @@ public class DateFormat extends Format { * @stable ICU 2.0 */ static final public DateFormat getInstance(Calendar cal) { - return getInstance(cal, ULocale.getDefault()); + return getInstance(cal, ULocale.getDefault(Category.FORMAT)); } /** @@ -1425,7 +1426,7 @@ public class DateFormat extends Format { * @stable ICU 2.0 */ static final public DateFormat getDateInstance(Calendar cal, int dateStyle) { - return getDateInstance(cal, dateStyle, ULocale.getDefault()); + return getDateInstance(cal, dateStyle, ULocale.getDefault(Category.FORMAT)); } /** @@ -1433,7 +1434,7 @@ public class DateFormat extends Format { * @stable ICU 2.0 */ static final public DateFormat getTimeInstance(Calendar cal, int timeStyle) { - return getTimeInstance(cal, timeStyle, ULocale.getDefault()); + return getTimeInstance(cal, timeStyle, Locale.getDefault()); } /** @@ -1441,7 +1442,7 @@ public class DateFormat extends Format { * @stable ICU 2.0 */ static final public DateFormat getDateTimeInstance(Calendar cal, int dateStyle, int timeStyle) { - return getDateTimeInstance(cal, dateStyle, timeStyle, ULocale.getDefault()); + return getDateTimeInstance(cal, dateStyle, timeStyle, ULocale.getDefault(Category.FORMAT)); } /** diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormatSymbols.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormatSymbols.java index 7077e4ba689..2cff97c65cc 100644 --- a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormatSymbols.java +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormatSymbols.java @@ -8,6 +8,8 @@ package com.ibm.icu.text; import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.Locale; import java.util.MissingResourceException; import java.util.ResourceBundle; @@ -174,7 +176,7 @@ public class DateFormatSymbols implements Serializable, Cloneable { * @stable ICU 3.8 */ public static DateFormatSymbols getInstance() { - return new DateFormatSymbols(java.text.DateFormatSymbols.getInstance()); + return new DateFormatSymbols(new java.text.DateFormatSymbols()); } /** @@ -190,7 +192,7 @@ public class DateFormatSymbols implements Serializable, Cloneable { * @stable ICU 3.8 */ public static DateFormatSymbols getInstance(Locale locale) { - return new DateFormatSymbols(java.text.DateFormatSymbols.getInstance(locale)); + return new DateFormatSymbols(new java.text.DateFormatSymbols(locale)); } /** @@ -206,7 +208,7 @@ public class DateFormatSymbols implements Serializable, Cloneable { * @stable ICU 3.8 */ public static DateFormatSymbols getInstance(ULocale locale) { - return new DateFormatSymbols(java.text.DateFormatSymbols.getInstance(locale.toLocale())); + return new DateFormatSymbols(new java.text.DateFormatSymbols(locale.toLocale())); } /** @@ -223,7 +225,26 @@ public class DateFormatSymbols implements Serializable, Cloneable { * @stable ICU 3.8 */ public static Locale[] getAvailableLocales() { - return java.text.DateFormatSymbols.getAvailableLocales(); + Locale[] avlocs = null; + boolean isJava5 = true; + try { + Method mGetAvailableLocales = java.text.DateFormatSymbols.class.getMethod("getAvailableLocales", (Class[])null); + avlocs = (Locale[]) mGetAvailableLocales.invoke(null, (Object[]) null); + isJava5 = false; + } catch (NoSuchMethodException nsme) { + // fall through + } catch (InvocationTargetException ite) { + // fall through + } catch (IllegalAccessException iae) { + // fall through + } + + if (isJava5) { + // Use DateFormat's getAvailableLocales as fallback + avlocs = DateFormat.getAvailableLocales(); + } + + return avlocs; } /** @@ -241,7 +262,7 @@ public class DateFormatSymbols implements Serializable, Cloneable { * @provisional This API might change or be removed in a future release. */ public static ULocale[] getAvailableULocales() { - Locale[] locales = java.text.DateFormatSymbols.getAvailableLocales(); + Locale[] locales = getAvailableLocales(); ULocale[] ulocales = new ULocale[locales.length]; for (int i = 0; i < locales.length; ++i) { ulocales[i] = ULocale.forLocale(locales[i]); diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/NumberFormat.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/NumberFormat.java index 9f808836fb3..831e146198d 100644 --- a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/NumberFormat.java +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/NumberFormat.java @@ -19,6 +19,7 @@ import java.util.Set; import com.ibm.icu.util.Currency; import com.ibm.icu.util.CurrencyAmount; import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.ULocale.Category; /** * {@icuenhanced java.text.NumberFormat}.{@icu _usage_} @@ -514,7 +515,7 @@ public class NumberFormat extends Format { */ //Bug 4408066 [Richard/GCL] public final static NumberFormat getInstance() { - return getInstance(ULocale.getDefault(), NUMBERSTYLE); + return getInstance(ULocale.getDefault(Category.FORMAT), NUMBERSTYLE); } /** @@ -545,7 +546,7 @@ public class NumberFormat extends Format { * @stable ICU 4.2 */ public final static NumberFormat getInstance(int style) { - return getInstance(ULocale.getDefault(), style); + return getInstance(ULocale.getDefault(Category.FORMAT), style); } /** @@ -564,7 +565,7 @@ public class NumberFormat extends Format { * @stable ICU 2.0 */ public final static NumberFormat getNumberInstance() { - return getInstance(ULocale.getDefault(), NUMBERSTYLE); + return getInstance(ULocale.getDefault(Category.FORMAT), NUMBERSTYLE); } /** @@ -596,7 +597,7 @@ public class NumberFormat extends Format { */ //Bug 4408066 [Richard/GCL] public final static NumberFormat getIntegerInstance() { - return getInstance(ULocale.getDefault(), INTEGERSTYLE); + return getInstance(ULocale.getDefault(Category.FORMAT), INTEGERSTYLE); } /** @@ -638,7 +639,7 @@ public class NumberFormat extends Format { * @stable ICU 2.0 */ public final static NumberFormat getCurrencyInstance() { - return getInstance(ULocale.getDefault(), CURRENCYSTYLE); + return getInstance(ULocale.getDefault(Category.FORMAT), CURRENCYSTYLE); } /** @@ -665,7 +666,7 @@ public class NumberFormat extends Format { * @stable ICU 2.0 */ public final static NumberFormat getPercentInstance() { - return getInstance(ULocale.getDefault(), PERCENTSTYLE); + return getInstance(ULocale.getDefault(Category.FORMAT), PERCENTSTYLE); } /** @@ -692,7 +693,7 @@ public class NumberFormat extends Format { * @stable ICU 2.0 */ public final static NumberFormat getScientificInstance() { - return getInstance(ULocale.getDefault(), SCIENTIFICSTYLE); + return getInstance(ULocale.getDefault(Category.FORMAT), SCIENTIFICSTYLE); } /** diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/Calendar.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/Calendar.java index b09abaef91b..d7b4730749f 100644 --- a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/Calendar.java +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/Calendar.java @@ -11,6 +11,7 @@ import java.util.GregorianCalendar; import java.util.Locale; import com.ibm.icu.text.DateFormat; +import com.ibm.icu.util.ULocale.Category; /** * {@icuenhanced java.util.Calendar}.{@icu _usage_} @@ -1139,7 +1140,7 @@ public class Calendar implements Serializable, Cloneable, Comparable { */ protected Calendar() { - this(TimeZone.getDefault(), ULocale.getDefault()); + this(TimeZone.getDefault(), ULocale.getDefault(Category.FORMAT)); } /** diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/IllformedLocaleException.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/IllformedLocaleException.java new file mode 100644 index 00000000000..be0f48d3625 --- /dev/null +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/IllformedLocaleException.java @@ -0,0 +1,73 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.util; + +/** + * Thrown by methods in {@link ULocale} and {@link ULocale.Builder} to + * indicate that an argument is not a well-formed BCP 47 tag. + * + * @see ULocale + * @draft ICU 4.2 + * @provisional This API might change or be removed in a future release. + */ +public class IllformedLocaleException extends RuntimeException { + + private static final long serialVersionUID = 1L; + + private int _errIdx = -1; + + /** + * Constructs a new IllformedLocaleException with no + * detail message and -1 as the error index. + * @draft ICU 4.6 + * @provisional This API might change or be removed in a future release. + */ + public IllformedLocaleException() { + super(); + } + + /** + * Constructs a new IllformedLocaleException with the + * given message and -1 as the error index. + * + * @param message the message + * @draft ICU 4.2 + * @provisional This API might change or be removed in a future release. + */ + public IllformedLocaleException(String message) { + super(message); + } + + /** + * Constructs a new IllformedLocaleException with the + * given message and the error index. The error index is the approximate + * offset from the start of the ill-formed value to the point where the + * parse first detected an error. A negative error index value indicates + * either the error index is not applicable or unknown. + * + * @param message the message + * @param errorIndex the index + * @draft ICU 4.2 + * @provisional This API might change or be removed in a future release. + */ + public IllformedLocaleException(String message, int errorIndex) { + super(message + ((errorIndex < 0) ? "" : " [at index " + errorIndex + "]")); + _errIdx = errorIndex; + } + + /** + * Returns the index where the error was found. A negative value indicates + * either the error index is not applicable or unknown. + * + * @return the error index + * @draft ICU 4.2 + * @provisional This API might change or be removed in a future release. + */ + public int getErrorIndex() { + return _errIdx; + } +} diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/TimeZone.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/TimeZone.java index 6f5d08c20c8..0e35274753d 100644 --- a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/TimeZone.java +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/TimeZone.java @@ -12,6 +12,8 @@ import java.util.Date; import java.util.Locale; import java.util.MissingResourceException; +import com.ibm.icu.util.ULocale.Category; + /** * {@icuenhanced java.util.TimeZone}.{@icu _usage_} * @@ -368,7 +370,7 @@ public class TimeZone implements Serializable, Cloneable { * @stable ICU 2.0 */ public final String getDisplayName(boolean daylight, int style) { - return getDisplayName(daylight, style, ULocale.getDefault()); + return getDisplayName(daylight, style, ULocale.getDefault(Category.DISPLAY)); } /** diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java index 4d49adf54df..c98b65587af 100644 --- a/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java @@ -8,12 +8,18 @@ package com.ibm.icu.util; import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.text.ParseException; import java.util.Iterator; +import java.util.List; import java.util.Locale; +import java.util.Map; +import java.util.Map.Entry; import java.util.MissingResourceException; import java.util.Set; import java.util.TreeMap; +import java.util.TreeSet; import com.ibm.icu.impl.ICUCache; import com.ibm.icu.impl.LocaleIDParser; @@ -21,6 +27,14 @@ import com.ibm.icu.impl.LocaleIDs; import com.ibm.icu.impl.LocaleUtility; import com.ibm.icu.impl.SimpleCache; import com.ibm.icu.impl.locale.AsciiUtil; +import com.ibm.icu.impl.locale.BaseLocale; +import com.ibm.icu.impl.locale.Extension; +import com.ibm.icu.impl.locale.InternalLocaleBuilder; +import com.ibm.icu.impl.locale.LanguageTag; +import com.ibm.icu.impl.locale.LocaleExtensions; +import com.ibm.icu.impl.locale.LocaleSyntaxException; +import com.ibm.icu.impl.locale.ParseStatus; +import com.ibm.icu.impl.locale.UnicodeLocaleExtension; /** * {@icuenhanced java.util.Locale}.{@icu _usage_} @@ -87,7 +101,8 @@ import com.ibm.icu.impl.locale.AsciiUtil; * @stable ICU 2.8 */ public final class ULocale implements Serializable { - private static final long serialVersionUID = 1L; + // using serialver from jdk1.4.2_05 + private static final long serialVersionUID = 3715177670352309217L; /** * Useful constant for language. @@ -226,12 +241,33 @@ public final class ULocale implements Serializable { // default empty locale private static final Locale EMPTY_LOCALE = new Locale("", ""); + // special keyword key for Unicode locale attributes + private static final String LOCALE_ATTRIBUTE_KEY = "attribute"; + /** * The root ULocale. * @stable ICU 2.8 */ public static final ULocale ROOT = new ULocale("", EMPTY_LOCALE); + /** + * Enum for locale categories. These locale categories are used to get/set the default locale for + * the specific functionality represented by the category. + * @stable ICU 49 + */ + public enum Category { + /** + * Category used to represent the default locale for displaying user interfaces. + * @stable ICU 49 + */ + DISPLAY, + /** + * Category used to represent the default locale for formatting date, number and/or currency. + * @stable ICU 49 + */ + FORMAT + } + private static final SimpleCache CACHE = new SimpleCache(); /** @@ -244,6 +280,14 @@ public final class ULocale implements Serializable { */ private String localeID; + /** + * Cache the locale data container fields. + * In future, we want to use them as the primary locale identifier storage. + */ + private transient volatile BaseLocale baseLocale; + private transient volatile LocaleExtensions extensions; + + private static String[][] CANONICALIZE_MAP; private static String[][] variantsToKeywords; @@ -334,21 +378,6 @@ public final class ULocale implements Serializable { } } - /* - * This table is used for mapping between ICU and special Java - * locales. When an ICU locale matches with - * /, the ICU locale is mapped to locale. - * For example, both ja_JP@calendar=japanese and ja@calendar=japanese - * are mapped to Java locale "ja_JP_JP". ICU locale "nn" is mapped - * to Java locale "no_NO_NY". - */ - private static final String[][] _javaLocaleMap = { - // { , , , , - { "ja_JP_JP", "ja_JP", "calendar", "japanese", "ja"}, - { "no_NO_NY", "nn_NO", null, null, "nn"}, - { "th_TH_TH", "th_TH", "numbers", "thai", "th"}, - }; - /** * Private constructor used by static initializers. */ @@ -378,24 +407,7 @@ public final class ULocale implements Serializable { } ULocale result = CACHE.get(loc); if (result == null) { - if (defaultULocale != null && loc == defaultULocale.locale) { - result = defaultULocale; - } else { - String locStr = loc.toString(); - if (locStr.length() == 0) { - result = ROOT; - } else { - for (int i = 0; i < _javaLocaleMap.length; i++) { - if (_javaLocaleMap[i][0].equals(locStr)) { - LocaleIDParser p = new LocaleIDParser(_javaLocaleMap[i][1]); - p.setKeywordValue(_javaLocaleMap[i][2], _javaLocaleMap[i][3]); - locStr = p.getName(); - break; - } - } - result = new ULocale(locStr, loc); - } - } + result = JDKLocaleHelper.toULocale(loc); CACHE.put(loc, result); } return result; @@ -501,45 +513,63 @@ public final class ULocale implements Serializable { */ public Locale toLocale() { if (locale == null) { - LocaleIDParser p = new LocaleIDParser(localeID); - String base = p.getBaseName(); - for (int i = 0; i < _javaLocaleMap.length; i++) { - if (base.equals(_javaLocaleMap[i][1]) || base.equals(_javaLocaleMap[i][4])) { - if (_javaLocaleMap[i][2] != null) { - String val = p.getKeywordValue(_javaLocaleMap[i][2]); - if (val != null && val.equals(_javaLocaleMap[i][3])) { - p = new LocaleIDParser(_javaLocaleMap[i][0]); - break; - } - } else { - p = new LocaleIDParser(_javaLocaleMap[i][0]); - break; - } - } - } - String[] names = p.getLanguageScriptCountryVariant(); - locale = new Locale(names[0], names[2], names[3]); + locale = JDKLocaleHelper.toLocale(this); } return locale; } private static ICUCache nameCache = new SimpleCache(); + /** * Keep our own default ULocale. */ private static Locale defaultLocale = Locale.getDefault(); - private static ULocale defaultULocale = new ULocale(defaultLocale); + private static ULocale defaultULocale = forLocale(defaultLocale); + + private static Locale[] defaultCategoryLocales = new Locale[Category.values().length]; + private static ULocale[] defaultCategoryULocales = new ULocale[Category.values().length]; + + static { + for (Category cat: Category.values()) { + int idx = cat.ordinal(); + defaultCategoryLocales[idx] = JDKLocaleHelper.getDefault(cat); + defaultCategoryULocales[idx] = forLocale(defaultCategoryLocales[idx]); + } + } /** * Returns the current default ULocale. + * @return the default ULocale. * @stable ICU 2.8 */ public static ULocale getDefault() { synchronized (ULocale.class) { + if (defaultULocale == null) { + // When Java's default locale has extensions (such as ja-JP-u-ca-japanese), + // Locale -> ULocale mapping requires BCP47 keyword mapping data that is currently + // stored in a resource bundle. However, UResourceBundle currently requires + // non-null default ULocale. For now, this implementation returns ULocale.ROOT + // to avoid the problem. + + // TODO: Consider moving BCP47 mapping data out of resource bundle later. + + return ULocale.ROOT; + } Locale currentDefault = Locale.getDefault(); if (!defaultLocale.equals(currentDefault)) { defaultLocale = currentDefault; - defaultULocale = new ULocale(defaultLocale); + defaultULocale = forLocale(currentDefault); + + if (!JDKLocaleHelper.isJava7orNewer()) { + // Detected Java default Locale change. + // We need to update category defaults to match the + // Java 7's behavior on Java 6 or older environment. + for (Category cat : Category.values()) { + int idx = cat.ordinal(); + defaultCategoryLocales[idx] = currentDefault; + defaultCategoryULocales[idx] = forLocale(currentDefault); + } + } } return defaultULocale; } @@ -550,17 +580,101 @@ public final class ULocale implements Serializable { * If the caller does not have write permission to the * user.language property, a security exception will be thrown, * and the default ULocale will remain unchanged. + *

+ * By setting the default ULocale with this method, all of the default categoy locales + * are also set to the specified default ULocale. * @param newLocale the new default locale * @throws SecurityException if a security manager exists and its * checkPermission method doesn't allow the operation. * @throws NullPointerException if newLocale is null * @see SecurityManager#checkPermission(java.security.Permission) * @see java.util.PropertyPermission + * @see ULocale#setDefault(Category, ULocale) * @stable ICU 3.0 */ public static synchronized void setDefault(ULocale newLocale){ - Locale.setDefault(newLocale.toLocale()); + defaultLocale = newLocale.toLocale(); + Locale.setDefault(defaultLocale); defaultULocale = newLocale; + // This method also updates all category default locales + for (Category cat : Category.values()) { + setDefault(cat, newLocale); + } + } + + /** + * Returns the current default ULocale for the specified category. + * + * @param category the category + * @return the default ULocale for the specified category. + * @stable ICU 49 + */ + public static ULocale getDefault(Category category) { + synchronized (ULocale.class) { + int idx = category.ordinal(); + if (defaultCategoryULocales[idx] == null) { + // Just in case this method is called during ULocale class + // initialization. Unlike getDefault(), we do not have + // cyclic dependency for category default. + return ULocale.ROOT; + } + if (JDKLocaleHelper.isJava7orNewer()) { + Locale currentCategoryDefault = JDKLocaleHelper.getDefault(category); + if (!defaultCategoryLocales[idx].equals(currentCategoryDefault)) { + defaultCategoryLocales[idx] = currentCategoryDefault; + defaultCategoryULocales[idx] = forLocale(currentCategoryDefault); + } + } else { + // java.util.Locale.setDefault(Locale) in Java 7 updates + // category locale defaults. On Java 6 or older environment, + // ICU4J checks if the default locale has changed and update + // category ULocales here if necessary. + + // Note: When java.util.Locale.setDefault(Locale) is called + // with a Locale same with the previous one, Java 7 still + // updates category locale defaults. On Java 6 or older env, + // there is no good way to detect the event, ICU4J simply + // check if the default Java Locale has changed since last + // time. + + Locale currentDefault = Locale.getDefault(); + if (!defaultLocale.equals(currentDefault)) { + defaultLocale = currentDefault; + defaultULocale = forLocale(currentDefault); + + for (Category cat : Category.values()) { + int tmpIdx = cat.ordinal(); + defaultCategoryLocales[tmpIdx] = currentDefault; + defaultCategoryULocales[tmpIdx] = forLocale(currentDefault); + } + } + + // No synchronization with JDK Locale, because category default + // is not supported in Java 6 or older versions + } + return defaultCategoryULocales[idx]; + } + } + + /** + * Sets the default ULocale for the specified Category. + * This also sets the default Locale for the specified Category + * of the JVM. If the caller does not have write permission to the + * user.language property, a security exception will be thrown, + * and the default ULocale for the specified Category will remain unchanged. + * + * @param category the specified category to set the default locale + * @param newLocale the new default locale + * @see SecurityManager#checkPermission(java.security.Permission) + * @see java.util.PropertyPermission + * @stable ICU 49 + */ + public static synchronized void setDefault(Category category, ULocale newLocale) { + Locale newJavaDefault = newLocale.toLocale(); + int idx = category.ordinal(); + defaultCategoryULocales[idx] = newLocale; + defaultCategoryLocales[idx] = newJavaDefault; + JDKLocaleHelper.setDefault(category, newJavaDefault); } /** @@ -608,6 +722,7 @@ public final class ULocale implements Serializable { * @stable ICU 3.0 */ public static ULocale[] getAvailableLocales() { + //#com.ibm.icu.base if (availableLocales == null) { synchronized (ULocale.class) { if (availableLocales == null) { @@ -806,6 +921,36 @@ public final class ULocale implements Serializable { public String getName() { return localeID; // always normalized } + + /** + * Gets the shortest length subtag's size. + * + * @param localeID + * @return The size of the shortest length subtag + **/ + private static int getShortestSubtagLength(String localeID) { + int localeIDLength = localeID.length(); + int length = localeIDLength; + boolean reset = true; + int tmpLength = 0; + + for (int i = 0; i < localeIDLength; i++) { + if (localeID.charAt(i) != '_' && localeID.charAt(i) != '-') { + if (reset) { + reset = false; + tmpLength = 0; + } + tmpLength++; + } else { + if (tmpLength != 0 && tmpLength < length) { + length = tmpLength; + } + reset = true; + } + } + + return length; + } /** * {@icu} Returns the (normalized) full name for the specified locale. @@ -815,10 +960,20 @@ public final class ULocale implements Serializable { * @stable ICU 3.0 */ public static String getName(String localeID){ - String name = nameCache.get(localeID); + String tmpLocaleID; + // Convert BCP47 id if necessary + if (localeID != null && !localeID.contains("@") && getShortestSubtagLength(localeID) == 1) { + tmpLocaleID = forLanguageTag(localeID).getName(); + if (tmpLocaleID.length() == 0) { + tmpLocaleID = localeID; + } + } else { + tmpLocaleID = localeID; + } + String name = nameCache.get(tmpLocaleID); if (name == null) { - name = new LocaleIDParser(localeID).getName(); - nameCache.put(localeID, name); + name = new LocaleIDParser(tmpLocaleID).getName(); + nameCache.put(tmpLocaleID, name); } return name; } @@ -1043,12 +1198,13 @@ public final class ULocale implements Serializable { // display names /** - * Returns this locale's language localized for display in the default locale. + * Returns this locale's language localized for display in the default DISPLAY locale. * @return the localized language name. + * @see Category#DISPLAY * @stable ICU 3.0 */ public String getDisplayLanguage() { - return getDisplayLanguageInternal(this, getDefault(), false); + return getDisplayLanguageInternal(this, getDefault(Category.DISPLAY), false); } /** @@ -1086,14 +1242,14 @@ public final class ULocale implements Serializable { return getDisplayLanguageInternal(new ULocale(localeID), displayLocale, false); } /** - * {@icu} Returns this locale's language localized for display in the default locale. + * {@icu} Returns this locale's language localized for display in the default DISPLAY locale. * If a dialect name is present in the data, then it is returned. * @return the localized language name. - * @draft ICU 4.4 - * @provisional This API might change or be removed in a future release. + * @see Category#DISPLAY + * @stable ICU 4.4 */ public String getDisplayLanguageWithDialect() { - return getDisplayLanguageInternal(this, getDefault(), true); + return getDisplayLanguageInternal(this, getDefault(Category.DISPLAY), true); } /** @@ -1101,8 +1257,7 @@ public final class ULocale implements Serializable { * If a dialect name is present in the data, then it is returned. * @param displayLocale the locale in which to display the name. * @return the localized language name. - * @draft ICU 4.4 - * @provisional This API might change or be removed in a future release. + * @stable ICU 4.4 */ public String getDisplayLanguageWithDialect(ULocale displayLocale) { return getDisplayLanguageInternal(this, displayLocale, true); @@ -1115,8 +1270,7 @@ public final class ULocale implements Serializable { * @param localeID the id of the locale whose language will be displayed * @param displayLocaleID the id of the locale in which to display the name. * @return the localized language name. - * @draft ICU 4.4 - * @provisional This API might change or be removed in a future release. + * @stable ICU 4.4 */ public static String getDisplayLanguageWithDialect(String localeID, String displayLocaleID) { return getDisplayLanguageInternal(new ULocale(localeID), new ULocale(displayLocaleID), @@ -1130,8 +1284,7 @@ public final class ULocale implements Serializable { * @param localeID the id of the locale whose language will be displayed. * @param displayLocale the locale in which to display the name. * @return the localized language name. - * @draft ICU 4.4 - * @provisional This API might change or be removed in a future release. + * @stable ICU 4.4 */ public static String getDisplayLanguageWithDialect(String localeID, ULocale displayLocale) { return getDisplayLanguageInternal(new ULocale(localeID), displayLocale, true); @@ -1139,17 +1292,19 @@ public final class ULocale implements Serializable { private static String getDisplayLanguageInternal(ULocale locale, ULocale displayLocale, boolean useDialect) { + //#com.ibm.icu.base // No dialect support return locale.toLocale().getDisplayLanguage(displayLocale.toLocale()); } /** - * {@icu} Returns this locale's script localized for display in the default locale. + * {@icu} Returns this locale's script localized for display in the default DISPLAY locale. * @return the localized script name. + * @see Category#DISPLAY * @stable ICU 3.0 */ public String getDisplayScript() { - return getDisplayScriptInternal(this, getDefault()); + return getDisplayScriptInternal(this, getDefault(Category.DISPLAY)); } /** @@ -1187,17 +1342,32 @@ public final class ULocale implements Serializable { // displayLocaleID is canonical, localeID need not be since parsing will fix this. private static String getDisplayScriptInternal(ULocale locale, ULocale displayLocale) { - // No localization, just return the script code - return locale.getScript(); + //#com.ibm.icu.base + String dispScript = null; + try { + // Calling Locale#getDisplayScript on Java 7 or later + Method mGetDisplayScript = Locale.class.getMethod("getDisplayScript", Locale.class); + dispScript = (String) mGetDisplayScript.invoke(locale.toLocale(), displayLocale.toLocale()); + + } catch (NoSuchMethodException e) { + } catch (InvocationTargetException e) { + } catch (IllegalAccessException e) { + } + + if (dispScript == null) { + dispScript = locale.getScript(); + } + return dispScript; } /** - * Returns this locale's country localized for display in the default locale. + * Returns this locale's country localized for display in the default DISPLAY locale. * @return the localized country name. + * @see Category#DISPLAY * @stable ICU 3.0 */ public String getDisplayCountry() { - return getDisplayCountryInternal(this, getDefault()); + return getDisplayCountryInternal(this, getDefault(Category.DISPLAY)); } /** @@ -1236,16 +1406,18 @@ public final class ULocale implements Serializable { // displayLocaleID is canonical, localeID need not be since parsing will fix this. private static String getDisplayCountryInternal(ULocale locale, ULocale displayLocale) { + //#com.ibm.icu.base return locale.toLocale().getDisplayCountry(displayLocale.toLocale()); } /** - * Returns this locale's variant localized for display in the default locale. + * Returns this locale's variant localized for display in the default DISPLAY locale. * @return the localized variant name. + * @see Category#DISPLAY * @stable ICU 3.0 */ public String getDisplayVariant() { - return getDisplayVariantInternal(this, getDefault()); + return getDisplayVariantInternal(this, getDefault(Category.DISPLAY)); } /** @@ -1283,18 +1455,20 @@ public final class ULocale implements Serializable { } private static String getDisplayVariantInternal(ULocale locale, ULocale displayLocale) { + //#com.ibm.icu.base return locale.toLocale().getDisplayVariant(displayLocale.toLocale()); } /** - * {@icu} Returns a keyword localized for display in the default locale. + * {@icu} Returns a keyword localized for display in the default DISPLAY locale. * @param keyword the keyword to be displayed. * @return the localized keyword name. * @see #getKeywords() + * @see Category#DISPLAY * @stable ICU 3.0 */ public static String getDisplayKeyword(String keyword) { - return getDisplayKeywordInternal(keyword, getDefault()); + return getDisplayKeywordInternal(keyword, getDefault(Category.DISPLAY)); } /** @@ -1322,18 +1496,20 @@ public final class ULocale implements Serializable { } private static String getDisplayKeywordInternal(String keyword, ULocale displayLocale) { + //#com.ibm.icu.base // No localization return keyword; } /** - * {@icu} Returns a keyword value localized for display in the default locale. + * {@icu} Returns a keyword value localized for display in the default DISPLAY locale. * @param keyword the keyword whose value is to be displayed. * @return the localized value name. + * @see Category#DISPLAY * @stable ICU 3.0 */ public String getDisplayKeywordValue(String keyword) { - return getDisplayKeywordValueInternal(this, keyword, getDefault()); + return getDisplayKeywordValueInternal(this, keyword, getDefault(Category.DISPLAY)); } /** @@ -1379,18 +1555,21 @@ public final class ULocale implements Serializable { // displayLocaleID is canonical, localeID need not be since parsing will fix this. private static String getDisplayKeywordValueInternal(ULocale locale, String keyword, ULocale displayLocale) { + //#com.ibm.icu.base keyword = AsciiUtil.toLowerString(keyword.trim()); String value = locale.getKeywordValue(keyword); + // No localization return value; } /** - * Returns this locale name localized for display in the default locale. + * Returns this locale name localized for display in the default DISPLAY locale. * @return the localized locale name. + * @see Category#DISPLAY * @stable ICU 3.0 */ public String getDisplayName() { - return getDisplayNameInternal(this, getDefault()); + return getDisplayNameInternal(this, getDefault(Category.DISPLAY)); } /** @@ -1428,19 +1607,19 @@ public final class ULocale implements Serializable { } private static String getDisplayNameInternal(ULocale locale, ULocale displayLocale) { - // No localization, no script and keywords + //#com.ibm.icu.base return locale.toLocale().getDisplayName(displayLocale.toLocale()); } /** - * {@icu} Returns this locale name localized for display in the default locale. + * {@icu} Returns this locale name localized for display in the default DISPLAY locale. * If a dialect name is present in the locale data, then it is returned. * @return the localized locale name. - * @draft ICU 4.4 - * @provisional This API might change or be removed in a future release. + * @see Category#DISPLAY + * @stable ICU 4.4 */ public String getDisplayNameWithDialect() { - return getDisplayNameWithDialectInternal(this, getDefault()); + return getDisplayNameWithDialectInternal(this, getDefault(Category.DISPLAY)); } /** @@ -1448,8 +1627,7 @@ public final class ULocale implements Serializable { * If a dialect name is present in the locale data, then it is returned. * @param displayLocale the locale in which to display the locale name. * @return the localized locale name. - * @draft ICU 4.4 - * @provisional This API might change or be removed in a future release. + * @stable ICU 4.4 */ public String getDisplayNameWithDialect(ULocale displayLocale) { return getDisplayNameWithDialectInternal(this, displayLocale); @@ -1462,8 +1640,7 @@ public final class ULocale implements Serializable { * @param localeID the locale whose name is to be displayed. * @param displayLocaleID the id of the locale in which to display the locale name. * @return the localized locale name. - * @draft ICU 4.4 - * @provisional This API might change or be removed in a future release. + * @stable ICU 4.4 */ public static String getDisplayNameWithDialect(String localeID, String displayLocaleID) { return getDisplayNameWithDialectInternal(new ULocale(localeID), @@ -1477,15 +1654,15 @@ public final class ULocale implements Serializable { * @param localeID the locale whose name is to be displayed. * @param displayLocale the locale in which to display the locale name. * @return the localized locale name. - * @draft ICU 4.4 - * @provisional This API might change or be removed in a future release. + * @stable ICU 4.4 */ public static String getDisplayNameWithDialect(String localeID, ULocale displayLocale) { return getDisplayNameWithDialectInternal(new ULocale(localeID), displayLocale); } private static String getDisplayNameWithDialectInternal(ULocale locale, ULocale displayLocale) { - // No dialect support, no script and keyword support + //#com.ibm.icu.base + // No dialect handling return locale.toLocale().getDisplayName(displayLocale.toLocale()); } @@ -1497,7 +1674,17 @@ public final class ULocale implements Serializable { * @stable ICU 4.0 */ public String getCharacterOrientation() { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + //#com.ibm.icu.base + // Hardcoded + String lang = getLanguage(); + if (lang.equals("ar") || lang.equals("fa") || lang.equals("he") || lang.equals("ps") || lang.equals("ur")) { + return "right-to-left"; + } + String script = getScript(); + if (script.equals("Arab")) { + return "right-to-left"; + } + return "left-to-right"; } /** @@ -1508,7 +1695,8 @@ public final class ULocale implements Serializable { * @stable ICU 4.0 */ public String getLineOrientation() { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + //#com.ibm.icu.base + return "top-to-bottom"; } /** @@ -1619,6 +1807,21 @@ public final class ULocale implements Serializable { } return availableLocales[j]; } + // compare to scriptless alias, so locales such as + // zh_TW, zh_CN are considered as available locales - see #7190 + if (aLocale.getScript().length() == 0 + && availableLocales[j].getScript().length() > 0 + && availableLocales[j].getLanguage().equals(aLocale.getLanguage()) + && availableLocales[j].getCountry().equals(aLocale.getCountry()) + && availableLocales[j].getVariant().equals(aLocale.getVariant())) { + ULocale minAvail = ULocale.minimizeSubtags(availableLocales[j]); + if (minAvail.getScript().length() == 0) { + if(setFallback != null) { + setFallback[0] = false; // not a fallback. + } + return aLocale; + } + } } Locale loc = aLocale.toLocale(); Locale parent = LocaleUtility.fallback(loc); @@ -1933,6 +2136,8 @@ public final class ULocale implements Serializable { return acceptList; } + private static final String UNDEFINED_LANGUAGE = "und"; + /** * {@icu} Adds the likely subtags for a provided locale ID, per the algorithm * described in the following CLDR technical report: @@ -1961,7 +2166,8 @@ public final class ULocale implements Serializable { * @stable ICU 4.0 */ public static ULocale addLikelySubtags(ULocale loc) { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + //#com.ibm.icu.base + return loc; } /** @@ -1992,7 +2198,194 @@ public final class ULocale implements Serializable { * @stable ICU 4.0 */ public static ULocale minimizeSubtags(ULocale loc) { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + //#com.ibm.icu.base + return loc; + } + + /** + * A trivial utility function that checks for a null + * reference or checks the length of the supplied String. + * + * @param string The string to check + * + * @return true if the String is empty, or if the reference is null. + */ + private static boolean isEmptyString(String string) { + return string == null || string.length() == 0; + } + + /** + * Append a tag to a StringBuilder, adding the separator if necessary.The tag must + * not be a zero-length string. + * + * @param tag The tag to add. + * @param buffer The output buffer. + **/ + private static void appendTag(String tag, StringBuilder buffer) { + if (buffer.length() != 0) { + buffer.append(UNDERSCORE); + } + + buffer.append(tag); + } + + /** + * Create a tag string from the supplied parameters. The lang, script and region + * parameters may be null references. + * + * If any of the language, script or region parameters are empty, and the alternateTags + * parameter is not null, it will be parsed for potential language, script and region tags + * to be used when constructing the new tag. If the alternateTags parameter is null, or + * it contains no language tag, the default tag for the unknown language is used. + * + * @param lang The language tag to use. + * @param script The script tag to use. + * @param region The region tag to use. + * @param trailing Any trailing data to append to the new tag. + * @param alternateTags A string containing any alternate tags. + * @return The new tag string. + **/ + private static String createTagString(String lang, String script, String region, + String trailing, String alternateTags) { + + LocaleIDParser parser = null; + boolean regionAppended = false; + + StringBuilder tag = new StringBuilder(); + + if (!isEmptyString(lang)) { + appendTag( + lang, + tag); + } + else if (isEmptyString(alternateTags)) { + /* + * Append the value for an unknown language, if + * we found no language. + */ + appendTag( + UNDEFINED_LANGUAGE, + tag); + } + else { + parser = new LocaleIDParser(alternateTags); + + String alternateLang = parser.getLanguage(); + + /* + * Append the value for an unknown language, if + * we found no language. + */ + appendTag( + !isEmptyString(alternateLang) ? alternateLang : UNDEFINED_LANGUAGE, + tag); + } + + if (!isEmptyString(script)) { + appendTag( + script, + tag); + } + else if (!isEmptyString(alternateTags)) { + /* + * Parse the alternateTags string for the script. + */ + if (parser == null) { + parser = new LocaleIDParser(alternateTags); + } + + String alternateScript = parser.getScript(); + + if (!isEmptyString(alternateScript)) { + appendTag( + alternateScript, + tag); + } + } + + if (!isEmptyString(region)) { + appendTag( + region, + tag); + + regionAppended = true; + } + else if (!isEmptyString(alternateTags)) { + /* + * Parse the alternateTags string for the region. + */ + if (parser == null) { + parser = new LocaleIDParser(alternateTags); + } + + String alternateRegion = parser.getCountry(); + + if (!isEmptyString(alternateRegion)) { + appendTag( + alternateRegion, + tag); + + regionAppended = true; + } + } + + if (trailing != null && trailing.length() > 1) { + /* + * The current ICU format expects two underscores + * will separate the variant from the preceeding + * parts of the tag, if there is no region. + */ + int separators = 0; + + if (trailing.charAt(0) == UNDERSCORE) { + if (trailing.charAt(1) == UNDERSCORE) { + separators = 2; + } + } + else { + separators = 1; + } + + if (regionAppended) { + /* + * If we appended a region, we may need to strip + * the extra separator from the variant portion. + */ + if (separators == 2) { + tag.append(trailing.substring(1)); + } + else { + tag.append(trailing); + } + } + else { + /* + * If we did not append a region, we may need to add + * an extra separator to the variant portion. + */ + if (separators == 1) { + tag.append(UNDERSCORE); + } + tag.append(trailing); + } + } + + return tag.toString(); + } + + /** + * Create a tag string from the supplied parameters. The lang, script and region + * parameters may be null references.If the lang parameter is an empty string, the + * default value for an unknown language is written to the output buffer. + * + * @param lang The language tag to use. + * @param script The script tag to use. + * @param region The region tag to use. + * @param trailing Any trailing data to append to the new tag. + * @return The new String. + **/ + static String createTagString(String lang, String script, String region, String trailing) { + return createTagString(lang, script, region, trailing, null); } // -------------------------------- @@ -2023,15 +2416,15 @@ public final class ULocale implements Serializable { /** * {@icu} Returns the extension (or private use) value associated with - * the specified singleton key, or null if there is no extension - * associated with the key. To be valid, the key must be one - * of [0-9A-Za-z]. Keys are case-insensitive, so + * the specified key, or null if there is no extension + * associated with the key. To be well-formed, the key must be one + * of [0-9A-Za-z]. Keys are case-insensitive, so * for example 'z' and 'Z' represent the same extension. * * @param key the extension key - * @return the extension, or null if this locale defines no - * extension for the specified key - * @throws IllegalArgumentException if the key is not valid + * @return The extension, or null if this locale defines no + * extension for the specified key. + * @throws IllegalArgumentException if key is not well-formed * @see #PRIVATE_USE_EXTENSION * @see #UNICODE_LOCALE_EXTENSION * @@ -2039,104 +2432,125 @@ public final class ULocale implements Serializable { * @provisional This API might change or be removed in a future release. */ public String getExtension(char key) { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + if (!LocaleExtensions.isValidKey(key)) { + throw new IllegalArgumentException("Invalid extension key: " + key); + } + return extensions().getExtensionValue(key); } /** * {@icu} Returns the set of extension keys associated with this locale, or the - * empty set if it has no extensions. The returned set is unmodifiable. + * empty set if it has no extensions. The returned set is unmodifiable. + * The keys will all be lower-case. * * @return the set of extension keys, or the empty set if this locale has * no extensions - * * @draft ICU 4.2 * @provisional This API might change or be removed in a future release. */ public Set getExtensionKeys() { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + return extensions().getKeys(); } /** - * {@icu} Returns the Unicode locale type associated with the specified Unicode - * locale key for this locale. Unicode locale keywrods are specified - * by the 'u' extension and consist of key/type pairs. The key must be - * two alphanumeric characters in length, or an IllegalArgumentException - * is thrown. - * @param key the Unicode locale key - * @return the Unicode locale type associated with the key, or null if the - * locale does not define a value for the key. - * @throws IllegalArgumentException if the key is not valid. + * {@icu} Returns the set of unicode locale attributes associated with + * this locale, or the empty set if it has no attributes. The + * returned set is unmodifiable. + * + * @return The set of attributes. + * @draft ICU 4.6 + * @provisional This API might change or be removed in a future release. + */ + public Set getUnicodeLocaleAttributes() { + return extensions().getUnicodeLocaleAttributes(); + } + + /** + * {@icu} Returns the Unicode locale type associated with the specified Unicode locale key + * for this locale. Returns the empty string for keys that are defined with no type. + * Returns null if the key is not defined. Keys are case-insensitive. The key must + * be two alphanumeric characters ([0-9a-zA-Z]), or an IllegalArgumentException is + * thrown. * + * @param key the Unicode locale key + * @return The Unicode locale type associated with the key, or null if the + * locale does not define the key. + * @throws IllegalArgumentException if the key is not well-formed + * @throws NullPointerException if key is null + * * @draft ICU 4.4 * @provisional This API might change or be removed in a future release. */ public String getUnicodeLocaleType(String key) { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + if (!LocaleExtensions.isValidUnicodeLocaleKey(key)) { + throw new IllegalArgumentException("Invalid Unicode locale key: " + key); + } + return extensions().getUnicodeLocaleType(key); } /** - * {@icu} Returns the set of keys for Unicode locale keywords defined by this locale, - * or null if this locale has no locale extension. The returned set is - * immutable. - * - * @return the set of the Unicode locale keys, or null + * {@icu} Returns the set of Unicode locale keys defined by this locale, or the empty set if + * this locale has none. The returned set is immutable. Keys are all lower case. * + * @return The set of Unicode locale keys, or the empty set if this locale has + * no Unicode locale keywords. + * * @draft ICU 4.4 * @provisional This API might change or be removed in a future release. */ public Set getUnicodeLocaleKeys() { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + return extensions().getUnicodeLocaleKeys(); } /** * {@icu} Returns a well-formed IETF BCP 47 language tag representing * this locale. * - *

- * If this ULocale object has language, country, or variant - * that does not satisfy the IETF BCP 47 language tag syntax requirements, - * this method handles these fields as described below: - *

- * Language: If language is empty or ill-formed (for example "a" or "e2"), - * it will be emitted as "und" (Undetermined). - *

- * Country: If country is ill-formed (for example "12" or "USA"), it - * will be omitted. - *

- * Variant: Variant is treated as consisting of subtags separated by - * underscore and converted to lower case letters. 'Well-formed' subtags - * consist of either an ASCII letter followed by 4-7 ASCII characters, or an - * ASCII digit followed by 3-7 ASCII characters. If well-formed, the variant - * is emitted as each subtag in order (separated by hyphen). Otherwise: + *

If this ULocale has a language, script, country, or + * variant that does not satisfy the IETF BCP 47 language tag + * syntax requirements, this method handles these fields as + * described below: + * + *

Language: If language is empty, or not well-formed + * (for example "a" or "e2"), it will be emitted as "und" (Undetermined). + * + *

Script: If script is not well-formed (for example "12" + * or "Latin"), it will be omitted. + * + *

Country: If country is not well-formed (for example "12" + * or "USA"), it will be omitted. + * + *

Variant: If variant is well-formed, each sub-segment + * (delimited by '-' or '_') is emitted as a subtag. Otherwise: *

    - *
  • if all sub-segments consist of 1 to 8 ASCII alphanumerics (for example - * "WIN", "WINDOWS_XP", "SOLARIS_10"), the first ill-formed variant subtag - * and all following sub-segments will be emitted as private use subtags prefixed - * by the special private use subtag "variant" followed by each subtag in order - * (separated by hyphen). For example, locale "en_US_WIN" is converted to language - * tag "en-US-x-variant-win", locale "de_WINDOWS_XP" is converted to language tag - * "de-windows-x-variant-xp". If this locale has a private use extension value, - * the special private use subtags prefixed by "variant" are appended after the - * locale's private use value. - *
  • if any subtag does not consist of 1 to 8 ASCII alphanumerics, the - * variant will be truncated and the problematic subtag and all following - * sub-segments will be omitted. If the remainder is non-empty, it will be - * emitted as a private use subtag as above (even if the remainder turns out - * to be well-formed). For example, "Solaris_isjustthecoolestthing" is emitted - * as "x-jvariant-Solaris", not as "solaris".
  • - *
- * - *

Note: Although the language tag created by this method - * satisfies the syntax requirements defined by the IETF BCP 47 - * specification, it is not always a valid BCP 47 language tag. - * For example, + * + *

  • if all sub-segments match [0-9a-zA-Z]{1,8} + * (for example "WIN" or "Oracle_JDK_Standard_Edition"), the first + * ill-formed sub-segment and all following will be appended to + * the private use subtag. The first appended subtag will be + * "lvariant", followed by the sub-segments in order, separated by + * hyphen. For example, "x-lvariant-WIN", + * "Oracle-x-lvariant-JDK-Standard-Edition". + * + *
  • if any sub-segment does not match + * [0-9a-zA-Z]{1,8}, the variant will be truncated + * and the problematic sub-segment and all following sub-segments + * will be omitted. If the remainder is non-empty, it will be + * emitted as a private use subtag as above (even if the remainder + * turns out to be well-formed). For example, + * "Solaris_isjustthecoolestthing" is emitted as + * "x-lvariant-Solaris", not as "solaris".
  • + * + *

    Note: Although the language tag created by this + * method is well-formed (satisfies the syntax requirements + * defined by the IETF BCP 47 specification), it is not + * necessarily a valid BCP 47 language tag. For example, *

    -     *   new ULocale("xx_YY").toLanguageTag();
    -     * 
    - * will return "xx-YY", but the language subtag "xx" and the region subtag "YY" - * are invalid because they are not registered in the - * - * IANA Language Subtag Registry. + * new Locale("xx", "YY").toLanguageTag(); + * + * will return "xx-YY", but the language subtag "xx" and the + * region subtag "YY" are invalid because they are not registered + * in the IANA Language Subtag Registry. * * @return a BCP47 language tag representing the locale * @see #forLanguageTag(String) @@ -2145,71 +2559,204 @@ public final class ULocale implements Serializable { * @provisional This API might change or be removed in a future release. */ public String toLanguageTag() { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + BaseLocale base = base(); + LocaleExtensions exts = extensions(); + + if (base.getVariant().equalsIgnoreCase("POSIX")) { + // special handling for variant POSIX + base = BaseLocale.getInstance(base.getLanguage(), base.getScript(), base.getRegion(), ""); + if (exts.getUnicodeLocaleType("va") == null) { + // add va-posix + InternalLocaleBuilder ilocbld = new InternalLocaleBuilder(); + try { + ilocbld.setLocale(BaseLocale.ROOT, exts); + ilocbld.setUnicodeLocaleKeyword("va", "posix"); + exts = ilocbld.getLocaleExtensions(); + } catch (LocaleSyntaxException e) { + // this should not happen + throw new RuntimeException(e); + } + } + } + + LanguageTag tag = LanguageTag.parseLocale(base, exts); + + StringBuilder buf = new StringBuilder(); + String subtag = tag.getLanguage(); + if (subtag.length() > 0) { + buf.append(LanguageTag.canonicalizeLanguage(subtag)); + } + + subtag = tag.getScript(); + if (subtag.length() > 0) { + buf.append(LanguageTag.SEP); + buf.append(LanguageTag.canonicalizeScript(subtag)); + } + + subtag = tag.getRegion(); + if (subtag.length() > 0) { + buf.append(LanguageTag.SEP); + buf.append(LanguageTag.canonicalizeRegion(subtag)); + } + + Listsubtags = tag.getVariants(); + for (String s : subtags) { + buf.append(LanguageTag.SEP); + buf.append(LanguageTag.canonicalizeVariant(s)); + } + + subtags = tag.getExtensions(); + for (String s : subtags) { + buf.append(LanguageTag.SEP); + buf.append(LanguageTag.canonicalizeExtension(s)); + } + + subtag = tag.getPrivateuse(); + if (subtag.length() > 0) { + if (buf.length() > 0) { + buf.append(LanguageTag.SEP); + } + buf.append(LanguageTag.PRIVATEUSE).append(LanguageTag.SEP); + buf.append(LanguageTag.canonicalizePrivateuse(subtag)); + } + + return buf.toString(); } /** * {@icu} Returns a locale for the specified IETF BCP 47 language tag string. - * If the specified language tag contains any ill-formed subtags, - * the first such subtag and all following subtags are ignored. + * + *

    If the specified language tag contains any ill-formed subtags, + * the first such subtag and all following subtags are ignored. Compare + * to {@link ULocale.Builder#setLanguageTag} which throws an exception + * in this case. + * + *

    The following conversions are performed:

      + * + *
    • The language code "und" is mapped to language "". + * + *
    • The portion of a private use subtag prefixed by "lvariant", + * if any, is removed and appended to the variant field in the + * result locale (without case normalization). If it is then + * empty, the private use subtag is discarded: + * + *
      +     *     ULocale loc;
      +     *     loc = ULocale.forLanguageTag("en-US-x-lvariant-icu4j);
      +     *     loc.getVariant(); // returns "ICU4J"
      +     *     loc.getExtension('x'); // returns null
      +     *
      +     *     loc = Locale.forLanguageTag("de-icu4j-x-URP-lvariant-Abc-Def");
      +     *     loc.getVariant(); // returns "ICU4J_ABC_DEF"
      +     *     loc.getExtension('x'); // returns "urp"
      +     * 
      + * + *
    • When the languageTag argument contains an extlang subtag, + * the first such subtag is used as the language, and the primary + * language subtag and other extlang subtags are ignored: + * + *
      +     *     ULocale.forLanguageTag("ar-aao").getLanguage(); // returns "aao"
      +     *     ULocale.forLanguageTag("en-abc-def-us").toString(); // returns "abc_US"
      +     * 
      + * + *
    • Case is normalized. Language is normalized to lower case, + * script to title case, country to upper case, variant to upper case, + * and extensions to lower case. * *

      This implements the 'Language-Tag' production of BCP47, and * so supports grandfathered (regular and irregular) as well as * private use language tags. Stand alone private use tags are * represented as empty language and extension 'x-whatever', * and grandfathered tags are converted to their canonical replacements - * where they exist. Note that a few grandfathered tags have no - * modern replacement; these will be converted using the fallback - * described above so some information might be lost. + * where they exist. * - *

      For a list of grandfathered tags, see the - * - * IANA Language Subtag Registry. + *

      Grandfathered tags with canonical replacements are as follows: * - *

      Notes: This method converts private use subtags prefixed - * by "variant" to variant field in the result locale. For example, - * the code below will return "POSIX". - *

      -     *   ULocale.forLanguageTag("en-US-x-variant-posix).getVariant();
      -     * 
      + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
      grandfathered tag modern replacement
      art-lojban jbo
      i-ami ami
      i-bnn bnn
      i-hak hak
      i-klingon tlh
      i-lux lb
      i-navajo nv
      i-pwn pwn
      i-tao tao
      i-tay tay
      i-tsu tsu
      no-bok nb
      no-nyn nn
      sgn-BE-FR sfb
      sgn-BE-NL vgt
      sgn-CH-DE sgg
      zh-guoyu cmn
      zh-hakka hak
      zh-min-nan nan
      zh-xiang hsn
      + * + *

      Grandfathered tags with no modern replacement will be + * converted as follows: + * + * + * + * + * + * + * + * + * + * + * + *
      grandfathered tag converts to
      cel-gaulish xtg-x-cel-gaulish
      en-GB-oed en-GB-x-oed
      i-default en-x-i-default
      i-enochian und-x-i-enochian
      i-mingo see-x-i-mingo
      zh-min nan-x-zh-min
      + * + *

      For a list of all grandfathered tags, see the + * IANA Language Subtag Registry (search for "Type: grandfathered"). + * + *

      Note: there is no guarantee that toLanguageTag + * and forLanguageTag will round-trip. * * @param languageTag the language tag - * @return the locale that best represents the language tag - * @exception NullPointerException if languageTag is null + * @return The locale that best represents the language tag. + * @throws NullPointerException if languageTag is null * @see #toLanguageTag() + * @see ULocale.Builder#setLanguageTag(String) * * @draft ICU 4.2 * @provisional This API might change or be removed in a future release. */ public static ULocale forLanguageTag(String languageTag) { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + LanguageTag tag = LanguageTag.parse(languageTag, null); + InternalLocaleBuilder bldr = new InternalLocaleBuilder(); + bldr.setLanguageTag(tag); + return getInstance(bldr.getBaseLocale(), bldr.getLocaleExtensions()); } /** * Builder is used to build instances of ULocale - * from values configured by the setter. Unlike the ULocale + * from values configured by the setters. Unlike the ULocale * constructors, the Builder checks if a value configured by a - * setter satisfies the syntactical requirements defined by the ULocale + * setter satisfies the syntax requirements defined by the ULocale * class. A ULocale object created by a Builder is * well-formed and can be transformed to a well-formed IETF BCP 47 language tag * without losing information. * - *

      - * Note: The ULocale class does not provide - * any syntactical restrictions on variant, while BCP 47 - * requires each variant subtag to be 5 to 8 alphanumeric letters or a single - * numeric letter followed by 3 alphanumeric letters. By default, - * the setVariant method throws IllformedLocaleException - * for a variant that does not satisfy the syntax above. If it is - * necessary to support such a variant, you could use the constructor - * Builder(boolean isLenientVariant) passing true to - * skip the syntax validation for variant. However, you should keep in - * mind that a Locale object created this way might lose - * the variant information when transformed to a BCP 47 language tag. + *

      Note: The ULocale class does not provide any + * syntactic restrictions on variant, while BCP 47 requires each variant + * subtag to be 5 to 8 alphanumerics or a single numeric followed by 3 + * alphanumerics. The method setVariant throws + * IllformedLocaleException for a variant that does not satisfy + * this restriction. If it is necessary to support such a variant, use a + * ULocale constructor. However, keep in mind that a ULocale + * object created this way might lose the variant information when + * transformed to a BCP 47 language tag. * - *

      - * The following example shows how to create a ULocale object + *

      The following example shows how to create a Locale object * with the Builder. *

      *
      @@ -2227,6 +2774,8 @@ public final class ULocale implements Serializable {
            */
           public static final class Builder {
       
      +        private final InternalLocaleBuilder _locbld;
      +
               /**
                * Constructs an empty Builder. The default value of all
                * fields, extensions, and private use information is the
      @@ -2236,187 +2785,188 @@ public final class ULocale implements Serializable {
                * @provisional This API might change or be removed in a future release.
                */
               public Builder() {
      -            throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
      +            _locbld = new InternalLocaleBuilder();
               }
       
               /**
      -         * Constructs an empty Builder with an option whether to allow
      -         * setVariant to accept a value that does not
      -         * conform to the IETF BCP 47 variant subtag's syntax requirements.
      +         * Resets the Builder to match the provided
      +         * locale.  Existing state is discarded.
                *
      -         * @param isLenientVariant When true, this Builder
      -         * will accept an ill-formed variant.
      -         * @see #setVariant(String)
      +         * 

      All fields of the locale must be well-formed, see {@link Locale}. * - * @draft ICU 4.4 - * @provisional This API might change or be removed in a future release. - */ - public Builder(boolean isLenientVariant) { - throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base"); - } - - /** - * Returns true if this Builder accepts a value that does - * not conform to the IETF BCP 47 variant subtag's syntax requirements - * in setVariant - * - * @return true if this Build accepts an ill-formed variant. - * - * @draft ICU 4.4 - * @provisional This API might change or be removed in a future release. - */ - public boolean isLenientVariant() { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); - } - - - /** - * Resets the Builder to match the provided locale. - * The previous state of the builder is discarded. Fields that do - * not conform to the ULocale class specification, for example, - * a single letter language, are ill-formed. + *

      Locales with any ill-formed fields cause + * IllformedLocaleException to be thrown. * * @param locale the locale - * @return this builder + * @return This builder. * @throws IllformedLocaleException if locale has * any ill-formed fields. + * @throws NullPointerException if locale is null. * * @draft ICU 4.2 * @provisional This API might change or be removed in a future release. */ public Builder setLocale(ULocale locale) { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + try { + _locbld.setLocale(locale.base(), locale.extensions()); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; } /** - * Resets the builder to match the provided IETF BCP 47 language tag. - * The previous state of the builder is discarded. + * Resets the Builder to match the provided IETF BCP 47 + * language tag. Discards the existing state. Null and the + * empty string cause the builder to be reset, like {@link + * #clear}. Grandfathered tags (see {@link + * ULocale#forLanguageTag}) are converted to their canonical + * form before being processed. Otherwise, the language tag + * must be well-formed (see {@link ULocale}) or an exception is + * thrown (unlike ULocale.forLanguageTag, which + * just discards ill-formed and following portions of the + * tag). * * @param languageTag the language tag - * @return this builder - * @throws IllformedLocaleException if languageTag is ill-formed. - * @throws NullPointerException if languageTag is null. + * @return This builder. + * @throws IllformedLocaleException if languageTag is ill-formed * @see ULocale#forLanguageTag(String) * * @draft ICU 4.2 * @provisional This API might change or be removed in a future release. */ public Builder setLanguageTag(String languageTag) { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + ParseStatus sts = new ParseStatus(); + LanguageTag tag = LanguageTag.parse(languageTag, sts); + if (sts.isError()) { + throw new IllformedLocaleException(sts.getErrorMessage(), sts.getErrorIndex()); + } + _locbld.setLanguageTag(tag); + + return this; } /** - * Sets the language. If language is the empty string, - * the language in this Builder will be removed. - * Typical language value is a two or three-letter language + * Sets the language. If language is the empty string or + * null, the language in this Builder is removed. Otherwise, + * the language must be well-formed + * or an exception is thrown. + * + *

      The typical language value is a two or three-letter language * code as defined in ISO639. - * Well-formed values are any string of two to eight alpha - * letters. This method accepts upper case alpha letters - * [A-Z], but the language value in the ULocale - * created by the Builder is always normalized - * to lower case letters. * * @param language the language - * @return this builder + * @return This builder. * @throws IllformedLocaleException if language is ill-formed * * @draft ICU 4.2 * @provisional This API might change or be removed in a future release. */ public Builder setLanguage(String language) { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + try { + _locbld.setLanguage(language); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; } /** - * Sets the script. If script is the empty string, + * Sets the script. If script is null or the empty string, * the script in this Builder is removed. - * Typical script value is a four-letter script code as defined by ISO 15924. - * Well-formed values are any string of four alpha letters. - * This method accepts both upper and lower case alpha letters [a-zA-Z], - * but the script value in the ULocale created by the - * Builder is always normalized to title case - * (the first letter is upper case and the rest of letters are lower case). + * Otherwise, the script must be well-formed or an exception is thrown. + * + *

      The typical script value is a four-letter script code as defined by ISO 15924. * * @param script the script - * @return this builder + * @return This builder. * @throws IllformedLocaleException if script is ill-formed * * @draft ICU 4.2 * @provisional This API might change or be removed in a future release. */ public Builder setScript(String script) { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + try { + _locbld.setScript(script); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; } /** - * Sets the region. If region is the empty string, the region - * in this Builder is removed. - * Typical region value is a two-letter ISO 3166 code or a three-digit UN M.49 - * area code. Well-formed values are any two-letter or three-digit string. - * This method accepts lower case letters [a-z], but the country value in - * the ULocale created by the Builder is always - * normalized to upper case. + * Sets the region. If region is null or the empty string, the region + * in this Builder is removed. Otherwise, + * the region must be well-formed or an exception is thrown. + * + *

      The typical region value is a two-letter ISO 3166 code or a + * three-digit UN M.49 area code. + * + *

      The country value in the Locale created by the + * Builder is always normalized to upper case. * * @param region the region - * @return this builder + * @return This builder. * @throws IllformedLocaleException if region is ill-formed * * @draft ICU 4.2 * @provisional This API might change or be removed in a future release. */ public Builder setRegion(String region) { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + try { + _locbld.setRegion(region); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; } /** - * Sets the variant. If variant is the empty string, the - * variant in this Builder is removed. - *

      - * Note: By default, this method checks if variant - * satisfies the IETF BCP 47 variant subtag's syntax requirements. - * However, the ULocale class itself does not impose any syntactical - * restriction on variant. When a Builder is created by the - * constructor Builder(boolean isLenientVariant) - * with true, this method skips the syntax check. + * Sets the variant. If variant is null or the empty string, the + * variant in this Builder is removed. Otherwise, it + * must consist of one or more well-formed subtags, or an exception is thrown. + * + *

      Note: This method checks if variant + * satisfies the IETF BCP 47 variant subtag's syntax requirements, + * and normalizes the value to lowercase letters. However, + * the ULocale class does not impose any syntactic + * restriction on variant. To set such a variant, + * use a ULocale constructor. * * @param variant the variant - * @return this builder + * @return This builder. * @throws IllformedLocaleException if variant is ill-formed * * @draft ICU 4.2 * @provisional This API might change or be removed in a future release. */ public Builder setVariant(String variant) { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + try { + _locbld.setVariant(variant); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; } /** - * Sets the extension for the given key. If the value is the - * empty string, the extension is removed. Legal keys are - * characters in the ranges [0-9A-Za-z]. Keys - * are case-insensitive, so for example 'z' and 'Z' represent - * the same extension. In general, well-formed values are any - * series of fields of two to eight alphanumeric characters, - * separated by hyphen or underscore. + * Sets the extension for the given key. If the value is null or the + * empty string, the extension is removed. Otherwise, the extension + * must be well-formed or an exception is thrown. * *

      Note: The key {@link ULocale#UNICODE_LOCALE_EXTENSION * UNICODE_LOCALE_EXTENSION} ('u') is used for the Unicode locale extension. * Setting a value for this key replaces any existing Unicode locale key/type * pairs with those defined in the extension. - * To be well-formed, a value for this extension must meet the additional - * constraints that each locale key is two alphanumeric characters, - * followed by at least one locale type subtag represented by - * three to eight alphanumeric characters, and that the keys and types - * be legal Unicode locale keys and values. * *

      Note: The key {@link ULocale#PRIVATE_USE_EXTENSION * PRIVATE_USE_EXTENSION} ('x') is used for the private use code. To be - * well-formed, the value for this key needs only to have fields of one to + * well-formed, the value for this key needs only to have subtags of one to * eight alphanumeric characters, not two to eight as in the general case. * * @param key the extension key * @param value the extension value - * @return this builder + * @return This builder. * @throws IllformedLocaleException if key is illegal * or value is ill-formed * @see #setUnicodeLocaleKeyword(String, String) @@ -2425,30 +2975,92 @@ public final class ULocale implements Serializable { * @provisional This API might change or be removed in a future release. */ public Builder setExtension(char key, String value) { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + try { + _locbld.setExtension(key, value); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; } /** - * Sets the Unicode locale keyword type for the given key. If the - * value is the empty string, the Unicode keyword is removed. - * Well-formed keys are strings of two alphanumeric characters. - * Well-formed types are one or more subtags where each of them is - * three to eight alphanumeric characters. - *

      - * Note:Setting the 'u' extension replaces all Unicode locale - * keywords with those defined in the extension. + * Sets the Unicode locale keyword type for the given key. If the type + * is null, the Unicode keyword is removed. Otherwise, the key must be + * non-null and both key and type must be well-formed or an exception + * is thrown. + * + *

      Keys and types are converted to lower case. + * + *

      Note:Setting the 'u' extension via {@link #setExtension} + * replaces all Unicode locale keywords with those defined in the + * extension. + * * @param key the Unicode locale key * @param type the Unicode locale type - * @return this builder + * @return This builder. * @throws IllformedLocaleException if key or type * is ill-formed + * @throws NullPointerException if key is null * @see #setExtension(char, String) * * @draft ICU 4.4 * @provisional This API might change or be removed in a future release. */ public Builder setUnicodeLocaleKeyword(String key, String type) { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + try { + _locbld.setUnicodeLocaleKeyword(key, type); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; + } + + /** + * Adds a unicode locale attribute, if not already present, otherwise + * has no effect. The attribute must not be null and must be well-formed + * or an exception is thrown. + * + * @param attribute the attribute + * @return This builder. + * @throws NullPointerException if attribute is null + * @throws IllformedLocaleException if attribute is ill-formed + * @see #setExtension(char, String) + * + * @draft ICU 4.6 + * @provisional This API might change or be removed in a future release. + */ + public Builder addUnicodeLocaleAttribute(String attribute) { + try { + _locbld.addUnicodeLocaleAttribute(attribute); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; + } + + /** + * Removes a unicode locale attribute, if present, otherwise has no + * effect. The attribute must not be null and must be well-formed + * or an exception is thrown. + * + *

      Attribute comparision for removal is case-insensitive. + * + * @param attribute the attribute + * @return This builder. + * @throws NullPointerException if attribute is null + * @throws IllformedLocaleException if attribute is ill-formed + * @see #setExtension(char, String) + * + * @draft ICU 4.6 + * @provisional This API might change or be removed in a future release. + */ + public Builder removeUnicodeLocaleAttribute(String attribute) { + try { + _locbld.removeUnicodeLocaleAttribute(attribute); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; } /** @@ -2460,7 +3072,8 @@ public final class ULocale implements Serializable { * @provisional This API might change or be removed in a future release. */ public Builder clear() { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + _locbld.clear(); + return this; } /** @@ -2474,11 +3087,12 @@ public final class ULocale implements Serializable { * @provisional This API might change or be removed in a future release. */ public Builder clearExtensions() { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + _locbld.clearExtensions(); + return this; } /** - * Returns an instance of Locale created from the fields set + * Returns an instance of ULocale created from the fields set * on this builder. * * @return a new Locale @@ -2487,7 +3101,1305 @@ public final class ULocale implements Serializable { * @provisional This API might change or be removed in a future release. */ public ULocale build() { - throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); + return getInstance(_locbld.getBaseLocale(), _locbld.getLocaleExtensions()); + } + } + + private static ULocale getInstance(BaseLocale base, LocaleExtensions exts) { + String id = lscvToID(base.getLanguage(), base.getScript(), base.getRegion(), + base.getVariant()); + + Set extKeys = exts.getKeys(); + if (!extKeys.isEmpty()) { + // legacy locale ID assume Unicode locale keywords and + // other extensions are at the same level. + // e.g. @a=ext-for-aa;calendar=japanese;m=ext-for-mm;x=priv-use + + TreeMap kwds = new TreeMap(); + for (Character key : extKeys) { + Extension ext = exts.getExtension(key); + if (ext instanceof UnicodeLocaleExtension) { + UnicodeLocaleExtension uext = (UnicodeLocaleExtension)ext; + Set ukeys = uext.getUnicodeLocaleKeys(); + for (String bcpKey : ukeys) { + String bcpType = uext.getUnicodeLocaleType(bcpKey); + // convert to legacy key/type + String lkey = bcp47ToLDMLKey(bcpKey); + String ltype = bcp47ToLDMLType(lkey, ((bcpType.length() == 0) ? "true" : bcpType)); // use "true" as the value of typeless keywords + // special handling for u-va-posix, since this is a variant, not a keyword + if (lkey.equals("va") && ltype.equals("posix") && base.getVariant().length() == 0) { + id = id + "_POSIX"; + } else { + kwds.put(lkey, ltype); + } + } + // Mapping Unicode locale attribute to the special keyword, attribute=xxx-yyy + Set uattributes = uext.getUnicodeLocaleAttributes(); + if (uattributes.size() > 0) { + StringBuilder attrbuf = new StringBuilder(); + for (String attr : uattributes) { + if (attrbuf.length() > 0) { + attrbuf.append('-'); + } + attrbuf.append(attr); + } + kwds.put(LOCALE_ATTRIBUTE_KEY, attrbuf.toString()); + } + } else { + kwds.put(String.valueOf(key), ext.getValue()); + } + } + + if (!kwds.isEmpty()) { + StringBuilder buf = new StringBuilder(id); + buf.append("@"); + Set> kset = kwds.entrySet(); + boolean insertSep = false; + for (Map.Entry kwd : kset) { + if (insertSep) { + buf.append(";"); + } else { + insertSep = true; + } + buf.append(kwd.getKey()); + buf.append("="); + buf.append(kwd.getValue()); + } + + id = buf.toString(); + } + } + return new ULocale(id); + } + + private BaseLocale base() { + if (baseLocale == null) { + String language = getLanguage(); + if (equals(ULocale.ROOT)) { + language = ""; + } + baseLocale = BaseLocale.getInstance(language, getScript(), getCountry(), getVariant()); + } + return baseLocale; + } + + private LocaleExtensions extensions() { + if (extensions == null) { + Iterator kwitr = getKeywords(); + if (kwitr == null) { + extensions = LocaleExtensions.EMPTY_EXTENSIONS; + } else { + InternalLocaleBuilder intbld = new InternalLocaleBuilder(); + while (kwitr.hasNext()) { + String key = kwitr.next(); + if (key.equals(LOCALE_ATTRIBUTE_KEY)) { + // special keyword used for representing Unicode locale attributes + String[] uattributes = getKeywordValue(key).split("[-_]"); + for (String uattr : uattributes) { + try { + intbld.addUnicodeLocaleAttribute(uattr); + } catch (LocaleSyntaxException e) { + // ignore and fall through + } + } + } else if (key.length() >= 2) { + String bcpKey = ldmlKeyToBCP47(key); + String bcpType = ldmlTypeToBCP47(key, getKeywordValue(key)); + if (bcpKey != null && bcpType != null) { + try { + intbld.setUnicodeLocaleKeyword(bcpKey, bcpType); + } catch (LocaleSyntaxException e) { + // ignore and fall through + } + } + } else if (key.length() == 1 && (key.charAt(0) != UNICODE_LOCALE_EXTENSION)) { + try { + intbld.setExtension(key.charAt(0), getKeywordValue(key).replace("_", + LanguageTag.SEP)); + } catch (LocaleSyntaxException e) { + // ignore and fall through + } + } + } + extensions = intbld.getLocaleExtensions(); + } + } + return extensions; + } + + // + // LDML legacy/BCP47 key and type mapping functions + // + private static String ldmlKeyToBCP47(String key) { + //#com.ibm.icu.base + // normalize key to lowercase + key = AsciiUtil.toLowerString(key); + String bcpKey = null; + + for (int i = 0; i < KEYMAP.length; i += 2) { + if (key.equals(KEYMAP[i])) { + bcpKey = KEYMAP[i + 1]; + break; + } + } + + if (bcpKey == null) { + if (key.length() == 2 && LanguageTag.isExtensionSubtag(key)) { + return key; + } + return null; + } + + return bcpKey; + } + + private static String bcp47ToLDMLKey(String bcpKey) { + //#com.ibm.icu.base + // normalize bcp key to lowercase + bcpKey = AsciiUtil.toLowerString(bcpKey); + String key = null; + + for (int i = 0; i < KEYMAP.length; i += 2) { + if (bcpKey.equals(KEYMAP[i + 1])) { + key = KEYMAP[i]; + break; + } + } + + if (key == null) { + return bcpKey; + } + + return key; + } + + private static String ldmlTypeToBCP47(String key, String type) { + //#com.ibm.icu.base + + // keys are case-insensitive, while types are case-sensitive + key = AsciiUtil.toLowerString(key); + String bcpType = null; + String[] map = null; + String[] aliasMap = null; + + if (key.equals("calendar")) { + map = TYPEMAP_CALENDAR; + } else if (key.equals("colalternate")) { + map = TYPEMAP_COLALTERNATE; + } else if (key.equals("colbackwards")) { + map = TYPEMAP_COLBACKWARDS; + } else if (key.equals("colcasefirst")) { + map = TYPEMAP_COLCASEFIRST; + } else if (key.equals("colcaselevel")) { + map = TYPEMAP_COLCASELEVEL; + } else if (key.equals("colhiraganaquaternary")) { + map = TYPEMAP_COLHIRAGANAQUATERNARY; + } else if (key.equals("collation")) { + map = TYPEMAP_COLLATION; + } else if (key.equals("colnormalization")) { + map = TYPEMAP_COLNORMALIZATION; + } else if (key.equals("colnumeric")) { + map = TYPEMAP_COLNUMERIC; + } else if (key.equals("colstrength")) { + map = TYPEMAP_COLSTRENGTH; + aliasMap = TYPEALIAS_COLSTRENGTH; + } else if (key.equals("timezone")) { + map = TYPEMAP_TIMEZONE; + aliasMap = TYPEALIAS_TIMEZONE; + } + + // LDML alias -> LDML canonical + if (aliasMap != null) { + for (int i = 0; i < aliasMap.length; i += 2) { + if (type.equals(aliasMap[i])) { + type = aliasMap[i + 1]; + break; + } + } + } + + // LDML type -> BCP47 type + if (map != null) { + for (int i = 0; i < map.length; i += 2) { + if (type.equals(map[i])) { + bcpType = map[i + 1]; + break; + } + } + } + + if (bcpType == null) { + int typeLen = type.length(); + if (typeLen >= 3 && typeLen <= 8 && LanguageTag.isExtensionSubtag(type)) { + return type; + } + return null; + } + return bcpType; + } + + private static String bcp47ToLDMLType(String key, String bcpType) { + //#com.ibm.icu.base + + // normalize key/bcpType to lowercase + key = AsciiUtil.toLowerString(key); + bcpType = AsciiUtil.toLowerString(bcpType); + String type = null; + String[] map = null; + + if (key.equals("calendar")) { + map = TYPEMAP_CALENDAR; + } else if (key.equals("colalternate")) { + map = TYPEMAP_COLALTERNATE; + } else if (key.equals("colbackwards")) { + map = TYPEMAP_COLBACKWARDS; + } else if (key.equals("colcasefirst")) { + map = TYPEMAP_COLCASEFIRST; + } else if (key.equals("colcaselevel")) { + map = TYPEMAP_COLCASELEVEL; + } else if (key.equals("colhiraganaquaternary")) { + map = TYPEMAP_COLHIRAGANAQUATERNARY; + } else if (key.equals("collation")) { + map = TYPEMAP_COLLATION; + } else if (key.equals("colnormalization")) { + map = TYPEMAP_COLNORMALIZATION; + } else if (key.equals("colnumeric")) { + map = TYPEMAP_COLNUMERIC; + } else if (key.equals("colstrength")) { + map = TYPEMAP_COLSTRENGTH; + } else if (key.equals("timezone")) { + map = TYPEMAP_TIMEZONE; + } + + if (map != null) { + for (int i = 0; i < map.length; i += 2) { + if (bcpType.equals(map[i + 1])) { + type = map[i]; + break; + } + } + } + + return (type != null) ? type : bcpType; + } + + /* + * JDK Locale Helper + */ + private static final class JDKLocaleHelper { + private static boolean isJava7orNewer = false; + + /* + * New methods in Java 7 Locale class + */ + private static Method mGetScript; + private static Method mGetExtensionKeys; + private static Method mGetExtension; + private static Method mGetUnicodeLocaleKeys; + private static Method mGetUnicodeLocaleAttributes; + private static Method mGetUnicodeLocaleType; + private static Method mForLanguageTag; + + private static Method mGetDefault; + private static Method mSetDefault; + private static Object eDISPLAY; + private static Object eFORMAT; + + /* + * This table is used for mapping between ICU and special Java + * 6 locales. When an ICU locale matches with + * /, the ICU locale is mapped to locale. + * For example, both ja_JP@calendar=japanese and ja@calendar=japanese + * are mapped to Java locale "ja_JP_JP". ICU locale "nn" is mapped + * to Java locale "no_NO_NY". + */ + private static final String[][] JAVA6_MAPDATA = { + // { , , , , + { "ja_JP_JP", "ja_JP", "calendar", "japanese", "ja"}, + { "no_NO_NY", "nn_NO", null, null, "nn"}, + { "th_TH_TH", "th_TH", "numbers", "thai", "th"}, + }; + + static { + do { + try { + mGetScript = Locale.class.getMethod("getScript", (Class[]) null); + mGetExtensionKeys = Locale.class.getMethod("getExtensionKeys", (Class[]) null); + mGetExtension = Locale.class.getMethod("getExtension", char.class); + mGetUnicodeLocaleKeys = Locale.class.getMethod("getUnicodeLocaleKeys", (Class[]) null); + mGetUnicodeLocaleAttributes = Locale.class.getMethod("getUnicodeLocaleAttributes", (Class[]) null); + mGetUnicodeLocaleType = Locale.class.getMethod("getUnicodeLocaleType", String.class); + mForLanguageTag = Locale.class.getMethod("forLanguageTag", String.class); + + Class cCategory = null; + Class[] classes = Locale.class.getDeclaredClasses(); + for (Class c : classes) { + if (c.getName().equals("java.util.Locale$Category")) { + cCategory = c; + break; + } + } + if (cCategory == null) { + break; + } + mGetDefault = Locale.class.getDeclaredMethod("getDefault", cCategory); + mSetDefault = Locale.class.getDeclaredMethod("setDefault", cCategory, Locale.class); + + Method mName = cCategory.getMethod("name", (Class[]) null); + Object[] enumConstants = cCategory.getEnumConstants(); + for (Object e : enumConstants) { + String catVal = (String)mName.invoke(e, (Object[])null); + if (catVal.equals("DISPLAY")) { + eDISPLAY = e; + } else if (catVal.equals("FORMAT")) { + eFORMAT = e; + } + } + if (eDISPLAY == null || eFORMAT == null) { + break; + } + isJava7orNewer = true; + } catch (NoSuchMethodException e) { + } catch (IllegalArgumentException e) { + } catch (IllegalAccessException e) { + } catch (InvocationTargetException e) { + } catch (SecurityException e) { + // TODO : report? + } + } while (false); + } + + private JDKLocaleHelper() { + } + + public static boolean isJava7orNewer() { + return isJava7orNewer; + } + + public static ULocale toULocale(Locale loc) { + return isJava7orNewer ? toULocale7(loc) : toULocale6(loc); + } + + public static Locale toLocale(ULocale uloc) { + return isJava7orNewer ? toLocale7(uloc) : toLocale6(uloc); + } + + private static ULocale toULocale7(Locale loc) { + String language = loc.getLanguage(); + String script = ""; + String country = loc.getCountry(); + String variant = loc.getVariant(); + + Set attributes = null; + Map keywords = null; + + try { + script = (String) mGetScript.invoke(loc, (Object[]) null); + @SuppressWarnings("unchecked") + Set extKeys = (Set) mGetExtensionKeys.invoke(loc, (Object[]) null); + if (!extKeys.isEmpty()) { + for (Character extKey : extKeys) { + if (extKey.charValue() == 'u') { + // Found Unicode locale extension + + // attributes + @SuppressWarnings("unchecked") + Set uAttributes = (Set) mGetUnicodeLocaleAttributes.invoke(loc, (Object[]) null); + if (!uAttributes.isEmpty()) { + attributes = new TreeSet(); + for (String attr : uAttributes) { + attributes.add(attr); + } + } + + // keywords + @SuppressWarnings("unchecked") + Set uKeys = (Set) mGetUnicodeLocaleKeys.invoke(loc, (Object[]) null); + for (String kwKey : uKeys) { + String kwVal = (String) mGetUnicodeLocaleType.invoke(loc, kwKey); + if (kwVal != null) { + if (kwKey.equals("va")) { + // va-* is interpreted as a variant + variant = (variant.length() == 0) ? kwVal : kwVal + "_" + variant; + } else { + if (keywords == null) { + keywords = new TreeMap(); + } + keywords.put(kwKey, kwVal); + } + } + } + } else { + String extVal = (String) mGetExtension.invoke(loc, extKey); + if (extVal != null) { + if (keywords == null) { + keywords = new TreeMap(); + } + keywords.put(String.valueOf(extKey), extVal); + } + } + } + } + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } catch (InvocationTargetException e) { + throw new RuntimeException(e); + } + + // JDK locale no_NO_NY is not interpreted as Nynorsk by ICU, + // and it should be transformed to nn_NO. + + // Note: JDK7+ unerstand both no_NO_NY and nn_NO. When convert + // ICU locale to JDK, we do not need to map nn_NO back to no_NO_NY. + + if (language.equals("no") && country.equals("NO") && variant.equals("NY")) { + language = "nn"; + variant = ""; + } + + // Constructing ID + StringBuilder buf = new StringBuilder(language); + + if (script.length() > 0) { + buf.append('_'); + buf.append(script); + } + + if (country.length() > 0) { + buf.append('_'); + buf.append(country); + } + + if (variant.length() > 0) { + if (country.length() == 0) { + buf.append('_'); + } + buf.append('_'); + buf.append(variant); + } + + if (attributes != null) { + // transform Unicode attributes into a keyword + StringBuilder attrBuf = new StringBuilder(); + for (String attr : attributes) { + if (attrBuf.length() != 0) { + attrBuf.append('-'); + } + attrBuf.append(attr); + } + if (keywords == null) { + keywords = new TreeMap(); + } + keywords.put(LOCALE_ATTRIBUTE_KEY, attrBuf.toString()); + } + + if (keywords != null) { + buf.append('@'); + boolean addSep = false; + for (Entry kwEntry : keywords.entrySet()) { + String kwKey = kwEntry.getKey(); + String kwVal = kwEntry.getValue(); + + if (kwKey.length() != 1) { + // Unicode locale key + kwKey = bcp47ToLDMLKey(kwKey); + // use "true" as the value of typeless keywords + kwVal = bcp47ToLDMLType(kwKey, ((kwVal.length() == 0) ? "true" : kwVal)); + } + + if (addSep) { + buf.append(';'); + } else { + addSep = true; + } + buf.append(kwKey); + buf.append('='); + buf.append(kwVal); + } + } + + return new ULocale(getName(buf.toString()), loc); + } + + private static ULocale toULocale6(Locale loc) { + ULocale uloc = null; + String locStr = loc.toString(); + if (locStr.length() == 0) { + uloc = ULocale.ROOT; + } else { + for (int i = 0; i < JAVA6_MAPDATA.length; i++) { + if (JAVA6_MAPDATA[i][0].equals(locStr)) { + LocaleIDParser p = new LocaleIDParser(JAVA6_MAPDATA[i][1]); + p.setKeywordValue(JAVA6_MAPDATA[i][2], JAVA6_MAPDATA[i][3]); + locStr = p.getName(); + break; + } + } + uloc = new ULocale(getName(locStr), loc); + } + return uloc; + } + + private static Locale toLocale7(ULocale uloc) { + Locale loc = null; + String ulocStr = uloc.getName(); + if (uloc.getScript().length() > 0 || ulocStr.contains("@")) { + // With script or keywords available, the best way + // to get a mapped Locale is to go through a language tag. + // A Locale with script or keywords can only have variants + // that is 1 to 8 alphanum. If this ULocale has a variant + // subtag not satisfying the criteria, the variant subtag + // will be lost. + String tag = uloc.toLanguageTag(); + + // Workaround for variant casing problem: + // + // The variant field in ICU is case insensitive and normalized + // to upper case letters by getVariant(), while + // the variant field in JDK Locale is case sensitive. + // ULocale#toLanguageTag use lower case characters for + // BCP 47 variant and private use x-lvariant. + // + // Locale#forLanguageTag in JDK preserves character casing + // for variant. Because ICU always normalizes variant to + // upper case, we convert language tag to upper case here. + tag = AsciiUtil.toUpperString(tag); + + try { + loc = (Locale)mForLanguageTag.invoke(null, tag); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } catch (InvocationTargetException e) { + throw new RuntimeException(e); + } + } + if (loc == null) { + // Without script or keywords, use a Locale constructor, + // so we can preserve any ill-formed variants. + loc = new Locale(uloc.getLanguage(), uloc.getCountry(), uloc.getVariant()); + } + return loc; + } + + private static Locale toLocale6(ULocale uloc) { + String locstr = uloc.getBaseName(); + for (int i = 0; i < JAVA6_MAPDATA.length; i++) { + if (locstr.equals(JAVA6_MAPDATA[i][1]) || locstr.equals(JAVA6_MAPDATA[i][4])) { + if (JAVA6_MAPDATA[i][2] != null) { + String val = uloc.getKeywordValue(JAVA6_MAPDATA[i][2]); + if (val != null && val.equals(JAVA6_MAPDATA[i][3])) { + locstr = JAVA6_MAPDATA[i][0]; + break; + } + } else { + locstr = JAVA6_MAPDATA[i][0]; + break; + } + } + } + LocaleIDParser p = new LocaleIDParser(locstr); + String[] names = p.getLanguageScriptCountryVariant(); + return new Locale(names[0], names[2], names[3]); + } + + public static Locale getDefault(Category category) { + Locale loc = Locale.getDefault(); + if (isJava7orNewer) { + Object cat = null; + switch (category) { + case DISPLAY: + cat = eDISPLAY; + break; + case FORMAT: + cat = eFORMAT; + break; + } + if (cat != null) { + try { + loc = (Locale)mGetDefault.invoke(null, cat); + } catch (InvocationTargetException e) { + // fall through - use the base default + } catch (IllegalArgumentException e) { + // fall through - use the base default + } catch (IllegalAccessException e) { + // fall through - use the base default + } + } + } + return loc; + } + + public static void setDefault(Category category, Locale newLocale) { + if (isJava7orNewer) { + Object cat = null; + switch (category) { + case DISPLAY: + cat = eDISPLAY; + break; + case FORMAT: + cat = eFORMAT; + break; + } + if (cat != null) { + try { + mSetDefault.invoke(null, cat, newLocale); + } catch (InvocationTargetException e) { + // fall through - no effects + } catch (IllegalArgumentException e) { + // fall through - no effects + } catch (IllegalAccessException e) { + // fall through - no effects + } + } + } } } + + private static final String[] KEYMAP = { + "calendar", "ca", + "colalternate", "ka", + "colbackwards", "kb", + "colcasefirst", "kf", + "colcaselevel", "kc", + "colhiraganaquaternary", "kh", + "collation", "co", + "colnormalization", "kk", + "colnumeric", "kn", + "colstrength", "ks", + "currency", "cu", + "numbers", "nu", + "timezone", "tz", + "variabletop", "vt", + }; + + private static final String[] TYPEMAP_CALENDAR = { + "ethiopic-amete-alem", "ethioaa", + "gregorian", "gregory", + "islamic-civil", "islamicc", + }; + + private static final String[] TYPEMAP_COLALTERNATE = { + "non-ignorable", "noignore", + }; + + private static final String[] TYPEMAP_COLBACKWARDS = { + "no", "false", + "yes", "true", + }; + + private static final String[] TYPEMAP_COLCASEFIRST = { + "no", "false", + }; + + private static final String[] TYPEMAP_COLCASELEVEL = { + "no", "false", + "yes", "true", + }; + + private static final String[] TYPEMAP_COLHIRAGANAQUATERNARY = { + "no", "false", + "yes", "true", + }; + + private static final String[] TYPEMAP_COLLATION = { + "dictionary", "dict", + "gb2312han", "gb2312", + "phonebook", "phonebk", + "traditional", "trad", + }; + + private static final String[] TYPEMAP_COLNORMALIZATION = { + "no", "false", + "yes", "true", + }; + + private static final String[] TYPEMAP_COLNUMERIC = { + "no", "false", + "yes", "true", + }; + + private static final String[] TYPEMAP_COLSTRENGTH = { + "identical", "identic", + "primary", "level1", + "quaternary", "level4", + "secondary", "level2", + "tertiary", "level3", + }; + + private static final String[] TYPEMAP_TIMEZONE = { + "Africa/Abidjan", "ciabj", + "Africa/Accra", "ghacc", + "Africa/Addis_Ababa", "etadd", + "Africa/Algiers", "dzalg", + "Africa/Asmera", "erasm", + "Africa/Bamako", "mlbko", + "Africa/Bangui", "cfbgf", + "Africa/Banjul", "gmbjl", + "Africa/Bissau", "gwoxb", + "Africa/Blantyre", "mwblz", + "Africa/Brazzaville", "cgbzv", + "Africa/Bujumbura", "bibjm", + "Africa/Cairo", "egcai", + "Africa/Casablanca", "macas", + "Africa/Ceuta", "esceu", + "Africa/Conakry", "gncky", + "Africa/Dakar", "sndkr", + "Africa/Dar_es_Salaam", "tzdar", + "Africa/Djibouti", "djjib", + "Africa/Douala", "cmdla", + "Africa/El_Aaiun", "eheai", + "Africa/Freetown", "slfna", + "Africa/Gaborone", "bwgbe", + "Africa/Harare", "zwhre", + "Africa/Johannesburg", "zajnb", + "Africa/Kampala", "ugkla", + "Africa/Khartoum", "sdkrt", + "Africa/Kigali", "rwkgl", + "Africa/Kinshasa", "cdfih", + "Africa/Lagos", "nglos", + "Africa/Libreville", "galbv", + "Africa/Lome", "tglfw", + "Africa/Luanda", "aolad", + "Africa/Lubumbashi", "cdfbm", + "Africa/Lusaka", "zmlun", + "Africa/Malabo", "gqssg", + "Africa/Maputo", "mzmpm", + "Africa/Maseru", "lsmsu", + "Africa/Mbabane", "szqmn", + "Africa/Mogadishu", "somgq", + "Africa/Monrovia", "lrmlw", + "Africa/Nairobi", "kenbo", + "Africa/Ndjamena", "tdndj", + "Africa/Niamey", "nenim", + "Africa/Nouakchott", "mrnkc", + "Africa/Ouagadougou", "bfoua", + "Africa/Porto-Novo", "bjptn", + "Africa/Sao_Tome", "sttms", + "Africa/Tripoli", "lytip", + "Africa/Tunis", "tntun", + "Africa/Windhoek", "nawdh", + "America/Adak", "usadk", + "America/Anchorage", "usanc", + "America/Anguilla", "aiaxa", + "America/Antigua", "aganu", + "America/Araguaina", "braux", + "America/Argentina/La_Rioja", "arirj", + "America/Argentina/Rio_Gallegos", "arrgl", + "America/Argentina/Salta", "arsla", + "America/Argentina/San_Juan", "aruaq", + "America/Argentina/San_Luis", "arluq", + "America/Argentina/Tucuman", "artuc", + "America/Argentina/Ushuaia", "arush", + "America/Aruba", "awaua", + "America/Asuncion", "pyasu", + "America/Bahia", "brssa", + "America/Bahia_Banderas", "mxpvr", + "America/Barbados", "bbbgi", + "America/Belem", "brbel", + "America/Belize", "bzbze", + "America/Blanc-Sablon", "caybx", + "America/Boa_Vista", "brbvb", + "America/Bogota", "cobog", + "America/Boise", "usboi", + "America/Buenos_Aires", "arbue", + "America/Cambridge_Bay", "caycb", + "America/Campo_Grande", "brcgr", + "America/Cancun", "mxcun", + "America/Caracas", "veccs", + "America/Catamarca", "arctc", + "America/Cayenne", "gfcay", + "America/Cayman", "kygec", + "America/Chicago", "uschi", + "America/Chihuahua", "mxchi", + "America/Coral_Harbour", "cayzs", + "America/Cordoba", "arcor", + "America/Costa_Rica", "crsjo", + "America/Cuiaba", "brcgb", + "America/Curacao", "ancur", + "America/Danmarkshavn", "gldkshvn", + "America/Dawson", "cayda", + "America/Dawson_Creek", "caydq", + "America/Denver", "usden", + "America/Detroit", "usdet", + "America/Dominica", "dmdom", + "America/Edmonton", "caedm", + "America/Eirunepe", "brern", + "America/El_Salvador", "svsal", + "America/Fortaleza", "brfor", + "America/Glace_Bay", "caglb", + "America/Godthab", "glgoh", + "America/Goose_Bay", "cagoo", + "America/Grand_Turk", "tcgdt", + "America/Grenada", "gdgnd", + "America/Guadeloupe", "gpbbr", + "America/Guatemala", "gtgua", + "America/Guayaquil", "ecgye", + "America/Guyana", "gygeo", + "America/Halifax", "cahal", + "America/Havana", "cuhav", + "America/Hermosillo", "mxhmo", + "America/Indiana/Knox", "usknx", + "America/Indiana/Marengo", "usaeg", + "America/Indiana/Petersburg", "uswsq", + "America/Indiana/Tell_City", "ustel", + "America/Indiana/Vevay", "usinvev", + "America/Indiana/Vincennes", "usoea", + "America/Indiana/Winamac", "uswlz", + "America/Indianapolis", "usind", + "America/Inuvik", "cayev", + "America/Iqaluit", "caiql", + "America/Jamaica", "jmkin", + "America/Jujuy", "arjuj", + "America/Juneau", "usjnu", + "America/Kentucky/Monticello", "usmoc", + "America/La_Paz", "bolpb", + "America/Lima", "pelim", + "America/Los_Angeles", "uslax", + "America/Louisville", "uslui", + "America/Maceio", "brmcz", + "America/Managua", "nimga", + "America/Manaus", "brmao", + "America/Marigot", "gpmsb", + "America/Martinique", "mqfdf", + "America/Matamoros", "mxmam", + "America/Mazatlan", "mxmzt", + "America/Mendoza", "armdz", + "America/Menominee", "usmnm", + "America/Merida", "mxmid", + "America/Metlakatla", "usmtm", + "America/Mexico_City", "mxmex", + "America/Miquelon", "pmmqc", + "America/Moncton", "camon", + "America/Monterrey", "mxmty", + "America/Montevideo", "uymvd", + "America/Montreal", "camtr", + "America/Montserrat", "msmni", + "America/Nassau", "bsnas", + "America/New_York", "usnyc", + "America/Nipigon", "canpg", + "America/Nome", "usome", + "America/Noronha", "brfen", + "America/North_Dakota/Beulah", "usxul", + "America/North_Dakota/Center", "usndcnt", + "America/North_Dakota/New_Salem", "usndnsl", + "America/Ojinaga", "mxoji", + "America/Panama", "papty", + "America/Pangnirtung", "capnt", + "America/Paramaribo", "srpbm", + "America/Phoenix", "usphx", + "America/Port-au-Prince", "htpap", + "America/Port_of_Spain", "ttpos", + "America/Porto_Velho", "brpvh", + "America/Puerto_Rico", "prsju", + "America/Rainy_River", "caffs", + "America/Rankin_Inlet", "cayek", + "America/Recife", "brrec", + "America/Regina", "careg", + "America/Resolute", "careb", + "America/Rio_Branco", "brrbr", + "America/Santa_Isabel", "mxstis", + "America/Santarem", "brstm", + "America/Santiago", "clscl", + "America/Santo_Domingo", "dosdq", + "America/Sao_Paulo", "brsao", + "America/Scoresbysund", "globy", + "America/Shiprock", "usnavajo", + "America/Sitka", "ussit", + "America/St_Barthelemy", "gpsbh", + "America/St_Johns", "casjf", + "America/St_Kitts", "knbas", + "America/St_Lucia", "lccas", + "America/St_Thomas", "vistt", + "America/St_Vincent", "vcsvd", + "America/Swift_Current", "cayyn", + "America/Tegucigalpa", "hntgu", + "America/Thule", "glthu", + "America/Thunder_Bay", "cathu", + "America/Tijuana", "mxtij", + "America/Toronto", "cator", + "America/Tortola", "vgtov", + "America/Vancouver", "cavan", + "America/Whitehorse", "cayxy", + "America/Winnipeg", "cawnp", + "America/Yakutat", "usyak", + "America/Yellowknife", "cayzf", + "Antarctica/Casey", "aqcas", + "Antarctica/Davis", "aqdav", + "Antarctica/DumontDUrville", "aqddu", + "Antarctica/Macquarie", "aumqi", + "Antarctica/Mawson", "aqmaw", + "Antarctica/McMurdo", "aqmcm", + "Antarctica/Palmer", "aqplm", + "Antarctica/Rothera", "aqrot", + "Antarctica/South_Pole", "aqams", + "Antarctica/Syowa", "aqsyw", + "Antarctica/Vostok", "aqvos", + "Arctic/Longyearbyen", "sjlyr", + "Asia/Aden", "yeade", + "Asia/Almaty", "kzala", + "Asia/Amman", "joamm", + "Asia/Anadyr", "rudyr", + "Asia/Aqtau", "kzaau", + "Asia/Aqtobe", "kzakx", + "Asia/Ashgabat", "tmasb", + "Asia/Baghdad", "iqbgw", + "Asia/Bahrain", "bhbah", + "Asia/Baku", "azbak", + "Asia/Bangkok", "thbkk", + "Asia/Beirut", "lbbey", + "Asia/Bishkek", "kgfru", + "Asia/Brunei", "bnbwn", + "Asia/Calcutta", "inccu", + "Asia/Choibalsan", "mncoq", + "Asia/Chongqing", "cnckg", + "Asia/Colombo", "lkcmb", + "Asia/Damascus", "sydam", + "Asia/Dhaka", "bddac", + "Asia/Dili", "tldil", + "Asia/Dubai", "aedxb", + "Asia/Dushanbe", "tjdyu", + "Asia/Gaza", "gaza", + "Asia/Harbin", "cnhrb", + "Asia/Hong_Kong", "hkhkg", + "Asia/Hovd", "mnhvd", + "Asia/Irkutsk", "ruikt", + "Asia/Jakarta", "idjkt", + "Asia/Jayapura", "iddjj", + "Asia/Jerusalem", "jeruslm", + "Asia/Kabul", "afkbl", + "Asia/Kamchatka", "rupkc", + "Asia/Karachi", "pkkhi", + "Asia/Kashgar", "cnkhg", + "Asia/Katmandu", "npktm", + "Asia/Krasnoyarsk", "rukra", + "Asia/Kuala_Lumpur", "mykul", + "Asia/Kuching", "mykch", + "Asia/Kuwait", "kwkwi", + "Asia/Macau", "momfm", + "Asia/Magadan", "rugdx", + "Asia/Makassar", "idmak", + "Asia/Manila", "phmnl", + "Asia/Muscat", "ommct", + "Asia/Nicosia", "cynic", + "Asia/Novokuznetsk", "runoz", + "Asia/Novosibirsk", "ruovb", + "Asia/Omsk", "ruoms", + "Asia/Oral", "kzura", + "Asia/Phnom_Penh", "khpnh", + "Asia/Pontianak", "idpnk", + "Asia/Pyongyang", "kpfnj", + "Asia/Qatar", "qadoh", + "Asia/Qyzylorda", "kzkzo", + "Asia/Rangoon", "mmrgn", + "Asia/Riyadh", "saruh", + "Asia/Saigon", "vnsgn", + "Asia/Sakhalin", "ruuus", + "Asia/Samarkand", "uzskd", + "Asia/Seoul", "krsel", + "Asia/Shanghai", "cnsha", + "Asia/Singapore", "sgsin", + "Asia/Taipei", "twtpe", + "Asia/Tashkent", "uztas", + "Asia/Tbilisi", "getbs", + "Asia/Tehran", "irthr", + "Asia/Thimphu", "btthi", + "Asia/Tokyo", "jptyo", + "Asia/Ulaanbaatar", "mnuln", + "Asia/Urumqi", "cnurc", + "Asia/Vientiane", "lavte", + "Asia/Vladivostok", "ruvvo", + "Asia/Yakutsk", "ruyks", + "Asia/Yekaterinburg", "ruyek", + "Asia/Yerevan", "amevn", + "Atlantic/Azores", "ptpdl", + "Atlantic/Bermuda", "bmbda", + "Atlantic/Canary", "eslpa", + "Atlantic/Cape_Verde", "cvrai", + "Atlantic/Faeroe", "fotho", + "Atlantic/Madeira", "ptfnc", + "Atlantic/Reykjavik", "isrey", + "Atlantic/South_Georgia", "gsgrv", + "Atlantic/St_Helena", "shshn", + "Atlantic/Stanley", "fkpsy", + "Australia/Adelaide", "auadl", + "Australia/Brisbane", "aubne", + "Australia/Broken_Hill", "aubhq", + "Australia/Currie", "aukns", + "Australia/Darwin", "audrw", + "Australia/Eucla", "aueuc", + "Australia/Hobart", "auhba", + "Australia/Lindeman", "auldc", + "Australia/Lord_Howe", "auldh", + "Australia/Melbourne", "aumel", + "Australia/Perth", "auper", + "Australia/Sydney", "ausyd", + "CST6CDT", "cst6cdt", + "EST5EDT", "est5edt", + "Etc/GMT", "utc", + "Etc/GMT+1", "utcw01", + "Etc/GMT+10", "utcw10", + "Etc/GMT+11", "utcw11", + "Etc/GMT+12", "utcw12", + "Etc/GMT+2", "utcw02", + "Etc/GMT+3", "utcw03", + "Etc/GMT+4", "utcw04", + "Etc/GMT+5", "utcw05", + "Etc/GMT+6", "utcw06", + "Etc/GMT+7", "utcw07", + "Etc/GMT+8", "utcw08", + "Etc/GMT+9", "utcw09", + "Etc/GMT-1", "utce01", + "Etc/GMT-10", "utce10", + "Etc/GMT-11", "utce11", + "Etc/GMT-12", "utce12", + "Etc/GMT-13", "utce13", + "Etc/GMT-14", "utce14", + "Etc/GMT-2", "utce02", + "Etc/GMT-3", "utce03", + "Etc/GMT-4", "utce04", + "Etc/GMT-5", "utce05", + "Etc/GMT-6", "utce06", + "Etc/GMT-7", "utce07", + "Etc/GMT-8", "utce08", + "Etc/GMT-9", "utce09", + "Etc/Unknown", "unk", + "Europe/Amsterdam", "nlams", + "Europe/Andorra", "adalv", + "Europe/Athens", "grath", + "Europe/Belgrade", "rsbeg", + "Europe/Berlin", "deber", + "Europe/Bratislava", "skbts", + "Europe/Brussels", "bebru", + "Europe/Bucharest", "robuh", + "Europe/Budapest", "hubud", + "Europe/Chisinau", "mdkiv", + "Europe/Copenhagen", "dkcph", + "Europe/Dublin", "iedub", + "Europe/Gibraltar", "gigib", + "Europe/Guernsey", "gggci", + "Europe/Helsinki", "fihel", + "Europe/Isle_of_Man", "imdgs", + "Europe/Istanbul", "trist", + "Europe/Jersey", "jesth", + "Europe/Kaliningrad", "rukgd", + "Europe/Kiev", "uaiev", + "Europe/Lisbon", "ptlis", + "Europe/Ljubljana", "silju", + "Europe/London", "gblon", + "Europe/Luxembourg", "lulux", + "Europe/Madrid", "esmad", + "Europe/Malta", "mtmla", + "Europe/Mariehamn", "fimhq", + "Europe/Minsk", "bymsq", + "Europe/Monaco", "mcmon", + "Europe/Moscow", "rumow", + "Europe/Oslo", "noosl", + "Europe/Paris", "frpar", + "Europe/Podgorica", "metgd", + "Europe/Prague", "czprg", + "Europe/Riga", "lvrix", + "Europe/Rome", "itrom", + "Europe/Samara", "rukuf", + "Europe/San_Marino", "smsai", + "Europe/Sarajevo", "basjj", + "Europe/Simferopol", "uasip", + "Europe/Skopje", "mkskp", + "Europe/Sofia", "bgsof", + "Europe/Stockholm", "sesto", + "Europe/Tallinn", "eetll", + "Europe/Tirane", "altia", + "Europe/Uzhgorod", "uauzh", + "Europe/Vaduz", "livdz", + "Europe/Vatican", "vavat", + "Europe/Vienna", "atvie", + "Europe/Vilnius", "ltvno", + "Europe/Volgograd", "ruvog", + "Europe/Warsaw", "plwaw", + "Europe/Zagreb", "hrzag", + "Europe/Zaporozhye", "uaozh", + "Europe/Zurich", "chzrh", + "Indian/Antananarivo", "mgtnr", + "Indian/Chagos", "iodga", + "Indian/Christmas", "cxxch", + "Indian/Cocos", "cccck", + "Indian/Comoro", "kmyva", + "Indian/Kerguelen", "tfpfr", + "Indian/Mahe", "scmaw", + "Indian/Maldives", "mvmle", + "Indian/Mauritius", "muplu", + "Indian/Mayotte", "ytmam", + "Indian/Reunion", "rereu", + "MST7MDT", "mst7mdt", + "PST8PDT", "pst8pdt", + "Pacific/Apia", "wsapw", + "Pacific/Auckland", "nzakl", + "Pacific/Chatham", "nzcht", + "Pacific/Easter", "clipc", + "Pacific/Efate", "vuvli", + "Pacific/Enderbury", "kipho", + "Pacific/Fakaofo", "tkfko", + "Pacific/Fiji", "fjsuv", + "Pacific/Funafuti", "tvfun", + "Pacific/Galapagos", "ecgps", + "Pacific/Gambier", "pfgmr", + "Pacific/Guadalcanal", "sbhir", + "Pacific/Guam", "gugum", + "Pacific/Honolulu", "ushnl", + "Pacific/Johnston", "umjon", + "Pacific/Kiritimati", "kicxi", + "Pacific/Kosrae", "fmksa", + "Pacific/Kwajalein", "mhkwa", + "Pacific/Majuro", "mhmaj", + "Pacific/Marquesas", "pfnhv", + "Pacific/Midway", "ummdy", + "Pacific/Nauru", "nrinu", + "Pacific/Niue", "nuiue", + "Pacific/Norfolk", "nfnlk", + "Pacific/Noumea", "ncnou", + "Pacific/Pago_Pago", "asppg", + "Pacific/Palau", "pwror", + "Pacific/Pitcairn", "pnpcn", + "Pacific/Ponape", "fmpni", + "Pacific/Port_Moresby", "pgpom", + "Pacific/Rarotonga", "ckrar", + "Pacific/Saipan", "mpspn", + "Pacific/Tahiti", "pfppt", + "Pacific/Tarawa", "kitrw", + "Pacific/Tongatapu", "totbu", + "Pacific/Truk", "fmtkk", + "Pacific/Wake", "umawk", + "Pacific/Wallis", "wfmau", + }; + + private static final String[] TYPEALIAS_COLSTRENGTH = { + "quarternary", "quaternary", + }; + + private static final String[] TYPEALIAS_TIMEZONE = { + "Africa/Asmara", "Africa/Asmera", + "Africa/Timbuktu", "Africa/Bamako", + "America/Argentina/Buenos_Aires", "America/Buenos_Aires", + "America/Argentina/Catamarca", "America/Catamarca", + "America/Argentina/ComodRivadavia", "America/Catamarca", + "America/Argentina/Cordoba", "America/Cordoba", + "America/Argentina/Jujuy", "America/Jujuy", + "America/Argentina/Mendoza", "America/Mendoza", + "America/Atikokan", "America/Coral_Harbour", + "America/Atka", "America/Adak", + "America/Ensenada", "America/Tijuana", + "America/Fort_Wayne", "America/Indianapolis", + "America/Indiana/Indianapolis", "America/Indianapolis", + "America/Kentucky/Louisville", "America/Louisville", + "America/Knox_IN", "America/Indiana/Knox", + "America/Porto_Acre", "America/Rio_Branco", + "America/Rosario", "America/Cordoba", + "America/Virgin", "America/St_Thomas", + "Asia/Ashkhabad", "Asia/Ashgabat", + "Asia/Chungking", "Asia/Chongqing", + "Asia/Dacca", "Asia/Dhaka", + "Asia/Ho_Chi_Minh", "Asia/Saigon", + "Asia/Istanbul", "Europe/Istanbul", + "Asia/Kathmandu", "Asia/Katmandu", + "Asia/Kolkata", "Asia/Calcutta", + "Asia/Macao", "Asia/Macau", + "Asia/Tel_Aviv", "Asia/Jerusalem", + "Asia/Thimbu", "Asia/Thimphu", + "Asia/Ujung_Pandang", "Asia/Makassar", + "Asia/Ulan_Bator", "Asia/Ulaanbaatar", + "Atlantic/Faroe", "Atlantic/Faeroe", + "Atlantic/Jan_Mayen", "Arctic/Longyearbyen", + "Australia/ACT", "Australia/Sydney", + "Australia/Canberra", "Australia/Sydney", + "Australia/LHI", "Australia/Lord_Howe", + "Australia/NSW", "Australia/Sydney", + "Australia/North", "Australia/Darwin", + "Australia/Queensland", "Australia/Brisbane", + "Australia/South", "Australia/Adelaide", + "Australia/Tasmania", "Australia/Hobart", + "Australia/Victoria", "Australia/Melbourne", + "Australia/West", "Australia/Perth", + "Australia/Yancowinna", "Australia/Broken_Hill", + "Brazil/Acre", "America/Rio_Branco", + "Brazil/DeNoronha", "America/Noronha", + "Brazil/East", "America/Sao_Paulo", + "Brazil/West", "America/Manaus", + "Canada/Atlantic", "America/Halifax", + "Canada/Central", "America/Winnipeg", + "Canada/East-Saskatchewan", "America/Regina", + "Canada/Eastern", "America/Toronto", + "Canada/Mountain", "America/Edmonton", + "Canada/Newfoundland", "America/St_Johns", + "Canada/Pacific", "America/Vancouver", + "Canada/Saskatchewan", "America/Regina", + "Canada/Yukon", "America/Whitehorse", + "Chile/Continental", "America/Santiago", + "Chile/EasterIsland", "Pacific/Easter", + "Cuba", "America/Havana", + "EST", "Etc/GMT+5", + "Egypt", "Africa/Cairo", + "Eire", "Europe/Dublin", + "Etc/GMT+0", "Etc/GMT", + "Etc/GMT-0", "Etc/GMT", + "Etc/GMT0", "Etc/GMT", + "Etc/Greenwich", "Etc/GMT", + "Etc/UCT", "Etc/GMT", + "Etc/UTC", "Etc/GMT", + "Etc/Universal", "Etc/GMT", + "Etc/Zulu", "Etc/GMT", + "Europe/Belfast", "Europe/London", + "Europe/Nicosia", "Asia/Nicosia", + "Europe/Tiraspol", "Europe/Chisinau", + "GB", "Europe/London", + "GB-Eire", "Europe/London", + "GMT", "Etc/GMT", + "GMT+0", "Etc/GMT", + "GMT-0", "Etc/GMT", + "GMT0", "Etc/GMT", + "Greenwich", "Etc/GMT", + "HST", "Etc/GMT+10", + "Hongkong", "Asia/Hong_Kong", + "Iceland", "Atlantic/Reykjavik", + "Iran", "Asia/Tehran", + "Israel", "Asia/Jerusalem", + "Jamaica", "America/Jamaica", + "Japan", "Asia/Tokyo", + "Kwajalein", "Pacific/Kwajalein", + "Libya", "Africa/Tripoli", + "MST", "Etc/GMT+7", + "Mexico/BajaNorte", "America/Tijuana", + "Mexico/BajaSur", "America/Mazatlan", + "Mexico/General", "America/Mexico_City", + "NZ", "Pacific/Auckland", + "NZ-CHAT", "Pacific/Chatham", + "Navajo", "America/Shiprock", + "PRC", "Asia/Shanghai", + "Pacific/Chuuk", "Pacific/Truk", + "Pacific/Pohnpei", "Pacific/Ponape", + "Pacific/Samoa", "Pacific/Pago_Pago", + "Pacific/Yap", "Pacific/Truk", + "Poland", "Europe/Warsaw", + "Portugal", "Europe/Lisbon", + "ROC", "Asia/Taipei", + "ROK", "Asia/Seoul", + "Singapore", "Asia/Singapore", + "Turkey", "Europe/Istanbul", + "UCT", "Etc/GMT", + "US/Alaska", "America/Anchorage", + "US/Aleutian", "America/Adak", + "US/Arizona", "America/Phoenix", + "US/Central", "America/Chicago", + "US/East-Indiana", "America/Indianapolis", + "US/Eastern", "America/New_York", + "US/Hawaii", "Pacific/Honolulu", + "US/Indiana-Starke", "America/Indiana/Knox", + "US/Michigan", "America/Detroit", + "US/Mountain", "America/Denver", + "US/Pacific", "America/Los_Angeles", + "US/Pacific-New", "America/Los_Angeles", + "US/Samoa", "Pacific/Pago_Pago", + "UTC", "Etc/GMT", + "Universal", "Etc/GMT", + "W-SU", "Europe/Moscow", + "Zulu", "Etc/GMT", + }; } diff --git a/icu4j/eclipse-build/plugins.template/com.ibm.icu/.classpath b/icu4j/eclipse-build/plugins.template/com.ibm.icu/.classpath index 751c8f2e504..304e86186aa 100644 --- a/icu4j/eclipse-build/plugins.template/com.ibm.icu/.classpath +++ b/icu4j/eclipse-build/plugins.template/com.ibm.icu/.classpath @@ -1,7 +1,7 @@ - + -- 2.40.0