2 *******************************************************************************
3 * Copyright (C) 2009-2011, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
7 package com.ibm.icu.impl.locale;
9 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.List;
15 public final class InternalLocaleBuilder {
17 private static final boolean JDKIMPL = false;
19 private String _language = "";
20 private String _script = "";
21 private String _region = "";
22 private String _variant = "";
24 private static final CaseInsensitiveChar PRIVUSE_KEY = new CaseInsensitiveChar(LanguageTag.PRIVATEUSE.charAt(0));
26 private HashMap<CaseInsensitiveChar, String> _extensions;
27 private HashSet<CaseInsensitiveString> _uattributes;
28 private HashMap<CaseInsensitiveString, String> _ukeywords;
31 public InternalLocaleBuilder() {
34 public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException {
35 if (language == null || language.length() == 0) {
38 if (!LanguageTag.isLanguage(language)) {
39 throw new LocaleSyntaxException("Ill-formed language: " + language, 0);
46 public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException {
47 if (script == null || script.length() == 0) {
50 if (!LanguageTag.isScript(script)) {
51 throw new LocaleSyntaxException("Ill-formed script: " + script, 0);
58 public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException {
59 if (region == null || region.length() == 0) {
62 if (!LanguageTag.isRegion(region)) {
63 throw new LocaleSyntaxException("Ill-formed region: " + region, 0);
70 public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException {
71 if (variant == null || variant.length() == 0) {
74 // normalize separators to "_"
75 String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP);
76 int errIdx = checkVariants(var, BaseLocale.SEP);
78 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx);
85 public InternalLocaleBuilder addUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException {
86 if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) {
87 throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute);
89 // Use case insensitive string to prevent duplication
90 if (_uattributes == null) {
91 _uattributes = new HashSet<CaseInsensitiveString>(4);
93 _uattributes.add(new CaseInsensitiveString(attribute));
97 public InternalLocaleBuilder removeUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException {
98 if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) {
99 throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute);
101 if (_uattributes != null) {
102 _uattributes.remove(new CaseInsensitiveString(attribute));
107 public InternalLocaleBuilder setUnicodeLocaleKeyword(String key, String type) throws LocaleSyntaxException {
108 if (!UnicodeLocaleExtension.isKey(key)) {
109 throw new LocaleSyntaxException("Ill-formed Unicode locale keyword key: " + key);
112 CaseInsensitiveString cikey = new CaseInsensitiveString(key);
114 if (_ukeywords != null) {
115 // null type is used for remove the key
116 _ukeywords.remove(cikey);
119 if (type.length() != 0) {
120 // normalize separator to "-"
121 String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
123 StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP);
124 while (!itr.isDone()) {
125 String s = itr.current();
126 if (!UnicodeLocaleExtension.isTypeSubtag(s)) {
127 throw new LocaleSyntaxException("Ill-formed Unicode locale keyword type: " + type, itr.currentStart());
132 if (_ukeywords == null) {
133 _ukeywords = new HashMap<CaseInsensitiveString, String>(4);
135 _ukeywords.put(cikey, type);
140 public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException {
142 boolean isBcpPrivateuse = LanguageTag.isPrivateusePrefixChar(singleton);
143 if (!isBcpPrivateuse && !LanguageTag.isExtensionSingletonChar(singleton)) {
144 throw new LocaleSyntaxException("Ill-formed extension key: " + singleton);
147 boolean remove = (value == null || value.length() == 0);
148 CaseInsensitiveChar key = new CaseInsensitiveChar(singleton);
151 if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
152 // clear entire Unicode locale extension
153 if (_uattributes != null) {
154 _uattributes.clear();
156 if (_ukeywords != null) {
160 if (_extensions != null && _extensions.containsKey(key)) {
161 _extensions.remove(key);
166 String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
167 StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP);
168 while (!itr.isDone()) {
169 String s = itr.current();
171 if (isBcpPrivateuse) {
172 validSubtag = LanguageTag.isPrivateuseSubtag(s);
174 validSubtag = LanguageTag.isExtensionSubtag(s);
177 throw new LocaleSyntaxException("Ill-formed extension value: " + s, itr.currentStart());
182 if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
183 setUnicodeLocaleExtension(val);
185 if (_extensions == null) {
186 _extensions = new HashMap<CaseInsensitiveChar, String>(4);
188 _extensions.put(key, val);
195 * Set extension/private subtags in a single string representation
197 public InternalLocaleBuilder setExtensions(String subtags) throws LocaleSyntaxException {
198 if (subtags == null || subtags.length() == 0) {
202 subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
203 StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP);
205 List<String> extensions = null;
206 String privateuse = null;
211 // Make a list of extension subtags
212 while (!itr.isDone()) {
213 String s = itr.current();
214 if (LanguageTag.isExtensionSingleton(s)) {
215 start = itr.currentStart();
216 String singleton = s;
217 StringBuilder sb = new StringBuilder(singleton);
220 while (!itr.isDone()) {
222 if (LanguageTag.isExtensionSubtag(s)) {
223 sb.append(LanguageTag.SEP).append(s);
224 parsed = itr.currentEnd();
231 if (parsed < start) {
232 throw new LocaleSyntaxException("Incomplete extension '" + singleton + "'", start);
235 if (extensions == null) {
236 extensions = new ArrayList<String>(4);
238 extensions.add(sb.toString());
244 String s = itr.current();
245 if (LanguageTag.isPrivateusePrefix(s)) {
246 start = itr.currentStart();
247 StringBuilder sb = new StringBuilder(s);
250 while (!itr.isDone()) {
252 if (!LanguageTag.isPrivateuseSubtag(s)) {
255 sb.append(LanguageTag.SEP).append(s);
256 parsed = itr.currentEnd();
260 if (parsed <= start) {
261 throw new LocaleSyntaxException("Incomplete privateuse:" + subtags.substring(start), start);
263 privateuse = sb.toString();
269 throw new LocaleSyntaxException("Ill-formed extension subtags:" + subtags.substring(itr.currentStart()), itr.currentStart());
272 return setExtensions(extensions, privateuse);
276 * Set a list of BCP47 extensions and private use subtags
277 * BCP47 extensions are already validated and well-formed, but may contain duplicates
279 private InternalLocaleBuilder setExtensions(List<String> bcpExtensions, String privateuse) {
282 if (bcpExtensions != null && bcpExtensions.size() > 0) {
283 HashSet<CaseInsensitiveChar> processedExtensions = new HashSet<CaseInsensitiveChar>(bcpExtensions.size());
284 for (String bcpExt : bcpExtensions) {
285 CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt.charAt(0));
287 if (!processedExtensions.contains(key)) {
288 // each extension string contains singleton, e.g. "a-abc-def"
289 if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
290 setUnicodeLocaleExtension(bcpExt.substring(2));
292 if (_extensions == null) {
293 _extensions = new HashMap<CaseInsensitiveChar, String>(4);
295 _extensions.put(key, bcpExt.substring(2));
300 if (privateuse != null && privateuse.length() > 0) {
301 // privateuse string contains prefix, e.g. "x-abc-def"
302 if (_extensions == null) {
303 _extensions = new HashMap<CaseInsensitiveChar, String>(1);
305 _extensions.put(new CaseInsensitiveChar(privateuse.charAt(0)), privateuse.substring(2));
312 * Reset Builder's internal state with the given language tag
314 public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) {
316 if (langtag.getExtlangs().size() > 0) {
317 _language = langtag.getExtlangs().get(0);
319 String language = langtag.getLanguage();
320 if (!language.equals(LanguageTag.UNDETERMINED)) {
321 _language = language;
324 _script = langtag.getScript();
325 _region = langtag.getRegion();
327 List<String> bcpVariants = langtag.getVariants();
328 if (bcpVariants.size() > 0) {
329 StringBuilder var = new StringBuilder(bcpVariants.get(0));
330 for (int i = 1; i < bcpVariants.size(); i++) {
331 var.append(BaseLocale.SEP).append(bcpVariants.get(i));
333 _variant = var.toString();
336 setExtensions(langtag.getExtensions(), langtag.getPrivateuse());
341 public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions extensions) throws LocaleSyntaxException {
342 String language = base.getLanguage();
343 String script = base.getScript();
344 String region = base.getRegion();
345 String variant = base.getVariant();
348 // Special backward compatibility support
350 // Exception 1 - ja_JP_JP
351 if (language.equals("ja") && region.equals("JP") && variant.equals("JP")) {
352 // When locale ja_JP_JP is created, ca-japanese is always there.
353 // The builder ignores the variant "JP"
354 assert("japanese".equals(extensions.getUnicodeLocaleType("ca")));
357 // Exception 2 - th_TH_TH
358 else if (language.equals("th") && region.equals("TH") && variant.equals("TH")) {
359 // When locale th_TH_TH is created, nu-thai is always there.
360 // The builder ignores the variant "TH"
361 assert("thai".equals(extensions.getUnicodeLocaleType("nu")));
364 // Exception 3 - no_NO_NY
365 else if (language.equals("no") && region.equals("NO") && variant.equals("NY")) {
366 // no_NO_NY is a valid locale and used by Java 6 or older versions.
367 // The build ignores the variant "NY" and change the language to "nn".
373 // Validate base locale fields before updating internal state.
374 // LocaleExtensions always store validated/canonicalized values,
375 // so no checks are necessary.
376 if (language.length() > 0 && !LanguageTag.isLanguage(language)) {
377 throw new LocaleSyntaxException("Ill-formed language: " + language);
380 if (script.length() > 0 && !LanguageTag.isScript(script)) {
381 throw new LocaleSyntaxException("Ill-formed script: " + script);
384 if (region.length() > 0 && !LanguageTag.isRegion(region)) {
385 throw new LocaleSyntaxException("Ill-formed region: " + region);
388 if (variant.length() > 0) {
389 int errIdx = checkVariants(variant, BaseLocale.SEP);
391 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx);
395 // The input locale is validated at this point.
396 // Now, updating builder's internal fields.
397 _language = language;
403 Set<Character> extKeys = (extensions == null) ? null : extensions.getKeys();
404 if (extKeys != null) {
405 // map extensions back to builder's internal format
406 for (Character key : extKeys) {
407 Extension e = extensions.getExtension(key);
408 if (e instanceof UnicodeLocaleExtension) {
409 UnicodeLocaleExtension ue = (UnicodeLocaleExtension)e;
410 for (String uatr : ue.getUnicodeLocaleAttributes()) {
411 if (_uattributes == null) {
412 _uattributes = new HashSet<CaseInsensitiveString>(4);
414 _uattributes.add(new CaseInsensitiveString(uatr));
416 for (String ukey : ue.getUnicodeLocaleKeys()) {
417 if (_ukeywords == null) {
418 _ukeywords = new HashMap<CaseInsensitiveString, String>(4);
420 _ukeywords.put(new CaseInsensitiveString(ukey), ue.getUnicodeLocaleType(ukey));
423 if (_extensions == null) {
424 _extensions = new HashMap<CaseInsensitiveChar, String>(4);
426 _extensions.put(new CaseInsensitiveChar(key.charValue()), e.getValue());
433 public InternalLocaleBuilder clear() {
442 public InternalLocaleBuilder clearExtensions() {
443 if (_extensions != null) {
446 if (_uattributes != null) {
447 _uattributes.clear();
449 if (_ukeywords != null) {
455 public BaseLocale getBaseLocale() {
456 String language = _language;
457 String script = _script;
458 String region = _region;
459 String variant = _variant;
461 // Special private use subtag sequence identified by "lvariant" will be
462 // interpreted as Java variant.
463 if (_extensions != null) {
464 String privuse = _extensions.get(PRIVUSE_KEY);
465 if (privuse != null) {
466 StringTokenIterator itr = new StringTokenIterator(privuse, LanguageTag.SEP);
467 boolean sawPrefix = false;
468 int privVarStart = -1;
469 while (!itr.isDone()) {
471 privVarStart = itr.currentStart();
474 if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) {
479 if (privVarStart != -1) {
480 StringBuilder sb = new StringBuilder(variant);
481 if (sb.length() != 0) {
482 sb.append(BaseLocale.SEP);
484 sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP, BaseLocale.SEP));
485 variant = sb.toString();
490 return BaseLocale.getInstance(language, script, region, variant);
493 public LocaleExtensions getLocaleExtensions() {
494 if ((_extensions == null || _extensions.size() == 0)
495 && (_uattributes == null || _uattributes.size() == 0)
496 && (_ukeywords == null || _ukeywords.size() == 0)) {
497 return LocaleExtensions.EMPTY_EXTENSIONS;
500 return new LocaleExtensions(_extensions, _uattributes, _ukeywords);
504 * Remove special private use subtag sequence identified by "lvariant"
505 * and return the rest. Only used by LocaleExtensions
507 static String removePrivateuseVariant(String privuseVal) {
508 StringTokenIterator itr = new StringTokenIterator(privuseVal, LanguageTag.SEP);
510 // Note: privateuse value "abc-lvariant" is unchanged
511 // because no subtags after "lvariant".
513 int prefixStart = -1;
514 boolean sawPrivuseVar = false;
515 while (!itr.isDone()) {
516 if (prefixStart != -1) {
517 // Note: privateuse value "abc-lvariant" is unchanged
518 // because no subtags after "lvariant".
519 sawPrivuseVar = true;
522 if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) {
523 prefixStart = itr.currentStart();
527 if (!sawPrivuseVar) {
531 assert(prefixStart == 0 || prefixStart > 1);
532 return (prefixStart == 0) ? null : privuseVal.substring(0, prefixStart -1);
536 * Check if the given variant subtags separated by the given
537 * separator(s) are valid
539 private int checkVariants(String variants, String sep) {
540 StringTokenIterator itr = new StringTokenIterator(variants, sep);
541 while (!itr.isDone()) {
542 String s = itr.current();
543 if (!LanguageTag.isVariant(s)) {
544 return itr.currentStart();
552 * Private methods parsing Unicode Locale Extension subtags.
553 * Duplicated attributes/keywords will be ignored.
554 * The input must be a valid extension subtags (excluding singleton).
556 private void setUnicodeLocaleExtension(String subtags) {
557 // wipe out existing attributes/keywords
558 if (_uattributes != null) {
559 _uattributes.clear();
561 if (_ukeywords != null) {
565 StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP);
568 while (!itr.isDone()) {
569 if (!UnicodeLocaleExtension.isAttribute(itr.current())) {
572 if (_uattributes == null) {
573 _uattributes = new HashSet<CaseInsensitiveString>(4);
575 _uattributes.add(new CaseInsensitiveString(itr.current()));
580 CaseInsensitiveString key = null;
584 while (!itr.isDone()) {
586 if (UnicodeLocaleExtension.isKey(itr.current())) {
587 // next keyword - emit previous one
588 assert(typeStart == -1 || typeEnd != -1);
589 type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd);
590 if (_ukeywords == null) {
591 _ukeywords = new HashMap<CaseInsensitiveString, String>(4);
593 _ukeywords.put(key, type);
595 // reset keyword info
596 CaseInsensitiveString tmpKey = new CaseInsensitiveString(itr.current());
597 key = _ukeywords.containsKey(tmpKey) ? null : tmpKey;
598 typeStart = typeEnd = -1;
600 if (typeStart == -1) {
601 typeStart = itr.currentStart();
603 typeEnd = itr.currentEnd();
605 } else if (UnicodeLocaleExtension.isKey(itr.current())) {
606 // 1. first keyword or
607 // 2. next keyword, but previous one was duplicate
608 key = new CaseInsensitiveString(itr.current());
609 if (_ukeywords != null && _ukeywords.containsKey(key)) {
615 if (!itr.hasNext()) {
618 assert(typeStart == -1 || typeEnd != -1);
619 type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd);
620 if (_ukeywords == null) {
621 _ukeywords = new HashMap<CaseInsensitiveString, String>(4);
623 _ukeywords.put(key, type);
632 static class CaseInsensitiveString {
635 CaseInsensitiveString(String s) {
639 public String value() {
643 public int hashCode() {
644 return AsciiUtil.toLowerString(_s).hashCode();
647 public boolean equals(Object obj) {
651 if (!(obj instanceof CaseInsensitiveString)) {
654 return AsciiUtil.caseIgnoreMatch(_s, ((CaseInsensitiveString)obj).value());
658 static class CaseInsensitiveChar {
661 CaseInsensitiveChar(char c) {
665 public char value() {
669 public int hashCode() {
670 return AsciiUtil.toLower(_c);
673 public boolean equals(Object obj) {
677 if (!(obj instanceof CaseInsensitiveChar)) {
680 return _c == AsciiUtil.toLower(((CaseInsensitiveChar)obj).value());