if (!isValidT(locale.getExtension(c), where)) return false;
break;
case u:
- if (!isValidU(locale.getExtension(c), where)) return false;
+ if (!isValidU(locale, locale.getExtension(c), where)) return false;
break;
}
} catch (Exception e) {
}
enum SpecialCase {
- normal, anything, reorder, codepoints;
+ normal, anything, reorder, codepoints, subdivision;
static SpecialCase get(String key) {
if (key.equals("kr")) {
return SpecialCase.reorder;
} else if (key.equals("vt")) {
return SpecialCase.codepoints;
+ } else if (key.equals("sd")) {
+ return subdivision;
} else if (key.equals("x0")) {
return anything;
} else {
}
}
/**
+ * @param locale
* @param extension
* @param where
* @return
*/
- private boolean isValidU(String extensionString, Where where) {
+ private boolean isValidU(ULocale locale, String extensionString, Where where) {
String key = "";
int typeCount = 0;
ValueType valueType = null;
SpecialCase specialCase = null;
+ StringBuilder prefix = new StringBuilder();
// TODO: is empty -u- valid?
for (String subtag : SEPARATOR.split(extensionString)) {
if (subtag.length() == 2) {
typeCount = 0;
} else {
++typeCount;
- if (valueType == ValueType.single && typeCount > 1) {
- return where.set(Datatype.u, key+"-"+subtag);
+ switch (valueType) {
+ case single:
+ if (typeCount > 1) {
+ return where.set(Datatype.u, key+"-"+subtag);
+ }
+ break;
+ case incremental:
+ if (typeCount == 1) {
+ prefix.setLength(0);
+ prefix.append(subtag);
+ } else {
+ prefix.append('-').append(subtag);
+ subtag = prefix.toString();
+ }
}
switch (specialCase) {
case anything:
return where.set(Datatype.u, key+"-"+subtag);
}
continue;
+ case subdivision:
+ if (!isSubdivision(locale, subtag)) {
+ return where.set(Datatype.u, key+"-"+subtag);
+ }
+ continue;
}
-
+
// en-u-sd-usca
// en-US-u-sd-usca
Output<Boolean> isKnownKey = new Output<Boolean>();
return true;
}
+ /**
+ * @param locale
+ * @param subtag
+ * @return
+ */
+ private boolean isSubdivision(ULocale locale, String subtag) {
+ // First check if the subtag is valid
+ if (subtag.length() < 3) {
+ return false;
+ }
+ String region = subtag.substring(0, subtag.charAt(0) <= '9' ? 3 : 2);
+ String subdivision = subtag.substring(region.length());
+ if (ValidIdentifiers.isValid(Datatype.subdivision, datasubtypes, region, subdivision) == null) {
+ return false;
+ }
+ // Then check for consistency with the locale's region
+ String localeRegion = locale.getCountry();
+ if (localeRegion.isEmpty()) {
+ ULocale max = ULocale.addLikelySubtags(locale);
+ localeRegion = max.getCountry();
+ }
+ if (!region.equalsIgnoreCase(localeRegion)) {
+ return false;
+ }
+ return true;
+ }
+
static final Set<String> REORDERING_INCLUDE = new HashSet<String>(Arrays.asList("space", "punct", "symbol", "currency", "digit", "others"));
static final Set<String> REORDERING_EXCLUDE = new HashSet<String>(Arrays.asList("zinh", "zyyy"));
/**
return false;
}
return ValidIdentifiers.isValid(Datatype.script, datasubtypes, subtag) != null;
-// space, punct, symbol, currency, digit - core groups of characters below 'a'
-// any script code except Common and Inherited.
-// sc ; Zinh ; Inherited ; Qaai
-// sc ; Zyyy ; Common
-// Some pairs of scripts sort primary-equal and always reorder together. For example, Katakana characters are are always reordered with Hiragana.
-// others - where all codes not explicitly mentioned should be ordered. The script code Zzzz (Unknown Script) is a synonym for others. return false;
+ // space, punct, symbol, currency, digit - core groups of characters below 'a'
+ // any script code except Common and Inherited.
+ // sc ; Zinh ; Inherited ; Qaai
+ // sc ; Zyyy ; Common
+ // Some pairs of scripts sort primary-equal and always reorder together. For example, Katakana characters are are always reordered with Hiragana.
+ // others - where all codes not explicitly mentioned should be ordered. The script code Zzzz (Unknown Script) is a synonym for others. return false;
}
/**
}
public enum ValueType {
- single, multiple, specific;
+ single, multiple, incremental;
private static Set<String> multipleValueTypes = new HashSet<String>(Arrays.asList("x0", "kr", "vt"));
private static Set<String> specificValueTypes = new HashSet<String>(Arrays.asList("ca"));
static ValueType get(String key) {
if (multipleValueTypes.contains(key)) {
return multiple;
} else if (specificValueTypes.contains(key)) {
- return specific;
+ return incremental;
} else {
return single;
}
public void testBasic() {
String[][] tests = {
- {"OK", "en-u-kr-latn-digit"},
- {"Incomplete extension 'u' [at index 3]", "en-u"},
- {"Incomplete extension 't' [at index 3]", "en-t"},
{"OK", "en-u-ca-chinese"},
{"OK", "en-x-abcdefg"},
{"OK", "x-abcdefg"},
{"OK", "en-u-sd-usca"},
{"OK", "en-US-u-sd-usca"},
- {"OK", "en-AQ-u-sd-usca"},
{"OK", "en-t-it"},
{"OK", "und-Cyrl-t-und-latn"},
{"OK", "root"},
{"OK", "zh-Hant"},
{"OK", "zh-Hant-AQ"},
{"OK", "x-abcdefg-g-foobar"},
- {"Empty subtag [at index 0]", ""},
- {"{u, ca-chinesx}", "en-u-ca-chinesx"},
- {"{illegal, q}", "en-q-abcdefg"},
- {"Incomplete privateuse [at index 0]", "x-abc$defg"},
- {"{script, Latx}", "und-Cyrl-t-und-latx"},
- {"{variant, FOOBAR}", "zh-Hant-1606nict-1694acad-foobar"},
- {"{region, AB}", "zh-Hant-AB"},
- {"{language, ex}", "ex"},
- {"{script, Hanx}", "zh-Hanx"},
- {"{language, qaa}", "qaa"},
- {"Invalid subtag: $ [at index 3]", "EN-$"},
- {"Invalid subtag: $ [at index 0]", "$"},
- // too many items
- {"{u, cu-usd}", "en-u-cu-adp-usd"},
{"OK", "en-u-ca-buddhist"},
+ {"OK", "en-u-ca-islamic-umalqura"}, // additive
{"OK", "en-u-cf-account"},
{"OK", "en-u-co-big5han"},
{"OK", "en-u-cu-adp"},
{"OK", "en-u-kf-false"},
{"OK", "en-u-kk-false"},
{"OK", "en-u-kn-false"},
- {"OK", "en-u-kr-latn-digit-symbol"},
+ {"OK", "en-u-kr-latn-digit-symbol"}, // reorder codes, multiple
{"OK", "en-u-ks-identic"},
{"OK", "en-u-kv-currency"},
{"OK", "en-u-nu-ahom"},
{"OK", "en-u-sd-usny"},
{"OK", "en-u-tz-adalv"},
{"OK", "en-u-va-posix"},
- {"{u, ca-civil}", "en-u-ca-islamicc"}, // deprecated
+
+ // really long case
+
+ {"OK", "en-u-ca-buddhist-ca-islamic-umalqura-cf-account-co-big5han-cu-adp-fw-fri-hc-h11-ka-noignore-kb-false-kc-false-kf-false-kk-false-kn-false-kr-latn-digit-symbol-ks-identic-kv-currency-nu-ahom-sd-usny-tz-adalv-va-posix"},
+
+ // deprecated, but turned into valid by ULocale.Builder()
+ {"OK", "en-u-ca-islamicc"}, // deprecated
+ {"OK", "en-u-tz-aqams"}, // deprecated
+
+ // Bad syntax (caught by ULocale.Builder())
+
+ {"Incomplete extension 'u' [at index 3]", "en-u"},
+ {"Incomplete extension 't' [at index 3]", "en-t"},
+ {"Empty subtag [at index 0]", ""},
+ {"Incomplete privateuse [at index 0]", "x-abc$defg"},
+ {"Invalid subtag: $ [at index 3]", "EN-$"},
+ {"Invalid subtag: $ [at index 0]", "$"},
+
+ // bad extension
+
+ {"{illegal, q}", "en-q-abcdefg"},
+
+ // bad subtags
+
+ {"{variant, FOOBAR}", "zh-Hant-1606nict-1694acad-foobar"},
+ {"{region, AB}", "zh-Hant-AB"},
+ {"{language, ex}", "ex"},
+ {"{script, Hanx}", "zh-Hanx"},
+ {"{language, qaa}", "qaa"},
+
+ // bad types for keys
+
+ {"{u, ca-chinesx}", "en-u-ca-chinesx"},
+ {"{script, Latx}", "und-Cyrl-t-und-latx"},
+ {"{u, sd-usca}", "en-AQ-u-sd-usca"},
+
+ {"{u, ca-buddhisx}", "en-u-ca-buddhisx"},
+ {"{u, ca-islamic-umalqurx}", "en-u-ca-islamic-umalqurx"}, // additive
+ {"{u, cf-accounx}", "en-u-cf-accounx"},
+ {"{u, co-big5hax}", "en-u-co-big5hax"},
+ {"{u, cu-adx}", "en-u-cu-adx"},
+ {"{u, fw-frx}", "en-u-fw-frx"},
+ {"{u, hc-h1x}", "en-u-hc-h1x"},
+ {"{u, ka-noignorx}", "en-u-ka-noignorx"},
+ {"{u, kb-falsx}", "en-u-kb-falsx"},
+ {"{u, kc-falsx}", "en-u-kc-falsx"},
+ {"{u, kf-falsx}", "en-u-kf-falsx"},
+ {"{u, kk-falsx}", "en-u-kk-falsx"},
+ {"{u, kn-falsx}", "en-u-kn-falsx"},
+ {"{u, kr-symbox}", "en-u-kr-latn-digit-symbox"}, // reorder codes, multiple
+ {"{u, ks-identix}", "en-u-ks-identix"},
+ {"{u, kv-currencx}", "en-u-kv-currencx"},
+ {"{u, nu-ahox}", "en-u-nu-ahox"},
+ {"{u, sd-usnx}", "en-u-sd-usnx"},
+ {"{u, tz-adalx}", "en-u-tz-adalx"},
+ {"{u, va-posit}", "en-u-va-posit"},
+
+
+ // too many items
+
+ {"{u, cu-usd}", "en-u-cu-adp-usd"},
+
+ // use deprecated subtags. testDeprecated checks if they work when Datasubtype.deprecated is added
+ //{"{u, ca-civil}", "en-u-ca-islamicc"}, // deprecated, but turns into valid
{"{u, co-direct}", "en-u-co-direct"}, // deprecated
{"{u, kh}", "en-u-kh-false"}, // deprecated
- {"{u, tz-aqams}", "en-u-tz-aqams"}, // deprecated
+ {"{u, tz-camtr}", "en-u-tz-camtr"}, // deprecated
{"{u, vt}", "en-u-vt-0020-0041"}, // deprecated
};
check(tests, Datasubtype.regular, Datasubtype.unknown);
}
public void testDeprecated() {
- LocaleValidityChecker regularAndDeprecated = new LocaleValidityChecker(EnumSet.of(Datasubtype.regular, Datasubtype.deprecated));
String[][] tests = {
- {"OK", "en-u-ca-islamicc"}, // deprecated
{"OK", "en-u-co-direct"}, // deprecated
{"OK", "en-u-kh-false"}, // deprecated
- {"OK", "en-u-tz-aqams"}, // deprecated
+ {"OK", "en-u-tz-camtr"}, // deprecated
{"OK", "en-u-vt-0020"}, // deprecated
};
check(tests, Datasubtype.regular, Datasubtype.unknown, Datasubtype.deprecated);
private void check(String[][] tests, Datasubtype... datasubtypes) {
int count = 0;
- LocaleValidityChecker regularAndUnknown = new LocaleValidityChecker(datasubtypes);
+ LocaleValidityChecker localeValidityChecker = new LocaleValidityChecker(datasubtypes);
for (String[] test : tests) {
- check(++count, regularAndUnknown, test[0], test[1]);
+ check(++count, localeValidityChecker, test[0], test[1]);
}
}