#include "uhash.h"
#include "uvector.h"
-#define UND_LSR LSR("und", "", "")
+#define UND_LSR LSR("und", "", "", LSR::EXPLICIT_LSR)
/**
* Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
// 3. Remaining locales in builder order.
// In Java, we use a LinkedHashMap for both map & ordered lists.
// In C++, we use separate structures.
- // We over-allocate arrays of LSRs and indexes for simplicity.
- // We reserve slots at the array starts for the default and paradigm locales,
- // plus enough for all supported locales.
- // If there are few paradigm locales and few duplicate supported LSRs,
- // then the amount of wasted space is small.
+ //
+ // We allocate arrays of LSRs and indexes,
+ // with as many slots as supported locales, for simplicity.
+ // We write the default and paradigm LSRs starting from the front of the arrays,
+ // and others starting from the back.
+ // At the end we reverse the non-paradigm LSRs.
+ // We end up wasting as many array slots as there are duplicate supported LSRs,
+ // but the amount of wasted space is small as long as there are few duplicates.
supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
supportedLocalesLength, &errorCode);
if (U_FAILURE(errorCode)) { return; }
- int32_t paradigmLimit = 1 + localeDistance.getParadigmLSRsLength();
- int32_t suppLSRsCapacity = paradigmLimit + supportedLocalesLength;
supportedLSRs = static_cast<const LSR **>(
- uprv_malloc(suppLSRsCapacity * sizeof(const LSR *)));
+ uprv_malloc(supportedLocalesLength * sizeof(const LSR *)));
supportedIndexes = static_cast<int32_t *>(
- uprv_malloc(suppLSRsCapacity * sizeof(int32_t)));
+ uprv_malloc(supportedLocalesLength * sizeof(int32_t)));
if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
int32_t paradigmIndex = 0;
- int32_t otherIndex = paradigmLimit;
+ int32_t otherIndex = supportedLocalesLength;
if (idef >= 0) {
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(defLSR), idef + 1, &errorCode);
supportedLSRs[0] = defLSR;
supportedLSRs[paradigmIndex] = &lsr;
supportedIndexes[paradigmIndex++] = i;
} else {
- supportedLSRs[otherIndex] = &lsr;
- supportedIndexes[otherIndex++] = i;
+ supportedLSRs[--otherIndex] = &lsr;
+ supportedIndexes[otherIndex] = i;
}
}
}
if (U_FAILURE(errorCode)) { return; }
}
- // Squeeze out unused array slots.
- if (paradigmIndex < paradigmLimit && paradigmLimit < otherIndex) {
- uprv_memmove(supportedLSRs + paradigmIndex, supportedLSRs + paradigmLimit,
- (otherIndex - paradigmLimit) * sizeof(const LSR *));
- uprv_memmove(supportedIndexes + paradigmIndex, supportedIndexes + paradigmLimit,
- (otherIndex - paradigmLimit) * sizeof(int32_t));
+ // Reverse the non-paradigm LSRs to be in order, right after the paradigm LSRs.
+ // First fill the unused slots between paradigm LSRs and other LSRs.
+ // This gap is as large as the number of locales with duplicate LSRs.
+ int32_t i = paradigmIndex;
+ int32_t j = supportedLocalesLength - 1;
+ while (i < otherIndex && otherIndex <= j) {
+ supportedLSRs[i] = supportedLSRs[j];
+ supportedIndexes[i++] = supportedIndexes[j--];
}
- supportedLSRsLength = otherIndex - (paradigmLimit - paradigmIndex);
+ // Swap remaining non-paradigm LSRs in place.
+ while (i < j) {
+ const LSR *tempLSR = supportedLSRs[i];
+ supportedLSRs[i] = supportedLSRs[j];
+ supportedLSRs[j] = tempLSR;
+ int32_t tempIndex = supportedIndexes[i];
+ supportedIndexes[i++] = supportedIndexes[j];
+ supportedIndexes[j--] = tempIndex;
+ }
+ supportedLSRsLength = supportedLocalesLength - (otherIndex - paradigmIndex);
}
if (def != nullptr && (idef < 0 || def != supportedLocales[idef])) {
if (U_FAILURE(errorCode)) { return -1; }
int32_t desiredIndex = 0;
int32_t bestSupportedLsrIndex = -1;
- for (int32_t bestDistance = thresholdDistance;;) {
+ for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
// Quick check for exact maximized LSR.
// Returns suppIndex+1 where 0 means not found.
if (supportedLsrToIndex != nullptr) {
}
}
int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
- desiredLSR, supportedLSRs, supportedLSRsLength, bestDistance, favorSubtag);
+ desiredLSR, supportedLSRs, supportedLSRsLength, bestShiftedDistance, favorSubtag);
if (bestIndexAndDistance >= 0) {
- bestDistance = bestIndexAndDistance & 0xff;
+ bestShiftedDistance = LocaleDistance::getShiftedDistance(bestIndexAndDistance);
if (remainingIter != nullptr) {
remainingIter->rememberCurrent(desiredIndex, errorCode);
if (U_FAILURE(errorCode)) { return -1; }
}
- bestSupportedLsrIndex = bestIndexAndDistance >= 0 ? bestIndexAndDistance >> 8 : -1;
+ bestSupportedLsrIndex = bestIndexAndDistance >= 0 ?
+ LocaleDistance::getIndex(bestIndexAndDistance) : -1;
}
- if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
+ if ((bestShiftedDistance -= LocaleDistance::shiftDistance(demotionPerDesiredLocale)) <= 0) {
break;
}
if (remainingIter == nullptr || !remainingIter->hasNext()) {
LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
if (U_FAILURE(errorCode)) { return 0; }
const LSR *pSuppLSR = &suppLSR;
- int32_t distance = localeDistance.getBestIndexAndDistance(
+ int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
&pSuppLSR, 1,
- thresholdDistance, favorSubtag) & 0xff;
- return (100 - distance) / 100.0;
+ LocaleDistance::shiftDistance(thresholdDistance), favorSubtag);
+ double distance = LocaleDistance::getDistanceDouble(indexAndDistance);
+ return (100.0 - distance) / 100.0;
}
U_NAMESPACE_END
// a mere region difference for one desired locale
// is as good as a perfect match for the next following desired locale.
// As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
- LSR en("en", "Latn", "US");
- LSR enGB("en", "Latn", "GB");
+ LSR en("en", "Latn", "US", LSR::EXPLICIT_LSR);
+ LSR enGB("en", "Latn", "GB", LSR::EXPLICIT_LSR);
const LSR *p_enGB = &enGB;
- defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, &p_enGB, 1,
- 50, ULOCMATCH_FAVOR_LANGUAGE) & 0xff;
+ int32_t indexAndDistance = getBestIndexAndDistance(en, &p_enGB, 1,
+ shiftDistance(50), ULOCMATCH_FAVOR_LANGUAGE);
+ defaultDemotionPerDesiredLocale = getDistanceFloor(indexAndDistance);
}
int32_t LocaleDistance::getBestIndexAndDistance(
const LSR &desired,
const LSR **supportedLSRs, int32_t supportedLSRsLength,
- int32_t threshold, ULocMatchFavorSubtag favorSubtag) const {
+ int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
+ // Round up the shifted threshold (if fraction bits are not 0)
+ // for comparison with un-shifted distances until we need fraction bits.
+ // (If we simply shifted non-zero fraction bits away, then we might ignore a language
+ // when it's really still a micro distance below the threshold.)
+ int32_t roundedThreshold = (shiftedThreshold + DISTANCE_FRACTION_MASK) >> DISTANCE_SHIFT;
BytesTrie iter(trie);
// Look up the desired language only once for all supported LSRs.
// Its "distance" is either a match point value of 0, or a non-match negative value.
if (favorSubtag == ULOCMATCH_FAVOR_SCRIPT) {
distance >>= 2;
}
- if (distance >= threshold) {
+ if (distance >= roundedThreshold) {
continue;
}
scriptDistance &= ~DISTANCE_IS_FINAL;
}
distance += scriptDistance;
- if (distance >= threshold) {
+ if (distance >= roundedThreshold) {
continue;
}
} else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
distance += defaultRegionDistance;
} else {
- int32_t remainingThreshold = threshold - distance;
+ int32_t remainingThreshold = roundedThreshold - distance;
if (minRegionDistance >= remainingThreshold) {
continue;
}
partitionsForRegion(supported),
remainingThreshold);
}
- if (distance < threshold) {
- if (distance == 0) {
- return slIndex << 8;
+ int32_t shiftedDistance = shiftDistance(distance);
+ if (shiftedDistance == 0) {
+ // Distinguish between equivalent but originally unequal locales via an
+ // additional micro distance.
+ shiftedDistance |= (desired.flags ^ supported.flags);
+ }
+ if (shiftedDistance < shiftedThreshold) {
+ if (shiftedDistance == 0) {
+ return slIndex << INDEX_SHIFT;
}
bestIndex = slIndex;
- threshold = distance;
+ shiftedThreshold = shiftedDistance;
}
}
- return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
+ return bestIndex >= 0 ?
+ (bestIndex << INDEX_SHIFT) | shiftedThreshold :
+ INDEX_NEG_1 | shiftDistance(ABOVE_THRESHOLD);
}
int32_t LocaleDistance::getDesSuppScriptDistance(
}
UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
- // Linear search for a very short list (length 6 as of 2019).
- // If there are many paradigm LSRs we should use a hash set.
+ // Linear search for a very short list (length 6 as of 2019),
+ // because we look for equivalence not equality, and
+ // because it's easy.
+ // If there are many paradigm LSRs we should use a hash set
+ // with custom comparator and hasher.
U_ASSERT(paradigmLSRsLength <= 15);
for (int32_t i = 0; i < paradigmLSRsLength; ++i) {
- if (lsr == paradigmLSRs[i]) { return true; }
+ if (lsr.isEquivalentTo(paradigmLSRs[i])) { return true; }
}
return false;
}
public:
static const LocaleDistance *getSingleton(UErrorCode &errorCode);
+ static int32_t shiftDistance(int32_t distance) {
+ return distance << DISTANCE_SHIFT;
+ }
+
+ static int32_t getShiftedDistance(int32_t indexAndDistance) {
+ return indexAndDistance & DISTANCE_MASK;
+ }
+
+ static double getDistanceDouble(int32_t indexAndDistance) {
+ double shiftedDistance = getShiftedDistance(indexAndDistance);
+ return shiftedDistance / (1 << DISTANCE_SHIFT);
+ }
+
+ static int32_t getIndex(int32_t indexAndDistance) {
+ // assert indexAndDistance >= 0;
+ return indexAndDistance >> INDEX_SHIFT;
+ }
+
/**
* Finds the supported LSR with the smallest distance from the desired one.
* Equivalent LSR subtags must be normalized into a canonical form.
*
- * <p>Returns the index of the lowest-distance supported LSR in bits 31..8
+ * <p>Returns the index of the lowest-distance supported LSR in the high bits
* (negative if none has a distance below the threshold),
- * and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
+ * and its distance (0..ABOVE_THRESHOLD) in the low bits.
*/
int32_t getBestIndexAndDistance(const LSR &desired,
const LSR **supportedLSRs, int32_t supportedLSRsLength,
- int32_t threshold, ULocMatchFavorSubtag favorSubtag) const;
-
- int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; }
+ int32_t shiftedThreshold,
+ ULocMatchFavorSubtag favorSubtag) const;
UBool isParadigmLSR(const LSR &lsr) const;
}
private:
+ // The distance is shifted left to gain some fraction bits.
+ static constexpr int32_t DISTANCE_SHIFT = 3;
+ static constexpr int32_t DISTANCE_FRACTION_MASK = 7;
+ // 7 bits for 0..100
+ static constexpr int32_t DISTANCE_INT_SHIFT = 7;
+ static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT;
+ static constexpr int32_t DISTANCE_MASK = 0x3ff;
+ // tic constexpr int32_t MAX_INDEX = 0x1fffff; // avoids sign bit
+ static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
+
+ static int32_t getDistanceFloor(int32_t indexAndDistance) {
+ return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
+ }
+
LocaleDistance(const LocaleDistanceData &data);
LocaleDistance(const LocaleDistance &other) = delete;
LocaleDistance &operator=(const LocaleDistance &other) = delete;
for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) {
lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]),
strings.get(lsrSubtagIndexes[i + 1]),
- strings.get(lsrSubtagIndexes[i + 2]));
+ strings.get(lsrSubtagIndexes[i + 2]),
+ LSR::IMPLICIT_LSR);
}
if (partitionsLength > 0) {
for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) {
paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]),
strings.get(paradigmSubtagIndexes[i + 1]),
- strings.get(paradigmSubtagIndexes[i + 2]));
+ strings.get(paradigmSubtagIndexes[i + 2]),
+ LSR::DONT_CARE_FLAGS);
}
distanceData.paradigms = paradigms;
}
const char *name = locale.getName();
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
// Private use language tag x-subtag-subtag...
- return LSR(name, "", "");
+ return LSR(name, "", "", LSR::EXPLICIT_LSR);
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
locale.getVariant(), errorCode);
if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
switch (c1) {
case 'A':
- return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region, errorCode);
+ return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region,
+ LSR::EXPLICIT_LSR, errorCode);
case 'B':
- return LSR(PSEUDO_BIDI_PREFIX, language, script, region, errorCode);
+ return LSR(PSEUDO_BIDI_PREFIX, language, script, region,
+ LSR::EXPLICIT_LSR, errorCode);
case 'C':
- return LSR(PSEUDO_CRACKED_PREFIX, language, script, region, errorCode);
+ return LSR(PSEUDO_CRACKED_PREFIX, language, script, region,
+ LSR::EXPLICIT_LSR, errorCode);
default: // normal locale
break;
}
}
if (variant[0] == 'P' && variant[1] == 'S') {
+ int32_t lsrFlags = *region == 0 ?
+ LSR::EXPLICIT_LANGUAGE | LSR::EXPLICIT_SCRIPT : LSR::EXPLICIT_LSR;
if (uprv_strcmp(variant, "PSACCENT") == 0) {
return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
- *region == 0 ? "XA" : region, errorCode);
+ *region == 0 ? "XA" : region, lsrFlags, errorCode);
} else if (uprv_strcmp(variant, "PSBIDI") == 0) {
return LSR(PSEUDO_BIDI_PREFIX, language, script,
- *region == 0 ? "XB" : region, errorCode);
+ *region == 0 ? "XB" : region, lsrFlags, errorCode);
} else if (uprv_strcmp(variant, "PSCRACK") == 0) {
return LSR(PSEUDO_CRACKED_PREFIX, language, script,
- *region == 0 ? "XC" : region, errorCode);
+ *region == 0 ? "XC" : region, lsrFlags, errorCode);
}
// else normal locale
}
region = "";
}
if (*script != 0 && *region != 0 && *language != 0) {
- return LSR(language, script, region); // already maximized
+ return LSR(language, script, region, LSR::EXPLICIT_LSR); // already maximized
}
uint32_t retainOldMask = 0;
if (retainOldMask == 0) {
// Quickly return a copy of the lookup-result LSR
// without new allocation of the subtags.
- return LSR(result.language, result.script, result.region);
+ return LSR(result.language, result.script, result.region, result.flags);
}
if ((retainOldMask & 4) == 0) {
language = result.language;
if ((retainOldMask & 1) == 0) {
region = result.region;
}
- return LSR(language, script, region);
+ // retainOldMask flags = LSR explicit-subtag flags
+ return LSR(language, script, region, retainOldMask);
}
int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
boolean favorRegionOk = false;
if (result.script.equals(value00.script)) { //script is default
if (result.region.equals(value00.region)) {
- return new LSR(result.language, "", "");
+ return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
} else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
- return new LSR(result.language, "", result.region);
+ return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
} else {
favorRegionOk = true;
}
// Maybe do later, but for now use the straightforward code.
LSR result2 = maximize(languageIn, scriptIn, "");
if (result2.equals(result)) {
- return new LSR(result.language, result.script, "");
+ return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
} else if (favorRegionOk) {
- return new LSR(result.language, "", result.region);
+ return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
}
return result;
}
U_NAMESPACE_BEGIN
-LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode) :
+LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
+ UErrorCode &errorCode) :
language(nullptr), script(nullptr), region(r),
- regionIndex(indexForRegion(region)) {
+ regionIndex(indexForRegion(region)), flags(f) {
if (U_SUCCESS(errorCode)) {
CharString langScript;
langScript.append(prefix, errorCode).append(lang, errorCode).append('\0', errorCode);
LSR::LSR(LSR &&other) U_NOEXCEPT :
language(other.language), script(other.script), region(other.region), owned(other.owned),
- regionIndex(other.regionIndex), hashCode(other.hashCode) {
+ regionIndex(other.regionIndex), flags(other.flags),
+ hashCode(other.hashCode) {
if (owned != nullptr) {
other.language = other.script = "";
other.owned = nullptr;
script = other.script;
region = other.region;
regionIndex = other.regionIndex;
+ flags = other.flags;
owned = other.owned;
hashCode = other.hashCode;
if (owned != nullptr) {
return *this;
}
-UBool LSR::operator==(const LSR &other) const {
+UBool LSR::isEquivalentTo(const LSR &other) const {
return
uprv_strcmp(language, other.language) == 0 &&
uprv_strcmp(script, other.script) == 0 &&
(regionIndex > 0 || uprv_strcmp(region, other.region) == 0);
}
+UBool LSR::operator==(const LSR &other) const {
+ return
+ uprv_strcmp(language, other.language) == 0 &&
+ uprv_strcmp(script, other.script) == 0 &&
+ regionIndex == other.regionIndex &&
+ // Compare regions if both are ill-formed (and their indexes are 0).
+ (regionIndex > 0 || uprv_strcmp(region, other.region) == 0) &&
+ flags == other.flags;
+}
+
int32_t LSR::indexForRegion(const char *region) {
int32_t c = region[0];
int32_t a = c - '0';
LSR &LSR::setHashCode() {
if (hashCode == 0) {
- hashCode =
- (ustr_hashCharsN(language, static_cast<int32_t>(uprv_strlen(language))) * 37 +
- ustr_hashCharsN(script, static_cast<int32_t>(uprv_strlen(script)))) * 37 +
- regionIndex;
+ int32_t h = ustr_hashCharsN(language, static_cast<int32_t>(uprv_strlen(language)));
+ h = h * 37 + ustr_hashCharsN(script, static_cast<int32_t>(uprv_strlen(script)));
+ h = h * 37 + regionIndex;
+ hashCode = h * 37 + flags;
}
return *this;
}
struct LSR final : public UMemory {
static constexpr int32_t REGION_INDEX_LIMIT = 1001 + 26 * 26;
+ static constexpr int32_t EXPLICIT_LSR = 7;
+ static constexpr int32_t EXPLICIT_LANGUAGE = 4;
+ static constexpr int32_t EXPLICIT_SCRIPT = 2;
+ static constexpr int32_t EXPLICIT_REGION = 1;
+ static constexpr int32_t IMPLICIT_LSR = 0;
+ static constexpr int32_t DONT_CARE_FLAGS = 0;
+
const char *language;
const char *script;
const char *region;
char *owned = nullptr;
/** Index for region, 0 if ill-formed. @see indexForRegion */
int32_t regionIndex = 0;
+ int32_t flags = 0;
/** Only set for LSRs that will be used in a hash table. */
int32_t hashCode = 0;
LSR() : language("und"), script(""), region("") {}
/** Constructor which aliases all subtag pointers. */
- LSR(const char *lang, const char *scr, const char *r) :
+ LSR(const char *lang, const char *scr, const char *r, int32_t f) :
language(lang), script(scr), region(r),
- regionIndex(indexForRegion(region)) {}
+ regionIndex(indexForRegion(region)), flags(f) {}
/**
* Constructor which prepends the prefix to the language and script,
* copies those into owned memory, and aliases the region.
*/
- LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode);
+ LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
+ UErrorCode &errorCode);
LSR(LSR &&other) U_NOEXCEPT;
LSR(const LSR &other) = delete;
inline ~LSR() {
*/
static int32_t indexForRegion(const char *region);
+ UBool isEquivalentTo(const LSR &other) const;
UBool operator==(const LSR &other) const;
inline UBool operator!=(const LSR &other) const {
ar-EG >> ar-SY
pt-BR >> pt
ar-XB >> ar-XB
-ar-PSBIDI >> ar-XB # These are equivalent.
+ar-PSBIDI >> ar-PSBIDI
en-XA >> en-XA
-en-PSACCENT >> en-XA # These are equivalent.
+en-PSACCENT >> en-PSACCENT
ar-PSCRACK >> ar-PSCRACK
@favor=script
ar-EG >> ar-SY
pt-BR >> pt
ar-XB >> ar-XB
-ar-PSBIDI >> ar-XB # These are equivalent.
+ar-PSBIDI >> ar-PSBIDI
en-XA >> en-XA
-en-PSACCENT >> en-XA # These are equivalent.
+en-PSACCENT >> en-PSACCENT
ar-PSCRACK >> ar-PSCRACK
** test: BestMatchForTraditionalChinese
zh-Hant-CN, en >> en-US
zh-Hans, en >> zh-Hans-CN
-** test: return first among likely-subtags equivalent locales
-# Was: more specific script should win in case regions are identical
-# with some different results.
+** test: return most originally similar among likely-subtags equivalent locales
@supported=af, af-Latn, af-Arab
af >> af
af-ZA >> af
-af-Latn-ZA >> af
-af-Latn >> af
+af-Latn-ZA >> af-Latn
+af-Latn >> af-Latn
@favor=script
af >> af
af-ZA >> af
-af-Latn-ZA >> af
-af-Latn >> af
+af-Latn-ZA >> af-Latn
+af-Latn >> af-Latn
-# Was: more specific region should win
-# with some different results.
@supported=nl, nl-NL, nl-BE
@favor=
nl >> nl
nl-Latn >> nl
-nl-Latn-NL >> nl
-nl-NL >> nl
+nl-Latn-NL >> nl-NL
+nl-NL >> nl-NL
@favor=script
nl >> nl
nl-Latn >> nl
-nl-Latn-NL >> nl
-nl-NL >> nl
+nl-Latn-NL >> nl-NL
+nl-NL >> nl-NL
-# Was: more specific region wins over more specific script
-# with some different results.
@supported=nl, nl-Latn, nl-NL, nl-BE
@favor=
nl >> nl
-nl-Latn >> nl
-nl-NL >> nl
-nl-Latn-NL >> nl
+nl-Latn >> nl-Latn
+nl-NL >> nl-NL
+nl-Latn-NL >> nl-Latn
@favor=script
nl >> nl
-nl-Latn >> nl
-nl-NL >> nl
-nl-Latn-NL >> nl
+nl-Latn >> nl-Latn
+nl-NL >> nl-NL
+nl-Latn-NL >> nl-Latn
** test: region may replace matched if matched is enclosing
@supported=es-419, es
** test: pick best maximized tag
@supported=ja, ja-Jpan-US, ja-JP, en, ru
ja-Jpan, ru >> ja
-ja-JP, ru >> ja
+ja-JP, ru >> ja-JP
ja-US, ru >> ja-Jpan-US
@favor=script
ja-Jpan, ru >> ja
-ja-JP, ru >> ja
+ja-JP, ru >> ja-JP
ja-US, ru >> ja-Jpan-US
** test: termination: pick best maximized match
@supported=ja, ja-Jpan, ja-JP, en, ru
-ja-Jpan-JP, ru >> ja
-ja-Jpan, ru >> ja
+ja-Jpan-JP, ru >> ja-Jpan
+ja-Jpan, ru >> ja-Jpan
@favor=script
-ja-Jpan-JP, ru >> ja
-ja-Jpan, ru >> ja
+ja-Jpan-JP, ru >> ja-Jpan
+ja-Jpan, ru >> ja-Jpan
** test: same language over exact, but distinguish when user is explicit
@supported=fr, en-GB, ja, es-ES, es-MX
** test: testGetBestMatchWithMinMatchScore
@supported=fr-FR, fr, fr-CA, en
@default=und
-fr >> fr-FR # First likely-subtags equivalent match is chosen.
+fr >> fr
@supported=en, fr, fr-CA
fr-FR >> fr # Parent match is chosen.
@supported=en, fr-CA
@favor=script
@supported=fr-FR, fr, fr-CA, en
-fr >> fr-FR
+fr >> fr
@supported=en, fr, fr-CA
fr-FR >> fr
@supported=en, fr-CA
public final class LSR {
public static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
+ public static final int EXPLICIT_LSR = 7;
+ public static final int EXPLICIT_LANGUAGE = 4;
+ public static final int EXPLICIT_SCRIPT = 2;
+ public static final int EXPLICIT_REGION = 1;
+ public static final int IMPLICIT_LSR = 0;
+ public static final int DONT_CARE_FLAGS = 0;
+
public static final boolean DEBUG_OUTPUT = false;
public final String language;
public final String region;
/** Index for region, negative if ill-formed. @see indexForRegion */
final int regionIndex;
+ public final int flags;
- public LSR(String language, String script, String region) {
+ public LSR(String language, String script, String region, int flags) {
this.language = language;
this.script = script;
this.region = region;
regionIndex = indexForRegion(region);
+ this.flags = flags;
}
/**
}
return result.toString();
}
+
+ public boolean isEquivalentTo(LSR other) {
+ return language.equals(other.language)
+ && script.equals(other.script)
+ && region.equals(other.region);
+ }
+
@Override
public boolean equals(Object obj) {
LSR other;
&& obj.getClass() == this.getClass()
&& language.equals((other = (LSR) obj).language)
&& script.equals(other.script)
- && region.equals(other.region));
+ && region.equals(other.region)
+ && flags == other.flags);
}
+
@Override
public int hashCode() {
- return Objects.hash(language, script, region);
+ return Objects.hash(language, script, region, flags);
}
}
private static final int DISTANCE_IS_FINAL = 0x100;
private static final int DISTANCE_IS_FINAL_OR_SKIP_SCRIPT =
DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
+
+ // The distance is shifted left to gain some fraction bits.
+ private static final int DISTANCE_SHIFT = 3;
+ private static final int DISTANCE_FRACTION_MASK = 7;
+ // 7 bits for 0..100
+ private static final int DISTANCE_INT_SHIFT = 7;
+ private static final int INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT;
+ private static final int DISTANCE_MASK = 0x3ff;
+ // vate static final int MAX_INDEX = 0x1fffff; // avoids sign bit
+ private static final int INDEX_NEG_1 = 0xfffffc00;
+
// Indexes into array of distances.
public static final int IX_DEF_LANG_DISTANCE = 0;
public static final int IX_DEF_SCRIPT_DISTANCE = 1;
private final int minRegionDistance;
private final int defaultDemotionPerDesiredLocale;
+ public static final int shiftDistance(int distance) {
+ return distance << DISTANCE_SHIFT;
+ }
+
+ public static final int getShiftedDistance(int indexAndDistance) {
+ return indexAndDistance & DISTANCE_MASK;
+ }
+
+ public static final double getDistanceDouble(int indexAndDistance) {
+ double shiftedDistance = getShiftedDistance(indexAndDistance);
+ return shiftedDistance / (1 << DISTANCE_SHIFT);
+ }
+
+ private static final int getDistanceFloor(int indexAndDistance) {
+ return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
+ }
+
+ public static final int getIndex(int indexAndDistance) {
+ assert indexAndDistance >= 0;
+ return indexAndDistance >> INDEX_SHIFT;
+ }
+
// VisibleForTesting
public static final class Data {
public byte[] trie;
String[] paradigms = value.getStringArray();
paradigmLSRs = new HashSet<>(paradigms.length / 3);
for (int i = 0; i < paradigms.length; i += 3) {
- paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2]));
+ paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2],
+ LSR.DONT_CARE_FLAGS));
}
} else {
paradigmLSRs = Collections.emptySet();
// a mere region difference for one desired locale
// is as good as a perfect match for the next following desired locale.
// As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
- LSR en = new LSR("en", "Latn", "US");
- LSR enGB = new LSR("en", "Latn", "GB");
- defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, new LSR[] { enGB },
- 50, FavorSubtag.LANGUAGE) & 0xff;
+ LSR en = new LSR("en", "Latn", "US", LSR.EXPLICIT_LSR);
+ LSR enGB = new LSR("en", "Latn", "GB", LSR.EXPLICIT_LSR);
+ int indexAndDistance = getBestIndexAndDistance(en, new LSR[] { enGB },
+ shiftDistance(50), FavorSubtag.LANGUAGE);
+ defaultDemotionPerDesiredLocale = getDistanceFloor(indexAndDistance);
if (DEBUG_OUTPUT) {
System.out.println("*** locale distance");
int threshold, FavorSubtag favorSubtag) {
LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported);
LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired);
- return getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR },
- threshold, favorSubtag) & 0xff;
+ int indexAndDistance = getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR },
+ shiftDistance(threshold), favorSubtag);
+ return getDistanceFloor(indexAndDistance);
}
/**
* Finds the supported LSR with the smallest distance from the desired one.
* Equivalent LSR subtags must be normalized into a canonical form.
*
- * <p>Returns the index of the lowest-distance supported LSR in bits 31..8
+ * <p>Returns the index of the lowest-distance supported LSR in the high bits
* (negative if none has a distance below the threshold),
- * and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
+ * and its distance (0..ABOVE_THRESHOLD) in the low bits.
*/
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLSRs,
- int threshold, FavorSubtag favorSubtag) {
+ int shiftedThreshold, FavorSubtag favorSubtag) {
+ // Round up the shifted threshold (if fraction bits are not 0)
+ // for comparison with un-shifted distances until we need fraction bits.
+ // (If we simply shifted non-zero fraction bits away, then we might ignore a language
+ // when it's really still a micro distance below the threshold.)
+ int roundedThreshold = (shiftedThreshold + DISTANCE_FRACTION_MASK) >> DISTANCE_SHIFT;
BytesTrie iter = new BytesTrie(trie);
// Look up the desired language only once for all supported LSRs.
// Its "distance" is either a match point value of 0, or a non-match negative value.
if (favorSubtag == FavorSubtag.SCRIPT) {
distance >>= 2;
}
- if (distance >= threshold) {
+ if (distance >= roundedThreshold) {
continue;
}
scriptDistance &= ~DISTANCE_IS_FINAL;
}
distance += scriptDistance;
- if (distance >= threshold) {
+ if (distance >= roundedThreshold) {
continue;
}
} else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
distance += defaultRegionDistance;
} else {
- int remainingThreshold = threshold - distance;
+ int remainingThreshold = roundedThreshold - distance;
if (minRegionDistance >= remainingThreshold) {
continue;
}
partitionsForRegion(supported),
remainingThreshold);
}
- if (distance < threshold) {
- if (distance == 0) {
- return slIndex << 8;
+ int shiftedDistance = shiftDistance(distance);
+ if (shiftedDistance == 0) {
+ // Distinguish between equivalent but originally unequal locales via an
+ // additional micro distance.
+ shiftedDistance |= (desired.flags ^ supported.flags);
+ }
+ if (shiftedDistance < shiftedThreshold) {
+ if (shiftedDistance == 0) {
+ return slIndex << INDEX_SHIFT;
}
bestIndex = slIndex;
- threshold = distance;
+ shiftedThreshold = shiftedDistance;
}
}
- return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
+ return bestIndex >= 0 ?
+ (bestIndex << INDEX_SHIFT) | shiftedThreshold :
+ INDEX_NEG_1 | shiftDistance(ABOVE_THRESHOLD);
}
private static final int getDesSuppScriptDistance(BytesTrie iter, long startState,
}
public boolean isParadigmLSR(LSR lsr) {
- return paradigmLSRs.contains(lsr);
+ // Linear search for a very short list (length 6 as of 2019),
+ // because we look for equivalence not equality, and
+ // HashSet does not support customizing equality.
+ // If there are many paradigm LSRs we should revisit this.
+ assert paradigmLSRs.size() <= 15;
+ for (LSR plsr : paradigmLSRs) {
+ if (lsr.isEquivalentTo(plsr)) {
+ return true;
+ }
+ }
+ return false;
}
// VisibleForTesting
return defaultDemotionPerDesiredLocale;
}
- // TODO: When we build data offline,
- // write test code to compare the loaded table with the builder output.
- // Fail if different, with instructions for how to update the data file.
// VisibleForTesting
public Map<String, Integer> testOnlyGetDistanceTable() {
Map<String, Integer> map = new TreeMap<>();
String[] lsrSubtags = getValue(likelyTable, "lsrs", value).getStringArray();
LSR[] lsrs = new LSR[lsrSubtags.length / 3];
for (int i = 0, j = 0; i < lsrSubtags.length; i += 3, ++j) {
- lsrs[j] = new LSR(lsrSubtags[i], lsrSubtags[i + 1], lsrSubtags[i + 2]);
+ lsrs[j] = new LSR(lsrSubtags[i], lsrSubtags[i + 1], lsrSubtags[i + 2],
+ LSR.IMPLICIT_LSR);
}
return new Data(languageAliases, regionAliases, trie, lsrs);
String tag = locale.toLanguageTag();
assert tag.startsWith("x-");
// Private use language tag x-subtag-subtag...
- return new LSR(tag, "", "");
+ return new LSR(tag, "", "", LSR.EXPLICIT_LSR);
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
locale.getVariant());
String tag = locale.toLanguageTag();
if (tag.startsWith("x-")) {
// Private use language tag x-subtag-subtag...
- return new LSR(tag, "", "");
+ return new LSR(tag, "", "", LSR.EXPLICIT_LSR);
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
locale.getVariant());
switch (region.charAt(1)) {
case 'A':
return new LSR(PSEUDO_ACCENTS_PREFIX + language,
- PSEUDO_ACCENTS_PREFIX + script, region);
+ PSEUDO_ACCENTS_PREFIX + script, region, LSR.EXPLICIT_LSR);
case 'B':
return new LSR(PSEUDO_BIDI_PREFIX + language,
- PSEUDO_BIDI_PREFIX + script, region);
+ PSEUDO_BIDI_PREFIX + script, region, LSR.EXPLICIT_LSR);
case 'C':
return new LSR(PSEUDO_CRACKED_PREFIX + language,
- PSEUDO_CRACKED_PREFIX + script, region);
+ PSEUDO_CRACKED_PREFIX + script, region, LSR.EXPLICIT_LSR);
default: // normal locale
break;
}
}
if (variant.startsWith("PS")) {
+ int lsrFlags = region.isEmpty() ?
+ LSR.EXPLICIT_LANGUAGE | LSR.EXPLICIT_SCRIPT : LSR.EXPLICIT_LSR;
switch (variant) {
case "PSACCENT":
return new LSR(PSEUDO_ACCENTS_PREFIX + language,
- PSEUDO_ACCENTS_PREFIX + script, region.isEmpty() ? "XA" : region);
+ PSEUDO_ACCENTS_PREFIX + script,
+ region.isEmpty() ? "XA" : region, lsrFlags);
case "PSBIDI":
return new LSR(PSEUDO_BIDI_PREFIX + language,
- PSEUDO_BIDI_PREFIX + script, region.isEmpty() ? "XB" : region);
+ PSEUDO_BIDI_PREFIX + script,
+ region.isEmpty() ? "XB" : region, lsrFlags);
case "PSCRACK":
return new LSR(PSEUDO_CRACKED_PREFIX + language,
- PSEUDO_CRACKED_PREFIX + script, region.isEmpty() ? "XC" : region);
+ PSEUDO_CRACKED_PREFIX + script,
+ region.isEmpty() ? "XC" : region, lsrFlags);
default: // normal locale
break;
}
region = "";
}
if (!script.isEmpty() && !region.isEmpty() && !language.isEmpty()) {
- return new LSR(language, script, region); // already maximized
+ return new LSR(language, script, region, LSR.EXPLICIT_LSR); // already maximized
}
int retainOldMask = 0;
}
if (retainOldMask == 0) {
+ assert result.flags == LSR.IMPLICIT_LSR;
return result;
}
if ((retainOldMask & 4) == 0) {
if ((retainOldMask & 1) == 0) {
region = result.region;
}
- return new LSR(language, script, region);
+ // retainOldMask flags = LSR explicit-subtag flags
+ return new LSR(language, script, region, retainOldMask);
}
private static final int trieNext(BytesTrie iter, String s, int i) {
boolean favorRegionOk = false;
if (result.script.equals(value00.script)) { //script is default
if (result.region.equals(value00.region)) {
- return new LSR(result.language, "", "");
+ return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
} else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
- return new LSR(result.language, "", result.region);
+ return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
} else {
favorRegionOk = true;
}
// Maybe do later, but for now use the straightforward code.
LSR result2 = maximize(languageIn, scriptIn, "");
if (result2.equals(result)) {
- return new LSR(result.language, result.script, "");
+ return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
} else if (favorRegionOk) {
- return new LSR(result.language, "", result.region);
+ return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
}
return result;
}
* @stable ICU 4.4
*/
public final class LocaleMatcher {
- private static final LSR UND_LSR = new LSR("und","","");
+ private static final LSR UND_LSR = new LSR("und","","", LSR.EXPLICIT_LSR);
// In ULocale, "und" and "" make the same object.
private static final ULocale UND_ULOCALE = new ULocale("und");
// In Locale, "und" and "" make different objects.
builder.demotion == Demotion.NONE ? 0 :
LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION
favorSubtag = builder.favor;
+ if (TRACE_MATCHER) {
+ System.err.printf("new LocaleMatcher: %s\n", toString());
+ }
}
private static final void putIfAbsent(Map<LSR, Integer> lsrToIndex, LSR lsr, int i) {
private int getBestSuppIndex(LSR desiredLSR, LsrIterator remainingIter) {
int desiredIndex = 0;
int bestSupportedLsrIndex = -1;
- for (int bestDistance = thresholdDistance;;) {
+ StringBuilder sb = null;
+ if (TRACE_MATCHER) {
+ sb = new StringBuilder("LocaleMatcher desired:");
+ }
+ for (int bestShiftedDistance = LocaleDistance.shiftDistance(thresholdDistance);;) {
+ if (TRACE_MATCHER) {
+ sb.append(' ').append(desiredLSR);
+ }
// Quick check for exact maximized LSR.
Integer index = supportedLsrToIndex.get(desiredLSR);
if (index != null) {
int suppIndex = index;
if (TRACE_MATCHER) {
- System.err.printf("Returning %s: desiredLSR=supportedLSR\n",
- supportedULocales[suppIndex]);
+ System.err.printf("%s --> best=%s: desiredLSR=supportedLSR\n",
+ sb, supportedULocales[suppIndex]);
}
if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
return suppIndex;
}
int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
- desiredLSR, supportedLSRs, bestDistance, favorSubtag);
+ desiredLSR, supportedLSRs, bestShiftedDistance, favorSubtag);
if (bestIndexAndDistance >= 0) {
- bestDistance = bestIndexAndDistance & 0xff;
+ bestShiftedDistance = LocaleDistance.getShiftedDistance(bestIndexAndDistance);
if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
- bestSupportedLsrIndex = bestIndexAndDistance >> 8;
+ bestSupportedLsrIndex = LocaleDistance.getIndex(bestIndexAndDistance);
}
- if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
+ if ((bestShiftedDistance -= LocaleDistance.shiftDistance(demotionPerDesiredLocale))
+ <= 0) {
break;
}
if (remainingIter == null || !remainingIter.hasNext()) {
}
if (bestSupportedLsrIndex < 0) {
if (TRACE_MATCHER) {
- System.err.printf("Returning default %s: no good match\n", defaultULocale);
+ System.err.printf("%s --> best=default %s: no good match\n", sb, defaultULocale);
}
return -1;
}
int suppIndex = supportedIndexes[bestSupportedLsrIndex];
if (TRACE_MATCHER) {
- System.err.printf("Returning %s: best matching supported locale\n",
- supportedULocales[suppIndex]);
+ System.err.printf("%s --> best=%s: best matching supported locale\n",
+ sb, supportedULocales[suppIndex]);
}
return suppIndex;
}
@Deprecated
public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
- int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
+ int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
getMaximalLsrOrUnd(desired),
new LSR[] { getMaximalLsrOrUnd(supported) },
- thresholdDistance, favorSubtag) & 0xff;
- return (100 - distance) / 100.0;
+ LocaleDistance.shiftDistance(thresholdDistance), favorSubtag);
+ double distance = LocaleDistance.getDistanceDouble(indexAndDistance);
+ if (TRACE_MATCHER) {
+ System.err.printf("LocaleMatcher distance(desired=%s, supported=%s)=%g\n",
+ Objects.toString(desired), Objects.toString(supported), distance);
+ }
+ return (100.0 - distance) / 100.0;
}
/**
@Override
public String toString() {
StringBuilder s = new StringBuilder().append("{LocaleMatcher");
- if (supportedULocales.length > 0) {
- s.append(" supported={").append(supportedULocales[0].toString());
- for (int i = 1; i < supportedULocales.length; ++i) {
- s.append(", ").append(supportedULocales[i].toString());
+ // Supported languages in the order that we try to match them.
+ if (supportedLSRs.length > 0) {
+ s.append(" supportedLSRs={").append(supportedLSRs[0].toString());
+ for (int i = 1; i < supportedLSRs.length; ++i) {
+ s.append(", ").append(supportedLSRs[i].toString());
}
s.append('}');
}
s.append(" default=").append(Objects.toString(defaultULocale));
if (favorSubtag != null) {
- s.append(" distance=").append(favorSubtag.toString());
+ s.append(" favor=").append(favorSubtag.toString());
}
if (thresholdDistance >= 0) {
s.append(String.format(" threshold=%d", thresholdDistance));
ar-EG >> ar-SY
pt-BR >> pt
ar-XB >> ar-XB
-ar-PSBIDI >> ar-XB # These are equivalent.
+ar-PSBIDI >> ar-PSBIDI
en-XA >> en-XA
-en-PSACCENT >> en-XA # These are equivalent.
+en-PSACCENT >> en-PSACCENT
ar-PSCRACK >> ar-PSCRACK
@favor=script
ar-EG >> ar-SY
pt-BR >> pt
ar-XB >> ar-XB
-ar-PSBIDI >> ar-XB # These are equivalent.
+ar-PSBIDI >> ar-PSBIDI
en-XA >> en-XA
-en-PSACCENT >> en-XA # These are equivalent.
+en-PSACCENT >> en-PSACCENT
ar-PSCRACK >> ar-PSCRACK
** test: BestMatchForTraditionalChinese
zh-Hant-CN, en >> en-US
zh-Hans, en >> zh-Hans-CN
-** test: return first among likely-subtags equivalent locales
-# Was: more specific script should win in case regions are identical
-# with some different results.
+** test: return most originally similar among likely-subtags equivalent locales
@supported=af, af-Latn, af-Arab
af >> af
af-ZA >> af
-af-Latn-ZA >> af
-af-Latn >> af
+af-Latn-ZA >> af-Latn
+af-Latn >> af-Latn
@favor=script
af >> af
af-ZA >> af
-af-Latn-ZA >> af
-af-Latn >> af
+af-Latn-ZA >> af-Latn
+af-Latn >> af-Latn
-# Was: more specific region should win
-# with some different results.
@supported=nl, nl-NL, nl-BE
@favor=
nl >> nl
nl-Latn >> nl
-nl-Latn-NL >> nl
-nl-NL >> nl
+nl-Latn-NL >> nl-NL
+nl-NL >> nl-NL
@favor=script
nl >> nl
nl-Latn >> nl
-nl-Latn-NL >> nl
-nl-NL >> nl
+nl-Latn-NL >> nl-NL
+nl-NL >> nl-NL
-# Was: more specific region wins over more specific script
-# with some different results.
@supported=nl, nl-Latn, nl-NL, nl-BE
@favor=
nl >> nl
-nl-Latn >> nl
-nl-NL >> nl
-nl-Latn-NL >> nl
+nl-Latn >> nl-Latn
+nl-NL >> nl-NL
+nl-Latn-NL >> nl-Latn
@favor=script
nl >> nl
-nl-Latn >> nl
-nl-NL >> nl
-nl-Latn-NL >> nl
+nl-Latn >> nl-Latn
+nl-NL >> nl-NL
+nl-Latn-NL >> nl-Latn
** test: region may replace matched if matched is enclosing
@supported=es-419, es
** test: pick best maximized tag
@supported=ja, ja-Jpan-US, ja-JP, en, ru
ja-Jpan, ru >> ja
-ja-JP, ru >> ja
+ja-JP, ru >> ja-JP
ja-US, ru >> ja-Jpan-US
@favor=script
ja-Jpan, ru >> ja
-ja-JP, ru >> ja
+ja-JP, ru >> ja-JP
ja-US, ru >> ja-Jpan-US
** test: termination: pick best maximized match
@supported=ja, ja-Jpan, ja-JP, en, ru
-ja-Jpan-JP, ru >> ja
-ja-Jpan, ru >> ja
+ja-Jpan-JP, ru >> ja-Jpan
+ja-Jpan, ru >> ja-Jpan
@favor=script
-ja-Jpan-JP, ru >> ja
-ja-Jpan, ru >> ja
+ja-Jpan-JP, ru >> ja-Jpan
+ja-Jpan, ru >> ja-Jpan
** test: same language over exact, but distinguish when user is explicit
@supported=fr, en-GB, ja, es-ES, es-MX
** test: testGetBestMatchWithMinMatchScore
@supported=fr-FR, fr, fr-CA, en
@default=und
-fr >> fr-FR # First likely-subtags equivalent match is chosen.
+fr >> fr
@supported=en, fr, fr-CA
fr-FR >> fr # Parent match is chosen.
@supported=en, fr-CA
@favor=script
@supported=fr-FR, fr, fr-CA, en
-fr >> fr-FR
+fr >> fr
@supported=en, fr, fr-CA
fr-FR >> fr
@supported=en, fr-CA
Map<LSR, Integer> lsrIndexes = new LinkedHashMap<>();
// Reserve index 0 as "no value":
// The runtime lookup returns 0 for an intermediate match with no value.
- lsrIndexes.put(new LSR("", "", ""), 0); // arbitrary LSR
+ lsrIndexes.put(new LSR("", "", "", LSR.DONT_CARE_FLAGS), 0); // arbitrary LSR
// Reserve index 1 for SKIP_SCRIPT:
// The runtime lookup returns 1 for an intermediate match with a value.
- lsrIndexes.put(new LSR("skip", "script", ""), 1); // looks good when printing the data
+ // This LSR looks good when printing the data.
+ lsrIndexes.put(new LSR("skip", "script", "", LSR.DONT_CARE_FLAGS), 1);
// We could prefill the lsrList with common locales to give them small indexes,
// and see if that improves performance a little.
for (Map.Entry<String, Map<String, Map<String, LSR>>> ls : langTable.entrySet()) {
}
}
// hack
- set(result, "und", "Latn", "", new LSR("en", "Latn", "US"));
+ set(result, "und", "Latn", "", new LSR("en", "Latn", "US", LSR.DONT_CARE_FLAGS));
// hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
// <likelySubtag from="und_GH" to="ak_Latn_GH"/>
String lang = parts[0];
String p2 = parts.length < 2 ? "" : parts[1];
String p3 = parts.length < 3 ? "" : parts[2];
- return p2.length() < 4 ? new LSR(lang, "", p2) : new LSR(lang, p2, p3);
+ return p2.length() < 4 ?
+ new LSR(lang, "", p2, LSR.DONT_CARE_FLAGS) :
+ new LSR(lang, p2, p3, LSR.DONT_CARE_FLAGS);
}
private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
Set<LSR> paradigmLSRs = new HashSet<>(); // could be TreeSet if LSR were Comparable
for (String paradigm : paradigms) {
ULocale pl = new ULocale(paradigm);
- paradigmLSRs.add(XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl));
+ LSR max = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl);
+ // Clear the LSR flags to make the data equality test in
+ // LocaleDistanceTest happy.
+ paradigmLSRs.add(new LSR(max.language, max.script, max.region, LSR.DONT_CARE_FLAGS));
}
TerritoryContainment tc = new TerritoryContainment(supplementalData);