* This value may change in subsequent releases of ICU.
* @stable ICU 2.4
*/
-#define UCOL_RUNTIME_VERSION 8
+#define UCOL_RUNTIME_VERSION 9
/**
* Collation builder code version.
# SPECIAL MAX/MIN COLLATION ELEMENTS
-FFFE; [02, 02, 02] # Special LOWEST primary, for merge/interleaving
+FFFE; [02, 05, 05] # Special LOWEST primary, for merge/interleaving
FFFF; [EF FF, 05, 05] # Special HIGHEST primary, for ranges
// Special sort key bytes for all levels.
static const uint8_t TERMINATOR_BYTE = 0;
static const uint8_t LEVEL_SEPARATOR_BYTE = 1;
+
+ /** The secondary/tertiary lower limit for tailoring before any root elements. */
+ static const uint32_t BEFORE_WEIGHT16 = 0x0100;
+
/**
* Merge-sort-key separator.
- * Must not be used as the lead byte of any CE weight,
- * nor as primary compression low terminator.
+ * Same as the unique primary and identical-level weights of U+FFFE.
+ * Must not be used as primary compression low terminator.
* Otherwise usable.
*/
static const uint8_t MERGE_SEPARATOR_BYTE = 2;
static const uint32_t MERGE_SEPARATOR_PRIMARY = 0x02000000; // U+FFFE
- static const uint32_t MERGE_SEPARATOR_WEIGHT16 = 0x0200; // U+FFFE
- static const uint32_t MERGE_SEPARATOR_LOWER32 = 0x02000200; // U+FFFE
- static const uint32_t MERGE_SEPARATOR_CE32 = 0x02000202; // U+FFFE
+ static const uint32_t MERGE_SEPARATOR_CE32 = 0x02000505; // U+FFFE
/**
* Primary compression low terminator, must be greater than MERGE_SEPARATOR_BYTE.
}
nodes.setElementAt(node, index);
int32_t nextIndex = nextIndexFromNode(node);
- // Insert default nodes with weights 02 and 05, reset to the 02 node.
- node = nodeFromWeight16(BEFORE_WEIGHT16) | nodeFromStrength(strength);
+ // Insert default nodes with weights 01 and 05, reset to the 01 node.
+ node = nodeFromWeight16(Collation::BEFORE_WEIGHT16) | nodeFromStrength(strength);
index = insertNodeBetween(index, nextIndex, node, errorCode);
node = nodeFromWeight16(Collation::COMMON_WEIGHT16) | hasBefore3 |
nodeFromStrength(strength);
index = nextIndexFromNode(node);
node = nodes.elementAti(index);
U_ASSERT(!isTailoredNode(node) && strengthFromNode(node) == strength &&
- weight16FromNode(node) == BEFORE_WEIGHT16);
+ weight16FromNode(node) == Collation::BEFORE_WEIGHT16);
// Skip to the explicit common node.
do {
index = nextIndexFromNode(node);
// Gap at the beginning of the tertiary CE range.
t = rootElements.getTertiaryBoundary() - 0x100;
tLimit = rootElements.getFirstTertiaryCE() & Collation::ONLY_TERTIARY_MASK;
- } else if(t == BEFORE_WEIGHT16) {
+ } else if(t == Collation::BEFORE_WEIGHT16) {
tLimit = Collation::COMMON_WEIGHT16;
} else if(!pIsTailored && !sIsTailored) {
// p and s are root weights.
// Gap at the beginning of the secondary CE range.
s = rootElements.getSecondaryBoundary() - 0x100;
sLimit = rootElements.getFirstSecondaryCE() >> 16;
- } else if(s == BEFORE_WEIGHT16) {
+ } else if(s == Collation::BEFORE_WEIGHT16) {
sLimit = Collation::COMMON_WEIGHT16;
} else if(!pIsTailored) {
// p is a root primary.
static int32_t ceStrength(int64_t ce);
- /** The secondary/tertiary lower limit for tailoring before the common weight. */
- static const uint32_t BEFORE_WEIGHT16 = Collation::MERGE_SEPARATOR_WEIGHT16;
-
/** At most 1M nodes, limited by the 20 bits in node bit fields. */
static const int32_t MAX_INDEX = 0xfffff;
/**
int32_t rightStart = 0;
for(;;) {
// Find the merge separator or the NO_CE terminator.
+ uint32_t p;
int32_t leftLimit = leftStart;
- uint32_t leftLower32;
- while((leftLower32 = (uint32_t)left.getCE(leftLimit)) >
- Collation::MERGE_SEPARATOR_LOWER32 ||
- leftLower32 == 0) {
+ while((p = (uint32_t)(left.getCE(leftLimit) >> 32)) >
+ Collation::MERGE_SEPARATOR_PRIMARY ||
+ p == 0) {
++leftLimit;
}
int32_t rightLimit = rightStart;
- uint32_t rightLower32;
- while((rightLower32 = (uint32_t)right.getCE(rightLimit)) >
- Collation::MERGE_SEPARATOR_LOWER32 ||
- rightLower32 == 0) {
+ while((p = (uint32_t)(right.getCE(rightLimit) >> 32)) >
+ Collation::MERGE_SEPARATOR_PRIMARY ||
+ p == 0) {
++rightLimit;
}
// Both strings have the same number of merge separators,
// or else there would have been a primary-level difference.
U_ASSERT(left.getCE(leftLimit) == right.getCE(rightLimit));
- if(left.getCE(leftLimit) == Collation::NO_CE) { break; }
+ if(p == Collation::NO_CE_PRIMARY) { break; }
// Skip both merge separators and continue.
leftStart = leftLimit + 1;
rightStart = rightLimit + 1;
if(leftTertiary != rightTertiary) {
if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) {
- // Pass through NO_CE and MERGE_SEPARATOR
- // and keep real tertiary weights larger than the MERGE_SEPARATOR.
+ // Pass through NO_CE and keep real tertiary weights larger than that.
// Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
// to keep tertiary CEs well-formed.
// Their case+tertiary weights must be greater than those of
// primary and secondary CEs.
- if(leftTertiary > Collation::MERGE_SEPARATOR_WEIGHT16) {
+ if(leftTertiary > Collation::NO_CE_WEIGHT16) {
if(leftLower32 > 0xffff) {
leftTertiary ^= 0xc000;
} else {
leftTertiary += 0x4000;
}
}
- if(rightTertiary > Collation::MERGE_SEPARATOR_WEIGHT16) {
+ if(rightTertiary > Collation::NO_CE_WEIGHT16) {
if(rightLower32 > 0xffff) {
rightTertiary ^= 0xc000;
} else {
do {
int64_t ce = left.getCE(leftIndex++);
leftQuaternary = (uint32_t)ce & 0xffff;
- if(leftQuaternary == 0) {
- // Variable primary or completely ignorable.
+ if(leftQuaternary <= Collation::NO_CE_WEIGHT16) {
+ // Variable primary or completely ignorable or NO_CE.
leftQuaternary = (uint32_t)(ce >> 32);
- } else if(leftQuaternary <= Collation::MERGE_SEPARATOR_WEIGHT16) {
- // Leave NO_CE or MERGE_SEPARATOR as is.
} else {
// Regular CE, not tertiary ignorable.
// Preserve the quaternary weight in bits 7..6.
do {
int64_t ce = right.getCE(rightIndex++);
rightQuaternary = (uint32_t)ce & 0xffff;
- if(rightQuaternary == 0) {
- // Variable primary or completely ignorable.
+ if(rightQuaternary <= Collation::NO_CE_WEIGHT16) {
+ // Variable primary or completely ignorable or NO_CE.
rightQuaternary = (uint32_t)(ce >> 32);
- } else if(rightQuaternary <= Collation::MERGE_SEPARATOR_WEIGHT16) {
- // Leave NO_CE or MERGE_SEPARATOR as is.
} else {
// Regular CE, not tertiary ignorable.
// Preserve the quaternary weight in bits 7..6.
}
return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER;
}
- if(leftQuaternary == Collation::NO_CE_WEIGHT16) { break; }
+ if(leftQuaternary == Collation::NO_CE_PRIMARY) { break; }
}
return UCOL_EQUAL;
}
int32_t commonQuaternaries = 0;
uint32_t prevSecondary = 0;
- UBool anyMergeSeparators = FALSE;
+ int32_t secSegmentStart = 0;
for(;;) {
// No need to keep all CEs in the buffer when we write a sort key.
uint32_t s = lower32 >> 16;
if(s == 0) {
// secondary ignorable
- } else if(s == Collation::COMMON_WEIGHT16) {
+ } else if(s == Collation::COMMON_WEIGHT16 &&
+ ((options & CollationSettings::BACKWARD_SECONDARY) == 0 ||
+ p != Collation::MERGE_SEPARATOR_PRIMARY)) {
+ // s is a common secondary weight, and
+ // backwards-secondary is off or the ce is not the merge separator.
++commonSecondaries;
} else if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
if(commonSecondaries != 0) {
}
// commonSecondaries == 0
}
- // Reduce separators so that we can look for byte<=1 later.
- if(s <= Collation::MERGE_SEPARATOR_WEIGHT16) {
- if(s == Collation::MERGE_SEPARATOR_WEIGHT16) {
- anyMergeSeparators = TRUE;
+ if(0 < p && p <= Collation::MERGE_SEPARATOR_PRIMARY) {
+ // The backwards secondary level compares secondary weights backwards
+ // within segments separated by the merge separator (U+FFFE).
+ uint8_t *secs = secondaries.data();
+ int32_t last = secondaries.length() - 1;
+ if(secSegmentStart < last) {
+ uint8_t *p = secs + secSegmentStart;
+ uint8_t *q = secs + last;
+ do {
+ uint8_t b = *p;
+ *p++ = *q;
+ *q-- = b;
+ } while(p < q);
}
- secondaries.appendByte((s >> 8) - 1);
+ secondaries.appendByte(p == Collation::NO_CE_PRIMARY ?
+ Collation::LEVEL_SEPARATOR_BYTE : Collation::MERGE_SEPARATOR_BYTE);
+ prevSecondary = 0;
+ secSegmentStart = secondaries.length();
} else {
secondaries.appendReverseWeight16(s);
+ prevSecondary = s;
}
- prevSecondary = s;
}
}
} else {
uint32_t c = (lower32 >> 8) & 0xff; // case bits & tertiary lead byte
U_ASSERT((c & 0xc0) != 0xc0);
- if((c & 0xc0) == 0 && c > Collation::MERGE_SEPARATOR_BYTE) {
+ if((c & 0xc0) == 0 && c > Collation::LEVEL_SEPARATOR_BYTE) {
++commonCases;
} else {
if((options & CollationSettings::UPPER_FIRST) == 0) {
// lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, upper=15.
- if(commonCases != 0) {
+ // If there are only common (=lowest) weights in the whole level,
+ // then we need not write anything.
+ // Level length differences are handled already on the next-higher level.
+ if(commonCases != 0 &&
+ (c > Collation::LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) {
--commonCases;
while(commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
}
uint32_t b;
- if(c <= Collation::MERGE_SEPARATOR_BYTE) {
+ if(c <= Collation::LEVEL_SEPARATOR_BYTE) {
b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
} else {
b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
cases.appendByte(b << 4);
commonCases = 0;
}
- if(c > Collation::MERGE_SEPARATOR_BYTE) {
+ if(c > Collation::LEVEL_SEPARATOR_BYTE) {
c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >> 6)) << 4; // 14 or 15
}
} else {
cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
commonCases = 0;
}
- if(c > Collation::MERGE_SEPARATOR_BYTE) {
+ if(c > Collation::LEVEL_SEPARATOR_BYTE) {
c = (CASE_UPPER_FIRST_COMMON_LOW - (c >> 6)) << 4; // 2 or 1
}
}
- // c is a separator byte 01 or 02,
+ // c is a separator byte 01,
// or a left-shifted nibble 0x10, 0x20, ... 0xf0.
cases.appendByte(c);
}
// Their case+tertiary weights must be greater than those of
// primary and secondary CEs.
//
- // Separators 01..02 -> 01..02 (unchanged)
- // Lowercase 03..04 -> 83..84 (includes uncased)
+ // Separator 01 -> 01 (unchanged)
+ // Lowercase 02..04 -> 82..84 (includes uncased)
// Common weight 05 -> 85..C5 (common-weight compression range)
// Lowercase 06..3F -> C6..FF
- // Mixed case 43..7F -> 43..7F
- // Uppercase 83..BF -> 03..3F
+ // Mixed case 42..7F -> 42..7F
+ // Uppercase 82..BF -> 02..3F
// Tertiary CE 86..BF -> C6..FF
- if(t <= Collation::MERGE_SEPARATOR_WEIGHT16) {
+ if(t <= Collation::NO_CE_WEIGHT16) {
// Keep separators unchanged.
} else if(lower32 > 0xffff) {
// Invert case bits of primary & secondary CEs.
if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
uint32_t q = lower32 & 0xffff;
- if((q & 0xc0) == 0 && q > Collation::MERGE_SEPARATOR_WEIGHT16) {
+ if((q & 0xc0) == 0 && q > Collation::NO_CE_WEIGHT16) {
++commonQuaternaries;
- } else if(q <= Collation::MERGE_SEPARATOR_WEIGHT16 &&
+ } else if(q == Collation::NO_CE_WEIGHT16 &&
(options & CollationSettings::ALTERNATE_MASK) == 0 &&
- (quaternaries.isEmpty() ||
- quaternaries[quaternaries.length() - 1] == Collation::MERGE_SEPARATOR_BYTE)) {
- // If alternate=non-ignorable and there are only
- // common quaternary weights between two separators,
- // then we need not write anything between these separators.
+ quaternaries.isEmpty()) {
+ // If alternate=non-ignorable and there are only common quaternary weights,
+ // then we need not write anything.
// The only weights greater than the merge separator and less than the common weight
// are shifted primary weights, which are not generated for alternate=non-ignorable.
// There are also exactly as many quaternary weights as tertiary weights,
// so level length differences are handled already on tertiary level.
// Any above-common quaternary weight will compare greater regardless.
- quaternaries.appendByte(q >> 8);
+ quaternaries.appendByte(Collation::LEVEL_SEPARATOR_BYTE);
} else {
- if(q <= Collation::MERGE_SEPARATOR_WEIGHT16) {
- q >>= 8;
+ if(q == Collation::NO_CE_WEIGHT16) {
+ q = Collation::LEVEL_SEPARATOR_BYTE;
} else {
q = 0xfc + ((q >> 6) & 3);
}
if(!callback.needToWrite(Collation::SECONDARY_LEVEL)) { return; }
ok &= secondaries.isOk();
sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
- uint8_t *secs = secondaries.data();
- int32_t length = secondaries.length() - 1; // Ignore the trailing NO_CE.
- if((options & CollationSettings::BACKWARD_SECONDARY) != 0) {
- // The backwards secondary level compares secondary weights backwards
- // within segments separated by the merge separator (U+FFFE, weight 02).
- // The separator weights 01 & 02 were reduced to 00 & 01 so that
- // we do not accidentally separate at a _second_ weight byte of 02.
- int32_t start = 0;
- for(;;) {
- // Find the merge separator or the NO_CE terminator.
- int32_t limit;
- if(anyMergeSeparators) {
- limit = start;
- while(secs[limit] > 1) { ++limit; }
- } else {
- limit = length;
- }
- // Reverse this segment.
- if(start < limit) {
- uint8_t *p = secs + start;
- uint8_t *q = secs + limit - 1;
- while(p < q) {
- uint8_t s = *p;
- *p++ = *q;
- *q-- = s;
- }
- }
- // Did we reach the end of the string?
- if(secs[limit] == 0) { break; }
- // Restore the merge separator.
- secs[limit] = 2;
- // Skip the merge separator and continue.
- start = limit + 1;
- }
- }
- sink.Append(reinterpret_cast<char *>(secs), length);
+ secondaries.appendTo(sink);
}
if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
uint8_t b = 0;
for(int32_t i = 0; i < length; ++i) {
uint8_t c = (uint8_t)cases[i];
- if(c <= Collation::MERGE_SEPARATOR_BYTE) {
- U_ASSERT(c != 0);
- if(b != 0) {
- sink.Append(b);
- b = 0;
- }
- sink.Append(c);
+ U_ASSERT((c & 0xf) == 0 && c != 0);
+ if(b == 0) {
+ b = c;
} else {
- U_ASSERT((c & 0xf) == 0);
- if(b == 0) {
- b = c;
- } else {
- sink.Append(b | (c >> 4));
- b = 0;
- }
+ sink.Append(b | (c >> 4));
+ b = 0;
}
}
if(b != 0) {
sec = elements[index] >> 16;
} else {
index = findPrimary(p) + 1;
- previousSec = Collation::MERGE_SEPARATOR_WEIGHT16;
+ previousSec = Collation::BEFORE_WEIGHT16;
sec = Collation::COMMON_WEIGHT16;
}
U_ASSERT(s >= sec);
previousTer = 0;
} else {
index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
- previousTer = Collation::MERGE_SEPARATOR_WEIGHT16;
+ previousTer = Collation::BEFORE_WEIGHT16;
}
secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
} else {
index = findPrimary(p) + 1;
- previousTer = Collation::MERGE_SEPARATOR_WEIGHT16;
+ previousTer = Collation::BEFORE_WEIGHT16;
secTer = Collation::COMMON_SEC_AND_TER_CE;
}
uint32_t st = (s << 16) | t;
maxBytes[1] = 0;
minBytes[2] = 0;
maxBytes[2] = 0;
- minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1;
+ minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;
maxBytes[3] = 0xff;
minBytes[4] = 2;
maxBytes[4] = 0xff;
maxBytes[2] = 0;
// We use only 6 bits per byte.
// The other bits are used for case & quaternary weights.
- minBytes[3] = Collation::MERGE_SEPARATOR_BYTE + 1;
+ minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1;
maxBytes[3] = 0x3f;
minBytes[4] = 2;
maxBytes[4] = 0x3f;
UBool getCollationKey(const char *norm, const UnicodeString &line,
const UChar *s, int32_t length,
CollationKey &key, IcuTestErrorCode &errorCode);
+ UBool getMergedCollationKey(const UChar *s, int32_t length,
+ CollationKey &key, IcuTestErrorCode &errorCode);
UBool checkCompareTwo(const char *norm, const UnicodeString &prevFileLine,
const UnicodeString &prevString, const UnicodeString &s,
UCollationResult expectedOrder, Collation::Level expectedLevel,
return;
}
int64_t ce = ces.elementAti(0);
- int64_t expected =
- ((int64_t)Collation::MERGE_SEPARATOR_PRIMARY << 32) |
- Collation::MERGE_SEPARATOR_LOWER32;
+ int64_t expected = Collation::makeCE(Collation::MERGE_SEPARATOR_PRIMARY);
if(ce != expected) {
- errln("CE(U+fffe)=%04lx != 02.02.02", (long)ce);
+ errln("CE(U+fffe)=%04lx != 02..", (long)ce);
}
ce = ces.elementAti(1);
}
// Minimum & maximum lead bytes.
if((p1 != 0 && p1 <= Collation::MERGE_SEPARATOR_BYTE) ||
- (s1 != 0 && s1 <= Collation::MERGE_SEPARATOR_BYTE) ||
- (t1 != 0 && t1 <= Collation::MERGE_SEPARATOR_BYTE)) {
- return FALSE;
- }
- if(t1 != 0 && t1 > 0x3f) {
+ s1 == Collation::LEVEL_SEPARATOR_BYTE ||
+ t1 == Collation::LEVEL_SEPARATOR_BYTE || t1 > 0x3f) {
return FALSE;
}
if(c > 2) {
return FALSE;
}
- // If s contains U+FFFE, check that merged segments make the same key.
+ // Check that internalNextSortKeyPart() makes the same key, with several part sizes.
+ static const int32_t partSizes[] = { 32, 3, 1 };
+ for(int32_t psi = 0; psi < UPRV_LENGTHOF(partSizes); ++psi) {
+ int32_t partSize = partSizes[psi];
+ CharString parts;
+ if(!getSortKeyParts(s, length, parts, 32, errorCode)) {
+ infoln(fileTestName);
+ errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s",
+ norm, (int)partSize, errorCode.errorName());
+ infoln(line);
+ return FALSE;
+ }
+ if(keyLength != parts.length() || uprv_memcmp(keyBytes, parts.data(), keyLength) != 0) {
+ infoln(fileTestName);
+ errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)",
+ norm, (int)partSize);
+ infoln(line);
+ infoln(printCollationKey(key));
+ infoln(printSortKey(reinterpret_cast<uint8_t *>(parts.data()), parts.length()));
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+/**
+ * Changes the key to the merged segments of the U+FFFE-separated substrings of s.
+ * Leaves key unchanged if s does not contain U+FFFE.
+ * @return TRUE if the key was successfully changed
+ */
+UBool CollationTest::getMergedCollationKey(const UChar *s, int32_t length,
+ CollationKey &key, IcuTestErrorCode &errorCode) {
+ if(errorCode.isFailure()) { return FALSE; }
LocalMemory<uint8_t> mergedKey;
int32_t mergedKeyLength = 0;
int32_t mergedKeyCapacity = 0;
if(i == sLength) {
if(segmentStart == 0) {
// s does not contain any U+FFFE.
- break;
+ return FALSE;
}
} else if(s[i] != 0xfffe) {
++i;
if(i == sLength) { break; }
segmentStart = ++i;
}
- if(segmentStart != 0 &&
- (mergedKeyLength != keyLength ||
- uprv_memcmp(mergedKey.getAlias(), keyBytes, keyLength) != 0)) {
- infoln(fileTestName);
- errln("Collator(%s).getCollationKey(with U+FFFE) != "
- "ucol_mergeSortkeys(segments)",
- norm);
- infoln(line);
- infoln(printCollationKey(key));
- infoln(printSortKey(mergedKey.getAlias(), mergedKeyLength));
- return FALSE;
- }
-
- // Check that internalNextSortKeyPart() makes the same key, with several part sizes.
- static const int32_t partSizes[] = { 32, 3, 1 };
- for(int32_t psi = 0; psi < UPRV_LENGTHOF(partSizes); ++psi) {
- int32_t partSize = partSizes[psi];
- CharString parts;
- if(!getSortKeyParts(s, length, parts, 32, errorCode)) {
- infoln(fileTestName);
- errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s",
- norm, (int)partSize, errorCode.errorName());
- infoln(line);
- return FALSE;
- }
- if(keyLength != parts.length() || uprv_memcmp(keyBytes, parts.data(), keyLength) != 0) {
- infoln(fileTestName);
- errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)",
- norm, (int)partSize);
- infoln(line);
- infoln(printCollationKey(key));
- infoln(printSortKey(reinterpret_cast<uint8_t *>(parts.data()), parts.length()));
- return FALSE;
- }
- }
+ key = CollationKey(mergedKey.getAlias(), mergedKeyLength);
return TRUE;
}
return buffer;
}
+int32_t getDifferenceLevel(const CollationKey &prevKey, const CollationKey &key,
+ UCollationResult order, UBool collHasCaseLevel) {
+ if(order == UCOL_EQUAL) {
+ return Collation::NO_LEVEL;
+ }
+ int32_t prevKeyLength;
+ const uint8_t *prevBytes = prevKey.getByteArray(prevKeyLength);
+ int32_t keyLength;
+ const uint8_t *bytes = key.getByteArray(keyLength);
+ int32_t level = Collation::PRIMARY_LEVEL;
+ for(int32_t i = 0;; ++i) {
+ uint8_t b = prevBytes[i];
+ if(b != bytes[i]) { break; }
+ if(b == Collation::LEVEL_SEPARATOR_BYTE) {
+ ++level;
+ if(level == Collation::CASE_LEVEL && !collHasCaseLevel) {
+ ++level;
+ }
+ }
+ }
+ return level;
+}
+
}
UBool CollationTest::checkCompareTwo(const char *norm, const UnicodeString &prevFileLine,
infoln(printCollationKey(key));
return FALSE;
}
+ UBool collHasCaseLevel = coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == UCOL_ON;
+ int32_t level = getDifferenceLevel(prevKey, key, order, collHasCaseLevel);
if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) {
- int32_t prevKeyLength;
- const uint8_t *prevBytes = prevKey.getByteArray(prevKeyLength);
- int32_t keyLength;
- const uint8_t *bytes = key.getByteArray(keyLength);
- int32_t level = Collation::PRIMARY_LEVEL;
- for(int32_t i = 0;; ++i) {
- uint8_t b = prevBytes[i];
- if(b != bytes[i]) { break; }
- if(b == Collation::LEVEL_SEPARATOR_BYTE) {
- ++level;
- if(level == Collation::CASE_LEVEL &&
- coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == UCOL_OFF) {
- ++level;
- }
- }
- }
if(level != expectedLevel) {
infoln(fileTestName);
errln("line %d Collator(%s).getCollationKey(previous, current).compareTo()=%d wrong level: %d != %d",
return FALSE;
}
}
+
+ // If either string contains U+FFFE, then their sort keys must compare the same as
+ // the merged sort keys of each string's between-FFFE segments.
+ //
+ // It is not required that
+ // sortkey(str1 + "\uFFFE" + str2) == mergeSortkeys(sortkey(str1), sortkey(str2))
+ // only that those two methods yield the same order.
+ //
+ // Use bit-wise OR so that getMergedCollationKey() is always called for both strings.
+ if((getMergedCollationKey(prevString.getBuffer(), prevString.length(), prevKey, errorCode) |
+ getMergedCollationKey(s.getBuffer(), s.length(), key, errorCode)) ||
+ errorCode.isFailure()) {
+ order = prevKey.compareTo(key, errorCode);
+ if(order != expectedOrder || errorCode.isFailure()) {
+ infoln(fileTestName);
+ errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
+ "(previous, current segments between U+FFFE)).compareTo() wrong order: %d != %d (%s)",
+ (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName());
+ infoln(prevFileLine);
+ infoln(fileLine);
+ infoln(printCollationKey(prevKey));
+ infoln(printCollationKey(key));
+ return FALSE;
+ }
+ int32_t mergedLevel = getDifferenceLevel(prevKey, key, order, collHasCaseLevel);
+ if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) {
+ if(mergedLevel != level) {
+ infoln(fileTestName);
+ errln("line %d ucol_mergeSortkeys(Collator(%s).getCollationKey"
+ "(previous, current segments between U+FFFE)).compareTo()=%d wrong level: %d != %d",
+ (int)fileLineNumber, norm, order, mergedLevel, level);
+ infoln(prevFileLine);
+ infoln(fileLine);
+ infoln(printCollationKey(prevKey));
+ infoln(printCollationKey(key));
+ return FALSE;
+ }
+ }
+ }
return TRUE;
}