From 8e59fa7be46074cbe5e84c43a461c7482624f21d Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 13 Sep 2013 17:12:11 +0000 Subject: [PATCH] ICU-9915 do not mask off the continuation bits and then later test this masked CE value for isContinuation() X-SVN-Rev: 34310 --- .../com/ibm/icu/text/RuleBasedCollator.java | 35 ++++++++++--------- .../dev/test/collator/CollationAPITest.java | 18 +++++++++- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java index 158a8eb46ca..073d2b1e119 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java @@ -3429,29 +3429,26 @@ public final class RuleBasedCollator extends Collator { int hiraganaresult = 0; while (true) { int sorder = 0; + int sPrimary; // We fetch CEs until we hit a non ignorable primary or end. do { sorder = buffer.m_srcUtilColEIter_.next(); buffer.m_srcUtilCEBuffer_ = append(buffer.m_srcUtilCEBuffer_, buffer.m_srcUtilCEBufferSize_, sorder); buffer.m_srcUtilCEBufferSize_++; - sorder &= CE_PRIMARY_MASK_; - } while (sorder == CollationElementIterator.IGNORABLE); + sPrimary = sorder & CE_PRIMARY_MASK_; + } while (sPrimary == CollationElementIterator.IGNORABLE); int torder = 0; + int tPrimary; do { torder = buffer.m_tgtUtilColEIter_.next(); buffer.m_tgtUtilCEBuffer_ = append(buffer.m_tgtUtilCEBuffer_, buffer.m_tgtUtilCEBufferSize_, torder); buffer.m_tgtUtilCEBufferSize_++; - torder &= CE_PRIMARY_MASK_; - } while (torder == CollationElementIterator.IGNORABLE); - - if (!isContinuation(sorder) && m_leadBytePermutationTable_ != null) { - sorder = (m_leadBytePermutationTable_[((sorder >> 24) + 256) % 256] << 24) | (sorder & 0x00FFFFFF); - torder = (m_leadBytePermutationTable_[((torder >> 24) + 256) % 256] << 24) | (torder & 0x00FFFFFF); - } + tPrimary = torder & CE_PRIMARY_MASK_; + } while (tPrimary == CollationElementIterator.IGNORABLE); // if both primaries are the same - if (sorder == torder) { + if (sPrimary == tPrimary) { // and there are no more CEs, we advance to the next level // see if we are at the end of either string if (buffer.m_srcUtilCEBuffer_[buffer.m_srcUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER) { @@ -3471,8 +3468,12 @@ public final class RuleBasedCollator extends Collator { } } } else { + if (!isContinuation(sorder) && m_leadBytePermutationTable_ != null) { + sPrimary = (m_leadBytePermutationTable_[sPrimary >>> 24] << 24) | (sPrimary & 0x00FFFFFF); + tPrimary = (m_leadBytePermutationTable_[tPrimary >>> 24] << 24) | (tPrimary & 0x00FFFFFF); + } // if two primaries are different, we are done - return endPrimaryCompare(sorder, torder, buffer); + return endPrimaryCompare(sPrimary, tPrimary, buffer); } } // no primary difference... do the rest from the buffers @@ -3815,20 +3816,20 @@ public final class RuleBasedCollator extends Collator { int sorder = CollationElementIterator.IGNORABLE; int torder = CollationElementIterator.IGNORABLE; while ((sorder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) { - sorder = buffer.m_srcUtilCEBuffer_[soffset++] & m_mask3_; + sorder = buffer.m_srcUtilCEBuffer_[soffset++]; if (!isContinuation(sorder)) { - sorder ^= m_caseSwitch_; + sorder = (sorder & m_mask3_) ^ m_caseSwitch_; } else { - sorder &= CE_REMOVE_CASE_; + sorder = (sorder & m_mask3_) & CE_REMOVE_CASE_; } } while ((torder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) { - torder = buffer.m_tgtUtilCEBuffer_[toffset++] & m_mask3_; + torder = buffer.m_tgtUtilCEBuffer_[toffset++]; if (!isContinuation(torder)) { - torder ^= m_caseSwitch_; + torder = (torder & m_mask3_) ^ m_caseSwitch_; } else { - torder &= CE_REMOVE_CASE_; + torder = (torder & m_mask3_) & CE_REMOVE_CASE_; } } diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationAPITest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationAPITest.java index 4ff02590fa1..28b18f244ee 100644 --- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationAPITest.java +++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationAPITest.java @@ -1149,7 +1149,23 @@ public class CollationAPITest extends TestFmwk { " s: " + c.getStrength() + " u: " + c.isUpperCaseFirst()); } - + + public void TestIterNumeric() throws Exception { // misnomer for Java, but parallel with C++ test + // Regression test for ticket #9915. + // The collation code sometimes masked the continuation marker away + // but later tested the result for isContinuation(). + // This test case failed because the third bytes of the computed numeric-collation primaries + // were permutated with the script reordering table. + // It should have been possible to reproduce this with the root collator + // and characters with appropriate 3-byte primary weights. + // The effectiveness of this test depends completely on the collation elements + // and on the implementation code. + RuleBasedCollator coll = new RuleBasedCollator("[reorder Hang Hani]"); + coll.setNumericCollation(true); + int result = coll.compare("40", "72"); + assertTrue("40<72", result < 0); + } + /* * Tests the method public void setStrength(int newStrength) */ -- 2.40.0