ICU-8639 Merging #8484 endless loop in RuleBasedCollator.getSortKeyBytes (r30184...

author Yoshito Umaoka <y.umaoka@gmail.com>

Wed, 6 Jul 2011 20:52:07 +0000 (20:52 +0000)

committer Yoshito Umaoka <y.umaoka@gmail.com>

Wed, 6 Jul 2011 20:52:07 +0000 (20:52 +0000)
author Yoshito Umaoka <y.umaoka@gmail.com>
Wed, 6 Jul 2011 20:52:07 +0000 (20:52 +0000)
committer Yoshito Umaoka <y.umaoka@gmail.com>
Wed, 6 Jul 2011 20:52:07 +0000 (20:52 +0000)
diff --git a/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java b/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java

index 4cde4dc02ea571c3f2712ac9757ec02e8b58fc81..2ef8cb77fdba2afa9ccb9f5ebc23f65f9e7125f7 100644 (file)
--- a/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java
+++ b/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java
@@ -1690,10 +1690,19 @@ public final class CollationElementIterator
                  // Source string char was not in contraction table.
                  // Unless it is a discontiguous contraction, we are done
                  int miss = ch;
-                if(UTF16.isLeadSurrogate(ch)) { // in order to do the proper detection, we
-                    // need to see if we're dealing with a supplementary
-                    miss = UCharacterProperty.getRawSupplementary(ch, (char) nextChar());
-                  }
+                // ticket 8484 - porting changes from C for 6101
+                // We test whether the next two char are surrogate pairs.
+                // This test is done if the iterator is not in the end.
+                // If there is no surrogate pair, the iterator
+                // goes back one if needed. 
+                if(UTF16.isLeadSurrogate(ch) && !isEnd()) {
+                    char surrNextChar = (char)nextChar();
+                    if (UTF16.isTrailSurrogate(surrNextChar)) {
+                        miss = UCharacterProperty.getRawSupplementary(ch, surrNextChar);
+                    } else {
+                        previousChar();
+                    }
+                }
                  int sCC;
                  if (maxCC == 0 || (sCC = getCombiningClass(miss)) == 0
                      || sCC > maxCC || (allSame != 0 && sCC == maxCC) ||
diff --git a/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationRegressionTest.java b/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationRegressionTest.java

index c9e32cb3ffd83a81c90c5bb4d1cea45a6ae37718..f7c637516e0b27937ba222fec4a244d39f951fe8 100644 (file)
--- a/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationRegressionTest.java
+++ b/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationRegressionTest.java
@@ -1135,6 +1135,15 @@ public class CollationRegressionTest extends TestFmwk {
          }
      }
      
+    // Fixing the infinite loop for surrogates
+    public void Test8484()
+    {
+        String s = "\u9FE1\uCEF3\u2798\uAAB6\uDA7C";
+        Collator coll = Collator.getInstance();
+        CollationKey collKey = coll.getCollationKey(s); 
+        logln("Pass: " + collKey.toString() + " generated OK.");
+    }
+    
      public  void TestBengaliSortKey() throws Exception {
          char rules[] = { 0x26, 0x9fa, 0x3c, 0x98c, 0x3c, 0x9e1, 0x3c, 0x98f, 0x3c, 0x990, 0x3c, 0x993, 
                  0x3c, 0x994, 0x3c, 0x9bc, 0x3c, 0x982, 0x3c, 0x983, 0x3c, 0x981, 0x3c, 0x9b0, 0x3c,
author	Yoshito Umaoka <y.umaoka@gmail.com>
	Wed, 6 Jul 2011 20:52:07 +0000 (20:52 +0000)
committer	Yoshito Umaoka <y.umaoka@gmail.com>
	Wed, 6 Jul 2011 20:52:07 +0000 (20:52 +0000)
main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java		patch \| blob \| history
main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationRegressionTest.java		patch \| blob \| history