ICU-8654 Spoof check, incorrect length used on NFD text.

author Andy Heninger <andy.heninger@gmail.com>

Fri, 17 Jun 2011 20:47:37 +0000 (20:47 +0000)

committer Andy Heninger <andy.heninger@gmail.com>

Fri, 17 Jun 2011 20:47:37 +0000 (20:47 +0000)
author Andy Heninger <andy.heninger@gmail.com>
Fri, 17 Jun 2011 20:47:37 +0000 (20:47 +0000)
committer Andy Heninger <andy.heninger@gmail.com>
Fri, 17 Jun 2011 20:47:37 +0000 (20:47 +0000)
diff --git a/icu4c/source/i18n/uspoof.cpp b/icu4c/source/i18n/uspoof.cpp

index 48e3e45dc49b024d50927a61c00bd1bb7e002da4..58a706f0799ddc13beded1ce8c5b15f114dda9fa 100644 (file)
--- a/icu4c/source/i18n/uspoof.cpp
+++ b/icu4c/source/i18n/uspoof.cpp
@@ -24,9 +24,6 @@
  
  #if !UCONFIG_NO_NORMALIZATION
  
-
-#include <stdio.h>      // debug
-
  U_NAMESPACE_USE
  
  
@@ -255,7 +252,7 @@ uspoof_check(const USpoofChecker *sc,
              UBool       haveMultipleMarks = FALSE;  
              UnicodeSet  marksSeenSoFar;   // Set of combining marks in a single combining sequence.
              
-            for (i=0; i<length ;) {
+            for (i=0; i<nfdLength ;) {
                  U16_NEXT(nfdText, i, nfdLength, c);
                  if (u_charType(c) != U_NON_SPACING_MARK) {
                      firstNonspacingMark = 0;
@@ -278,6 +275,11 @@ uspoof_check(const USpoofChecker *sc,
                      // No need to find more than the first failure.
                      result |= USPOOF_INVISIBLE;
                      failPos = i;
+                    // TODO: Bug 8655: failPos is the position in the NFD buffer, but what we want
+                    //       to give back to our caller is a position in the original input string.
+                    if (failPos > length) {
+                        failPos = length;
+                    }
                      break;
                  }
                  marksSeenSoFar.add(c);
diff --git a/icu4c/source/test/intltest/itspoof.cpp b/icu4c/source/test/intltest/itspoof.cpp

index dd2c2763999fd8e4b015e222c485ae16ef6f80e1..08c651a66f2cd536d420d2f7c51d73aebd8fb355 100644 (file)
--- a/icu4c/source/test/intltest/itspoof.cpp
+++ b/icu4c/source/test/intltest/itspoof.cpp
@@ -91,7 +91,13 @@ void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name
                  testConfData();
              }
              break;
-        default: name=""; break;
+          case 5:
+            name = "testBug8654";
+            if (exec) {
+                testBug8654();
+            }
+            break;
+         default: name=""; break;
      }
  }
  
@@ -251,7 +257,7 @@ void IntlTestSpoof::testInvisible() {
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT_EQ(7, position);
  
-        // Tow acute accents, one from the composed a with acute accent, \u00e1,
+        // Two acute accents, one from the composed a with acute accent, \u00e1,
          // and one separate.
          position = -42;
          UnicodeString  s3 = UnicodeString("abcd\\u00e1\\u0301xyz").unescape();
@@ -261,6 +267,15 @@ void IntlTestSpoof::testInvisible() {
      TEST_TEARDOWN;
  }
  
+void IntlTestSpoof::testBug8654() {
+    TEST_SETUP
+        UnicodeString s = UnicodeString("B\u00c1\u0301").unescape();
+        int32_t position = -42;
+        TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s, &position, &status) & USPOOF_INVISIBLE );
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT_EQ(3, position);
+    TEST_TEARDOWN;
+}
  
  static UnicodeString parseHex(const UnicodeString &in) {
      // Convert a series of hex numbers in a Unicode String to a string with the
diff --git a/icu4c/source/test/intltest/itspoof.h b/icu4c/source/test/intltest/itspoof.h

index 6a77459ca10ce8d646064df3b455534a5d892f53..abd54f5a930471d4691f712406407b8b3daa675d 100644 (file)
--- a/icu4c/source/test/intltest/itspoof.h
+++ b/icu4c/source/test/intltest/itspoof.h
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-* Copyright (C) 2009, International Business Machines Corporation 
+* Copyright (C) 2011, International Business Machines Corporation 
  * and others.  All Rights Reserved.
  **********************************************************************
  */
@@ -34,6 +34,8 @@ public:
  
      void testConfData();
  
+    void testBug8654();
+
      // Internal function to run a single skeleton test case.
      void  checkSkeleton(const USpoofChecker *sc, uint32_t flags, 
                          const char *input, const char *expected, int32_t lineNum);
author	Andy Heninger <andy.heninger@gmail.com>
	Fri, 17 Jun 2011 20:47:37 +0000 (20:47 +0000)
committer	Andy Heninger <andy.heninger@gmail.com>
	Fri, 17 Jun 2011 20:47:37 +0000 (20:47 +0000)
icu4c/source/i18n/uspoof.cpp		patch \| blob \| history
icu4c/source/test/intltest/itspoof.cpp		patch \| blob \| history
icu4c/source/test/intltest/itspoof.h		patch \| blob \| history