ICU-11406 RBBITest/TestUnicodeFiles, improve known issue detection.

author Andy Heninger <andy.heninger@gmail.com>

Wed, 3 Dec 2014 23:43:47 +0000 (23:43 +0000)

committer Andy Heninger <andy.heninger@gmail.com>

Wed, 3 Dec 2014 23:43:47 +0000 (23:43 +0000)
author Andy Heninger <andy.heninger@gmail.com>
Wed, 3 Dec 2014 23:43:47 +0000 (23:43 +0000)
committer Andy Heninger <andy.heninger@gmail.com>
Wed, 3 Dec 2014 23:43:47 +0000 (23:43 +0000)
diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp

index 278176dc333130ba0576e54b66b26b7ba0a94374..04b30904598006a5e13967b962d0853d66e0f2b8 100644 (file)
--- a/icu4c/source/test/intltest/rbbitst.cpp
+++ b/icu4c/source/test/intltest/rbbitst.cpp
@@ -38,6 +38,7 @@
  #include <stdlib.h>
  #include "unicode/numfmt.h"
  #include "unicode/uscript.h"
+#include "cmemory.h"
  
  #define TEST_ASSERT(x) {if (!(x)) { \
      errln("Failure in file %s, line %d", __FILE__, __LINE__);}}
@@ -1724,6 +1725,32 @@ void RBBITest::TestUnicodeFiles() {
  }
  
  
+// Check for test cases from the Unicode test data files that are known to fail
+// and should be skipped because ICU is not yet able to fully implement the spec.
+// See ticket #7270.
+
+UBool RBBITest::testCaseIsKnownIssue(const UnicodeString &testCase, const char *fileName) {
+    static const UChar badTestCases[][4] {                     // Line Numbers from Unicode 7.0.0 file.
+        {(UChar)0x200B, (UChar)0x0020, (UChar)0x007D, (UChar)0x0000},   // Line 5198
+        {(UChar)0x200B, (UChar)0x0020, (UChar)0x0029, (UChar)0x0000},   // Line 5202
+        {(UChar)0x200B, (UChar)0x0020, (UChar)0x0021, (UChar)0x0000},   // Line 5214
+        {(UChar)0x200B, (UChar)0x0020, (UChar)0x002c, (UChar)0x0000},   // Line 5246
+        {(UChar)0x200B, (UChar)0x0020, (UChar)0x002f, (UChar)0x0000},   // Line 5298
+        {(UChar)0x200B, (UChar)0x0020, (UChar)0x2060, (UChar)0x0000}    // Line 5302
+    };
+    if (strcmp(fileName, "LineBreakTest.txt") != 0) {
+        return FALSE;
+    }
+
+    for (int i=0; i<UPRV_LENGTHOF(badTestCases); i++) {
+        if (testCase == UnicodeString(badTestCases[i])) {
+            return logKnownIssue("7270");
+        }
+    }
+    return FALSE;
+}
+
+
  //--------------------------------------------------------------------------------------------
  //
  //   Run tests from one of the boundary test data files distributed by the Unicode Consortium
@@ -1731,9 +1758,6 @@ void RBBITest::TestUnicodeFiles() {
  //-------------------------------------------------------------------------------------------
  void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *bi) {
  #if !UCONFIG_NO_REGULAR_EXPRESSIONS
-    // TODO(andy): Match line break behavior to Unicode 6.0 and remove this time bomb. Ticket #7270
-    UBool isTicket7270Fixed = !logKnownIssue("7270");
-    UBool isLineBreak = 0 == strcmp(fileName, "LineBreakTest.txt");
      UErrorCode  status = U_ZERO_ERROR;
  
      //
@@ -1825,20 +1849,8 @@ void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *
          else if (tokenMatcher.start(4, status) >= 0) {
              // Scanned to end of a line, possibly skipping over a comment in the process.
              //   If the line from the file contained test data, run the test now.
-            //
-            if (testString.length() > 0) {
-// TODO(andy): Remove this time bomb code. Note: Failing line numbers may change when updating to new Unicode data.
-//             Rule 8 
-//                ZW SP* <break>
-//             is not yet implemented.
-if (!(isLineBreak && !isTicket7270Fixed && (5198 == lineNumber || 
-                                            5202 == lineNumber ||
-                                            5214 == lineNumber ||
-                                            5246 == lineNumber ||
-                                            5298 == lineNumber ||
-                                            5302 == lineNumber ))) {
+            if (testString.length() > 0 && !testCaseIsKnownIssue(testString, fileName)) {  
                  checkUnicodeTestCase(fileName, lineNumber, testString, &breakPositions, bi);
-}
              }
  
              // Clear out this test case.
diff --git a/icu4c/source/test/intltest/rbbitst.h b/icu4c/source/test/intltest/rbbitst.h

index 3d21037eb1bfe4235796c73b3de55437697f2829..a05bef377a2ce9facbda98f046809e94ad7d4011 100644 (file)
--- a/icu4c/source/test/intltest/rbbitst.h
+++ b/icu4c/source/test/intltest/rbbitst.h
@@ -131,6 +131,15 @@ private:
      // Run the actual tests for TestTailoredBreaks()
      void TBTest(BreakIterator* brkitr, int type, const char *locale, const char* escapedText,
                  const int32_t *expectOffsets, int32_t expectOffsetsCount);
+
+    /** Filter for test cases from the Unicode test data files.
+     *  Some need to be skipped because ICU is unable to fully implement the
+     *  Unicode boundary specifications.
+     *  @param testCase the test data string.
+     *  @param fileName the Unicode test data file name.
+     *  @return FALSE if the test case should be run, TRUE if it should be skipped.
+     */
+    UBool testCaseIsKnownIssue(const UnicodeString &testCase, const char *fileName);
  };
  
  #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
author	Andy Heninger <andy.heninger@gmail.com>
	Wed, 3 Dec 2014 23:43:47 +0000 (23:43 +0000)
committer	Andy Heninger <andy.heninger@gmail.com>
	Wed, 3 Dec 2014 23:43:47 +0000 (23:43 +0000)
icu4c/source/test/intltest/rbbitst.cpp		patch \| blob \| history
icu4c/source/test/intltest/rbbitst.h		patch \| blob \| history