ICU-13274 Break Iterator test additions.

author Andy Heninger <andy.heninger@gmail.com>

Wed, 16 Aug 2017 23:19:21 +0000 (23:19 +0000)

committer Andy Heninger <andy.heninger@gmail.com>

Wed, 16 Aug 2017 23:19:21 +0000 (23:19 +0000)
author Andy Heninger <andy.heninger@gmail.com>
Wed, 16 Aug 2017 23:19:21 +0000 (23:19 +0000)
committer Andy Heninger <andy.heninger@gmail.com>
Wed, 16 Aug 2017 23:19:21 +0000 (23:19 +0000)
diff --cc icu4c/source/test/intltest/rbbitst.cpp

index 7ffcae7222e06235f57fc622aa002cab32c88290,7cbbd6d2a48d5935520924f6d983d278c2863c5a..173295224302db866db199e93d9e54997929b4b5
--- 1/icu4c/source/test/intltest/rbbitst.cpp
--- 2/icu4c/source/test/intltest/rbbitst.cpp
+++ b/icu4c/source/test/intltest/rbbitst.cpp
@@@ -1131,10 -1085,9 +1085,9 @@@ void RBBITest::TestExtended() 
       UErrorCode      status  = U_ZERO_ERROR;
       Locale          locale("");
   
-     UnicodeString       rules;
       TestParams          tp(status);
   
--    RegexMatcher      localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{Nd}_@&=-]*) *>"), 0, status);
++    RegexMatcher      localeMatcher(UnicodeString(u"<locale *([\\p{L}\\p{Nd}_@&=-]*) *>"), 0, status);
       if (U_FAILURE(status)) {
           dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LINE__, u_errorName(status));
       }
@@@ -1144,21 -1096,21 +1096,16 @@@
       //  Open and read the test data file.
       //
       const char *testDataDirectory = IntlTest::getSourceTestData(status);
--    char testFileName[1000];
--    if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFileName)) {
--        errln("Can't open test data.  Path too long.");
--        return;
--    }
--    strcpy(testFileName, testDataDirectory);
--    strcat(testFileName, "rbbitst.txt");
++    CharString testFileName(testDataDirectory, -1, status);
++    testFileName.append("rbbitst.txt", -1, status);
   
       int    len;
--    UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status);
++    UChar *testFile = ReadAndConvertFile(testFileName.data(), len, "UTF-8", status);
       if (U_FAILURE(status)) {
--        return; /* something went wrong, error already output */
++        errln("%s:%d Error %s opening file rbbitst.txt", __FILE__, __LINE__, u_errorName(status));
++        return;
       }
   
--
       bool skipTest = false; // Skip this test?
   
       //
@@@ -1215,41 -1171,51 +1166,50 @@@
               if (u_isUWhiteSpace(c)) {
                   break;
               }
--            if (testString.compare(charIdx-1, 6, "<word>") == 0) {
++            if (testString.compare(charIdx-1, 6, u"<word>") == 0) {
                   delete tp.bi;
                   tp.bi = BreakIterator::createWordInstance(locale,  status);
                   skipTest = false;
                   charIdx += 5;
                   break;
               }
--            if (testString.compare(charIdx-1, 6, "<char>") == 0) {
++            if (testString.compare(charIdx-1, 6, u"<char>") == 0) {
                   delete tp.bi;
                   tp.bi = BreakIterator::createCharacterInstance(locale,  status);
                   skipTest = false;
                   charIdx += 5;
                   break;
               }
--            if (testString.compare(charIdx-1, 6, "<line>") == 0) {
++            if (testString.compare(charIdx-1, 6, u"<line>") == 0) {
                   delete tp.bi;
                   tp.bi = BreakIterator::createLineInstance(locale,  status);
                   skipTest = false;
                   charIdx += 5;
                   break;
               }
--            if (testString.compare(charIdx-1, 6, "<sent>") == 0) {
++            if (testString.compare(charIdx-1, 6, u"<sent>") == 0) {
                   delete tp.bi;
                   tp.bi = BreakIterator::createSentenceInstance(locale,  status);
                   skipTest = false;
                   charIdx += 5;
                   break;
               }
--            if (testString.compare(charIdx-1, 7, "<title>") == 0) {
++            if (testString.compare(charIdx-1, 7, u"<title>") == 0) {
                   delete tp.bi;
                   tp.bi = BreakIterator::createTitleInstance(locale,  status);
                   charIdx += 6;
                   break;
               }
   
- -            if (testString.compare(charIdx-1, 7, "<rules>") == 0 ||
- -                testString.compare(charIdx-1, 10, "<badrules>") == 0) {
- -                /// charIdx += 6;
- -                charIdx = testString.indexOf(0x3e, charIdx) + 1;  // 0x3e == '>'
++            if (testString.compare(charIdx-1, 7, u"<rules>") == 0 ||
++                testString.compare(charIdx-1, 10, u"<badrules>") == 0) {
++                charIdx = testString.indexOf(u'>', charIdx) + 1;
+                 parseState = PARSE_RULES;
+                 rules.remove();
+                 rulesFirstLine = lineNum;
+                 break;
+             }
+ 
               // <locale  loc_name>
               localeMatcher.reset(testString);
               if (localeMatcher.lookingAt(charIdx-1, status)) {
@@@ -1261,7 -1227,7 +1221,7 @@@
                   TEST_ASSERT_SUCCESS(status);
                   break;
               }
--            if (testString.compare(charIdx-1, 6, "<data>") == 0) {
++            if (testString.compare(charIdx-1, 6, u"<data>") == 0) {
                   parseState = PARSE_DATA;
                   charIdx += 5;
                   tp.dataToBreak = "";
@@@ -1278,6 -1244,35 +1238,33 @@@
               }
               break;
   
- -            if (testString.compare(charIdx-1, 8, "</rules>") == 0) {
+         case PARSE_RULES:
- -                break;
- -            }
- -            if (testString.compare(charIdx-1, 11, "</badrules>") == 0) {
++            if (testString.compare(charIdx-1, 8, u"</rules>") == 0) {
+                 charIdx += 7;
+                 parseState = PARSE_TAG;
+                 delete tp.bi;
+                 UParseError pe;
+                 tp.bi = new RuleBasedBreakIterator(rules, pe, status);
+                 skipTest = U_FAILURE(status);
+                 if (U_FAILURE(status)) {
+                     errln("file rbbitst.txt: %d - Error %s creating break iterator from rules.",
+                         rulesFirstLine + pe.line - 1, u_errorName(status));
+                 }
- -                break;
++            } else if (testString.compare(charIdx-1, 11, u"</badrules>") == 0) {
+                 charIdx += 10;
+                 parseState = PARSE_TAG;
+                 UErrorCode ec = U_ZERO_ERROR;
+                 UParseError pe;
+                 RuleBasedBreakIterator bi(rules, pe, ec);
+                 if (U_SUCCESS(ec)) {
+                     errln("file rbbitst.txt: %d - Expected, but did not get, a failure creating break iterator from rules.",
+                         rulesFirstLine + pe.line - 1);
+                 }
- -            rules.append(c);
++            } else {
++                rules.append(c);
+             }
+             break;
+ 
           case PARSE_DATA:
               if (c == u'•') {
                   int32_t  breakIdx = tp.dataToBreak.length();
@@@ -1290,7 -1285,7 +1277,7 @@@
                   break;
               }
   
--            if (testString.compare(charIdx-1, 7, "</data>") == 0) {
++            if (testString.compare(charIdx-1, 7, u"</data>") == 0) {
                   // Add final entry to mappings from break location to source file position.
                   //  Need one extra because last break position returned is after the
                   //    last char in the data, not at the last char.
@@@ -1316,7 -1311,7 +1303,7 @@@
                   break;
               }
   
--            if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) {
++            if (testString.compare(charIdx-1, 3, u"\\N{") == 0) {
                   // Named character, e.g. \N{COMBINING GRAVE ACCENT}
                   // Get the code point from the name and insert it into the test data.
                   //   (Damn, no API takes names in Unicode  !!!
@@@ -1355,8 -1350,7 +1342,7 @@@
   
   
   
- 
--            if (testString.compare(charIdx-1, 2, "<>") == 0) {
++            if (testString.compare(charIdx-1, 2, u"<>") == 0) {
                   charIdx++;
                   int32_t  breakIdx = tp.dataToBreak.length();
                   tp.expectedBreaks->setSize(breakIdx+1);
diff --cc icu4c/source/test/intltest/rbbitst.h
Simple merge
author	Andy Heninger <andy.heninger@gmail.com>
	Wed, 16 Aug 2017 23:19:21 +0000 (23:19 +0000)
committer	Andy Heninger <andy.heninger@gmail.com>
	Wed, 16 Aug 2017 23:19:21 +0000 (23:19 +0000)
		1	2
icu4c/source/test/intltest/rbbitst.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
icu4c/source/test/intltest/rbbitst.h	patch \|	diff1 \|	diff2 \|	blob \| history