From: Andy Heninger <andy.heninger@gmail.com> Date: Wed, 16 Aug 2017 23:19:21 +0000 (+0000) Subject: ICU-13274 Break Iterator test additions. X-Git-Tag: release-60-rc~187 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f08eb087ba2f24af38fb3a50e379f22933f84c49;p=icu ICU-13274 Break Iterator test additions. X-SVN-Rev: 40334 --- f08eb087ba2f24af38fb3a50e379f22933f84c49 diff --cc icu4c/source/test/intltest/rbbitst.cpp index 7ffcae7222e,7cbbd6d2a48..17329522430 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@@ -1131,10 -1085,9 +1085,9 @@@ void RBBITest::TestExtended() UErrorCode status = U_ZERO_ERROR; Locale locale(""); - UnicodeString rules; TestParams tp(status); -- RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{Nd}_@&=-]*) *>"), 0, status); ++ RegexMatcher localeMatcher(UnicodeString(u"<locale *([\\p{L}\\p{Nd}_@&=-]*) *>"), 0, status); if (U_FAILURE(status)) { dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LINE__, u_errorName(status)); } @@@ -1144,21 -1096,21 +1096,16 @@@ // Open and read the test data file. // const char *testDataDirectory = IntlTest::getSourceTestData(status); -- char testFileName[1000]; -- if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFileName)) { -- errln("Can't open test data. Path too long."); -- return; -- } -- strcpy(testFileName, testDataDirectory); -- strcat(testFileName, "rbbitst.txt"); ++ CharString testFileName(testDataDirectory, -1, status); ++ testFileName.append("rbbitst.txt", -1, status); int len; -- UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status); ++ UChar *testFile = ReadAndConvertFile(testFileName.data(), len, "UTF-8", status); if (U_FAILURE(status)) { -- return; /* something went wrong, error already output */ ++ errln("%s:%d Error %s opening file rbbitst.txt", __FILE__, __LINE__, u_errorName(status)); ++ return; } -- bool skipTest = false; // Skip this test? // @@@ -1215,41 -1171,51 +1166,50 @@@ if (u_isUWhiteSpace(c)) { break; } -- if (testString.compare(charIdx-1, 6, "<word>") == 0) { ++ if (testString.compare(charIdx-1, 6, u"<word>") == 0) { delete tp.bi; tp.bi = BreakIterator::createWordInstance(locale, status); skipTest = false; charIdx += 5; break; } -- if (testString.compare(charIdx-1, 6, "<char>") == 0) { ++ if (testString.compare(charIdx-1, 6, u"<char>") == 0) { delete tp.bi; tp.bi = BreakIterator::createCharacterInstance(locale, status); skipTest = false; charIdx += 5; break; } -- if (testString.compare(charIdx-1, 6, "<line>") == 0) { ++ if (testString.compare(charIdx-1, 6, u"<line>") == 0) { delete tp.bi; tp.bi = BreakIterator::createLineInstance(locale, status); skipTest = false; charIdx += 5; break; } -- if (testString.compare(charIdx-1, 6, "<sent>") == 0) { ++ if (testString.compare(charIdx-1, 6, u"<sent>") == 0) { delete tp.bi; tp.bi = BreakIterator::createSentenceInstance(locale, status); skipTest = false; charIdx += 5; break; } -- if (testString.compare(charIdx-1, 7, "<title>") == 0) { ++ if (testString.compare(charIdx-1, 7, u"<title>") == 0) { delete tp.bi; tp.bi = BreakIterator::createTitleInstance(locale, status); charIdx += 6; break; } - if (testString.compare(charIdx-1, 7, "<rules>") == 0 || - testString.compare(charIdx-1, 10, "<badrules>") == 0) { - /// charIdx += 6; - charIdx = testString.indexOf(0x3e, charIdx) + 1; // 0x3e == '>' ++ if (testString.compare(charIdx-1, 7, u"<rules>") == 0 || ++ testString.compare(charIdx-1, 10, u"<badrules>") == 0) { ++ charIdx = testString.indexOf(u'>', charIdx) + 1; + parseState = PARSE_RULES; + rules.remove(); + rulesFirstLine = lineNum; + break; + } + // <locale loc_name> localeMatcher.reset(testString); if (localeMatcher.lookingAt(charIdx-1, status)) { @@@ -1261,7 -1227,7 +1221,7 @@@ TEST_ASSERT_SUCCESS(status); break; } -- if (testString.compare(charIdx-1, 6, "<data>") == 0) { ++ if (testString.compare(charIdx-1, 6, u"<data>") == 0) { parseState = PARSE_DATA; charIdx += 5; tp.dataToBreak = ""; @@@ -1278,6 -1244,35 +1238,33 @@@ } break; + case PARSE_RULES: - if (testString.compare(charIdx-1, 8, "</rules>") == 0) { ++ if (testString.compare(charIdx-1, 8, u"</rules>") == 0) { + charIdx += 7; + parseState = PARSE_TAG; + delete tp.bi; + UParseError pe; + tp.bi = new RuleBasedBreakIterator(rules, pe, status); + skipTest = U_FAILURE(status); + if (U_FAILURE(status)) { + errln("file rbbitst.txt: %d - Error %s creating break iterator from rules.", + rulesFirstLine + pe.line - 1, u_errorName(status)); + } - break; - } - if (testString.compare(charIdx-1, 11, "</badrules>") == 0) { ++ } else if (testString.compare(charIdx-1, 11, u"</badrules>") == 0) { + charIdx += 10; + parseState = PARSE_TAG; + UErrorCode ec = U_ZERO_ERROR; + UParseError pe; + RuleBasedBreakIterator bi(rules, pe, ec); + if (U_SUCCESS(ec)) { + errln("file rbbitst.txt: %d - Expected, but did not get, a failure creating break iterator from rules.", + rulesFirstLine + pe.line - 1); + } - break; ++ } else { ++ rules.append(c); + } - rules.append(c); + break; + case PARSE_DATA: if (c == u'â¢') { int32_t breakIdx = tp.dataToBreak.length(); @@@ -1290,7 -1285,7 +1277,7 @@@ break; } -- if (testString.compare(charIdx-1, 7, "</data>") == 0) { ++ if (testString.compare(charIdx-1, 7, u"</data>") == 0) { // Add final entry to mappings from break location to source file position. // Need one extra because last break position returned is after the // last char in the data, not at the last char. @@@ -1316,7 -1311,7 +1303,7 @@@ break; } -- if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) { ++ if (testString.compare(charIdx-1, 3, u"\\N{") == 0) { // Named character, e.g. \N{COMBINING GRAVE ACCENT} // Get the code point from the name and insert it into the test data. // (Damn, no API takes names in Unicode !!! @@@ -1355,8 -1350,7 +1342,7 @@@ - -- if (testString.compare(charIdx-1, 2, "<>") == 0) { ++ if (testString.compare(charIdx-1, 2, u"<>") == 0) { charIdx++; int32_t breakIdx = tp.dataToBreak.length(); tp.expectedBreaks->setSize(breakIdx+1);