From e6987fbfd845abcd438a36d2cdba55d144150151 Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Thu, 5 Oct 2017 21:41:46 +0000 Subject: [PATCH] ICU-13391 Change ICU4C parsing to count digits instead of UTF-16 code units for grouping sizes. X-SVN-Rev: 40573 --- icu4c/source/i18n/decimfmt.cpp | 20 ++++++------ icu4c/source/test/intltest/nmfmtrt.cpp | 3 -- icu4c/source/test/intltest/numfmtst.cpp | 31 +++++++++++++++++++ icu4c/source/test/intltest/numfmtst.h | 1 + icu4c/source/test/intltest/tsnmfmt.cpp | 3 -- .../numberformattestspecification.txt | 2 +- .../data/numberformattestspecification.txt | 2 +- .../icu/dev/test/format/NumberFormatTest.java | 19 ++++++++++++ 8 files changed, 64 insertions(+), 17 deletions(-) diff --git a/icu4c/source/i18n/decimfmt.cpp b/icu4c/source/i18n/decimfmt.cpp index 2a8a226c77e..3861db3df68 100644 --- a/icu4c/source/i18n/decimfmt.cpp +++ b/icu4c/source/i18n/decimfmt.cpp @@ -1423,8 +1423,8 @@ UBool DecimalFormat::subparse(const UnicodeString& text, UBool strictFail = FALSE; // did we exit with a strict parse failure? - int32_t lastGroup = -1; // where did we last see a grouping separator? - int32_t digitStart = position; + int32_t lastGroup = -1; // after which digit index did we last see a grouping separator? + int32_t currGroup = -1; // for temporary storage the digit index of the current grouping separator int32_t gs2 = fImpl->fEffGrouping.fGrouping2 == 0 ? fImpl->fEffGrouping.fGrouping : fImpl->fEffGrouping.fGrouping2; const UnicodeString *decimalString; @@ -1513,16 +1513,17 @@ UBool DecimalFormat::subparse(const UnicodeString& text, // before that, the group must == the secondary group // length, else it can be <= the the secondary group // length. - if ((lastGroup != -1 && backup - lastGroup - 1 != gs2) || - (lastGroup == -1 && position - digitStart - 1 > gs2)) { + if ((lastGroup != -1 && currGroup - lastGroup != gs2) || + (lastGroup == -1 && digitCount - 1 > gs2)) { strictFail = TRUE; break; } - lastGroup = backup; + lastGroup = currGroup; } // Cancel out backup setting (see grouping handler below) + currGroup = -1; backup = -1; sawDigit = TRUE; @@ -1561,6 +1562,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text, // Ignore grouping characters, if we are using them, but require // that they be followed by a digit. Otherwise we backup and // reprocess them. + currGroup = digitCount; backup = position; position += groupingStringLength; sawGrouping=TRUE; @@ -1571,7 +1573,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text, { if (strictParse) { if (backup != -1 || - (lastGroup != -1 && position - lastGroup != fImpl->fEffGrouping.fGrouping + 1)) { + (lastGroup != -1 && digitCount - lastGroup != fImpl->fEffGrouping.fGrouping)) { strictFail = TRUE; break; } @@ -1622,7 +1624,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text, UBool sawExponentDigit = FALSE; while (pos < textLength) { - ch = text[(int32_t)pos]; + ch = text.char32At(pos); digit = ch - zero; if (digit < 0 || digit > 9) { @@ -1634,7 +1636,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text, parsedNum.append(exponentSign, err); sawExponentDigit = TRUE; } - ++pos; + pos += U16_LENGTH(ch); parsedNum.append((char)(digit + '0'), err); } else { break; @@ -1673,7 +1675,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text, } if (strictParse && !sawDecimal) { - if (lastGroup != -1 && position - lastGroup != fImpl->fEffGrouping.fGrouping + 1) { + if (lastGroup != -1 && digitCount - lastGroup != fImpl->fEffGrouping.fGrouping) { strictFail = TRUE; } } diff --git a/icu4c/source/test/intltest/nmfmtrt.cpp b/icu4c/source/test/intltest/nmfmtrt.cpp index a4d1e78e57c..2379277aebb 100644 --- a/icu4c/source/test/intltest/nmfmtrt.cpp +++ b/icu4c/source/test/intltest/nmfmtrt.cpp @@ -123,9 +123,6 @@ NumberFormatRoundTripTest::start() logln("Quick mode: only testing first 5 Locales"); } for(int i = 0; i < locCount; ++i) { - if (uprv_strcmp(loc[i].getLanguage(),"ccp")==0 && logKnownIssue("13391", "Skip handling ccp until NumberFormat parsing is fixed")) { - continue; - } UnicodeString name; logln(loc[i].getDisplayName(name)); diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index ce1432df2eb..fabc1d0b005 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -622,6 +622,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n TESTCASE_AUTO(Test11640_getAffixes); TESTCASE_AUTO(Test11649_toPatternWithMultiCurrency); TESTCASE_AUTO(Test13327_numberingSystemBufferOverflow); + TESTCASE_AUTO(Test13391_chakmaParsing); TESTCASE_AUTO_END; } @@ -8807,6 +8808,36 @@ void NumberFormatTest::Test13327_numberingSystemBufferOverflow() { } } +void NumberFormatTest::Test13391_chakmaParsing() { + UErrorCode status = U_ZERO_ERROR; + LocalPointer df(static_cast( + NumberFormat::createInstance(Locale("ccp"), status))); + const UChar* expected = u"\U00011137\U00011138,\U00011139\U0001113A\U0001113B"; + UnicodeString actual; + df->format(12345, actual, status); + assertSuccess("Should not fail when formatting in ccp", status); + assertEquals("Should produce expected output in ccp", expected, actual); + + Formattable result; + df->parse(expected, result, status); + assertSuccess("Should not fail when parsing in ccp", status); + assertEquals("Should parse to 12345 in ccp", 12345, result); + + const UChar* expectedScientific = u"\U00011137.\U00011139E\U00011138"; + UnicodeString actualScientific; + df.adoptInstead(static_cast( + NumberFormat::createScientificInstance(Locale("ccp"), status))); + df->format(130, actualScientific, status); + assertSuccess("Should not fail when formatting scientific in ccp", status); + assertEquals("Should produce expected scientific output in ccp", + expectedScientific, actualScientific); + + Formattable resultScientific; + df->parse(expectedScientific, resultScientific, status); + assertSuccess("Should not fail when parsing scientific in ccp", status); + assertEquals("Should parse scientific to 130 in ccp", 130, resultScientific); +} + void NumberFormatTest::verifyFieldPositionIterator( NumberFormatTest_Attributes *expected, FieldPositionIterator &iter) { diff --git a/icu4c/source/test/intltest/numfmtst.h b/icu4c/source/test/intltest/numfmtst.h index 545b4961c0f..8477fcbcdb2 100644 --- a/icu4c/source/test/intltest/numfmtst.h +++ b/icu4c/source/test/intltest/numfmtst.h @@ -216,6 +216,7 @@ class NumberFormatTest: public CalendarTimeZoneTest { void Test11640_getAffixes(); void Test11649_toPatternWithMultiCurrency(); void Test13327_numberingSystemBufferOverflow(); + void Test13391_chakmaParsing(); void checkExceptionIssue11735(); diff --git a/icu4c/source/test/intltest/tsnmfmt.cpp b/icu4c/source/test/intltest/tsnmfmt.cpp index 924ae2d2988..845206f266e 100644 --- a/icu4c/source/test/intltest/tsnmfmt.cpp +++ b/icu4c/source/test/intltest/tsnmfmt.cpp @@ -442,9 +442,6 @@ void IntlTestNumberFormat::monsterTest(/* char* par */) } for (int32_t i=0; i