]> granicus.if.org Git - icu/commitdiff
ICU-13391 Change ICU4C parsing to count digits instead of UTF-16 code units for group...
authorShane Carr <shane@unicode.org>
Thu, 5 Oct 2017 21:41:46 +0000 (21:41 +0000)
committerShane Carr <shane@unicode.org>
Thu, 5 Oct 2017 21:41:46 +0000 (21:41 +0000)
X-SVN-Rev: 40573

icu4c/source/i18n/decimfmt.cpp
icu4c/source/test/intltest/nmfmtrt.cpp
icu4c/source/test/intltest/numfmtst.cpp
icu4c/source/test/intltest/numfmtst.h
icu4c/source/test/intltest/tsnmfmt.cpp
icu4c/source/test/testdata/numberformattestspecification.txt
icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java

index 2a8a226c77ea0f238701ed0dc48c72b1e7d05e2f..3861db3df68e8e61227095e4cacea441641c5b36 100644 (file)
@@ -1423,8 +1423,8 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
 
 
         UBool strictFail = FALSE; // did we exit with a strict parse failure?
-        int32_t lastGroup = -1; // where did we last see a grouping separator?
-        int32_t digitStart = position;
+        int32_t lastGroup = -1; // after which digit index did we last see a grouping separator?
+        int32_t currGroup = -1; // for temporary storage the digit index of the current grouping separator
         int32_t gs2 = fImpl->fEffGrouping.fGrouping2 == 0 ? fImpl->fEffGrouping.fGrouping : fImpl->fEffGrouping.fGrouping2;
 
         const UnicodeString *decimalString;
@@ -1513,16 +1513,17 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
                     // before that, the group must == the secondary group
                     // length, else it can be <= the the secondary group
                     // length.
-                    if ((lastGroup != -1 && backup - lastGroup - 1 != gs2) ||
-                        (lastGroup == -1 && position - digitStart - 1 > gs2)) {
+                    if ((lastGroup != -1 && currGroup - lastGroup != gs2) ||
+                        (lastGroup == -1 && digitCount - 1 > gs2)) {
                         strictFail = TRUE;
                         break;
                     }
                     
-                    lastGroup = backup;
+                    lastGroup = currGroup;
                 }
                 
                 // Cancel out backup setting (see grouping handler below)
+                currGroup = -1;
                 backup = -1;
                 sawDigit = TRUE;
                 
@@ -1561,6 +1562,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
                 // Ignore grouping characters, if we are using them, but require
                 // that they be followed by a digit.  Otherwise we backup and
                 // reprocess them.
+                currGroup = digitCount;
                 backup = position;
                 position += groupingStringLength;
                 sawGrouping=TRUE;
@@ -1571,7 +1573,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
             {
                 if (strictParse) {
                     if (backup != -1 ||
-                        (lastGroup != -1 && position - lastGroup != fImpl->fEffGrouping.fGrouping + 1)) {
+                        (lastGroup != -1 && digitCount - lastGroup != fImpl->fEffGrouping.fGrouping)) {
                         strictFail = TRUE;
                         break;
                     }
@@ -1622,7 +1624,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
 
                         UBool sawExponentDigit = FALSE;
                         while (pos < textLength) {
-                            ch = text[(int32_t)pos];
+                            ch = text.char32At(pos);
                             digit = ch - zero;
 
                             if (digit < 0 || digit > 9) {
@@ -1634,7 +1636,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
                                     parsedNum.append(exponentSign, err);
                                     sawExponentDigit = TRUE;
                                 }
-                                ++pos;
+                                pos += U16_LENGTH(ch);
                                 parsedNum.append((char)(digit + '0'), err);
                             } else {
                                 break;
@@ -1673,7 +1675,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
         }
 
         if (strictParse && !sawDecimal) {
-            if (lastGroup != -1 && position - lastGroup != fImpl->fEffGrouping.fGrouping + 1) {
+            if (lastGroup != -1 && digitCount - lastGroup != fImpl->fEffGrouping.fGrouping) {
                 strictFail = TRUE;
             }
         }
index a4d1e78e57c8d37a50cf4799c93da24de058d59f..2379277aebb899de64b269d910bf47930aaa3c08 100644 (file)
@@ -123,9 +123,6 @@ NumberFormatRoundTripTest::start()
         logln("Quick mode: only testing first 5 Locales");
     }
     for(int i = 0; i < locCount; ++i) {
-        if (uprv_strcmp(loc[i].getLanguage(),"ccp")==0 && logKnownIssue("13391", "Skip handling ccp until NumberFormat parsing is fixed")) {
-            continue;
-        }
         UnicodeString name;
         logln(loc[i].getDisplayName(name));
 
index ce1432df2ebcda60f09d9c81dc7e3a95f29e666d..fabc1d0b005239e10eeb7ac0564fb44206b22f8a 100644 (file)
@@ -622,6 +622,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
   TESTCASE_AUTO(Test11640_getAffixes);
   TESTCASE_AUTO(Test11649_toPatternWithMultiCurrency);
   TESTCASE_AUTO(Test13327_numberingSystemBufferOverflow);
+  TESTCASE_AUTO(Test13391_chakmaParsing);
   TESTCASE_AUTO_END;
 }
 
@@ -8807,6 +8808,36 @@ void NumberFormatTest::Test13327_numberingSystemBufferOverflow() {
     }
 }
 
+void NumberFormatTest::Test13391_chakmaParsing() {
+    UErrorCode status = U_ZERO_ERROR;
+    LocalPointer<DecimalFormat> df(static_cast<DecimalFormat*>(
+        NumberFormat::createInstance(Locale("ccp"), status)));
+    const UChar* expected = u"\U00011137\U00011138,\U00011139\U0001113A\U0001113B";
+    UnicodeString actual;
+    df->format(12345, actual, status);
+    assertSuccess("Should not fail when formatting in ccp", status);
+    assertEquals("Should produce expected output in ccp", expected, actual);
+
+    Formattable result;
+    df->parse(expected, result, status);
+    assertSuccess("Should not fail when parsing in ccp", status);
+    assertEquals("Should parse to 12345 in ccp", 12345, result);
+
+    const UChar* expectedScientific = u"\U00011137.\U00011139E\U00011138";
+    UnicodeString actualScientific;
+    df.adoptInstead(static_cast<DecimalFormat*>(
+        NumberFormat::createScientificInstance(Locale("ccp"), status)));
+    df->format(130, actualScientific, status);
+    assertSuccess("Should not fail when formatting scientific in ccp", status);
+    assertEquals("Should produce expected scientific output in ccp",
+        expectedScientific, actualScientific);
+
+    Formattable resultScientific;
+    df->parse(expectedScientific, resultScientific, status);
+    assertSuccess("Should not fail when parsing scientific in ccp", status);
+    assertEquals("Should parse scientific to 130 in ccp", 130, resultScientific);
+}
+
 
 void NumberFormatTest::verifyFieldPositionIterator(
         NumberFormatTest_Attributes *expected, FieldPositionIterator &iter) {
index 545b4961c0f6e837f64284147b80367cd358fca2..8477fcbcdb2851f5e8c93be68063c6100223e686 100644 (file)
@@ -216,6 +216,7 @@ class NumberFormatTest: public CalendarTimeZoneTest {
     void Test11640_getAffixes();
     void Test11649_toPatternWithMultiCurrency();
     void Test13327_numberingSystemBufferOverflow();
+    void Test13391_chakmaParsing();
 
     void checkExceptionIssue11735();
 
index 924ae2d2988c930fff175a16366fb46b95685a7c..845206f266e07fa0510f9ecdccdfa05deb51b1c0 100644 (file)
@@ -442,9 +442,6 @@ void IntlTestNumberFormat::monsterTest(/* char* par */)
         }
         for (int32_t i=0; i<count; ++i)
         {
-            if (uprv_strcmp(locales[i].getLanguage(),"ccp")==0 && logKnownIssue("13391", "Skip handling ccp until NumberFormat parsing is fixed")) {
-                continue;
-            }
             UnicodeString name(locales[i].getName(), "");
             logln(SEP);
             testLocale(/* par, */locales[i], name);
index 0eef9e3db49b92676648dc911bd8092fdd4d6fd8..113473a2a57f849223ac41cd03261489f605482e 100644 (file)
@@ -839,7 +839,7 @@ parse       output  breaks
 (63,425)       -63425
 // JDK and S allow separators in sci notation and parses as -342.5
 // C passes
-(63,425E-1)    fail    KS
+(63,425E-1)    fail    CKS
 // Both prefix and suffix needed for strict.
 // JDK accepts this and parses as -342.5
 (3425E-1       fail    K
index 0eef9e3db49b92676648dc911bd8092fdd4d6fd8..113473a2a57f849223ac41cd03261489f605482e 100644 (file)
@@ -839,7 +839,7 @@ parse       output  breaks
 (63,425)       -63425
 // JDK and S allow separators in sci notation and parses as -342.5
 // C passes
-(63,425E-1)    fail    KS
+(63,425E-1)    fail    CKS
 // Both prefix and suffix needed for strict.
 // JDK accepts this and parses as -342.5
 (3425E-1       fail    K
index 3354f9b776061fe0a100563441dbbec195d19048..66722432681848744863b8148f665ea5aa756511 100644 (file)
@@ -5266,6 +5266,25 @@ public class NumberFormatTest extends TestFmwk {
                 new DecimalFormat("000000000.0#E0").format(10000000.76d));
     }
 
+    @Test
+    public void Test13391() throws ParseException {
+        DecimalFormat df = (DecimalFormat) NumberFormat.getInstance(new ULocale("ccp"));
+        df.setParseStrict(true);
+        String expected = "\uD804\uDD37\uD804\uDD38,\uD804\uDD39\uD804\uDD3A\uD804\uDD3B";
+        assertEquals("Should produce expected output in ccp", expected, df.format(12345));
+        Number result = df.parse(expected);
+        assertEquals("Should parse to 12345 in ccp", 12345, result.longValue());
+
+        df = (DecimalFormat) NumberFormat.getScientificInstance(new ULocale("ccp"));
+        df.setParseStrict(true);
+        String expectedScientific = "\uD804\uDD37.\uD804\uDD39E\uD804\uDD38";
+        assertEquals("Should produce expected scientific output in ccp",
+                expectedScientific, df.format(130));
+        Number resultScientific = df.parse(expectedScientific);
+        assertEquals("Should parse scientific to 130 in ccp",
+                130, resultScientific.longValue());
+    }
+
     @Test
     public void testPercentZero() {
         DecimalFormat df = (DecimalFormat) NumberFormat.getPercentInstance();