ICU-13391 Change ICU4C parsing to count digits instead of UTF-16 code units for group...

author Shane Carr <shane@unicode.org>

Thu, 5 Oct 2017 21:41:46 +0000 (21:41 +0000)

committer Shane Carr <shane@unicode.org>

Thu, 5 Oct 2017 21:41:46 +0000 (21:41 +0000)
author Shane Carr <shane@unicode.org>
Thu, 5 Oct 2017 21:41:46 +0000 (21:41 +0000)
committer Shane Carr <shane@unicode.org>
Thu, 5 Oct 2017 21:41:46 +0000 (21:41 +0000)
diff --git a/icu4c/source/i18n/decimfmt.cpp b/icu4c/source/i18n/decimfmt.cpp

index 2a8a226c77ea0f238701ed0dc48c72b1e7d05e2f..3861db3df68e8e61227095e4cacea441641c5b36 100644 (file)
--- a/icu4c/source/i18n/decimfmt.cpp
+++ b/icu4c/source/i18n/decimfmt.cpp
@@ -1423,8 +1423,8 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
  
  
          UBool strictFail = FALSE; // did we exit with a strict parse failure?
-        int32_t lastGroup = -1; // where did we last see a grouping separator?
-        int32_t digitStart = position;
+        int32_t lastGroup = -1; // after which digit index did we last see a grouping separator?
+        int32_t currGroup = -1; // for temporary storage the digit index of the current grouping separator
          int32_t gs2 = fImpl->fEffGrouping.fGrouping2 == 0 ? fImpl->fEffGrouping.fGrouping : fImpl->fEffGrouping.fGrouping2;
  
          const UnicodeString *decimalString;
@@ -1513,16 +1513,17 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
                      // before that, the group must == the secondary group
                      // length, else it can be <= the the secondary group
                      // length.
-                    if ((lastGroup != -1 && backup - lastGroup - 1 != gs2) ||
-                        (lastGroup == -1 && position - digitStart - 1 > gs2)) {
+                    if ((lastGroup != -1 && currGroup - lastGroup != gs2) ||
+                        (lastGroup == -1 && digitCount - 1 > gs2)) {
                          strictFail = TRUE;
                          break;
                      }
                      
-                    lastGroup = backup;
+                    lastGroup = currGroup;
                  }
                  
                  // Cancel out backup setting (see grouping handler below)
+                currGroup = -1;
                  backup = -1;
                  sawDigit = TRUE;
                  
@@ -1561,6 +1562,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
                  // Ignore grouping characters, if we are using them, but require
                  // that they be followed by a digit.  Otherwise we backup and
                  // reprocess them.
+                currGroup = digitCount;
                  backup = position;
                  position += groupingStringLength;
                  sawGrouping=TRUE;
@@ -1571,7 +1573,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
              {
                  if (strictParse) {
                      if (backup != -1 ||
-                        (lastGroup != -1 && position - lastGroup != fImpl->fEffGrouping.fGrouping + 1)) {
+                        (lastGroup != -1 && digitCount - lastGroup != fImpl->fEffGrouping.fGrouping)) {
                          strictFail = TRUE;
                          break;
                      }
@@ -1622,7 +1624,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
  
                          UBool sawExponentDigit = FALSE;
                          while (pos < textLength) {
-                            ch = text[(int32_t)pos];
+                            ch = text.char32At(pos);
                              digit = ch - zero;
  
                              if (digit < 0 || digit > 9) {
@@ -1634,7 +1636,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
                                      parsedNum.append(exponentSign, err);
                                      sawExponentDigit = TRUE;
                                  }
-                                ++pos;
+                                pos += U16_LENGTH(ch);
                                  parsedNum.append((char)(digit + '0'), err);
                              } else {
                                  break;
@@ -1673,7 +1675,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
          }
  
          if (strictParse && !sawDecimal) {
-            if (lastGroup != -1 && position - lastGroup != fImpl->fEffGrouping.fGrouping + 1) {
+            if (lastGroup != -1 && digitCount - lastGroup != fImpl->fEffGrouping.fGrouping) {
                  strictFail = TRUE;
              }
          }
diff --git a/icu4c/source/test/intltest/nmfmtrt.cpp b/icu4c/source/test/intltest/nmfmtrt.cpp

index a4d1e78e57c8d37a50cf4799c93da24de058d59f..2379277aebb899de64b269d910bf47930aaa3c08 100644 (file)
--- a/icu4c/source/test/intltest/nmfmtrt.cpp
+++ b/icu4c/source/test/intltest/nmfmtrt.cpp
@@ -123,9 +123,6 @@ NumberFormatRoundTripTest::start()
          logln("Quick mode: only testing first 5 Locales");
      }
      for(int i = 0; i < locCount; ++i) {
-        if (uprv_strcmp(loc[i].getLanguage(),"ccp")==0 && logKnownIssue("13391", "Skip handling ccp until NumberFormat parsing is fixed")) {
-            continue;
-        }
          UnicodeString name;
          logln(loc[i].getDisplayName(name));
  
diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp

index ce1432df2ebcda60f09d9c81dc7e3a95f29e666d..fabc1d0b005239e10eeb7ac0564fb44206b22f8a 100644 (file)
--- a/icu4c/source/test/intltest/numfmtst.cpp
+++ b/icu4c/source/test/intltest/numfmtst.cpp
@@ -622,6 +622,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
    TESTCASE_AUTO(Test11640_getAffixes);
    TESTCASE_AUTO(Test11649_toPatternWithMultiCurrency);
    TESTCASE_AUTO(Test13327_numberingSystemBufferOverflow);
+  TESTCASE_AUTO(Test13391_chakmaParsing);
    TESTCASE_AUTO_END;
  }
  
@@ -8807,6 +8808,36 @@ void NumberFormatTest::Test13327_numberingSystemBufferOverflow() {
      }
  }
  
+void NumberFormatTest::Test13391_chakmaParsing() {
+    UErrorCode status = U_ZERO_ERROR;
+    LocalPointer<DecimalFormat> df(static_cast<DecimalFormat*>(
+        NumberFormat::createInstance(Locale("ccp"), status)));
+    const UChar* expected = u"\U00011137\U00011138,\U00011139\U0001113A\U0001113B";
+    UnicodeString actual;
+    df->format(12345, actual, status);
+    assertSuccess("Should not fail when formatting in ccp", status);
+    assertEquals("Should produce expected output in ccp", expected, actual);
+
+    Formattable result;
+    df->parse(expected, result, status);
+    assertSuccess("Should not fail when parsing in ccp", status);
+    assertEquals("Should parse to 12345 in ccp", 12345, result);
+
+    const UChar* expectedScientific = u"\U00011137.\U00011139E\U00011138";
+    UnicodeString actualScientific;
+    df.adoptInstead(static_cast<DecimalFormat*>(
+        NumberFormat::createScientificInstance(Locale("ccp"), status)));
+    df->format(130, actualScientific, status);
+    assertSuccess("Should not fail when formatting scientific in ccp", status);
+    assertEquals("Should produce expected scientific output in ccp",
+        expectedScientific, actualScientific);
+
+    Formattable resultScientific;
+    df->parse(expectedScientific, resultScientific, status);
+    assertSuccess("Should not fail when parsing scientific in ccp", status);
+    assertEquals("Should parse scientific to 130 in ccp", 130, resultScientific);
+}
+
  
  void NumberFormatTest::verifyFieldPositionIterator(
          NumberFormatTest_Attributes *expected, FieldPositionIterator &iter) {
diff --git a/icu4c/source/test/intltest/numfmtst.h b/icu4c/source/test/intltest/numfmtst.h

index 545b4961c0f6e837f64284147b80367cd358fca2..8477fcbcdb2851f5e8c93be68063c6100223e686 100644 (file)
--- a/icu4c/source/test/intltest/numfmtst.h
+++ b/icu4c/source/test/intltest/numfmtst.h
@@ -216,6 +216,7 @@ class NumberFormatTest: public CalendarTimeZoneTest {
      void Test11640_getAffixes();
      void Test11649_toPatternWithMultiCurrency();
      void Test13327_numberingSystemBufferOverflow();
+    void Test13391_chakmaParsing();
  
      void checkExceptionIssue11735();
  
diff --git a/icu4c/source/test/intltest/tsnmfmt.cpp b/icu4c/source/test/intltest/tsnmfmt.cpp

index 924ae2d2988c930fff175a16366fb46b95685a7c..845206f266e07fa0510f9ecdccdfa05deb51b1c0 100644 (file)
--- a/icu4c/source/test/intltest/tsnmfmt.cpp
+++ b/icu4c/source/test/intltest/tsnmfmt.cpp
@@ -442,9 +442,6 @@ void IntlTestNumberFormat::monsterTest(/* char* par */)
          }
          for (int32_t i=0; i<count; ++i)
          {
-            if (uprv_strcmp(locales[i].getLanguage(),"ccp")==0 && logKnownIssue("13391", "Skip handling ccp until NumberFormat parsing is fixed")) {
-                continue;
-            }
              UnicodeString name(locales[i].getName(), "");
              logln(SEP);
              testLocale(/* par, */locales[i], name);
diff --git a/icu4c/source/test/testdata/numberformattestspecification.txt b/icu4c/source/test/testdata/numberformattestspecification.txt

index 0eef9e3db49b92676648dc911bd8092fdd4d6fd8..113473a2a57f849223ac41cd03261489f605482e 100644 (file)
--- a/icu4c/source/test/testdata/numberformattestspecification.txt
+++ b/icu4c/source/test/testdata/numberformattestspecification.txt
@@ -839,7 +839,7 @@ parse       output  breaks
  (63,425)       -63425
  // JDK and S allow separators in sci notation and parses as -342.5
  // C passes
-(63,425E-1)    fail    KS
+(63,425E-1)    fail    CKS
  // Both prefix and suffix needed for strict.
  // JDK accepts this and parses as -342.5
  (3425E-1       fail    K
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt

index 0eef9e3db49b92676648dc911bd8092fdd4d6fd8..113473a2a57f849223ac41cd03261489f605482e 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt
@@ -839,7 +839,7 @@ parse       output  breaks
  (63,425)       -63425
  // JDK and S allow separators in sci notation and parses as -342.5
  // C passes
-(63,425E-1)    fail    KS
+(63,425E-1)    fail    CKS
  // Both prefix and suffix needed for strict.
  // JDK accepts this and parses as -342.5
  (3425E-1       fail    K
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java

index 3354f9b776061fe0a100563441dbbec195d19048..66722432681848744863b8148f665ea5aa756511 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
@@ -5266,6 +5266,25 @@ public class NumberFormatTest extends TestFmwk {
                  new DecimalFormat("000000000.0#E0").format(10000000.76d));
      }
  
+    @Test
+    public void Test13391() throws ParseException {
+        DecimalFormat df = (DecimalFormat) NumberFormat.getInstance(new ULocale("ccp"));
+        df.setParseStrict(true);
+        String expected = "\uD804\uDD37\uD804\uDD38,\uD804\uDD39\uD804\uDD3A\uD804\uDD3B";
+        assertEquals("Should produce expected output in ccp", expected, df.format(12345));
+        Number result = df.parse(expected);
+        assertEquals("Should parse to 12345 in ccp", 12345, result.longValue());
+
+        df = (DecimalFormat) NumberFormat.getScientificInstance(new ULocale("ccp"));
+        df.setParseStrict(true);
+        String expectedScientific = "\uD804\uDD37.\uD804\uDD39E\uD804\uDD38";
+        assertEquals("Should produce expected scientific output in ccp",
+                expectedScientific, df.format(130));
+        Number resultScientific = df.parse(expectedScientific);
+        assertEquals("Should parse scientific to 130 in ccp",
+                130, resultScientific.longValue());
+    }
+
      @Test
      public void testPercentZero() {
          DecimalFormat df = (DecimalFormat) NumberFormat.getPercentInstance();
author	Shane Carr <shane@unicode.org>
	Thu, 5 Oct 2017 21:41:46 +0000 (21:41 +0000)
committer	Shane Carr <shane@unicode.org>
	Thu, 5 Oct 2017 21:41:46 +0000 (21:41 +0000)
icu4c/source/i18n/decimfmt.cpp		patch \| blob \| history
icu4c/source/test/intltest/nmfmtrt.cpp		patch \| blob \| history
icu4c/source/test/intltest/numfmtst.cpp		patch \| blob \| history
icu4c/source/test/intltest/numfmtst.h		patch \| blob \| history
icu4c/source/test/intltest/tsnmfmt.cpp		patch \| blob \| history
icu4c/source/test/testdata/numberformattestspecification.txt		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java		patch \| blob \| history