ICU-10161 Now compareSimpleAffix ignores LRM/RLM/ALM in affix or text when comparing...

author Peter Edberg <pedberg@unicode.org>

Wed, 4 Sep 2013 10:46:03 +0000 (10:46 +0000)

committer Peter Edberg <pedberg@unicode.org>

Wed, 4 Sep 2013 10:46:03 +0000 (10:46 +0000)
author Peter Edberg <pedberg@unicode.org>
Wed, 4 Sep 2013 10:46:03 +0000 (10:46 +0000)
committer Peter Edberg <pedberg@unicode.org>
Wed, 4 Sep 2013 10:46:03 +0000 (10:46 +0000)
diff --git a/icu4c/source/data/locales/ar.txt b/icu4c/source/data/locales/ar.txt

index e0bd59777aa30145f489f45aca3cbbaf4e717baf..4b88cbd349cbd70454229d874a65a32abc012ba0 100644 (file)
--- a/icu4c/source/data/locales/ar.txt
+++ b/icu4c/source/data/locales/ar.txt
@@ -24,7 +24,7 @@ ar{
          arab{
              patterns{
                  currencyFormat{"¤ #0.00;¤ #0.00-"}
-                decimalFormat{"#0.###;#0.###-"}
+                decimalFormat{"#0.###"}
                  percentFormat{"#,##0%"}
                  scientificFormat{"#E0"}
              }
@@ -34,18 +34,18 @@ ar{
                  group{"٬"}
                  infinity{"∞"}
                  list{"؛"}
-                minusSign{"-"}
+                minusSign{"‏-"}
                  nan{"ليس رقم"}
                  perMille{"؉"}
                  percentSign{"٪"}
-                plusSign{"+"}
+                plusSign{"‏+"}
              }
          }
          default{"arab"}
          latn{
              patterns{
                  currencyFormat{"¤ #,##0.00;¤ #,##0.00-"}
-                decimalFormat{"#,##0.###;#,##0.###-"}
+                decimalFormat{"#,##0.###"}
                  percentFormat{"#,##0%"}
                  scientificFormat{"#E0"}
              }
@@ -255,11 +255,11 @@ ar{
                  group{","}
                  infinity{"∞"}
                  list{";"}
-                minusSign{"-"}
+                minusSign{"‎-"}
                  nan{"NaN"}
                  perMille{"‰"}
                  percentSign{"%"}
-                plusSign{"+"}
+                plusSign{"‎+"}
              }
          }
          native{"arab"}
diff --git a/icu4c/source/data/locales/ar_QA.txt b/icu4c/source/data/locales/ar_QA.txt

index f01e8fd04fc4a11a1ba14835db0460f8d9f4d5f8..32847a21a368d2635e638616b45d66d2c3ed0800 100644 (file)
--- a/icu4c/source/data/locales/ar_QA.txt
+++ b/icu4c/source/data/locales/ar_QA.txt
@@ -11,7 +11,7 @@ ar_QA{
          latn{
              patterns{
                  currencyFormat{"¤#0.00"}
-                decimalFormat{"#0.###;#0.###-"}
+                decimalFormat{"#0.###"}
              }
          }
      }
diff --git a/icu4c/source/data/locales/ar_SA.txt b/icu4c/source/data/locales/ar_SA.txt

index c5d8acc95e25bf4a0dd30fa8c7f46fc77eb273b7..b4cfd40e98c05c26aeac2d09b0be2568b7180fc7 100644 (file)
--- a/icu4c/source/data/locales/ar_SA.txt
+++ b/icu4c/source/data/locales/ar_SA.txt
@@ -11,7 +11,7 @@ ar_SA{
          latn{
              patterns{
                  currencyFormat{"¤#0.00"}
-                decimalFormat{"#0.###;#0.###-"}
+                decimalFormat{"#0.###"}
              }
          }
      }
diff --git a/icu4c/source/data/locales/ar_SY.txt b/icu4c/source/data/locales/ar_SY.txt

index 30d79b3ce98ab659c0f944810e5c512e9792458a..94d85fc95e8271cf16ce3229613a13ee24476103 100644 (file)
--- a/icu4c/source/data/locales/ar_SY.txt
+++ b/icu4c/source/data/locales/ar_SY.txt
@@ -11,7 +11,7 @@ ar_SY{
          latn{
              patterns{
                  currencyFormat{"¤#0.00"}
-                decimalFormat{"#0.###;#0.###-"}
+                decimalFormat{"#0.###"}
              }
          }
      }
diff --git a/icu4c/source/data/locales/ar_TN.txt b/icu4c/source/data/locales/ar_TN.txt

index e7173ceb1792a8401ba9f5afbd7c7d46e967baea..01cea61116fff1034faad01b43d88806f3e7162f 100644 (file)
--- a/icu4c/source/data/locales/ar_TN.txt
+++ b/icu4c/source/data/locales/ar_TN.txt
@@ -12,7 +12,7 @@ ar_TN{
          latn{
              patterns{
                  currencyFormat{"¤#0.00"}
-                decimalFormat{"#0.###;#0.###-"}
+                decimalFormat{"#0.###"}
              }
              symbols{
                  decimal{","}
diff --git a/icu4c/source/data/locales/ar_YE.txt b/icu4c/source/data/locales/ar_YE.txt

index 84dd1a207c3e57392feb47e8b160fe223cecc65b..cba401613f65923f579b9100fdad2624ae319fab 100644 (file)
--- a/icu4c/source/data/locales/ar_YE.txt
+++ b/icu4c/source/data/locales/ar_YE.txt
@@ -11,7 +11,7 @@ ar_YE{
          latn{
              patterns{
                  currencyFormat{"¤#0.00"}
-                decimalFormat{"#0.###;#0.###-"}
+                decimalFormat{"#0.###"}
              }
          }
      }
diff --git a/icu4c/source/data/locales/fa.txt b/icu4c/source/data/locales/fa.txt

index 84b405d1824891b059a93dda4ad8e6d54cb06710..620b5117199b17715481b8066801a41b9ad7aac0 100644 (file)
--- a/icu4c/source/data/locales/fa.txt
+++ b/icu4c/source/data/locales/fa.txt
@@ -28,11 +28,11 @@ fa{
                  group{"٬"}
                  infinity{"∞"}
                  list{"؛"}
-                minusSign{"−"}
+                minusSign{"â\80\8eâ\88\92"}
                  nan{"ناعدد"}
                  perMille{"؉"}
                  percentSign{"٪"}
-                plusSign{"+"}
+                plusSign{"‎+‎"}
              }
          }
          default{"arabext"}
@@ -129,11 +129,11 @@ fa{
                  group{","}
                  infinity{"∞"}
                  list{";"}
-                minusSign{"−"}
+                minusSign{"â\80\8eâ\88\92"}
                  nan{"NaN"}
                  perMille{"‰"}
                  percentSign{"%"}
-                plusSign{"+"}
+                plusSign{"‎+"}
              }
          }
          native{"arabext"}
diff --git a/icu4c/source/data/locales/he.txt b/icu4c/source/data/locales/he.txt

index 403193b2ede50e7da48e2dcb3f2bff8f1771512b..1dc2b5996b485b27fac7cea8104f0e71872aebae 100644 (file)
--- a/icu4c/source/data/locales/he.txt
+++ b/icu4c/source/data/locales/he.txt
@@ -187,11 +187,11 @@ he{
                  group{","}
                  infinity{"∞"}
                  list{";"}
-                minusSign{"-"}
+                minusSign{"‎-"}
                  nan{"NaN"}
                  perMille{"‰"}
                  percentSign{"%"}
-                plusSign{"+"}
+                plusSign{"‎+"}
              }
          }
          native{"latn"}
diff --git a/icu4c/source/data/locales/ps.txt b/icu4c/source/data/locales/ps.txt

index 63c183216c9b70a0b6250d35fcb137a96f626c8e..9c47c1cb6b236bde8b10431194bb8467b96c8b00 100644 (file)
--- a/icu4c/source/data/locales/ps.txt
+++ b/icu4c/source/data/locales/ps.txt
@@ -10,7 +10,7 @@
   * ICU <specials> source: <path>/common/main/ps.xml
   */
  ps{
-    AuxExemplarCharacters{"[\u200C \u200D]"}
+    AuxExemplarCharacters{"[\u200C \u200D \u200E \u200F]"}
      ExemplarCharacters{
          "[\u064E \u0650 \u064F \u064B \u064D \u064C \u0651 \u0652 \u0654 \u0670 آ ا أ"
          " ء ب پ ت ټ ث ج ځ چ څ ح خ د ډ ذ ر ړ ز ژ ږ س ش ښ ص ض ط ظ ع غ ف ق ک ګ ل م ن ڼ ه"
@@ -37,8 +37,9 @@ ps{
                  decimal{","}
                  exponential{"E"}
                  group{"."}
-                minusSign{"−"}
+                minusSign{"â\80\8eâ\88\92"}
                  percentSign{"%"}
+                plusSign{"‎+"}
              }
          }
          native{"arabext"}
diff --git a/icu4c/source/data/locales/root.txt b/icu4c/source/data/locales/root.txt

index 96e3daf0001e14049963b38372829b128eb6c6c9..ae64ab88c7c896e85c5f7e5b24d9213f23eb2d74 100644 (file)
--- a/icu4c/source/data/locales/root.txt
+++ b/icu4c/source/data/locales/root.txt
@@ -25,11 +25,11 @@ root{
                  group{"٬"}
                  infinity{"∞"}
                  list{"؛"}
-                minusSign{"-"}
+                minusSign{"‏-"}
                  nan{"NaN"}
                  perMille{"؉"}
                  percentSign{"٪"}
-                plusSign{"+"}
+                plusSign{"‏+"}
                  superscriptingExponent{"×"}
              }
          }
@@ -40,11 +40,11 @@ root{
                  group{"٬"}
                  infinity{"∞"}
                  list{"؛"}
-                minusSign{"-"}
+                minusSign{"‎-‎"}
                  nan{"NaN"}
                  perMille{"؉"}
                  percentSign{"٪"}
-                plusSign{"+"}
+                plusSign{"‎+‎"}
                  superscriptingExponent{"×"}
              }
          }
diff --git a/icu4c/source/data/locales/uz_Arab.txt b/icu4c/source/data/locales/uz_Arab.txt

index b7dfdebead1b20dd26de6b4913ef38a6cd87b1ae..9a5e9c534ab5c6cce1d8912774d3cbed35128cf8 100644 (file)
--- a/icu4c/source/data/locales/uz_Arab.txt
+++ b/icu4c/source/data/locales/uz_Arab.txt
@@ -11,7 +11,7 @@
   */
  uz_Arab{
      %%Parent{"root"}
-    AuxExemplarCharacters{"[\u200C\u200D ټ ځ څ ډ ړ ږ ښ ګ ڼ ي ۍ ې]"}
+    AuxExemplarCharacters{"[\u200C\u200D\u200E\u200F ټ ځ څ ډ ړ ږ ښ ګ ڼ ي ۍ ې]"}
      ExemplarCharacters{
          "[\u064B \u064C \u064D \u064E \u064F \u0650 \u0651 \u0652 \u0654 \u0670 ء آ أ"
          " ؤ ئ ا ب پ ة ت ث ج چ ح خ د ذ ر ز ژ س ش ص ض ط ظ ع غ ف ق ک گ ل م ن ه و ۇ ۉ ی]"
@@ -33,7 +33,8 @@ uz_Arab{
              symbols{
                  decimal{","}
                  group{"."}
-                minusSign{"−"}
+                minusSign{"‎−"}
+                plusSign{"‎+"}
              }
          }
          native{"arabext"}
diff --git a/icu4c/source/i18n/decimfmt.cpp b/icu4c/source/i18n/decimfmt.cpp

index f740ba65aea55cc87be98857193855eaf99bcd8d..13e3073f9edfcaddb871d7557e4d84eed5c1e49c 100644 (file)
--- a/icu4c/source/i18n/decimfmt.cpp
+++ b/icu4c/source/i18n/decimfmt.cpp
@@ -2840,7 +2840,7 @@ int32_t DecimalFormat::skipPadding(const UnicodeString& text, int32_t position)
   * @param isNegative
   * @param isPrefix
   * @param affixPat affix pattern used for currency affix comparison.
- * @param copmplexCurrencyParsing whether it is currency parsing or not
+ * @param complexCurrencyParsing whether it is currency parsing or not
   * @param type the currency type to parse against, LONG_NAME only or not.
   * @param currency return value for parsed currency, for generic
   * currency parsing mode, or null for normal parsing. In generic
@@ -2853,13 +2853,13 @@ int32_t DecimalFormat::compareAffix(const UnicodeString& text,
                                      UBool isNegative,
                                      UBool isPrefix,
                                      const UnicodeString* affixPat,
-                                    UBool copmplexCurrencyParsing,
+                                    UBool complexCurrencyParsing,
                                      int8_t type,
                                      UChar* currency) const
  {
      const UnicodeString *patternToCompare;
      if (fCurrencyChoice != NULL || currency != NULL ||
-        (fCurrencySignCount != fgCurrencySignCountZero && copmplexCurrencyParsing)) {
+        (fCurrencySignCount != fgCurrencySignCountZero && complexCurrencyParsing)) {
  
          if (affixPat != NULL) {
              return compareComplexAffix(*affixPat, text, pos, type, currency);
@@ -2901,6 +2901,28 @@ static UBool equalWithSignCompatibility(UChar32 lhs, UChar32 rhs) {
          (plusSigns->contains(lhs) && plusSigns->contains(rhs));
  }
  
+// check for LRM 0x200E, RLM 0x200F, ALM 0x061C
+#define IS_BIDI_MARK(c) (c==0x200E || c==0x200F || c==0x061C)
+
+// The following assumes any marks are at the beginning or end of the affix
+UnicodeString& DecimalFormat::trimMarksFromAffix(const UnicodeString& affix, UnicodeString& trimmedAffix) {
+    int32_t first = 0;
+    int32_t last = affix.length() - 1;
+    if (last > 0) {
+        UChar c = affix.charAt(0);
+        if (IS_BIDI_MARK(c)) {
+            first++;
+        }
+        if (last > first) {
+            c = affix.charAt(last);
+            if (IS_BIDI_MARK(c)) {
+                last--;
+            }
+        }
+    }
+    return trimmedAffix.setTo(affix, first, last + 1 - first);
+}
+
  /**
   * Return the length matched by the given affix, or -1 if none.
   * Runs of white space in the affix, match runs of white space in
@@ -2916,8 +2938,10 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix,
                                            int32_t pos,
                                            UBool lenient) {
      int32_t start = pos;
-    UChar32 affixChar = affix.char32At(0);
-    int32_t affixLength = affix.length();
+    UnicodeString trimmedAffix;
+    trimMarksFromAffix(affix, trimmedAffix);
+    UChar32 affixChar = trimmedAffix.char32At(0);
+    int32_t affixLength = trimmedAffix.length();
      int32_t inputLength = input.length();
      int32_t affixCharLength = U16_LENGTH(affixChar);
      UnicodeSet *affixSet;
@@ -2930,17 +2954,20 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix,
      if (!lenient) {
          affixSet = staticSets->fStrictDashEquivalents;
          
-        // If the affix is exactly one character long and that character
+        // If the trimmedAffix is exactly one character long and that character
          // is in the dash set and the very next input character is also
          // in the dash set, return a match.
          if (affixCharLength == affixLength && affixSet->contains(affixChar))  {
-            if (affixSet->contains(input.char32At(pos))) {
-                return 1;
+            UChar32 ic = input.char32At(pos);
+            if (affixSet->contains(ic)) {
+                pos += U16_LENGTH(ic);
+                pos = skipBidiMarks(input, pos); // skip any trailing bidi marks
+                return pos - start;
              }
          }
  
          for (int32_t i = 0; i < affixLength; ) {
-            UChar32 c = affix.char32At(i);
+            UChar32 c = trimmedAffix.char32At(i);
              int32_t len = U16_LENGTH(c);
              if (PatternProps::isWhiteSpace(c)) {
                  // We may have a pattern like: \u200F \u0020
@@ -2950,23 +2977,29 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix,
                  // match of the run of Pattern_White_Space in the pattern,
                  // then match any extra characters.
                  UBool literalMatch = FALSE;
-                while (pos < inputLength &&
-                       input.char32At(pos) == c) {
-                    literalMatch = TRUE;
-                    i += len;
-                    pos += len;
-                    if (i == affixLength) {
-                        break;
-                    }
-                    c = affix.char32At(i);
-                    len = U16_LENGTH(c);
-                    if (!PatternProps::isWhiteSpace(c)) {
+                while (pos < inputLength) {
+                    UChar32 ic = input.char32At(pos);
+                    if (ic == c) {
+                        literalMatch = TRUE;
+                        i += len;
+                        pos += len;
+                        if (i == affixLength) {
+                            break;
+                        }
+                        c = trimmedAffix.char32At(i);
+                        len = U16_LENGTH(c);
+                        if (!PatternProps::isWhiteSpace(c)) {
+                            break;
+                        }
+                    } else if (IS_BIDI_MARK(ic)) {
+                        pos ++; // just skip over this input text
+                    } else {
                          break;
                      }
                  }
  
                  // Advance over run in pattern
-                i = skipPatternWhiteSpace(affix, i);
+                i = skipPatternWhiteSpace(trimmedAffix, i);
  
                  // Advance over run in input text
                  // Must see at least one white space char in input,
@@ -2979,14 +3012,23 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix,
  
                  // If we skip UWhiteSpace in the input text, we need to skip it in the pattern.
                  // Otherwise, the previous lines may have skipped over text (such as U+00A0) that
-                // is also in the affix.
-                i = skipUWhiteSpace(affix, i);
+                // is also in the trimmedAffix.
+                i = skipUWhiteSpace(trimmedAffix, i);
              } else {
-                if (pos < inputLength &&
-                    input.char32At(pos) == c) {
-                    i += len;
-                    pos += len;
-                } else {
+                UBool match = FALSE;
+                while (pos < inputLength) {
+                    UChar32 ic = input.char32At(pos);
+                    if (!match && ic == c) {
+                        i += len;
+                        pos += len;
+                        match = TRUE;
+                    } else if (IS_BIDI_MARK(ic)) {
+                        pos++; // just skip over this input text
+                    } else {
+                        break;
+                    }
+                }
+                if (!match) {
                      return -1;
                  }
              }
@@ -2997,33 +3039,37 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix,
          affixSet = staticSets->fDashEquivalents;
  
          if (affixCharLength == affixLength && affixSet->contains(affixChar))  {
-            pos = skipUWhiteSpace(input, pos);
+            pos = skipUWhiteSpaceAndMarks(input, pos);
+            UChar32 ic = input.char32At(pos);
              
-            if (affixSet->contains(input.char32At(pos))) {
-                return pos - start + 1;
+            if (affixSet->contains(ic)) {
+                pos += U16_LENGTH(ic);
+                pos = skipBidiMarks(input, pos);
+                return pos - start;
              }
          }
  
          for (int32_t i = 0; i < affixLength; )
          {
-            //i = skipRuleWhiteSpace(affix, i);
-            i = skipUWhiteSpace(affix, i);
-            pos = skipUWhiteSpace(input, pos);
+            //i = skipRuleWhiteSpace(trimmedAffix, i);
+            i = skipUWhiteSpace(trimmedAffix, i);
+            pos = skipUWhiteSpaceAndMarks(input, pos);
  
              if (i >= affixLength || pos >= inputLength) {
                  break;
              }
  
-            UChar32 c = affix.char32At(i);
-            int32_t len = U16_LENGTH(c);
+            UChar32 c = trimmedAffix.char32At(i);
+            UChar32 ic = input.char32At(pos);
  
-            if (!equalWithSignCompatibility(input.char32At(pos), c)) {
+            if (!equalWithSignCompatibility(ic, c)) {
                  return -1;
              }
  
              match = TRUE;
-            i += len;
-            pos += len;
+            i += U16_LENGTH(c);
+            pos += U16_LENGTH(ic);
+            pos = skipBidiMarks(input, pos);
          }
  
          if (affixLength > 0 && ! match) {
@@ -3057,6 +3103,35 @@ int32_t DecimalFormat::skipUWhiteSpace(const UnicodeString& text, int32_t pos) {
      return pos;
  }
  
+/**
+ * Skip over a run of zero or more isUWhiteSpace() characters or bidi marks at pos
+ * in text.
+ */
+int32_t DecimalFormat::skipUWhiteSpaceAndMarks(const UnicodeString& text, int32_t pos) {
+    while (pos < text.length()) {
+        UChar32 c = text.char32At(pos);
+        if (!u_isUWhiteSpace(c) && !IS_BIDI_MARK(c)) { // u_isUWhiteSpace doesn't include LRM,RLM,ALM
+            break;
+        }
+        pos += U16_LENGTH(c);
+    }
+    return pos;
+}
+
+/**
+ * Skip over a run of zero or more bidi marks at pos in text.
+ */
+int32_t DecimalFormat::skipBidiMarks(const UnicodeString& text, int32_t pos) {
+    while (pos < text.length()) {
+        UChar c = text.charAt(pos);
+        if (!IS_BIDI_MARK(c)) {
+            break;
+        }
+        pos++;
+    }
+    return pos;
+}
+
  /**
   * Return the length matched by the given affix, or -1 if none.
   * @param affixPat pattern string
diff --git a/icu4c/source/i18n/unicode/decimfmt.h b/icu4c/source/i18n/unicode/decimfmt.h

index 544e7511c2a2996d5da20c3bba28340eb6fd802c..8f98eafe3b13fcf17c6f0a6133cf024a776ee4ef 100644 (file)
--- a/icu4c/source/i18n/unicode/decimfmt.h
+++ b/icu4c/source/i18n/unicode/decimfmt.h
@@ -2019,6 +2019,8 @@ private:
                           int8_t type,
                           UChar* currency) const;
  
+    static UnicodeString& trimMarksFromAffix(const UnicodeString& affix, UnicodeString& trimmedAffix);
+
      static int32_t compareSimpleAffix(const UnicodeString& affix,
                                        const UnicodeString& input,
                                        int32_t pos,
@@ -2028,6 +2030,10 @@ private:
  
      static int32_t skipUWhiteSpace(const UnicodeString& text, int32_t pos);
  
+    static int32_t skipUWhiteSpaceAndMarks(const UnicodeString& text, int32_t pos);
+
+    static int32_t skipBidiMarks(const UnicodeString& text, int32_t pos);
+
      int32_t compareComplexAffix(const UnicodeString& affixPat,
                                  const UnicodeString& input,
                                  int32_t pos,
diff --git a/icu4c/source/test/intltest/compactdecimalformattest.cpp b/icu4c/source/test/intltest/compactdecimalformattest.cpp

index 94cef3f455d2e88e515e10b55e2528a0359278e7..f9b9002dc08fb60700dfc0fb7700c60f18bf424b 100644 (file)
--- a/icu4c/source/test/intltest/compactdecimalformattest.cpp
+++ b/icu4c/source/test/intltest/compactdecimalformattest.cpp
@@ -164,7 +164,7 @@ static ExpectedResult kSwahiliShortNegative[] = {
    {-1.23456789E15, "T-1200"}};
  
  static ExpectedResult kArabicLong[] = {
-  {-5300.0, "\\u0665\\u066B\\u0663- \\u0623\\u0644\\u0641"}};
+  {-5300.0, "\\u200F-\\u0665\\u066B\\u0663 \\u0623\\u0644\\u0641"}};
  
  
  class CompactDecimalFormatTest : public IntlTest {
diff --git a/icu4c/source/test/intltest/dtfmtrtts.cpp b/icu4c/source/test/intltest/dtfmtrtts.cpp

index b0ee96a6405331a42e39147f255825d649249f79..1f1115a449b25befde67cb1ec291ce3aaaea4b9c 100644 (file)
--- a/icu4c/source/test/intltest/dtfmtrtts.cpp
+++ b/icu4c/source/test/intltest/dtfmtrtts.cpp
@@ -1,6 +1,6 @@
  /***********************************************************************
   * COPYRIGHT: 
- * Copyright (c) 1997-2012, International Business Machines Corporation
+ * Copyright (c) 1997-2013, International Business Machines Corporation
   * and others. All Rights Reserved.
   ***********************************************************************/
   
@@ -335,7 +335,7 @@ void DateFormatRoundTripTest::test(DateFormat *fmt, const Locale &origLocale, UB
              for(loop = 0; loop < DEPTH; ++loop) {
                  if (loop > 0)  {
                      d[loop] = fmt->parse(s[loop-1], status);
-                    failure(status, "fmt->parse", s[loop-1]+" in locale: " + origLocale.getName());
+                    failure(status, "fmt->parse", s[loop-1]+" in locale: " + origLocale.getName() + " with pattern: " + pat);
                      status = U_ZERO_ERROR; /* any error would have been reported */
                  }
  
diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp

index 624ac31bfd5cad369b739ce3f30f17547d418211..9dbced328fe972a528c0f49ba33790beccae004f 100644 (file)
--- a/icu4c/source/test/intltest/numfmtst.cpp
+++ b/icu4c/source/test/intltest/numfmtst.cpp
@@ -128,7 +128,8 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
    TESTCASE_AUTO(TestBug9936);
    TESTCASE_AUTO(TestParseNegativeWithFaLocale);
    TESTCASE_AUTO(TestParseNegativeWithAlternateMinusSign);
-  TESTCASE_AUTO(TestCustomCurrecySignAndSeparator);
+  TESTCASE_AUTO(TestCustomCurrencySignAndSeparator);
+  TESTCASE_AUTO(TestParseSignsAndMarks);
    TESTCASE_AUTO_END;
  }
  
@@ -7149,7 +7150,7 @@ void NumberFormatTest::TestParseNegativeWithAlternateMinusSign() {
      delete test;
  }
  
-void NumberFormatTest::TestCustomCurrecySignAndSeparator() {
+void NumberFormatTest::TestCustomCurrencySignAndSeparator() {
      UErrorCode status = U_ZERO_ERROR;
      DecimalFormatSymbols custom(Locale::getUS(), status);
      CHECK(status, "DecimalFormatSymbols constructor");
@@ -7168,4 +7169,116 @@ void NumberFormatTest::TestCustomCurrecySignAndSeparator() {
      expect2(fmt, (Formattable)((double)1234.56), numstr);
  }
  
+typedef struct {
+    const char *   locale;
+    UBool          lenient;
+    UnicodeString  numString;
+    double         value;
+} SignsAndMarksItem;
+
+
+void NumberFormatTest::TestParseSignsAndMarks() {
+    const SignsAndMarksItem items[] = {
+        // locale               lenient numString                                                       value
+        { "en",                 FALSE,  CharsToUnicodeString("12"),                                      12 },
+        { "en",                 TRUE,   CharsToUnicodeString("12"),                                      12 },
+        { "en",                 FALSE,  CharsToUnicodeString("-23"),                                    -23 },
+        { "en",                 TRUE,   CharsToUnicodeString("-23"),                                    -23 },
+        { "en",                 TRUE,   CharsToUnicodeString("- 23"),                                   -23 },
+        { "en",                 FALSE,  CharsToUnicodeString("\\u200E-23"),                             -23 },
+        { "en",                 TRUE,   CharsToUnicodeString("\\u200E-23"),                             -23 },
+        { "en",                 TRUE,   CharsToUnicodeString("\\u200E- 23"),                            -23 },
+
+        { "en@numbers=arab",    FALSE,  CharsToUnicodeString("\\u0663\\u0664"),                          34 },
+        { "en@numbers=arab",    TRUE,   CharsToUnicodeString("\\u0663\\u0664"),                          34 },
+        { "en@numbers=arab",    FALSE,  CharsToUnicodeString("-\\u0664\\u0665"),                        -45 },
+        { "en@numbers=arab",    TRUE,   CharsToUnicodeString("-\\u0664\\u0665"),                        -45 },
+        { "en@numbers=arab",    TRUE,   CharsToUnicodeString("- \\u0664\\u0665"),                       -45 },
+        { "en@numbers=arab",    FALSE,  CharsToUnicodeString("\\u200F-\\u0664\\u0665"),                 -45 },
+        { "en@numbers=arab",    TRUE,   CharsToUnicodeString("\\u200F-\\u0664\\u0665"),                 -45 },
+        { "en@numbers=arab",    TRUE,   CharsToUnicodeString("\\u200F- \\u0664\\u0665"),                -45 },
+
+        { "en@numbers=arabext", FALSE,  CharsToUnicodeString("\\u06F5\\u06F6"),                          56 },
+        { "en@numbers=arabext", TRUE,   CharsToUnicodeString("\\u06F5\\u06F6"),                          56 },
+        { "en@numbers=arabext", FALSE,  CharsToUnicodeString("-\\u06F6\\u06F7"),                        -67 },
+        { "en@numbers=arabext", TRUE,   CharsToUnicodeString("-\\u06F6\\u06F7"),                        -67 },
+        { "en@numbers=arabext", TRUE,   CharsToUnicodeString("- \\u06F6\\u06F7"),                       -67 },
+        { "en@numbers=arabext", FALSE,  CharsToUnicodeString("\\u200E-\\u200E\\u06F6\\u06F7"),          -67 },
+        { "en@numbers=arabext", TRUE,   CharsToUnicodeString("\\u200E-\\u200E\\u06F6\\u06F7"),          -67 },
+        { "en@numbers=arabext", TRUE,   CharsToUnicodeString("\\u200E-\\u200E \\u06F6\\u06F7"),         -67 },
+ 
+        { "he",                 FALSE,  CharsToUnicodeString("12"),                                      12 },
+        { "he",                 TRUE,   CharsToUnicodeString("12"),                                      12 },
+        { "he",                 FALSE,  CharsToUnicodeString("-23"),                                    -23 },
+        { "he",                 TRUE,   CharsToUnicodeString("-23"),                                    -23 },
+        { "he",                 TRUE,   CharsToUnicodeString("- 23"),                                   -23 },
+        { "he",                 FALSE,  CharsToUnicodeString("\\u200E-23"),                             -23 },
+        { "he",                 TRUE,   CharsToUnicodeString("\\u200E-23"),                             -23 },
+        { "he",                 TRUE,   CharsToUnicodeString("\\u200E- 23"),                            -23 },
+
+        { "ar",                 FALSE,  CharsToUnicodeString("\\u0663\\u0664"),                          34 },
+        { "ar",                 TRUE,   CharsToUnicodeString("\\u0663\\u0664"),                          34 },
+        { "ar",                 FALSE,  CharsToUnicodeString("-\\u0664\\u0665"),                        -45 },
+        { "ar",                 TRUE,   CharsToUnicodeString("-\\u0664\\u0665"),                        -45 },
+        { "ar",                 TRUE,   CharsToUnicodeString("- \\u0664\\u0665"),                       -45 },
+        { "ar",                 FALSE,  CharsToUnicodeString("\\u200F-\\u0664\\u0665"),                 -45 },
+        { "ar",                 TRUE,   CharsToUnicodeString("\\u200F-\\u0664\\u0665"),                 -45 },
+        { "ar",                 TRUE,   CharsToUnicodeString("\\u200F- \\u0664\\u0665"),                -45 },
+
+        { "ar_MA",              FALSE,  CharsToUnicodeString("12"),                                      12 },
+        { "ar_MA",              TRUE,   CharsToUnicodeString("12"),                                      12 },
+        { "ar_MA",              FALSE,  CharsToUnicodeString("-23"),                                    -23 },
+        { "ar_MA",              TRUE,   CharsToUnicodeString("-23"),                                    -23 },
+        { "ar_MA",              TRUE,   CharsToUnicodeString("- 23"),                                   -23 },
+        { "ar_MA",              FALSE,  CharsToUnicodeString("\\u200E-23"),                             -23 },
+        { "ar_MA",              TRUE,   CharsToUnicodeString("\\u200E-23"),                             -23 },
+        { "ar_MA",              TRUE,   CharsToUnicodeString("\\u200E- 23"),                            -23 },
+
+        { "fa",                 FALSE,  CharsToUnicodeString("\\u06F5\\u06F6"),                          56 },
+        { "fa",                 TRUE,   CharsToUnicodeString("\\u06F5\\u06F6"),                          56 },
+        { "fa",                 FALSE,  CharsToUnicodeString("\\u2212\\u06F6\\u06F7"),                  -67 },
+        { "fa",                 TRUE,   CharsToUnicodeString("\\u2212\\u06F6\\u06F7"),                  -67 },
+        { "fa",                 TRUE,   CharsToUnicodeString("\\u2212 \\u06F6\\u06F7"),                 -67 },
+        { "fa",                 FALSE,  CharsToUnicodeString("\\u200E\\u2212\\u200E\\u06F6\\u06F7"),    -67 },
+        { "fa",                 TRUE,   CharsToUnicodeString("\\u200E\\u2212\\u200E\\u06F6\\u06F7"),    -67 },
+        { "fa",                 TRUE,   CharsToUnicodeString("\\u200E\\u2212\\u200E \\u06F6\\u06F7"),   -67 },
+
+        { "ps",                 FALSE,  CharsToUnicodeString("\\u06F5\\u06F6"),                          56 },
+        { "ps",                 TRUE,   CharsToUnicodeString("\\u06F5\\u06F6"),                          56 },
+        { "ps",                 FALSE,  CharsToUnicodeString("-\\u06F6\\u06F7"),                        -67 },
+        { "ps",                 TRUE,   CharsToUnicodeString("-\\u06F6\\u06F7"),                        -67 },
+        { "ps",                 TRUE,   CharsToUnicodeString("- \\u06F6\\u06F7"),                       -67 },
+        { "ps",                 FALSE,  CharsToUnicodeString("\\u200E-\\u200E\\u06F6\\u06F7"),          -67 },
+        { "ps",                 TRUE,   CharsToUnicodeString("\\u200E-\\u200E\\u06F6\\u06F7"),          -67 },
+        { "ps",                 TRUE,   CharsToUnicodeString("\\u200E-\\u200E \\u06F6\\u06F7"),         -67 },
+        { "ps",                 FALSE,  CharsToUnicodeString("-\\u200E\\u06F6\\u06F7"),                 -67 },
+        { "ps",                 TRUE,   CharsToUnicodeString("-\\u200E\\u06F6\\u06F7"),                 -67 },
+        { "ps",                 TRUE,   CharsToUnicodeString("-\\u200E \\u06F6\\u06F7"),                -67 },
+        // terminator
+        { NULL,                 0,      UnicodeString(""),                                                0 },
+    };
+
+    const SignsAndMarksItem * itemPtr;
+    for (itemPtr = items; itemPtr->locale != NULL; itemPtr++ ) {
+        UErrorCode status = U_ZERO_ERROR;
+        NumberFormat *numfmt = NumberFormat::createInstance(Locale(itemPtr->locale), status);
+        if (U_SUCCESS(status)) {
+            numfmt->setLenient(itemPtr->lenient);
+            Formattable fmtobj;
+            ParsePosition ppos;
+            numfmt->parse(itemPtr->numString, fmtobj, ppos);
+            if (ppos.getIndex() == itemPtr->numString.length()) {
+                double parsedValue = fmtobj.getDouble(status);
+                if (U_FAILURE(status) || parsedValue != itemPtr->value) {
+                    errln((UnicodeString)"FAIL: locale " + itemPtr->locale + ", lenient " + itemPtr->lenient + ", parse of \"" + itemPtr->numString + "\" gives value " + parsedValue);
+                }
+            } else {
+                errln((UnicodeString)"FAIL: locale " + itemPtr->locale + ", lenient " + itemPtr->lenient + ", parse of \"" + itemPtr->numString + "\" gives position " + ppos.getIndex());
+            }
+        } else {
+            dataerrln("FAIL: NumberFormat::createInstance for locale % gives error %s", itemPtr->locale, u_errorName(status));
+        }
+    }
+}
+
  #endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/icu4c/source/test/intltest/numfmtst.h b/icu4c/source/test/intltest/numfmtst.h

index 93395800f8501b7d5c9bb5ffffd77406432bb812..fad385974dca6e2b922b741ab3abe44ecebe7bfb 100644 (file)
--- a/icu4c/source/test/intltest/numfmtst.h
+++ b/icu4c/source/test/intltest/numfmtst.h
@@ -172,7 +172,9 @@ class NumberFormatTest: public CalendarTimeZoneTest {
      void TestParseNegativeWithFaLocale();
      void TestParseNegativeWithAlternateMinusSign();
  
-    void TestCustomCurrecySignAndSeparator();
+    void TestCustomCurrencySignAndSeparator();
+
+    void TestParseSignsAndMarks();
  
   private:
      UBool testFormattableAsUFormattable(const char *file, int line, Formattable &f);
author	Peter Edberg <pedberg@unicode.org>
	Wed, 4 Sep 2013 10:46:03 +0000 (10:46 +0000)
committer	Peter Edberg <pedberg@unicode.org>
	Wed, 4 Sep 2013 10:46:03 +0000 (10:46 +0000)
icu4c/source/data/locales/ar.txt		patch \| blob \| history
icu4c/source/data/locales/ar_QA.txt		patch \| blob \| history
icu4c/source/data/locales/ar_SA.txt		patch \| blob \| history
icu4c/source/data/locales/ar_SY.txt		patch \| blob \| history
icu4c/source/data/locales/ar_TN.txt		patch \| blob \| history
icu4c/source/data/locales/ar_YE.txt		patch \| blob \| history
icu4c/source/data/locales/fa.txt		patch \| blob \| history
icu4c/source/data/locales/he.txt		patch \| blob \| history
icu4c/source/data/locales/ps.txt		patch \| blob \| history
icu4c/source/data/locales/root.txt		patch \| blob \| history
icu4c/source/data/locales/uz_Arab.txt		patch \| blob \| history
icu4c/source/i18n/decimfmt.cpp		patch \| blob \| history
icu4c/source/i18n/unicode/decimfmt.h		patch \| blob \| history
icu4c/source/test/intltest/compactdecimalformattest.cpp		patch \| blob \| history
icu4c/source/test/intltest/dtfmtrtts.cpp		patch \| blob \| history
icu4c/source/test/intltest/numfmtst.cpp		patch \| blob \| history
icu4c/source/test/intltest/numfmtst.h		patch \| blob \| history