From: Peter Edberg Date: Wed, 4 Sep 2013 10:46:03 +0000 (+0000) Subject: ICU-10161 Now compareSimpleAffix ignores LRM/RLM/ALM in affix or text when comparing... X-Git-Tag: milestone-59-0-1~2621 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8997c77684a4de6d33e647f66aa4e07cc12b256c;p=icu ICU-10161 Now compareSimpleAffix ignores LRM/RLM/ALM in affix or text when comparing them; and pre-integrate some CLDR 24 number symbol & pattern changes so tests will match the new data. X-SVN-Rev: 34186 --- diff --git a/icu4c/source/data/locales/ar.txt b/icu4c/source/data/locales/ar.txt index e0bd59777aa..4b88cbd349c 100644 --- a/icu4c/source/data/locales/ar.txt +++ b/icu4c/source/data/locales/ar.txt @@ -24,7 +24,7 @@ ar{ arab{ patterns{ currencyFormat{"¤ #0.00;¤ #0.00-"} - decimalFormat{"#0.###;#0.###-"} + decimalFormat{"#0.###"} percentFormat{"#,##0%"} scientificFormat{"#E0"} } @@ -34,18 +34,18 @@ ar{ group{"٬"} infinity{"∞"} list{"؛"} - minusSign{"-"} + minusSign{"‏-"} nan{"ليس رقم"} perMille{"؉"} percentSign{"٪"} - plusSign{"+"} + plusSign{"‏+"} } } default{"arab"} latn{ patterns{ currencyFormat{"¤ #,##0.00;¤ #,##0.00-"} - decimalFormat{"#,##0.###;#,##0.###-"} + decimalFormat{"#,##0.###"} percentFormat{"#,##0%"} scientificFormat{"#E0"} } @@ -255,11 +255,11 @@ ar{ group{","} infinity{"∞"} list{";"} - minusSign{"-"} + minusSign{"‎-"} nan{"NaN"} perMille{"‰"} percentSign{"%"} - plusSign{"+"} + plusSign{"‎+"} } } native{"arab"} diff --git a/icu4c/source/data/locales/ar_QA.txt b/icu4c/source/data/locales/ar_QA.txt index f01e8fd04fc..32847a21a36 100644 --- a/icu4c/source/data/locales/ar_QA.txt +++ b/icu4c/source/data/locales/ar_QA.txt @@ -11,7 +11,7 @@ ar_QA{ latn{ patterns{ currencyFormat{"¤#0.00"} - decimalFormat{"#0.###;#0.###-"} + decimalFormat{"#0.###"} } } } diff --git a/icu4c/source/data/locales/ar_SA.txt b/icu4c/source/data/locales/ar_SA.txt index c5d8acc95e2..b4cfd40e98c 100644 --- a/icu4c/source/data/locales/ar_SA.txt +++ b/icu4c/source/data/locales/ar_SA.txt @@ -11,7 +11,7 @@ ar_SA{ latn{ patterns{ currencyFormat{"¤#0.00"} - decimalFormat{"#0.###;#0.###-"} + decimalFormat{"#0.###"} } } } diff --git a/icu4c/source/data/locales/ar_SY.txt b/icu4c/source/data/locales/ar_SY.txt index 30d79b3ce98..94d85fc95e8 100644 --- a/icu4c/source/data/locales/ar_SY.txt +++ b/icu4c/source/data/locales/ar_SY.txt @@ -11,7 +11,7 @@ ar_SY{ latn{ patterns{ currencyFormat{"¤#0.00"} - decimalFormat{"#0.###;#0.###-"} + decimalFormat{"#0.###"} } } } diff --git a/icu4c/source/data/locales/ar_TN.txt b/icu4c/source/data/locales/ar_TN.txt index e7173ceb179..01cea61116f 100644 --- a/icu4c/source/data/locales/ar_TN.txt +++ b/icu4c/source/data/locales/ar_TN.txt @@ -12,7 +12,7 @@ ar_TN{ latn{ patterns{ currencyFormat{"¤#0.00"} - decimalFormat{"#0.###;#0.###-"} + decimalFormat{"#0.###"} } symbols{ decimal{","} diff --git a/icu4c/source/data/locales/ar_YE.txt b/icu4c/source/data/locales/ar_YE.txt index 84dd1a207c3..cba401613f6 100644 --- a/icu4c/source/data/locales/ar_YE.txt +++ b/icu4c/source/data/locales/ar_YE.txt @@ -11,7 +11,7 @@ ar_YE{ latn{ patterns{ currencyFormat{"¤#0.00"} - decimalFormat{"#0.###;#0.###-"} + decimalFormat{"#0.###"} } } } diff --git a/icu4c/source/data/locales/fa.txt b/icu4c/source/data/locales/fa.txt index 84b405d1824..620b5117199 100644 --- a/icu4c/source/data/locales/fa.txt +++ b/icu4c/source/data/locales/fa.txt @@ -28,11 +28,11 @@ fa{ group{"٬"} infinity{"∞"} list{"؛"} - minusSign{"−"} + minusSign{"‎−"} nan{"ناعدد"} perMille{"؉"} percentSign{"٪"} - plusSign{"+"} + plusSign{"‎+‎"} } } default{"arabext"} @@ -129,11 +129,11 @@ fa{ group{","} infinity{"∞"} list{";"} - minusSign{"−"} + minusSign{"‎−"} nan{"NaN"} perMille{"‰"} percentSign{"%"} - plusSign{"+"} + plusSign{"‎+"} } } native{"arabext"} diff --git a/icu4c/source/data/locales/he.txt b/icu4c/source/data/locales/he.txt index 403193b2ede..1dc2b5996b4 100644 --- a/icu4c/source/data/locales/he.txt +++ b/icu4c/source/data/locales/he.txt @@ -187,11 +187,11 @@ he{ group{","} infinity{"∞"} list{";"} - minusSign{"-"} + minusSign{"‎-"} nan{"NaN"} perMille{"‰"} percentSign{"%"} - plusSign{"+"} + plusSign{"‎+"} } } native{"latn"} diff --git a/icu4c/source/data/locales/ps.txt b/icu4c/source/data/locales/ps.txt index 63c183216c9..9c47c1cb6b2 100644 --- a/icu4c/source/data/locales/ps.txt +++ b/icu4c/source/data/locales/ps.txt @@ -10,7 +10,7 @@ * ICU source: /common/main/ps.xml */ ps{ - AuxExemplarCharacters{"[\u200C \u200D]"} + AuxExemplarCharacters{"[\u200C \u200D \u200E \u200F]"} ExemplarCharacters{ "[\u064E \u0650 \u064F \u064B \u064D \u064C \u0651 \u0652 \u0654 \u0670 آ ا أ" " ء ب پ ت ټ ث ج ځ چ څ ح خ د ډ ذ ر ړ ز ژ ږ س ش ښ ص ض ط ظ ع غ ف ق ک ګ ل م ن ڼ ه" @@ -37,8 +37,9 @@ ps{ decimal{","} exponential{"E"} group{"."} - minusSign{"−"} + minusSign{"‎−"} percentSign{"%"} + plusSign{"‎+"} } } native{"arabext"} diff --git a/icu4c/source/data/locales/root.txt b/icu4c/source/data/locales/root.txt index 96e3daf0001..ae64ab88c7c 100644 --- a/icu4c/source/data/locales/root.txt +++ b/icu4c/source/data/locales/root.txt @@ -25,11 +25,11 @@ root{ group{"٬"} infinity{"∞"} list{"؛"} - minusSign{"-"} + minusSign{"‏-"} nan{"NaN"} perMille{"؉"} percentSign{"٪"} - plusSign{"+"} + plusSign{"‏+"} superscriptingExponent{"×"} } } @@ -40,11 +40,11 @@ root{ group{"٬"} infinity{"∞"} list{"؛"} - minusSign{"-"} + minusSign{"‎-‎"} nan{"NaN"} perMille{"؉"} percentSign{"٪"} - plusSign{"+"} + plusSign{"‎+‎"} superscriptingExponent{"×"} } } diff --git a/icu4c/source/data/locales/uz_Arab.txt b/icu4c/source/data/locales/uz_Arab.txt index b7dfdebead1..9a5e9c534ab 100644 --- a/icu4c/source/data/locales/uz_Arab.txt +++ b/icu4c/source/data/locales/uz_Arab.txt @@ -11,7 +11,7 @@ */ uz_Arab{ %%Parent{"root"} - AuxExemplarCharacters{"[\u200C\u200D ټ ځ څ ډ ړ ږ ښ ګ ڼ ي ۍ ې]"} + AuxExemplarCharacters{"[\u200C\u200D\u200E\u200F ټ ځ څ ډ ړ ږ ښ ګ ڼ ي ۍ ې]"} ExemplarCharacters{ "[\u064B \u064C \u064D \u064E \u064F \u0650 \u0651 \u0652 \u0654 \u0670 ء آ أ" " ؤ ئ ا ب پ ة ت ث ج چ ح خ د ذ ر ز ژ س ش ص ض ط ظ ع غ ف ق ک گ ل م ن ه و ۇ ۉ ی]" @@ -33,7 +33,8 @@ uz_Arab{ symbols{ decimal{","} group{"."} - minusSign{"−"} + minusSign{"‎−"} + plusSign{"‎+"} } } native{"arabext"} diff --git a/icu4c/source/i18n/decimfmt.cpp b/icu4c/source/i18n/decimfmt.cpp index f740ba65aea..13e3073f9ed 100644 --- a/icu4c/source/i18n/decimfmt.cpp +++ b/icu4c/source/i18n/decimfmt.cpp @@ -2840,7 +2840,7 @@ int32_t DecimalFormat::skipPadding(const UnicodeString& text, int32_t position) * @param isNegative * @param isPrefix * @param affixPat affix pattern used for currency affix comparison. - * @param copmplexCurrencyParsing whether it is currency parsing or not + * @param complexCurrencyParsing whether it is currency parsing or not * @param type the currency type to parse against, LONG_NAME only or not. * @param currency return value for parsed currency, for generic * currency parsing mode, or null for normal parsing. In generic @@ -2853,13 +2853,13 @@ int32_t DecimalFormat::compareAffix(const UnicodeString& text, UBool isNegative, UBool isPrefix, const UnicodeString* affixPat, - UBool copmplexCurrencyParsing, + UBool complexCurrencyParsing, int8_t type, UChar* currency) const { const UnicodeString *patternToCompare; if (fCurrencyChoice != NULL || currency != NULL || - (fCurrencySignCount != fgCurrencySignCountZero && copmplexCurrencyParsing)) { + (fCurrencySignCount != fgCurrencySignCountZero && complexCurrencyParsing)) { if (affixPat != NULL) { return compareComplexAffix(*affixPat, text, pos, type, currency); @@ -2901,6 +2901,28 @@ static UBool equalWithSignCompatibility(UChar32 lhs, UChar32 rhs) { (plusSigns->contains(lhs) && plusSigns->contains(rhs)); } +// check for LRM 0x200E, RLM 0x200F, ALM 0x061C +#define IS_BIDI_MARK(c) (c==0x200E || c==0x200F || c==0x061C) + +// The following assumes any marks are at the beginning or end of the affix +UnicodeString& DecimalFormat::trimMarksFromAffix(const UnicodeString& affix, UnicodeString& trimmedAffix) { + int32_t first = 0; + int32_t last = affix.length() - 1; + if (last > 0) { + UChar c = affix.charAt(0); + if (IS_BIDI_MARK(c)) { + first++; + } + if (last > first) { + c = affix.charAt(last); + if (IS_BIDI_MARK(c)) { + last--; + } + } + } + return trimmedAffix.setTo(affix, first, last + 1 - first); +} + /** * Return the length matched by the given affix, or -1 if none. * Runs of white space in the affix, match runs of white space in @@ -2916,8 +2938,10 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix, int32_t pos, UBool lenient) { int32_t start = pos; - UChar32 affixChar = affix.char32At(0); - int32_t affixLength = affix.length(); + UnicodeString trimmedAffix; + trimMarksFromAffix(affix, trimmedAffix); + UChar32 affixChar = trimmedAffix.char32At(0); + int32_t affixLength = trimmedAffix.length(); int32_t inputLength = input.length(); int32_t affixCharLength = U16_LENGTH(affixChar); UnicodeSet *affixSet; @@ -2930,17 +2954,20 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix, if (!lenient) { affixSet = staticSets->fStrictDashEquivalents; - // If the affix is exactly one character long and that character + // If the trimmedAffix is exactly one character long and that character // is in the dash set and the very next input character is also // in the dash set, return a match. if (affixCharLength == affixLength && affixSet->contains(affixChar)) { - if (affixSet->contains(input.char32At(pos))) { - return 1; + UChar32 ic = input.char32At(pos); + if (affixSet->contains(ic)) { + pos += U16_LENGTH(ic); + pos = skipBidiMarks(input, pos); // skip any trailing bidi marks + return pos - start; } } for (int32_t i = 0; i < affixLength; ) { - UChar32 c = affix.char32At(i); + UChar32 c = trimmedAffix.char32At(i); int32_t len = U16_LENGTH(c); if (PatternProps::isWhiteSpace(c)) { // We may have a pattern like: \u200F \u0020 @@ -2950,23 +2977,29 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix, // match of the run of Pattern_White_Space in the pattern, // then match any extra characters. UBool literalMatch = FALSE; - while (pos < inputLength && - input.char32At(pos) == c) { - literalMatch = TRUE; - i += len; - pos += len; - if (i == affixLength) { - break; - } - c = affix.char32At(i); - len = U16_LENGTH(c); - if (!PatternProps::isWhiteSpace(c)) { + while (pos < inputLength) { + UChar32 ic = input.char32At(pos); + if (ic == c) { + literalMatch = TRUE; + i += len; + pos += len; + if (i == affixLength) { + break; + } + c = trimmedAffix.char32At(i); + len = U16_LENGTH(c); + if (!PatternProps::isWhiteSpace(c)) { + break; + } + } else if (IS_BIDI_MARK(ic)) { + pos ++; // just skip over this input text + } else { break; } } // Advance over run in pattern - i = skipPatternWhiteSpace(affix, i); + i = skipPatternWhiteSpace(trimmedAffix, i); // Advance over run in input text // Must see at least one white space char in input, @@ -2979,14 +3012,23 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix, // If we skip UWhiteSpace in the input text, we need to skip it in the pattern. // Otherwise, the previous lines may have skipped over text (such as U+00A0) that - // is also in the affix. - i = skipUWhiteSpace(affix, i); + // is also in the trimmedAffix. + i = skipUWhiteSpace(trimmedAffix, i); } else { - if (pos < inputLength && - input.char32At(pos) == c) { - i += len; - pos += len; - } else { + UBool match = FALSE; + while (pos < inputLength) { + UChar32 ic = input.char32At(pos); + if (!match && ic == c) { + i += len; + pos += len; + match = TRUE; + } else if (IS_BIDI_MARK(ic)) { + pos++; // just skip over this input text + } else { + break; + } + } + if (!match) { return -1; } } @@ -2997,33 +3039,37 @@ int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix, affixSet = staticSets->fDashEquivalents; if (affixCharLength == affixLength && affixSet->contains(affixChar)) { - pos = skipUWhiteSpace(input, pos); + pos = skipUWhiteSpaceAndMarks(input, pos); + UChar32 ic = input.char32At(pos); - if (affixSet->contains(input.char32At(pos))) { - return pos - start + 1; + if (affixSet->contains(ic)) { + pos += U16_LENGTH(ic); + pos = skipBidiMarks(input, pos); + return pos - start; } } for (int32_t i = 0; i < affixLength; ) { - //i = skipRuleWhiteSpace(affix, i); - i = skipUWhiteSpace(affix, i); - pos = skipUWhiteSpace(input, pos); + //i = skipRuleWhiteSpace(trimmedAffix, i); + i = skipUWhiteSpace(trimmedAffix, i); + pos = skipUWhiteSpaceAndMarks(input, pos); if (i >= affixLength || pos >= inputLength) { break; } - UChar32 c = affix.char32At(i); - int32_t len = U16_LENGTH(c); + UChar32 c = trimmedAffix.char32At(i); + UChar32 ic = input.char32At(pos); - if (!equalWithSignCompatibility(input.char32At(pos), c)) { + if (!equalWithSignCompatibility(ic, c)) { return -1; } match = TRUE; - i += len; - pos += len; + i += U16_LENGTH(c); + pos += U16_LENGTH(ic); + pos = skipBidiMarks(input, pos); } if (affixLength > 0 && ! match) { @@ -3057,6 +3103,35 @@ int32_t DecimalFormat::skipUWhiteSpace(const UnicodeString& text, int32_t pos) { return pos; } +/** + * Skip over a run of zero or more isUWhiteSpace() characters or bidi marks at pos + * in text. + */ +int32_t DecimalFormat::skipUWhiteSpaceAndMarks(const UnicodeString& text, int32_t pos) { + while (pos < text.length()) { + UChar32 c = text.char32At(pos); + if (!u_isUWhiteSpace(c) && !IS_BIDI_MARK(c)) { // u_isUWhiteSpace doesn't include LRM,RLM,ALM + break; + } + pos += U16_LENGTH(c); + } + return pos; +} + +/** + * Skip over a run of zero or more bidi marks at pos in text. + */ +int32_t DecimalFormat::skipBidiMarks(const UnicodeString& text, int32_t pos) { + while (pos < text.length()) { + UChar c = text.charAt(pos); + if (!IS_BIDI_MARK(c)) { + break; + } + pos++; + } + return pos; +} + /** * Return the length matched by the given affix, or -1 if none. * @param affixPat pattern string diff --git a/icu4c/source/i18n/unicode/decimfmt.h b/icu4c/source/i18n/unicode/decimfmt.h index 544e7511c2a..8f98eafe3b1 100644 --- a/icu4c/source/i18n/unicode/decimfmt.h +++ b/icu4c/source/i18n/unicode/decimfmt.h @@ -2019,6 +2019,8 @@ private: int8_t type, UChar* currency) const; + static UnicodeString& trimMarksFromAffix(const UnicodeString& affix, UnicodeString& trimmedAffix); + static int32_t compareSimpleAffix(const UnicodeString& affix, const UnicodeString& input, int32_t pos, @@ -2028,6 +2030,10 @@ private: static int32_t skipUWhiteSpace(const UnicodeString& text, int32_t pos); + static int32_t skipUWhiteSpaceAndMarks(const UnicodeString& text, int32_t pos); + + static int32_t skipBidiMarks(const UnicodeString& text, int32_t pos); + int32_t compareComplexAffix(const UnicodeString& affixPat, const UnicodeString& input, int32_t pos, diff --git a/icu4c/source/test/intltest/compactdecimalformattest.cpp b/icu4c/source/test/intltest/compactdecimalformattest.cpp index 94cef3f455d..f9b9002dc08 100644 --- a/icu4c/source/test/intltest/compactdecimalformattest.cpp +++ b/icu4c/source/test/intltest/compactdecimalformattest.cpp @@ -164,7 +164,7 @@ static ExpectedResult kSwahiliShortNegative[] = { {-1.23456789E15, "T-1200"}}; static ExpectedResult kArabicLong[] = { - {-5300.0, "\\u0665\\u066B\\u0663- \\u0623\\u0644\\u0641"}}; + {-5300.0, "\\u200F-\\u0665\\u066B\\u0663 \\u0623\\u0644\\u0641"}}; class CompactDecimalFormatTest : public IntlTest { diff --git a/icu4c/source/test/intltest/dtfmtrtts.cpp b/icu4c/source/test/intltest/dtfmtrtts.cpp index b0ee96a6405..1f1115a449b 100644 --- a/icu4c/source/test/intltest/dtfmtrtts.cpp +++ b/icu4c/source/test/intltest/dtfmtrtts.cpp @@ -1,6 +1,6 @@ /*********************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2012, International Business Machines Corporation + * Copyright (c) 1997-2013, International Business Machines Corporation * and others. All Rights Reserved. ***********************************************************************/ @@ -335,7 +335,7 @@ void DateFormatRoundTripTest::test(DateFormat *fmt, const Locale &origLocale, UB for(loop = 0; loop < DEPTH; ++loop) { if (loop > 0) { d[loop] = fmt->parse(s[loop-1], status); - failure(status, "fmt->parse", s[loop-1]+" in locale: " + origLocale.getName()); + failure(status, "fmt->parse", s[loop-1]+" in locale: " + origLocale.getName() + " with pattern: " + pat); status = U_ZERO_ERROR; /* any error would have been reported */ } diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index 624ac31bfd5..9dbced328fe 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -128,7 +128,8 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n TESTCASE_AUTO(TestBug9936); TESTCASE_AUTO(TestParseNegativeWithFaLocale); TESTCASE_AUTO(TestParseNegativeWithAlternateMinusSign); - TESTCASE_AUTO(TestCustomCurrecySignAndSeparator); + TESTCASE_AUTO(TestCustomCurrencySignAndSeparator); + TESTCASE_AUTO(TestParseSignsAndMarks); TESTCASE_AUTO_END; } @@ -7149,7 +7150,7 @@ void NumberFormatTest::TestParseNegativeWithAlternateMinusSign() { delete test; } -void NumberFormatTest::TestCustomCurrecySignAndSeparator() { +void NumberFormatTest::TestCustomCurrencySignAndSeparator() { UErrorCode status = U_ZERO_ERROR; DecimalFormatSymbols custom(Locale::getUS(), status); CHECK(status, "DecimalFormatSymbols constructor"); @@ -7168,4 +7169,116 @@ void NumberFormatTest::TestCustomCurrecySignAndSeparator() { expect2(fmt, (Formattable)((double)1234.56), numstr); } +typedef struct { + const char * locale; + UBool lenient; + UnicodeString numString; + double value; +} SignsAndMarksItem; + + +void NumberFormatTest::TestParseSignsAndMarks() { + const SignsAndMarksItem items[] = { + // locale lenient numString value + { "en", FALSE, CharsToUnicodeString("12"), 12 }, + { "en", TRUE, CharsToUnicodeString("12"), 12 }, + { "en", FALSE, CharsToUnicodeString("-23"), -23 }, + { "en", TRUE, CharsToUnicodeString("-23"), -23 }, + { "en", TRUE, CharsToUnicodeString("- 23"), -23 }, + { "en", FALSE, CharsToUnicodeString("\\u200E-23"), -23 }, + { "en", TRUE, CharsToUnicodeString("\\u200E-23"), -23 }, + { "en", TRUE, CharsToUnicodeString("\\u200E- 23"), -23 }, + + { "en@numbers=arab", FALSE, CharsToUnicodeString("\\u0663\\u0664"), 34 }, + { "en@numbers=arab", TRUE, CharsToUnicodeString("\\u0663\\u0664"), 34 }, + { "en@numbers=arab", FALSE, CharsToUnicodeString("-\\u0664\\u0665"), -45 }, + { "en@numbers=arab", TRUE, CharsToUnicodeString("-\\u0664\\u0665"), -45 }, + { "en@numbers=arab", TRUE, CharsToUnicodeString("- \\u0664\\u0665"), -45 }, + { "en@numbers=arab", FALSE, CharsToUnicodeString("\\u200F-\\u0664\\u0665"), -45 }, + { "en@numbers=arab", TRUE, CharsToUnicodeString("\\u200F-\\u0664\\u0665"), -45 }, + { "en@numbers=arab", TRUE, CharsToUnicodeString("\\u200F- \\u0664\\u0665"), -45 }, + + { "en@numbers=arabext", FALSE, CharsToUnicodeString("\\u06F5\\u06F6"), 56 }, + { "en@numbers=arabext", TRUE, CharsToUnicodeString("\\u06F5\\u06F6"), 56 }, + { "en@numbers=arabext", FALSE, CharsToUnicodeString("-\\u06F6\\u06F7"), -67 }, + { "en@numbers=arabext", TRUE, CharsToUnicodeString("-\\u06F6\\u06F7"), -67 }, + { "en@numbers=arabext", TRUE, CharsToUnicodeString("- \\u06F6\\u06F7"), -67 }, + { "en@numbers=arabext", FALSE, CharsToUnicodeString("\\u200E-\\u200E\\u06F6\\u06F7"), -67 }, + { "en@numbers=arabext", TRUE, CharsToUnicodeString("\\u200E-\\u200E\\u06F6\\u06F7"), -67 }, + { "en@numbers=arabext", TRUE, CharsToUnicodeString("\\u200E-\\u200E \\u06F6\\u06F7"), -67 }, + + { "he", FALSE, CharsToUnicodeString("12"), 12 }, + { "he", TRUE, CharsToUnicodeString("12"), 12 }, + { "he", FALSE, CharsToUnicodeString("-23"), -23 }, + { "he", TRUE, CharsToUnicodeString("-23"), -23 }, + { "he", TRUE, CharsToUnicodeString("- 23"), -23 }, + { "he", FALSE, CharsToUnicodeString("\\u200E-23"), -23 }, + { "he", TRUE, CharsToUnicodeString("\\u200E-23"), -23 }, + { "he", TRUE, CharsToUnicodeString("\\u200E- 23"), -23 }, + + { "ar", FALSE, CharsToUnicodeString("\\u0663\\u0664"), 34 }, + { "ar", TRUE, CharsToUnicodeString("\\u0663\\u0664"), 34 }, + { "ar", FALSE, CharsToUnicodeString("-\\u0664\\u0665"), -45 }, + { "ar", TRUE, CharsToUnicodeString("-\\u0664\\u0665"), -45 }, + { "ar", TRUE, CharsToUnicodeString("- \\u0664\\u0665"), -45 }, + { "ar", FALSE, CharsToUnicodeString("\\u200F-\\u0664\\u0665"), -45 }, + { "ar", TRUE, CharsToUnicodeString("\\u200F-\\u0664\\u0665"), -45 }, + { "ar", TRUE, CharsToUnicodeString("\\u200F- \\u0664\\u0665"), -45 }, + + { "ar_MA", FALSE, CharsToUnicodeString("12"), 12 }, + { "ar_MA", TRUE, CharsToUnicodeString("12"), 12 }, + { "ar_MA", FALSE, CharsToUnicodeString("-23"), -23 }, + { "ar_MA", TRUE, CharsToUnicodeString("-23"), -23 }, + { "ar_MA", TRUE, CharsToUnicodeString("- 23"), -23 }, + { "ar_MA", FALSE, CharsToUnicodeString("\\u200E-23"), -23 }, + { "ar_MA", TRUE, CharsToUnicodeString("\\u200E-23"), -23 }, + { "ar_MA", TRUE, CharsToUnicodeString("\\u200E- 23"), -23 }, + + { "fa", FALSE, CharsToUnicodeString("\\u06F5\\u06F6"), 56 }, + { "fa", TRUE, CharsToUnicodeString("\\u06F5\\u06F6"), 56 }, + { "fa", FALSE, CharsToUnicodeString("\\u2212\\u06F6\\u06F7"), -67 }, + { "fa", TRUE, CharsToUnicodeString("\\u2212\\u06F6\\u06F7"), -67 }, + { "fa", TRUE, CharsToUnicodeString("\\u2212 \\u06F6\\u06F7"), -67 }, + { "fa", FALSE, CharsToUnicodeString("\\u200E\\u2212\\u200E\\u06F6\\u06F7"), -67 }, + { "fa", TRUE, CharsToUnicodeString("\\u200E\\u2212\\u200E\\u06F6\\u06F7"), -67 }, + { "fa", TRUE, CharsToUnicodeString("\\u200E\\u2212\\u200E \\u06F6\\u06F7"), -67 }, + + { "ps", FALSE, CharsToUnicodeString("\\u06F5\\u06F6"), 56 }, + { "ps", TRUE, CharsToUnicodeString("\\u06F5\\u06F6"), 56 }, + { "ps", FALSE, CharsToUnicodeString("-\\u06F6\\u06F7"), -67 }, + { "ps", TRUE, CharsToUnicodeString("-\\u06F6\\u06F7"), -67 }, + { "ps", TRUE, CharsToUnicodeString("- \\u06F6\\u06F7"), -67 }, + { "ps", FALSE, CharsToUnicodeString("\\u200E-\\u200E\\u06F6\\u06F7"), -67 }, + { "ps", TRUE, CharsToUnicodeString("\\u200E-\\u200E\\u06F6\\u06F7"), -67 }, + { "ps", TRUE, CharsToUnicodeString("\\u200E-\\u200E \\u06F6\\u06F7"), -67 }, + { "ps", FALSE, CharsToUnicodeString("-\\u200E\\u06F6\\u06F7"), -67 }, + { "ps", TRUE, CharsToUnicodeString("-\\u200E\\u06F6\\u06F7"), -67 }, + { "ps", TRUE, CharsToUnicodeString("-\\u200E \\u06F6\\u06F7"), -67 }, + // terminator + { NULL, 0, UnicodeString(""), 0 }, + }; + + const SignsAndMarksItem * itemPtr; + for (itemPtr = items; itemPtr->locale != NULL; itemPtr++ ) { + UErrorCode status = U_ZERO_ERROR; + NumberFormat *numfmt = NumberFormat::createInstance(Locale(itemPtr->locale), status); + if (U_SUCCESS(status)) { + numfmt->setLenient(itemPtr->lenient); + Formattable fmtobj; + ParsePosition ppos; + numfmt->parse(itemPtr->numString, fmtobj, ppos); + if (ppos.getIndex() == itemPtr->numString.length()) { + double parsedValue = fmtobj.getDouble(status); + if (U_FAILURE(status) || parsedValue != itemPtr->value) { + errln((UnicodeString)"FAIL: locale " + itemPtr->locale + ", lenient " + itemPtr->lenient + ", parse of \"" + itemPtr->numString + "\" gives value " + parsedValue); + } + } else { + errln((UnicodeString)"FAIL: locale " + itemPtr->locale + ", lenient " + itemPtr->lenient + ", parse of \"" + itemPtr->numString + "\" gives position " + ppos.getIndex()); + } + } else { + dataerrln("FAIL: NumberFormat::createInstance for locale % gives error %s", itemPtr->locale, u_errorName(status)); + } + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/numfmtst.h b/icu4c/source/test/intltest/numfmtst.h index 93395800f85..fad385974dc 100644 --- a/icu4c/source/test/intltest/numfmtst.h +++ b/icu4c/source/test/intltest/numfmtst.h @@ -172,7 +172,9 @@ class NumberFormatTest: public CalendarTimeZoneTest { void TestParseNegativeWithFaLocale(); void TestParseNegativeWithAlternateMinusSign(); - void TestCustomCurrecySignAndSeparator(); + void TestCustomCurrencySignAndSeparator(); + + void TestParseSignsAndMarks(); private: UBool testFormattableAsUFormattable(const char *file, int line, Formattable &f);