* @param isNegative
* @param isPrefix
* @param affixPat affix pattern used for currency affix comparison.
- * @param copmplexCurrencyParsing whether it is currency parsing or not
+ * @param complexCurrencyParsing whether it is currency parsing or not
* @param type the currency type to parse against, LONG_NAME only or not.
* @param currency return value for parsed currency, for generic
* currency parsing mode, or null for normal parsing. In generic
UBool isNegative,
UBool isPrefix,
const UnicodeString* affixPat,
- UBool copmplexCurrencyParsing,
+ UBool complexCurrencyParsing,
int8_t type,
UChar* currency) const
{
const UnicodeString *patternToCompare;
if (fCurrencyChoice != NULL || currency != NULL ||
- (fCurrencySignCount != fgCurrencySignCountZero && copmplexCurrencyParsing)) {
+ (fCurrencySignCount != fgCurrencySignCountZero && complexCurrencyParsing)) {
if (affixPat != NULL) {
return compareComplexAffix(*affixPat, text, pos, type, currency);
(plusSigns->contains(lhs) && plusSigns->contains(rhs));
}
+// check for LRM 0x200E, RLM 0x200F, ALM 0x061C
+#define IS_BIDI_MARK(c) (c==0x200E || c==0x200F || c==0x061C)
+
+// The following assumes any marks are at the beginning or end of the affix
+UnicodeString& DecimalFormat::trimMarksFromAffix(const UnicodeString& affix, UnicodeString& trimmedAffix) {
+ int32_t first = 0;
+ int32_t last = affix.length() - 1;
+ if (last > 0) {
+ UChar c = affix.charAt(0);
+ if (IS_BIDI_MARK(c)) {
+ first++;
+ }
+ if (last > first) {
+ c = affix.charAt(last);
+ if (IS_BIDI_MARK(c)) {
+ last--;
+ }
+ }
+ }
+ return trimmedAffix.setTo(affix, first, last + 1 - first);
+}
+
/**
* Return the length matched by the given affix, or -1 if none.
* Runs of white space in the affix, match runs of white space in
int32_t pos,
UBool lenient) {
int32_t start = pos;
- UChar32 affixChar = affix.char32At(0);
- int32_t affixLength = affix.length();
+ UnicodeString trimmedAffix;
+ trimMarksFromAffix(affix, trimmedAffix);
+ UChar32 affixChar = trimmedAffix.char32At(0);
+ int32_t affixLength = trimmedAffix.length();
int32_t inputLength = input.length();
int32_t affixCharLength = U16_LENGTH(affixChar);
UnicodeSet *affixSet;
if (!lenient) {
affixSet = staticSets->fStrictDashEquivalents;
- // If the affix is exactly one character long and that character
+ // If the trimmedAffix is exactly one character long and that character
// is in the dash set and the very next input character is also
// in the dash set, return a match.
if (affixCharLength == affixLength && affixSet->contains(affixChar)) {
- if (affixSet->contains(input.char32At(pos))) {
- return 1;
+ UChar32 ic = input.char32At(pos);
+ if (affixSet->contains(ic)) {
+ pos += U16_LENGTH(ic);
+ pos = skipBidiMarks(input, pos); // skip any trailing bidi marks
+ return pos - start;
}
}
for (int32_t i = 0; i < affixLength; ) {
- UChar32 c = affix.char32At(i);
+ UChar32 c = trimmedAffix.char32At(i);
int32_t len = U16_LENGTH(c);
if (PatternProps::isWhiteSpace(c)) {
// We may have a pattern like: \u200F \u0020
// match of the run of Pattern_White_Space in the pattern,
// then match any extra characters.
UBool literalMatch = FALSE;
- while (pos < inputLength &&
- input.char32At(pos) == c) {
- literalMatch = TRUE;
- i += len;
- pos += len;
- if (i == affixLength) {
- break;
- }
- c = affix.char32At(i);
- len = U16_LENGTH(c);
- if (!PatternProps::isWhiteSpace(c)) {
+ while (pos < inputLength) {
+ UChar32 ic = input.char32At(pos);
+ if (ic == c) {
+ literalMatch = TRUE;
+ i += len;
+ pos += len;
+ if (i == affixLength) {
+ break;
+ }
+ c = trimmedAffix.char32At(i);
+ len = U16_LENGTH(c);
+ if (!PatternProps::isWhiteSpace(c)) {
+ break;
+ }
+ } else if (IS_BIDI_MARK(ic)) {
+ pos ++; // just skip over this input text
+ } else {
break;
}
}
// Advance over run in pattern
- i = skipPatternWhiteSpace(affix, i);
+ i = skipPatternWhiteSpace(trimmedAffix, i);
// Advance over run in input text
// Must see at least one white space char in input,
// If we skip UWhiteSpace in the input text, we need to skip it in the pattern.
// Otherwise, the previous lines may have skipped over text (such as U+00A0) that
- // is also in the affix.
- i = skipUWhiteSpace(affix, i);
+ // is also in the trimmedAffix.
+ i = skipUWhiteSpace(trimmedAffix, i);
} else {
- if (pos < inputLength &&
- input.char32At(pos) == c) {
- i += len;
- pos += len;
- } else {
+ UBool match = FALSE;
+ while (pos < inputLength) {
+ UChar32 ic = input.char32At(pos);
+ if (!match && ic == c) {
+ i += len;
+ pos += len;
+ match = TRUE;
+ } else if (IS_BIDI_MARK(ic)) {
+ pos++; // just skip over this input text
+ } else {
+ break;
+ }
+ }
+ if (!match) {
return -1;
}
}
affixSet = staticSets->fDashEquivalents;
if (affixCharLength == affixLength && affixSet->contains(affixChar)) {
- pos = skipUWhiteSpace(input, pos);
+ pos = skipUWhiteSpaceAndMarks(input, pos);
+ UChar32 ic = input.char32At(pos);
- if (affixSet->contains(input.char32At(pos))) {
- return pos - start + 1;
+ if (affixSet->contains(ic)) {
+ pos += U16_LENGTH(ic);
+ pos = skipBidiMarks(input, pos);
+ return pos - start;
}
}
for (int32_t i = 0; i < affixLength; )
{
- //i = skipRuleWhiteSpace(affix, i);
- i = skipUWhiteSpace(affix, i);
- pos = skipUWhiteSpace(input, pos);
+ //i = skipRuleWhiteSpace(trimmedAffix, i);
+ i = skipUWhiteSpace(trimmedAffix, i);
+ pos = skipUWhiteSpaceAndMarks(input, pos);
if (i >= affixLength || pos >= inputLength) {
break;
}
- UChar32 c = affix.char32At(i);
- int32_t len = U16_LENGTH(c);
+ UChar32 c = trimmedAffix.char32At(i);
+ UChar32 ic = input.char32At(pos);
- if (!equalWithSignCompatibility(input.char32At(pos), c)) {
+ if (!equalWithSignCompatibility(ic, c)) {
return -1;
}
match = TRUE;
- i += len;
- pos += len;
+ i += U16_LENGTH(c);
+ pos += U16_LENGTH(ic);
+ pos = skipBidiMarks(input, pos);
}
if (affixLength > 0 && ! match) {
return pos;
}
+/**
+ * Skip over a run of zero or more isUWhiteSpace() characters or bidi marks at pos
+ * in text.
+ */
+int32_t DecimalFormat::skipUWhiteSpaceAndMarks(const UnicodeString& text, int32_t pos) {
+ while (pos < text.length()) {
+ UChar32 c = text.char32At(pos);
+ if (!u_isUWhiteSpace(c) && !IS_BIDI_MARK(c)) { // u_isUWhiteSpace doesn't include LRM,RLM,ALM
+ break;
+ }
+ pos += U16_LENGTH(c);
+ }
+ return pos;
+}
+
+/**
+ * Skip over a run of zero or more bidi marks at pos in text.
+ */
+int32_t DecimalFormat::skipBidiMarks(const UnicodeString& text, int32_t pos) {
+ while (pos < text.length()) {
+ UChar c = text.charAt(pos);
+ if (!IS_BIDI_MARK(c)) {
+ break;
+ }
+ pos++;
+ }
+ return pos;
+}
+
/**
* Return the length matched by the given affix, or -1 if none.
* @param affixPat pattern string
TESTCASE_AUTO(TestBug9936);
TESTCASE_AUTO(TestParseNegativeWithFaLocale);
TESTCASE_AUTO(TestParseNegativeWithAlternateMinusSign);
- TESTCASE_AUTO(TestCustomCurrecySignAndSeparator);
+ TESTCASE_AUTO(TestCustomCurrencySignAndSeparator);
+ TESTCASE_AUTO(TestParseSignsAndMarks);
TESTCASE_AUTO_END;
}
delete test;
}
-void NumberFormatTest::TestCustomCurrecySignAndSeparator() {
+void NumberFormatTest::TestCustomCurrencySignAndSeparator() {
UErrorCode status = U_ZERO_ERROR;
DecimalFormatSymbols custom(Locale::getUS(), status);
CHECK(status, "DecimalFormatSymbols constructor");
expect2(fmt, (Formattable)((double)1234.56), numstr);
}
+typedef struct {
+ const char * locale;
+ UBool lenient;
+ UnicodeString numString;
+ double value;
+} SignsAndMarksItem;
+
+
+void NumberFormatTest::TestParseSignsAndMarks() {
+ const SignsAndMarksItem items[] = {
+ // locale lenient numString value
+ { "en", FALSE, CharsToUnicodeString("12"), 12 },
+ { "en", TRUE, CharsToUnicodeString("12"), 12 },
+ { "en", FALSE, CharsToUnicodeString("-23"), -23 },
+ { "en", TRUE, CharsToUnicodeString("-23"), -23 },
+ { "en", TRUE, CharsToUnicodeString("- 23"), -23 },
+ { "en", FALSE, CharsToUnicodeString("\\u200E-23"), -23 },
+ { "en", TRUE, CharsToUnicodeString("\\u200E-23"), -23 },
+ { "en", TRUE, CharsToUnicodeString("\\u200E- 23"), -23 },
+
+ { "en@numbers=arab", FALSE, CharsToUnicodeString("\\u0663\\u0664"), 34 },
+ { "en@numbers=arab", TRUE, CharsToUnicodeString("\\u0663\\u0664"), 34 },
+ { "en@numbers=arab", FALSE, CharsToUnicodeString("-\\u0664\\u0665"), -45 },
+ { "en@numbers=arab", TRUE, CharsToUnicodeString("-\\u0664\\u0665"), -45 },
+ { "en@numbers=arab", TRUE, CharsToUnicodeString("- \\u0664\\u0665"), -45 },
+ { "en@numbers=arab", FALSE, CharsToUnicodeString("\\u200F-\\u0664\\u0665"), -45 },
+ { "en@numbers=arab", TRUE, CharsToUnicodeString("\\u200F-\\u0664\\u0665"), -45 },
+ { "en@numbers=arab", TRUE, CharsToUnicodeString("\\u200F- \\u0664\\u0665"), -45 },
+
+ { "en@numbers=arabext", FALSE, CharsToUnicodeString("\\u06F5\\u06F6"), 56 },
+ { "en@numbers=arabext", TRUE, CharsToUnicodeString("\\u06F5\\u06F6"), 56 },
+ { "en@numbers=arabext", FALSE, CharsToUnicodeString("-\\u06F6\\u06F7"), -67 },
+ { "en@numbers=arabext", TRUE, CharsToUnicodeString("-\\u06F6\\u06F7"), -67 },
+ { "en@numbers=arabext", TRUE, CharsToUnicodeString("- \\u06F6\\u06F7"), -67 },
+ { "en@numbers=arabext", FALSE, CharsToUnicodeString("\\u200E-\\u200E\\u06F6\\u06F7"), -67 },
+ { "en@numbers=arabext", TRUE, CharsToUnicodeString("\\u200E-\\u200E\\u06F6\\u06F7"), -67 },
+ { "en@numbers=arabext", TRUE, CharsToUnicodeString("\\u200E-\\u200E \\u06F6\\u06F7"), -67 },
+
+ { "he", FALSE, CharsToUnicodeString("12"), 12 },
+ { "he", TRUE, CharsToUnicodeString("12"), 12 },
+ { "he", FALSE, CharsToUnicodeString("-23"), -23 },
+ { "he", TRUE, CharsToUnicodeString("-23"), -23 },
+ { "he", TRUE, CharsToUnicodeString("- 23"), -23 },
+ { "he", FALSE, CharsToUnicodeString("\\u200E-23"), -23 },
+ { "he", TRUE, CharsToUnicodeString("\\u200E-23"), -23 },
+ { "he", TRUE, CharsToUnicodeString("\\u200E- 23"), -23 },
+
+ { "ar", FALSE, CharsToUnicodeString("\\u0663\\u0664"), 34 },
+ { "ar", TRUE, CharsToUnicodeString("\\u0663\\u0664"), 34 },
+ { "ar", FALSE, CharsToUnicodeString("-\\u0664\\u0665"), -45 },
+ { "ar", TRUE, CharsToUnicodeString("-\\u0664\\u0665"), -45 },
+ { "ar", TRUE, CharsToUnicodeString("- \\u0664\\u0665"), -45 },
+ { "ar", FALSE, CharsToUnicodeString("\\u200F-\\u0664\\u0665"), -45 },
+ { "ar", TRUE, CharsToUnicodeString("\\u200F-\\u0664\\u0665"), -45 },
+ { "ar", TRUE, CharsToUnicodeString("\\u200F- \\u0664\\u0665"), -45 },
+
+ { "ar_MA", FALSE, CharsToUnicodeString("12"), 12 },
+ { "ar_MA", TRUE, CharsToUnicodeString("12"), 12 },
+ { "ar_MA", FALSE, CharsToUnicodeString("-23"), -23 },
+ { "ar_MA", TRUE, CharsToUnicodeString("-23"), -23 },
+ { "ar_MA", TRUE, CharsToUnicodeString("- 23"), -23 },
+ { "ar_MA", FALSE, CharsToUnicodeString("\\u200E-23"), -23 },
+ { "ar_MA", TRUE, CharsToUnicodeString("\\u200E-23"), -23 },
+ { "ar_MA", TRUE, CharsToUnicodeString("\\u200E- 23"), -23 },
+
+ { "fa", FALSE, CharsToUnicodeString("\\u06F5\\u06F6"), 56 },
+ { "fa", TRUE, CharsToUnicodeString("\\u06F5\\u06F6"), 56 },
+ { "fa", FALSE, CharsToUnicodeString("\\u2212\\u06F6\\u06F7"), -67 },
+ { "fa", TRUE, CharsToUnicodeString("\\u2212\\u06F6\\u06F7"), -67 },
+ { "fa", TRUE, CharsToUnicodeString("\\u2212 \\u06F6\\u06F7"), -67 },
+ { "fa", FALSE, CharsToUnicodeString("\\u200E\\u2212\\u200E\\u06F6\\u06F7"), -67 },
+ { "fa", TRUE, CharsToUnicodeString("\\u200E\\u2212\\u200E\\u06F6\\u06F7"), -67 },
+ { "fa", TRUE, CharsToUnicodeString("\\u200E\\u2212\\u200E \\u06F6\\u06F7"), -67 },
+
+ { "ps", FALSE, CharsToUnicodeString("\\u06F5\\u06F6"), 56 },
+ { "ps", TRUE, CharsToUnicodeString("\\u06F5\\u06F6"), 56 },
+ { "ps", FALSE, CharsToUnicodeString("-\\u06F6\\u06F7"), -67 },
+ { "ps", TRUE, CharsToUnicodeString("-\\u06F6\\u06F7"), -67 },
+ { "ps", TRUE, CharsToUnicodeString("- \\u06F6\\u06F7"), -67 },
+ { "ps", FALSE, CharsToUnicodeString("\\u200E-\\u200E\\u06F6\\u06F7"), -67 },
+ { "ps", TRUE, CharsToUnicodeString("\\u200E-\\u200E\\u06F6\\u06F7"), -67 },
+ { "ps", TRUE, CharsToUnicodeString("\\u200E-\\u200E \\u06F6\\u06F7"), -67 },
+ { "ps", FALSE, CharsToUnicodeString("-\\u200E\\u06F6\\u06F7"), -67 },
+ { "ps", TRUE, CharsToUnicodeString("-\\u200E\\u06F6\\u06F7"), -67 },
+ { "ps", TRUE, CharsToUnicodeString("-\\u200E \\u06F6\\u06F7"), -67 },
+ // terminator
+ { NULL, 0, UnicodeString(""), 0 },
+ };
+
+ const SignsAndMarksItem * itemPtr;
+ for (itemPtr = items; itemPtr->locale != NULL; itemPtr++ ) {
+ UErrorCode status = U_ZERO_ERROR;
+ NumberFormat *numfmt = NumberFormat::createInstance(Locale(itemPtr->locale), status);
+ if (U_SUCCESS(status)) {
+ numfmt->setLenient(itemPtr->lenient);
+ Formattable fmtobj;
+ ParsePosition ppos;
+ numfmt->parse(itemPtr->numString, fmtobj, ppos);
+ if (ppos.getIndex() == itemPtr->numString.length()) {
+ double parsedValue = fmtobj.getDouble(status);
+ if (U_FAILURE(status) || parsedValue != itemPtr->value) {
+ errln((UnicodeString)"FAIL: locale " + itemPtr->locale + ", lenient " + itemPtr->lenient + ", parse of \"" + itemPtr->numString + "\" gives value " + parsedValue);
+ }
+ } else {
+ errln((UnicodeString)"FAIL: locale " + itemPtr->locale + ", lenient " + itemPtr->lenient + ", parse of \"" + itemPtr->numString + "\" gives position " + ppos.getIndex());
+ }
+ } else {
+ dataerrln("FAIL: NumberFormat::createInstance for locale % gives error %s", itemPtr->locale, u_errorName(status));
+ }
+ }
+}
+
#endif /* #if !UCONFIG_NO_FORMATTING */