]> granicus.if.org Git - icu/commitdiff
ICU-13634 Refactoring the two separate currency matchers into a single unified Combin...
authorShane Carr <shane@unicode.org>
Sat, 31 Mar 2018 05:18:51 +0000 (05:18 +0000)
committerShane Carr <shane@unicode.org>
Sat, 31 Mar 2018 05:18:51 +0000 (05:18 +0000)
X-SVN-Rev: 41181

19 files changed:
icu4c/source/i18n/numparse_affixes.cpp
icu4c/source/i18n/numparse_affixes.h
icu4c/source/i18n/numparse_compositions.cpp
icu4c/source/i18n/numparse_compositions.h
icu4c/source/i18n/numparse_currency.cpp
icu4c/source/i18n/numparse_currency.h
icu4c/source/i18n/numparse_impl.cpp
icu4c/source/i18n/numparse_impl.h
icu4c/source/test/intltest/numbertest.h
icu4c/source/test/intltest/numbertest_parse.cpp
icu4c/source/test/testdata/NumberFormatTestCases.txt
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixTokenMatcherFactory.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java [deleted file]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CombinedCurrencyMatcher.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyCustomMatcher.java [deleted file]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyNamesMatcher.java [deleted file]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java

index 83ecdd144b8f633c41bca615e3699491508aad71..013cc01ff8709c2400fe58ce17dd577beb554ca9 100644 (file)
@@ -190,7 +190,7 @@ NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
 }
 
 NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
-    return fCurrency = {{fSetupData->locale, status}, {fSetupData->currencySymbols, status}};
+    return fCurrency = {fSetupData->currencySymbols, fSetupData->dfs, status};
 }
 
 IgnorablesMatcher& AffixTokenMatcherWarehouse::ignorables() {
index 3b79457b6d368534bf1c5de38fa2e963b910d7b3..08a7d912c6a2ed2e1c65515d12a816c5e813f230 100644 (file)
@@ -125,7 +125,7 @@ class AffixTokenMatcherWarehouse : public UMemory {
     PlusSignMatcher fPlusSign;
     PercentMatcher fPercent;
     PermilleMatcher fPermille;
-    CurrencyAnyMatcher fCurrency;
+    CombinedCurrencyMatcher fCurrency;
 
     // Use a child class for code point matchers, since it requires non-default operators.
     CodePointMatcherWarehouse fCodePoints;
index d254c07349d2a2c2e8df902a477959d5cd66ce8b..06aa476a29b914c5873c1fb5ecfa644336aeef9b 100644 (file)
@@ -18,44 +18,6 @@ using namespace icu::numparse;
 using namespace icu::numparse::impl;
 
 
-bool AnyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
-    int32_t initialOffset = segment.getOffset();
-    bool maybeMore = false;
-
-    // NOTE: The range-based for loop calls the virtual begin() and end() methods.
-    for (auto& matcher : *this) {
-        maybeMore = maybeMore || matcher->match(segment, result, status);
-        if (segment.getOffset() != initialOffset) {
-            // Match succeeded.
-            // NOTE: Except for a couple edge cases, if a matcher accepted string A, then it will
-            // accept any string starting with A. Therefore, there is no possibility that matchers
-            // later in the list may be evaluated on longer strings, and we can exit the loop here.
-            break;
-        }
-    }
-
-    // None of the matchers succeeded.
-    return maybeMore;
-}
-
-bool AnyMatcher::smokeTest(const StringSegment& segment) const {
-    // NOTE: The range-based for loop calls the virtual begin() and end() methods.
-    for (auto& matcher : *this) {
-        if (matcher->smokeTest(segment)) {
-            return true;
-        }
-    }
-    return false;
-}
-
-void AnyMatcher::postProcess(ParsedNumber& result) const {
-    // NOTE: The range-based for loop calls the virtual begin() and end() methods.
-    for (auto& matcher : *this) {
-        matcher->postProcess(result);
-    }
-}
-
-
 bool SeriesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
     ParsedNumber backup(result);
 
index 8d61ab46d320c664f442b14ee7e46463e3a5ab2c..a0b20c3433ca5d767263fbb0ce0470edfcb3d350 100644 (file)
@@ -29,27 +29,29 @@ class CompositionMatcher : public NumberParseMatcher {
 };
 
 
-/**
- * Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
- * the first matcher in the list to succeed.
- *
- * NOTE: In C++, this is a base class, unlike ICU4J, which uses a factory-style interface.
- *
- * @author sffc
- * @see SeriesMatcher
- */
-class AnyMatcher : public CompositionMatcher {
-  public:
-    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
-
-    bool smokeTest(const StringSegment& segment) const override;
-
-    void postProcess(ParsedNumber& result) const override;
-
-  protected:
-    // No construction except by subclasses!
-    AnyMatcher() = default;
-};
+// NOTE: AnyMatcher is no longer being used. The previous definition is shown below.
+// The implementation can be found in SVN source control, deleted around March 30, 2018.
+///**
+// * Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
+// * the first matcher in the list to succeed.
+// *
+// * NOTE: In C++, this is a base class, unlike ICU4J, which uses a factory-style interface.
+// *
+// * @author sffc
+// * @see SeriesMatcher
+// */
+//class AnyMatcher : public CompositionMatcher {
+//  public:
+//    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
+//
+//    bool smokeTest(const StringSegment& segment) const override;
+//
+//    void postProcess(ParsedNumber& result) const override;
+//
+//  protected:
+//    // No construction except by subclasses!
+//    AnyMatcher() = default;
+//};
 
 
 /**
index 6dee3d28d4e75d3371e83f5fa84fc4035bb37b86..5c14fd7429e4dd3b0340f0694566925dd957a154 100644 (file)
@@ -20,19 +20,83 @@ using namespace icu::numparse;
 using namespace icu::numparse::impl;
 
 
-CurrencyNamesMatcher::CurrencyNamesMatcher(const Locale& locale, UErrorCode& status)
-        : fLocaleName(locale.getName(), -1, status) {
+CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols,
+                                                 const DecimalFormatSymbols& dfs, UErrorCode& status)
+        : fCurrency1(currencySymbols.getCurrencySymbol(status)),
+          fCurrency2(currencySymbols.getIntlCurrencySymbol(status)),
+          afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)),
+          beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)),
+          fLocaleName(dfs.getLocale().getName(), -1, status) {
+    utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
+
+    // Compute the full set of characters that could be the first in a currency to allow for
+    // efficient smoke test.
+    fLeadCodePoints.add(fCurrency1.char32At(0));
+    fLeadCodePoints.add(fCurrency2.char32At(0));
+    fLeadCodePoints.add(beforeSuffixInsert.char32At(0));
     uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status);
     // Always apply case mapping closure for currencies
     fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS);
     fLeadCodePoints.freeze();
 }
 
-bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
+bool CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result,
+                                    UErrorCode& status) const {
     if (result.currencyCode[0] != 0) {
         return false;
     }
 
+    // Try to match a currency spacing separator.
+    int32_t initialOffset = segment.getOffset();
+    bool maybeMore = false;
+    if (result.seenNumber()) {
+        int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
+        if (overlap == beforeSuffixInsert.length()) {
+            segment.adjustOffset(overlap);
+            // Note: let currency spacing be a weak match. Don't update chars consumed.
+        }
+        maybeMore = maybeMore || overlap == segment.length();
+    }
+
+    // Match the currency string, and reset if we didn't find one.
+    maybeMore = maybeMore || matchCurrency(segment, result, status);
+    if (result.currencyCode[0] == 0) {
+        segment.setOffset(initialOffset);
+        return maybeMore;
+    }
+
+    // Try to match a currency spacing separator.
+    if (!result.seenNumber()) {
+        int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
+        if (overlap == afterPrefixInsert.length()) {
+            segment.adjustOffset(overlap);
+            // Note: let currency spacing be a weak match. Don't update chars consumed.
+        }
+        maybeMore = maybeMore || overlap == segment.length();
+    }
+
+    return maybeMore;
+}
+
+bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result,
+                                            UErrorCode& status) const {
+
+    int32_t overlap1 = segment.getCommonPrefixLength(fCurrency1);
+    if (overlap1 == fCurrency1.length()) {
+        utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
+        segment.adjustOffset(overlap1);
+        result.setCharsConsumed(segment);
+        return segment.length() == 0;
+    }
+
+    int32_t overlap2 = segment.getCommonPrefixLength(fCurrency2);
+    if (overlap2 == fCurrency2.length()) {
+        utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
+        segment.adjustOffset(overlap2);
+        result.setCharsConsumed(segment);
+        return segment.length() == 0;
+    }
+
     // NOTE: This call site should be improved with #13584.
     const UnicodeString segmentString = segment.toTempUnicodeString();
 
@@ -48,9 +112,6 @@ bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, U
             result.currencyCode,
             status);
 
-    // Possible partial match
-    bool partialMatch = partialMatchLen == segment.length();
-
     if (U_SUCCESS(status) && ppos.getIndex() != 0) {
         // Complete match.
         // NOTE: The currency code should already be saved in the ParsedNumber.
@@ -58,91 +119,16 @@ bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, U
         result.setCharsConsumed(segment);
     }
 
-    return partialMatch;
+    return overlap1 == segment.length() || overlap2 == segment.length() ||
+           partialMatchLen == segment.length();
 }
 
-bool CurrencyNamesMatcher::smokeTest(const StringSegment& segment) const {
+bool CombinedCurrencyMatcher::smokeTest(const StringSegment& segment) const {
     return segment.startsWith(fLeadCodePoints);
 }
 
-UnicodeString CurrencyNamesMatcher::toString() const {
-    return u"<CurrencyNames>";
-}
-
-
-CurrencyCustomMatcher::CurrencyCustomMatcher(const CurrencySymbols& currencySymbols, UErrorCode& status)
-        : fCurrency1(currencySymbols.getCurrencySymbol(status)),
-          fCurrency2(currencySymbols.getIntlCurrencySymbol(status)) {
-    utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
-}
-
-bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
-    if (result.currencyCode[0] != 0) {
-        return false;
-    }
-
-    int overlap1 = segment.getCommonPrefixLength(fCurrency1);
-    if (overlap1 == fCurrency1.length()) {
-        utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
-        segment.adjustOffset(overlap1);
-        result.setCharsConsumed(segment);
-    }
-
-    int overlap2 = segment.getCommonPrefixLength(fCurrency2);
-    if (overlap2 == fCurrency2.length()) {
-        utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
-        segment.adjustOffset(overlap2);
-        result.setCharsConsumed(segment);
-    }
-
-    return overlap1 == segment.length() || overlap2 == segment.length();
-}
-
-bool CurrencyCustomMatcher::smokeTest(const StringSegment& segment) const {
-    return segment.startsWith(fCurrency1)
-           || segment.startsWith(fCurrency2);
-}
-
-UnicodeString CurrencyCustomMatcher::toString() const {
-    return u"<CurrencyCustom>";
-}
-
-
-CurrencyAnyMatcher::CurrencyAnyMatcher() {
-    fMatcherArray[0] = &fNamesMatcher;
-    fMatcherArray[1] = &fCustomMatcher;
-}
-
-CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher,
-                                       CurrencyCustomMatcher customMatcher)
-        : fNamesMatcher(std::move(namesMatcher)), fCustomMatcher(std::move(customMatcher)) {
-    fMatcherArray[0] = &fNamesMatcher;
-    fMatcherArray[1] = &fCustomMatcher;
-}
-
-CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyAnyMatcher&& src) U_NOEXCEPT
-        : fNamesMatcher(std::move(src.fNamesMatcher)), fCustomMatcher(std::move(src.fCustomMatcher)) {
-    fMatcherArray[0] = &fNamesMatcher;
-    fMatcherArray[1] = &fCustomMatcher;
-}
-
-CurrencyAnyMatcher& CurrencyAnyMatcher::operator=(CurrencyAnyMatcher&& src) U_NOEXCEPT {
-    fNamesMatcher = std::move(src.fNamesMatcher);
-    fCustomMatcher = std::move(src.fCustomMatcher);
-    // Note: do NOT move fMatcherArray
-    return *this;
-}
-
-const NumberParseMatcher* const* CurrencyAnyMatcher::begin() const {
-    return fMatcherArray;
-}
-
-const NumberParseMatcher* const* CurrencyAnyMatcher::end() const {
-    return fMatcherArray + 2;
-}
-
-UnicodeString CurrencyAnyMatcher::toString() const {
-    return u"<CurrencyAny>";
+UnicodeString CombinedCurrencyMatcher::toString() const {
+    return u"<CombinedCurrencyMatcher>";
 }
 
 
index 1c2a57d2c145e6c74d9a4f959b09c50d8da94ed1..fa7d67b9bde8ac89e4f89840cd852e7ee4076411 100644 (file)
@@ -19,38 +19,21 @@ namespace impl {
 using ::icu::number::impl::CurrencySymbols;
 
 /**
- * Matches currencies according to all available strings in locale data.
+ * Matches a currency, either a custom currency or one from the data bundle. The class is called
+ * "combined" to emphasize that the currency string may come from one of multiple sources.
  *
- * The implementation of this class is different between J and C. See #13584 for a follow-up.
+ * Will match currency spacing either before or after the number depending on whether we are currently in
+ * the prefix or suffix.
+ *
+ * The implementation of this class is slightly different between J and C. See #13584 for a follow-up.
  *
  * @author sffc
  */
-class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {
-  public:
-    CurrencyNamesMatcher() = default;  // WARNING: Leaves the object in an unusable state
-
-    CurrencyNamesMatcher(const Locale& locale, UErrorCode& status);
-
-    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
-
-    bool smokeTest(const StringSegment& segment) const override;
-
-    UnicodeString toString() const override;
-
-  private:
-    // We could use Locale instead of CharString here, but
-    // Locale has a non-trivial default constructor.
-    CharString fLocaleName;
-
-    UnicodeSet fLeadCodePoints;
-};
-
-
-class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
+class CombinedCurrencyMatcher : public NumberParseMatcher, public UMemory {
   public:
-    CurrencyCustomMatcher() = default;  // WARNING: Leaves the object in an unusable state
+    CombinedCurrencyMatcher() = default;  // WARNING: Leaves the object in an unusable state
 
-    CurrencyCustomMatcher(const CurrencySymbols& currencySymbols, UErrorCode& status);
+    CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs, UErrorCode& status);
 
     bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
 
@@ -62,36 +45,18 @@ class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
     UChar fCurrencyCode[4];
     UnicodeString fCurrency1;
     UnicodeString fCurrency2;
-};
-
-
-/**
- * An implementation of AnyMatcher, allowing for either currency data or locale currency matches.
- */
-class CurrencyAnyMatcher : public AnyMatcher, public UMemory {
-  public:
-    CurrencyAnyMatcher();  // WARNING: Leaves the object in an unusable state
-
-    CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher, CurrencyCustomMatcher customMatcher);
-
-    // Needs custom move constructor/operator since constructor is nontrivial
 
-    CurrencyAnyMatcher(CurrencyAnyMatcher&& src) U_NOEXCEPT;
+    UnicodeString afterPrefixInsert;
+    UnicodeString beforeSuffixInsert;
 
-    CurrencyAnyMatcher& operator=(CurrencyAnyMatcher&& src) U_NOEXCEPT;
-
-    UnicodeString toString() const override;
-
-  protected:
-    const NumberParseMatcher* const* begin() const override;
-
-    const NumberParseMatcher* const* end() const override;
+    // We could use Locale instead of CharString here, but
+    // Locale has a non-trivial default constructor.
+    CharString fLocaleName;
 
-  private:
-    CurrencyNamesMatcher fNamesMatcher;
-    CurrencyCustomMatcher fCustomMatcher;
+    UnicodeSet fLeadCodePoints;
 
-    const NumberParseMatcher* fMatcherArray[2];
+    /** Matches the currency string without concern for currency spacing. */
+    bool matchCurrency(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
 };
 
 
index 36ddc1f2f191f5da19a7c51e78ea8ba2ed1d134d..89db7001a34eaa921d1231340f1b30ef59555818 100644 (file)
@@ -69,7 +69,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
     parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
     parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
     parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
-    parser->addMatcher(parser->fLocalMatchers.currencyNames = {locale, status});
+    parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, status});
 //    parser.addMatcher(new RequireNumberMatcher());
 
     parser->freeze();
@@ -136,8 +136,7 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
     ////////////////////////
 
     if (parseCurrency || patternInfo.hasCurrencySign()) {
-        parser->addMatcher(parser->fLocalMatchers.currencyCustom = {currencySymbols, status});
-        parser->addMatcher(parser->fLocalMatchers.currencyNames = {locale, status});
+        parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, status});
     }
 
     ///////////////////////////////
index 96a259a55fb487ab9744f638e1ca992cd890044d..308a2ffcf81b34ba7bbdfe4ed4a0701a595e6afb 100644 (file)
@@ -68,8 +68,7 @@ class NumberParserImpl : public MutableMatcherCollection {
         PlusSignMatcher plusSign;
         DecimalMatcher decimal;
         ScientificMatcher scientific;
-        CurrencyNamesMatcher currencyNames;
-        CurrencyCustomMatcher currencyCustom;
+        CombinedCurrencyMatcher currency;
         AffixMatcherWarehouse affixMatcherWarehouse;
         AffixTokenMatcherWarehouse affixTokenMatcherWarehouse;
     } fLocalMatchers;
index f8a16e86539fbe0556de4a01d02207ba3394c16d..e1a84aab1e3331a546a7cfa6e9bcf2948f3e2685 100644 (file)
@@ -226,9 +226,10 @@ class NumberParserTest : public IntlTest {
     void testBasic();
     void testLocaleFi();
     void testSeriesMatcher();
-    void testCurrencyAnyMatcher();
+    void testCombinedCurrencyMatcher();
     void testAffixPatternMatcher();
     void testGroupingDisabled();
+    void testCaseFolding();
 
     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
 };
index 11d6d64cdf72ec2b2790abe11e9d6b679b288491..ec8fee01b839789902b2b8ff8beacfbc3346ae11 100644 (file)
@@ -23,7 +23,7 @@ void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& na
     TESTCASE_AUTO_BEGIN;
         TESTCASE_AUTO(testBasic);
         TESTCASE_AUTO(testSeriesMatcher);
-        TESTCASE_AUTO(testCurrencyAnyMatcher);
+        TESTCASE_AUTO(testCombinedCurrencyMatcher);
         TESTCASE_AUTO(testAffixPatternMatcher);
     TESTCASE_AUTO_END;
 }
@@ -211,8 +211,8 @@ void NumberParserTest::testSeriesMatcher() {
     }
 }
 
-void NumberParserTest::testCurrencyAnyMatcher() {
-    IcuTestErrorCode status(*this, "testCurrencyAnyMatcher");
+void NumberParserTest::testCombinedCurrencyMatcher() {
+    IcuTestErrorCode status(*this, "testCombinedCurrencyMatcher");
 
     IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
     Locale locale = Locale::getEnglish();
index b7c7beb86611ac4389be5f616c86a11b8b925459..f46810bf01cad47631a2c146a5a12c9dffe0b39e 100644 (file)
@@ -16,12 +16,12 @@ rt:  "0.###"  1.0         "1"
 # Basics
 fp:  "0.####" 0.10005     "0.1"        0.1
 fp:  -        0.10006     "0.1001"     0.1001
-pat: -        "#0.####"
+pat: -        "0.####"
 fp:  "#.####" 0.10005     "0.1"        0.1
-pat: -        "#0.####"
+pat: -        "0.####"
 
 rt:  "0"      1234        "1234"
-pat: -        "#0"
+pat: -        "0"
 
 # Significant digits                                                  
 fp:  "@@@"    1.234567    "1.23"       1.23
@@ -79,12 +79,12 @@ fpc: -              1234.56/JPY  "\u00A51,235"  1235/JPY
 # ISO codes that overlap display names (QQQ vs. Q)
 # recognize real ISO name in parsing, so, can not use fake name as QQQ
 #fpc: -              123/QQQ      "QQQ123.00"    123/QQQ   # QQQ is fake
-fpc: -              123/GTQ      "GTQ123.00"      123/GTQ
+fpc: -              123/GTQ      "GTQ 123.00"      123/GTQ
 # ChoiceFormat-based display names
 fpc: -              1/INR        "\u20b91.00"      1/INR
 fpc: -              2/INR        "\u20b92.00"      2/INR
 # Display names with shared prefix (YDD vs. Y)
-fpc: -              100/YDD      "YDD100.00"    100/YDD
+fpc: -              100/YDD      "YDD 100.00"    100/YDD
 fpc: -              100/CNY      "CN\u00a5100.00"      100/CNY
 
 # Regression Tests bug#7914
index 4c051572aa12523ebb1ded4c2252fde2704ac8dc..769749f7cb9b71c1ac537ea6ffdb3a3d705ba9cc 100644 (file)
@@ -33,12 +33,8 @@ public class AffixTokenMatcherFactory {
         return PermilleMatcher.getInstance(symbols);
     }
 
-    public AnyMatcher currency() {
-        AnyMatcher any = new AnyMatcher();
-        any.addMatcher(CurrencyCustomMatcher.getInstance(currency, locale));
-        any.addMatcher(CurrencyNamesMatcher.getInstance(locale));
-        any.freeze();
-        return any;
+    public CombinedCurrencyMatcher currency() {
+        return CombinedCurrencyMatcher.getInstance(currency, symbols);
     }
 
     public IgnorablesMatcher ignorables() {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java
deleted file mode 100644 (file)
index e5359ea..0000000
+++ /dev/null
@@ -1,92 +0,0 @@
-// © 2018 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.number.parse;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import com.ibm.icu.impl.StringSegment;
-
-/**
- * Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
- * the first matcher in the list to succeed.
- *
- * @author sffc
- * @see SeriesMatcher
- */
-public class AnyMatcher implements NumberParseMatcher {
-
-    protected List<NumberParseMatcher> matchers = null;
-    protected boolean frozen = false;
-
-    public void addMatcher(NumberParseMatcher matcher) {
-        assert !frozen;
-        if (matchers == null) {
-            matchers = new ArrayList<NumberParseMatcher>();
-        }
-        matchers.add(matcher);
-    }
-
-    public void freeze() {
-        frozen = true;
-    }
-
-    @Override
-    public boolean match(StringSegment segment, ParsedNumber result) {
-        assert frozen;
-        if (matchers == null) {
-            return false;
-        }
-
-        int initialOffset = segment.getOffset();
-        boolean maybeMore = false;
-        for (int i = 0; i < matchers.size(); i++) {
-            NumberParseMatcher matcher = matchers.get(i);
-            maybeMore = maybeMore || matcher.match(segment, result);
-            if (segment.getOffset() != initialOffset) {
-                // Match succeeded.
-                // NOTE: Except for a couple edge cases, if a matcher accepted string A, then it will
-                // accept any string starting with A. Therefore, there is no possibility that matchers
-                // later in the list may be evaluated on longer strings, and we can exit the loop here.
-                break;
-            }
-        }
-
-        // None of the matchers succeeded.
-        return maybeMore;
-    }
-
-    @Override
-    public boolean smokeTest(StringSegment segment) {
-        assert frozen;
-        if (matchers == null) {
-            return false;
-        }
-
-        for (int i = 0; i < matchers.size(); i++) {
-            if (matchers.get(i).smokeTest(segment)) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    @Override
-    public void postProcess(ParsedNumber result) {
-        assert frozen;
-        if (matchers == null) {
-            return;
-        }
-
-        for (int i = 0; i < matchers.size(); i++) {
-            NumberParseMatcher matcher = matchers.get(i);
-            matcher.postProcess(result);
-        }
-    }
-
-    @Override
-    public String toString() {
-        return "<AnyMatcher " + matchers + ">";
-    }
-
-}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CombinedCurrencyMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CombinedCurrencyMatcher.java
new file mode 100644 (file)
index 0000000..c1d1418
--- /dev/null
@@ -0,0 +1,157 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import java.util.Iterator;
+
+import com.ibm.icu.impl.StringSegment;
+import com.ibm.icu.impl.TextTrieMap;
+import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.Currency;
+import com.ibm.icu.util.Currency.CurrencyStringInfo;
+
+/**
+ * Matches a currency, either a custom currency or one from the data bundle. The class is called
+ * "combined" to emphasize that the currency string may come from one of multiple sources.
+ *
+ * Will match currency spacing either before or after the number depending on whether we are currently in
+ * the prefix or suffix.
+ *
+ * The implementation of this class is slightly different between J and C. See #13584 for a follow-up.
+ *
+ * @author sffc
+ */
+public class CombinedCurrencyMatcher implements NumberParseMatcher {
+
+    private final String isoCode;
+    private final String currency1;
+    private final String currency2;
+
+    private final String afterPrefixInsert;
+    private final String beforeSuffixInsert;
+
+    private final TextTrieMap<CurrencyStringInfo> longNameTrie;
+    private final TextTrieMap<CurrencyStringInfo> symbolTrie;
+
+    private final UnicodeSet leadCodePoints;
+
+    public static CombinedCurrencyMatcher getInstance(Currency currency, DecimalFormatSymbols dfs) {
+        // TODO: Cache these instances. They are somewhat expensive.
+        return new CombinedCurrencyMatcher(currency, dfs);
+    }
+
+    private CombinedCurrencyMatcher(Currency currency, DecimalFormatSymbols dfs) {
+        this.isoCode = currency.getSubtype();
+        this.currency1 = currency.getSymbol(dfs.getULocale());
+        this.currency2 = currency.getCurrencyCode();
+
+        afterPrefixInsert = dfs
+                .getPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, false);
+        beforeSuffixInsert = dfs
+                .getPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, true);
+
+        // TODO: Currency trie does not currently have an option for case folding. It defaults to use
+        // case folding on long-names but not symbols.
+        longNameTrie = Currency.getParsingTrie(dfs.getULocale(), Currency.LONG_NAME);
+        symbolTrie = Currency.getParsingTrie(dfs.getULocale(), Currency.SYMBOL_NAME);
+
+        // Compute the full set of characters that could be the first in a currency to allow for
+        // efficient smoke test.
+        leadCodePoints = new UnicodeSet();
+        leadCodePoints.add(currency1.codePointAt(0));
+        leadCodePoints.add(currency2.codePointAt(0));
+        leadCodePoints.add(beforeSuffixInsert.codePointAt(0));
+        longNameTrie.putLeadCodePoints(leadCodePoints);
+        symbolTrie.putLeadCodePoints(leadCodePoints);
+        // Always apply case mapping closure for currencies
+        leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
+        leadCodePoints.freeze();
+    }
+
+    @Override
+    public boolean match(StringSegment segment, ParsedNumber result) {
+        if (result.currencyCode != null) {
+            return false;
+        }
+
+        // Try to match a currency spacing separator.
+        int initialOffset = segment.getOffset();
+        boolean maybeMore = false;
+        if (result.seenNumber()) {
+            int overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
+            if (overlap == beforeSuffixInsert.length()) {
+                segment.adjustOffset(overlap);
+                // Note: let currency spacing be a weak match. Don't update chars consumed.
+            }
+            maybeMore = maybeMore || overlap == segment.length();
+        }
+
+        // Match the currency string, and reset if we didn't find one.
+        maybeMore = maybeMore || matchCurrency(segment, result);
+        if (result.currencyCode == null) {
+            segment.setOffset(initialOffset);
+            return maybeMore;
+        }
+
+        // Try to match a currency spacing separator.
+        if (!result.seenNumber()) {
+            int overlap = segment.getCommonPrefixLength(afterPrefixInsert);
+            if (overlap == afterPrefixInsert.length()) {
+                segment.adjustOffset(overlap);
+                // Note: let currency spacing be a weak match. Don't update chars consumed.
+            }
+            maybeMore = maybeMore || overlap == segment.length();
+        }
+
+        return maybeMore;
+    }
+
+    /** Matches the currency string without concern for currency spacing. */
+    private boolean matchCurrency(StringSegment segment, ParsedNumber result) {
+        int overlap1 = segment.getCommonPrefixLength(currency1);
+        if (overlap1 == currency1.length()) {
+            result.currencyCode = isoCode;
+            segment.adjustOffset(overlap1);
+            result.setCharsConsumed(segment);
+            return segment.length() == 0;
+        }
+
+        int overlap2 = segment.getCommonPrefixLength(currency2);
+        if (overlap2 == currency2.length()) {
+            result.currencyCode = isoCode;
+            segment.adjustOffset(overlap2);
+            result.setCharsConsumed(segment);
+            return segment.length() == 0;
+        }
+
+        TextTrieMap.Output trieOutput = new TextTrieMap.Output();
+        Iterator<CurrencyStringInfo> values = longNameTrie.get(segment, 0, trieOutput);
+        if (values == null) {
+            values = symbolTrie.get(segment, 0, trieOutput);
+        }
+        if (values != null) {
+            result.currencyCode = values.next().getISOCode();
+            segment.adjustOffset(trieOutput.matchLength);
+            result.setCharsConsumed(segment);
+        }
+
+        return overlap1 == segment.length() || overlap2 == segment.length() || trieOutput.partialMatch;
+    }
+
+    @Override
+    public boolean smokeTest(StringSegment segment) {
+        return segment.startsWith(leadCodePoints);
+    }
+
+    @Override
+    public void postProcess(ParsedNumber result) {
+        // No-op
+    }
+
+    @Override
+    public String toString() {
+        return "<CombinedCurrencyMatcher " + isoCode + ">";
+    }
+
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyCustomMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyCustomMatcher.java
deleted file mode 100644 (file)
index 019af75..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.number.parse;
-
-import com.ibm.icu.impl.StringSegment;
-import com.ibm.icu.util.Currency;
-import com.ibm.icu.util.ULocale;
-
-/**
- * A matcher for a single currency instance (not the full trie).
- */
-public class CurrencyCustomMatcher implements NumberParseMatcher {
-
-    private final String isoCode;
-    private final String currency1;
-    private final String currency2;
-
-    public static CurrencyCustomMatcher getInstance(Currency currency, ULocale loc) {
-        return new CurrencyCustomMatcher(currency.getSubtype(),
-                currency.getSymbol(loc),
-                currency.getCurrencyCode());
-    }
-
-    private CurrencyCustomMatcher(String isoCode, String currency1, String currency2) {
-        this.isoCode = isoCode;
-        this.currency1 = currency1;
-        this.currency2 = currency2;
-    }
-
-    @Override
-    public boolean match(StringSegment segment, ParsedNumber result) {
-        if (result.currencyCode != null) {
-            return false;
-        }
-
-        int overlap1 = segment.getCommonPrefixLength(currency1);
-        if (overlap1 == currency1.length()) {
-            result.currencyCode = isoCode;
-            segment.adjustOffset(overlap1);
-            result.setCharsConsumed(segment);
-        }
-
-        int overlap2 = segment.getCommonPrefixLength(currency2);
-        if (overlap2 == currency2.length()) {
-            result.currencyCode = isoCode;
-            segment.adjustOffset(overlap2);
-            result.setCharsConsumed(segment);
-        }
-
-        return overlap1 == segment.length() || overlap2 == segment.length();
-    }
-
-    @Override
-    public boolean smokeTest(StringSegment segment) {
-        return segment.startsWith(currency1) || segment.startsWith(currency2);
-    }
-
-    @Override
-    public void postProcess(ParsedNumber result) {
-        // No-op
-    }
-
-    @Override
-    public String toString() {
-        return "<CurrencyMatcher " + isoCode + ">";
-    }
-}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyNamesMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyNamesMatcher.java
deleted file mode 100644 (file)
index be2acd4..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.number.parse;
-
-import java.util.Iterator;
-
-import com.ibm.icu.impl.StringSegment;
-import com.ibm.icu.impl.TextTrieMap;
-import com.ibm.icu.text.UnicodeSet;
-import com.ibm.icu.util.Currency;
-import com.ibm.icu.util.Currency.CurrencyStringInfo;
-import com.ibm.icu.util.ULocale;
-
-/**
- * Matches currencies according to all available strings in locale data.
- *
- * The implementation of this class is different between J and C. See #13584 for a follow-up.
- *
- * @author sffc
- */
-public class CurrencyNamesMatcher implements NumberParseMatcher {
-
-    private final TextTrieMap<CurrencyStringInfo> longNameTrie;
-    private final TextTrieMap<CurrencyStringInfo> symbolTrie;
-
-    private final UnicodeSet leadCodePoints;
-
-    public static CurrencyNamesMatcher getInstance(ULocale locale) {
-        // TODO: Pre-compute some of the more popular locales?
-        return new CurrencyNamesMatcher(locale);
-    }
-
-    private CurrencyNamesMatcher(ULocale locale) {
-        // TODO: Currency trie does not currently have an option for case folding. It defaults to use
-        // case folding on long-names but not symbols.
-        longNameTrie = Currency.getParsingTrie(locale, Currency.LONG_NAME);
-        symbolTrie = Currency.getParsingTrie(locale, Currency.SYMBOL_NAME);
-
-        // Compute the full set of characters that could be the first in a currency to allow for
-        // efficient smoke test.
-        leadCodePoints = new UnicodeSet();
-        longNameTrie.putLeadCodePoints(leadCodePoints);
-        symbolTrie.putLeadCodePoints(leadCodePoints);
-        // Always apply case mapping closure for currencies
-        leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
-        leadCodePoints.freeze();
-    }
-
-    @Override
-    public boolean match(StringSegment segment, ParsedNumber result) {
-        if (result.currencyCode != null) {
-            return false;
-        }
-
-        TextTrieMap.Output trieOutput = new TextTrieMap.Output();
-        Iterator<CurrencyStringInfo> values = longNameTrie.get(segment, 0, trieOutput);
-        if (values == null) {
-            values = symbolTrie.get(segment, 0, trieOutput);
-        }
-        if (values != null) {
-            result.currencyCode = values.next().getISOCode();
-            segment.adjustOffset(trieOutput.matchLength);
-            result.setCharsConsumed(segment);
-        }
-        return trieOutput.partialMatch;
-    }
-
-    @Override
-    public boolean smokeTest(StringSegment segment) {
-        return segment.startsWith(leadCodePoints);
-    }
-
-    @Override
-    public void postProcess(ParsedNumber result) {
-        // No-op
-    }
-
-    @Override
-    public String toString() {
-        return "<CurrencyTrieMatcher>";
-    }
-}
index f91a61ebd4f28aad8a67cbc84d7699a4c1a364da..9283523275bb53f004e51b380b1497158b6b8a4e 100644 (file)
@@ -37,11 +37,12 @@ public class NumberParserImpl {
     public static NumberParserImpl createSimpleParser(ULocale locale, String pattern, int parseFlags) {
 
         NumberParserImpl parser = new NumberParserImpl(parseFlags);
+        Currency currency = Currency.getInstance("USD");
         DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
         IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
 
         AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
-        factory.currency = Currency.getInstance("USD");
+        factory.currency = currency;
         factory.symbols = symbols;
         factory.ignorables = ignorables;
         factory.locale = locale;
@@ -61,7 +62,7 @@ public class NumberParserImpl {
         parser.addMatcher(InfinityMatcher.getInstance(symbols));
         parser.addMatcher(PaddingMatcher.getInstance("@"));
         parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
-        parser.addMatcher(CurrencyNamesMatcher.getInstance(locale));
+        parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols));
         parser.addMatcher(new RequireNumberValidator());
 
         parser.freeze();
@@ -185,8 +186,7 @@ public class NumberParserImpl {
         ////////////////////////
 
         if (parseCurrency || patternInfo.hasCurrencySign()) {
-            parser.addMatcher(CurrencyCustomMatcher.getInstance(currency, locale));
-            parser.addMatcher(CurrencyNamesMatcher.getInstance(locale));
+            parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols));
         }
 
         ///////////////////////////////
index 5f1ca8bfdf6d763ef54aa8379a1485d4bae21acc..06ece4721106b764da6d4699a89cefb81029bdd7 100644 (file)
@@ -5974,4 +5974,14 @@ public class NumberFormatTest extends TestFmwk {
         df.setParseStrict(true);
         expect2(df, 0.5, "50x%");
     }
+
+    @Test
+    public void testParseIsoStrict() {
+        DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
+        DecimalFormat df = new DecimalFormat("¤¤0;-0¤¤", dfs);
+        df.setCurrency(Currency.getInstance("USD"));
+        df.setParseStrict(true);
+        expect2(df, 45, "USD 45.00");
+        expect2(df, -45, "-45.00 USD");
+    }
 }
index b35cd789c963ad2da302a11463d3132a1294d978..57aef1547cae49b9f1aaf793c93fd4989d7055d6 100644 (file)
@@ -13,7 +13,7 @@ import com.ibm.icu.impl.number.CustomSymbolCurrency;
 import com.ibm.icu.impl.number.DecimalFormatProperties;
 import com.ibm.icu.impl.number.parse.AffixPatternMatcher;
 import com.ibm.icu.impl.number.parse.AffixTokenMatcherFactory;
-import com.ibm.icu.impl.number.parse.AnyMatcher;
+import com.ibm.icu.impl.number.parse.CombinedCurrencyMatcher;
 import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
 import com.ibm.icu.impl.number.parse.MinusSignMatcher;
 import com.ibm.icu.impl.number.parse.NumberParserImpl;
@@ -229,12 +229,13 @@ public class NumberParserTest {
     }
 
     @Test
-    public void testCurrencyAnyMatcher() {
+    public void testCombinedCurrencyMatcher() {
         AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
         factory.locale = ULocale.ENGLISH;
         CustomSymbolCurrency currency = new CustomSymbolCurrency("ICU", "IU$", "ICU");
         factory.currency = currency;
-        AnyMatcher matcher = factory.currency();
+        factory.symbols = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
+        CombinedCurrencyMatcher matcher = factory.currency();
 
         Object[][] cases = new Object[][] {
                 { "", null },