]> granicus.if.org Git - icu/commitdiff
ICU-13644 Property mapper for parsing is building. Refactoring CurrencySymbols a...
authorShane Carr <shane@unicode.org>
Wed, 21 Mar 2018 05:17:28 +0000 (05:17 +0000)
committerShane Carr <shane@unicode.org>
Wed, 21 Mar 2018 05:17:28 +0000 (05:17 +0000)
X-SVN-Rev: 41130

19 files changed:
icu4c/source/i18n/Makefile.in
icu4c/source/i18n/number_currencysymbols.cpp
icu4c/source/i18n/number_currencysymbols.h
icu4c/source/i18n/number_decimfmtprops.cpp
icu4c/source/i18n/number_decimfmtprops.h
icu4c/source/i18n/number_formatimpl.cpp
icu4c/source/i18n/number_formatimpl.h
icu4c/source/i18n/number_mapper.cpp
icu4c/source/i18n/number_mapper.h
icu4c/source/i18n/numparse_affixes.cpp
icu4c/source/i18n/numparse_affixes.h
icu4c/source/i18n/numparse_currency.cpp
icu4c/source/i18n/numparse_currency.h
icu4c/source/i18n/numparse_impl.cpp
icu4c/source/i18n/numparse_impl.h
icu4c/source/i18n/numparse_validators.cpp [new file with mode: 0644]
icu4c/source/i18n/numparse_validators.h [new file with mode: 0644]
icu4c/source/test/intltest/numbertest_parse.cpp
icu4c/source/test/intltest/numbertest_patternmodifier.cpp

index e0fc77fee08a50c44cb50fbfc369e3d15056699c..e249995b47ace82949ea3e7f5bb78e5678123423 100644 (file)
@@ -107,7 +107,7 @@ double-conversion.o double-conversion-bignum-dtoa.o double-conversion-bignum.o \
 double-conversion-cached-powers.o double-conversion-diy-fp.o double-conversion-fast-dtoa.o \
 numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o \
 numparse_impl.o numparse_symbols.o numparse_decimal.o numparse_scientific.o \
-numparse_currency.o numparse_affixes.o numparse_compositions.o \
+numparse_currency.o numparse_affixes.o numparse_compositions.o numparse_validators.o \
 number_mapper.o number_multiplier.o number_currencysymbols.o
 
 
index 0c946f9fc1f93576f4a2dd8300bb24776b97acf4..051212fb6b9bd1c3de2b34d7561e9c9f4549d4bf 100644 (file)
@@ -18,12 +18,39 @@ using namespace icu::number::impl;
 
 
 CurrencySymbols::CurrencySymbols(CurrencyUnit currency, const Locale& locale, UErrorCode& status)
-        : fCurrency(currency), fLocaleName(locale.getName(), status) {}
+        : fCurrency(currency), fLocaleName(locale.getName(), status) {
+    fCurrencySymbol.setToBogus();
+    fIntlCurrencySymbol.setToBogus();
+}
+
+CurrencySymbols::CurrencySymbols(CurrencyUnit currency, const Locale& locale,
+                                 const DecimalFormatSymbols& symbols, UErrorCode& status)
+        : CurrencySymbols(currency, locale, status) {
+    // If either of the overrides is present, save it in the local UnicodeString.
+    if (symbols.isCustomCurrencySymbol()) {
+        fCurrencySymbol = symbols.getConstSymbol(DecimalFormatSymbols::kCurrencySymbol);
+    }
+    if (symbols.isCustomIntlCurrencySymbol()) {
+        fIntlCurrencySymbol = symbols.getConstSymbol(DecimalFormatSymbols::kIntlCurrencySymbol);
+    }
+}
+
+const char16_t* CurrencySymbols::getIsoCode() const {
+    return fCurrency.getISOCurrency();
+}
 
 UnicodeString CurrencySymbols::getNarrowCurrencySymbol(UErrorCode& status) const {
+    // Note: currently no override is available for narrow currency symbol
     return loadSymbol(UCURR_NARROW_SYMBOL_NAME, status);
 }
 
+UnicodeString CurrencySymbols::getCurrencySymbol(UErrorCode& status) const {
+    if (!fCurrencySymbol.isBogus()) {
+        return fCurrencySymbol;
+    }
+    return loadSymbol(UCURR_SYMBOL_NAME, status);
+}
+
 UnicodeString CurrencySymbols::loadSymbol(UCurrNameStyle selector, UErrorCode& status) const {
     UBool ignoredIsChoiceFormatFillIn = FALSE;
     int32_t symbolLen = 0;
@@ -38,6 +65,14 @@ UnicodeString CurrencySymbols::loadSymbol(UCurrNameStyle selector, UErrorCode& s
     return UnicodeString(TRUE, symbol, symbolLen);
 }
 
+UnicodeString CurrencySymbols::getIntlCurrencySymbol(UErrorCode&) const {
+    if (!fIntlCurrencySymbol.isBogus()) {
+        return fIntlCurrencySymbol;
+    }
+    // Readonly-aliasing char16_t* constructor:
+    return UnicodeString(TRUE, fCurrency.getISOCurrency(), 3);
+}
+
 UnicodeString CurrencySymbols::getPluralName(StandardPlural::Form plural, UErrorCode& status) const {
     UBool isChoiceFormat = FALSE;
     int32_t symbolLen = 0;
@@ -53,46 +88,6 @@ UnicodeString CurrencySymbols::getPluralName(StandardPlural::Form plural, UError
 }
 
 
-CurrencyDataSymbols::CurrencyDataSymbols(CurrencyUnit currency, const Locale& locale, UErrorCode& status)
-        : CurrencySymbols(currency, locale, status) {}
-
-UnicodeString CurrencyDataSymbols::getCurrencySymbol(UErrorCode& status) const {
-    return loadSymbol(UCURR_SYMBOL_NAME, status);
-}
-
-UnicodeString CurrencyDataSymbols::getIntlCurrencySymbol(UErrorCode&) const {
-    // Readonly-aliasing char16_t* constructor:
-    return UnicodeString(TRUE, fCurrency.getISOCurrency(), 3);
-}
-
-
-CurrencyCustomSymbols::CurrencyCustomSymbols(CurrencyUnit currency, const Locale& locale,
-                                             const DecimalFormatSymbols& symbols, UErrorCode& status)
-        : CurrencySymbols(currency, locale, status) {
-    // Hit the data bundle if the DecimalFormatSymbols version is not custom.
-    // Note: the CurrencyDataSymbols implementation waits to hit the data bundle until requested.
-    if (symbols.isCustomCurrencySymbol()) {
-        fCurrencySymbol = symbols.getConstSymbol(DecimalFormatSymbols::kCurrencySymbol);
-    } else {
-        fCurrencySymbol = loadSymbol(UCURR_SYMBOL_NAME, status);
-    }
-    if (symbols.isCustomIntlCurrencySymbol()) {
-        fIntlCurrencySymbol = symbols.getConstSymbol(DecimalFormatSymbols::kIntlCurrencySymbol);
-    } else {
-        // UnicodeString copy constructor since we don't know about the lifetime of the CurrencyUnit
-        fIntlCurrencySymbol = UnicodeString(currency.getISOCurrency(), 3);
-    }
-}
-
-UnicodeString CurrencyCustomSymbols::getCurrencySymbol(UErrorCode&) const {
-    return fCurrencySymbol;
-}
-
-UnicodeString CurrencyCustomSymbols::getIntlCurrencySymbol(UErrorCode&) const {
-    return fIntlCurrencySymbol;
-}
-
-
 CurrencyUnit
 icu::number::impl::resolveCurrency(const DecimalFormatProperties& properties, const Locale& locale,
                                    UErrorCode& status) {
index 63810b004343811e8e9b316384265b216ffdc576..eab1a661db93c86218f7d7f9c83193c3dffea266 100644 (file)
@@ -15,55 +15,37 @@ U_NAMESPACE_BEGIN namespace number {
 namespace impl {
 
 
-class CurrencySymbols {
+class CurrencySymbols : public UMemory {
   public:
     CurrencySymbols() = default; // default constructor: leaves class in valid but undefined state
 
-    explicit CurrencySymbols(CurrencyUnit currency, const Locale& locale, UErrorCode& status);
+    /** Creates an instance in which all symbols are loaded from data. */
+    CurrencySymbols(CurrencyUnit currency, const Locale& locale, UErrorCode& status);
 
-    virtual UnicodeString getCurrencySymbol(UErrorCode& status) const = 0;
+    /** Creates an instance in which some symbols might be pre-populated. */
+    CurrencySymbols(CurrencyUnit currency, const Locale& locale, const DecimalFormatSymbols& symbols,
+                    UErrorCode& status);
 
-    virtual UnicodeString getIntlCurrencySymbol(UErrorCode& status) const = 0;
+    const char16_t* getIsoCode() const;
 
-    // Put narrow and plural symbols in the base class since there is no API for overriding them
     UnicodeString getNarrowCurrencySymbol(UErrorCode& status) const;
 
+    UnicodeString getCurrencySymbol(UErrorCode& status) const;
+
+    UnicodeString getIntlCurrencySymbol(UErrorCode& status) const;
+
     UnicodeString getPluralName(StandardPlural::Form plural, UErrorCode& status) const;
 
   protected:
+    // Required fields:
     CurrencyUnit fCurrency;
     CharString fLocaleName;
 
-    UnicodeString loadSymbol(UCurrNameStyle selector, UErrorCode& status) const;
-};
-
-
-class CurrencyDataSymbols : public CurrencySymbols, public UMemory {
-  public:
-    CurrencyDataSymbols() = default; // default constructor: leaves class in valid but undefined state
-
-    CurrencyDataSymbols(CurrencyUnit currency, const Locale& locale, UErrorCode& status);
-
-    UnicodeString getCurrencySymbol(UErrorCode& status) const U_OVERRIDE;
-
-    UnicodeString getIntlCurrencySymbol(UErrorCode& status) const U_OVERRIDE;
-};
-
-
-class CurrencyCustomSymbols : public CurrencySymbols, public UMemory {
-  public:
-    CurrencyCustomSymbols() = default; // default constructor: leaves class in valid but undefined state
-
-    CurrencyCustomSymbols(CurrencyUnit currency, const Locale& locale, const DecimalFormatSymbols& symbols,
-                          UErrorCode& status);
-
-    UnicodeString getCurrencySymbol(UErrorCode& status) const U_OVERRIDE;
-
-    UnicodeString getIntlCurrencySymbol(UErrorCode& status) const U_OVERRIDE;
-
-  private:
+    // Optional fields:
     UnicodeString fCurrencySymbol;
     UnicodeString fIntlCurrencySymbol;
+
+    UnicodeString loadSymbol(UCurrNameStyle selector, UErrorCode& status) const;
 };
 
 
index 425f11490cb01b12f224d5a4aebba9696c3dba41..54fd6a1ef17e702e3667c21dce2129e99196a36c 100644 (file)
@@ -43,7 +43,7 @@ void DecimalFormatProperties::clear() {
     padString.setToBogus();
     parseCaseSensitive = false;
     parseIntegerOnly = false;
-    parseLenient = false;
+    parseMode.nullify();
     parseNoExponent = false;
     parseToBigDecimal = false;
     parseAllInput = UNUM_MAYBE;
@@ -86,7 +86,7 @@ bool DecimalFormatProperties::operator==(const DecimalFormatProperties &other) c
     eq = eq && padString == other.padString;
     eq = eq && parseCaseSensitive == other.parseCaseSensitive;
     eq = eq && parseIntegerOnly == other.parseIntegerOnly;
-    eq = eq && parseLenient == other.parseLenient;
+    eq = eq && parseMode == other.parseMode;
     eq = eq && parseNoExponent == other.parseNoExponent;
     eq = eq && parseToBigDecimal == other.parseToBigDecimal;
     eq = eq && parseAllInput == other.parseAllInput;
index 890aa1dee8f53eac0adfe60f12915bda00b0e91f..6632dfc5a2a6303387007f4360eea872c3797edf 100644 (file)
@@ -51,6 +51,40 @@ struct U_I18N_API CopyableLocalPointer {
     }
 };
 
+/** Controls the set of rules for parsing a string from the old DecimalFormat API. */
+enum ParseMode {
+    /**
+     * Lenient mode should be used if you want to accept malformed user input. It will use heuristics
+     * to attempt to parse through typographical errors in the string.
+     */
+            PARSE_MODE_LENIENT,
+
+    /**
+     * Strict mode should be used if you want to require that the input is well-formed. More
+     * specifically, it differs from lenient mode in the following ways:
+     *
+     * <ul>
+     * <li>Grouping widths must match the grouping settings. For example, "12,3,45" will fail if the
+     * grouping width is 3, as in the pattern "#,##0".
+     * <li>The string must contain a complete prefix and suffix. For example, if the pattern is
+     * "{#};(#)", then "{123}" or "(123)" would match, but "{123", "123}", and "123" would all fail.
+     * (The latter strings would be accepted in lenient mode.)
+     * <li>Whitespace may not appear at arbitrary places in the string. In lenient mode, whitespace
+     * is allowed to occur arbitrarily before and after prefixes and exponent separators.
+     * <li>Leading grouping separators are not allowed, as in ",123".
+     * <li>Minus and plus signs can only appear if specified in the pattern. In lenient mode, a plus
+     * or minus sign can always precede a number.
+     * <li>The set of characters that can be interpreted as a decimal or grouping separator is
+     * smaller.
+     * <li><strong>If currency parsing is enabled,</strong> currencies must only appear where
+     * specified in either the current pattern string or in a valid pattern string for the current
+     * locale. For example, if the pattern is "¤0.00", then "$1.23" would match, but "1.23$" would
+     * fail to match.
+     * </ul>
+     */
+            PARSE_MODE_STRICT,
+};
+
 // Exported as U_I18N_API because it is needed for the unit test PatternStringTest
 struct U_I18N_API DecimalFormatProperties {
 
@@ -82,7 +116,7 @@ struct U_I18N_API DecimalFormatProperties {
     UnicodeString padString;
     bool parseCaseSensitive;
     bool parseIntegerOnly;
-    bool parseLenient;
+    NullableValue<ParseMode> parseMode;
     bool parseNoExponent;
     bool parseToBigDecimal;
     UNumberFormatAttributeValue parseAllInput; // ICU4C-only
index 3edf73e197ad1c79226bb991ec37023539a882a3..102a786a2270698972bd32e9d5aa6bed557ef280 100644 (file)
@@ -210,8 +210,8 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe,
         // Used by the DecimalFormat code path
         currencySymbols = macros.currencySymbols;
     } else {
-        fWarehouse.fCurrencyDataSymbols = {currency, macros.locale, status};
-        currencySymbols = &fWarehouse.fCurrencyDataSymbols;
+        fWarehouse.fCurrencySymbols = {currency, macros.locale, status};
+        currencySymbols = &fWarehouse.fCurrencySymbols;
     }
     UNumberUnitWidth unitWidth = UNUM_UNIT_WIDTH_SHORT;
     if (macros.unitWidth != UNUM_UNIT_WIDTH_COUNT) {
index dff500ab944ebff09d017b8fcf2ea76ad2634729..13c22aec75c4ba846864c3ae7a3fb6f888067376 100644 (file)
@@ -62,7 +62,7 @@ class NumberFormatterImpl : public UMemory {
 
     // Value objects possibly used by the number formatting pipeline:
     struct Warehouse {
-        CurrencyDataSymbols fCurrencyDataSymbols;
+        CurrencySymbols fCurrencySymbols;
     } fWarehouse;
 
 
index aeaa647d018068a214d0329583697292d27d321d..84774011011f402e6f3580f612c458701397d558 100644 (file)
@@ -86,10 +86,8 @@ MacroProps NumberPropertyMapper::oldToNew(const DecimalFormatProperties& propert
         // NOTE: Slicing is OK.
         macros.unit = currency; // NOLINT
     }
-    if (symbols.isCustomCurrencySymbol() || symbols.isCustomIntlCurrencySymbol()) {
-        warehouse.currencyCustomSymbols = {currency, locale, symbols, status};
-        macros.currencySymbols = &warehouse.currencyCustomSymbols;
-    }
+    warehouse.currencySymbols = {currency, locale, symbols, status};
+    macros.currencySymbols = &warehouse.currencySymbols;
 
     ///////////////////////
     // ROUNDING STRATEGY //
index 7c08eecfb6cee9cc5d9ff6360efdb08fa9cdf9a4..5183b1195f64e1fbd02ea34a8249e09e78c74f1d 100644 (file)
@@ -29,6 +29,12 @@ class PropertiesAffixPatternProvider : public AffixPatternProvider, public UMemo
 
     void setTo(const DecimalFormatProperties& properties, UErrorCode& status);
 
+    PropertiesAffixPatternProvider() = default; // puts instance in valid but undefined state
+
+    PropertiesAffixPatternProvider(const DecimalFormatProperties& properties, UErrorCode& status) {
+        setTo(properties, status);
+    }
+
     // AffixPatternProvider Methods:
 
     char16_t charAt(int32_t flags, int32_t i) const U_OVERRIDE;
@@ -106,7 +112,7 @@ class CurrencyPluralInfoAffixProvider : public AffixPatternProvider, public UMem
 struct DecimalFormatWarehouse {
     PropertiesAffixPatternProvider propertiesAPP;
     CurrencyPluralInfoAffixProvider currencyPluralInfoAPP;
-    CurrencyCustomSymbols currencyCustomSymbols;
+    CurrencySymbols currencySymbols;
 };
 
 
index 97ba4a1c6606a45bcc1d8d5e831907a9c68e601b..e4dd8b76626f95f232a78f96299b1fb5e4cff000 100644 (file)
@@ -190,8 +190,7 @@ NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
 }
 
 NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
-    return fCurrency = {{fSetupData->locale, status},
-                        {fSetupData->currencyCode, fSetupData->currency1, fSetupData->currency2}};
+    return fCurrency = {{fSetupData->locale, status}, {fSetupData->currencySymbols, status}};
 }
 
 IgnorablesMatcher& AffixTokenMatcherWarehouse::ignorables() {
index 59789e039500b254ea910a7fb6ac13b4a45937c3..8a09983c9e18a424ca1808cc1d3d2f1aa2da9172 100644 (file)
@@ -11,6 +11,7 @@
 #include "numparse_symbols.h"
 #include "numparse_currency.h"
 #include "number_affixutils.h"
+#include "number_currencysymbols.h"
 
 #include <array>
 
@@ -23,6 +24,7 @@ class AffixPatternMatcher;
 
 using ::icu::number::impl::AffixPatternProvider;
 using ::icu::number::impl::TokenConsumer;
+using ::icu::number::impl::CurrencySymbols;
 
 
 class CodePointMatcher : public NumberParseMatcher, public UMemory {
@@ -73,9 +75,7 @@ class CodePointMatcherWarehouse : public UMemory {
 
 
 struct AffixTokenMatcherSetupData {
-    const UChar* currencyCode;
-    const UnicodeString& currency1;
-    const UnicodeString& currency2;
+    const CurrencySymbols& currencySymbols;
     const DecimalFormatSymbols& dfs;
     IgnorablesMatcher& ignorables;
     const Locale& locale;
index b3a317ef716b24127f85ee31b9f346fe6df93614..064ba73fc002347ce9012f2c3ebf29c3b4197cf5 100644 (file)
@@ -75,10 +75,10 @@ UnicodeString CurrencyNamesMatcher::toString() const {
 }
 
 
-CurrencyCustomMatcher::CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1,
-                                             const UnicodeString& currency2)
-        : fCurrency1(currency1), fCurrency2(currency2) {
-    utils::copyCurrencyCode(fCurrencyCode, currencyCode);
+CurrencyCustomMatcher::CurrencyCustomMatcher(const CurrencySymbols& currencySymbols, UErrorCode& status)
+        : fCurrency1(currencySymbols.getCurrencySymbol(status)),
+          fCurrency2(currencySymbols.getIntlCurrencySymbol(status)) {
+    utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
 }
 
 bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
index 3d8cb3a2bfbbf61411bec9f98cbc8302d67d76e2..dd0490a9af5b38aa7e860fe2a22e05229239658a 100644 (file)
 #include "numparse_types.h"
 #include "numparse_compositions.h"
 #include "charstr.h"
+#include "number_currencysymbols.h"
 
 U_NAMESPACE_BEGIN namespace numparse {
 namespace impl {
 
+using ::icu::number::impl::CurrencySymbols;
 
 /**
  * Matches currencies according to all available strings in locale data.
@@ -46,8 +48,7 @@ class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
   public:
     CurrencyCustomMatcher() = default;  // WARNING: Leaves the object in an unusable state
 
-    CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1,
-                          const UnicodeString& currency2);
+    CurrencyCustomMatcher(const CurrencySymbols& currencySymbols, UErrorCode& status);
 
     bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
 
index 1d29aee758d3a5abc82fb3678ce91a9a6ed81dcc..32d904f99a7bf5276a9f37b8dce1350adb8343e2 100644 (file)
@@ -9,6 +9,9 @@
 // Helpful in toString methods and elsewhere.
 #define UNISTR_FROM_STRING_EXPLICIT
 
+#include <typeinfo>
+#include <array>
+#include <iostream>
 #include "number_types.h"
 #include "number_patternstring.h"
 #include "numparse_types.h"
 #include "numparse_symbols.h"
 #include "numparse_decimal.h"
 #include "unicode/numberformatter.h"
-
-#include <typeinfo>
-#include <array>
-#include <iostream>
 #include "cstr.h"
+#include "number_mapper.h"
+#include "numparse_unisets.h"
 
 using namespace icu;
 using namespace icu::number;
@@ -33,22 +34,23 @@ NumberParserImpl*
 NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& patternString,
                                      parse_flags_t parseFlags, UErrorCode& status) {
 
-    auto* parser = new NumberParserImpl(parseFlags, true);
+    LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags, true));
     DecimalFormatSymbols symbols(locale, status);
 
     parser->fLocalMatchers.ignorables = {unisets::DEFAULT_IGNORABLES};
     IgnorablesMatcher& ignorables = parser->fLocalMatchers.ignorables;
 
-    const UChar currencyCode[] = u"USD";
-    UnicodeString currency1(u"IU$");
-    UnicodeString currency2(u"ICU");
+    DecimalFormatSymbols dfs(locale, status);
+    dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
+    dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status);
+    CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
 
     ParsedPatternInfo patternInfo;
     PatternParser::parseToPatternInfo(patternString, patternInfo, status);
 
     // The following statements set up the affix matchers.
     AffixTokenMatcherSetupData affixSetupData = {
-            currencyCode, currency1, currency2, symbols, ignorables, locale};
+            currencySymbols, symbols, ignorables, locale};
     parser->fLocalMatchers.affixTokenMatcherWarehouse = {&affixSetupData};
     parser->fLocalMatchers.affixMatcherWarehouse = {&parser->fLocalMatchers.affixTokenMatcherWarehouse};
     parser->fLocalMatchers.affixMatcherWarehouse.createAffixMatchers(
@@ -71,17 +73,129 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
 //    parser.addMatcher(new RequireNumberMatcher());
 
     parser->freeze();
-    return parser;
+    return parser.orphan();
 }
 
-//NumberParserImpl*
-//NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatProperties& properties,
-//                                             DecimalFormatSymbols symbols, bool parseCurrency,
-//                                             bool optimize, UErrorCode& status) {
-//    // TODO
-//    status = U_UNSUPPORTED_ERROR;
-//    return nullptr;
-//}
+NumberParserImpl*
+NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatProperties& properties,
+                                             const DecimalFormatSymbols& symbols, bool parseCurrency,
+                                             bool computeLeads, UErrorCode& status) {
+    Locale locale = symbols.getLocale();
+    PropertiesAffixPatternProvider patternInfo(properties, status);
+    CurrencyUnit currency = resolveCurrency(properties, locale, status);
+    CurrencySymbols currencySymbols(currency, locale, symbols, status);
+    bool isStrict = properties.parseMode.getOrDefault(PARSE_MODE_STRICT) == PARSE_MODE_STRICT;
+    Grouper grouper = Grouper::forProperties(properties);
+    int parseFlags = 0;
+    // Fraction grouping is disabled by default because it has never been supported in DecimalFormat
+    parseFlags |= PARSE_FLAG_FRACTION_GROUPING_DISABLED;
+    if (!properties.parseCaseSensitive) {
+        parseFlags |= PARSE_FLAG_IGNORE_CASE;
+    }
+    if (properties.parseIntegerOnly) {
+        parseFlags |= PARSE_FLAG_INTEGER_ONLY;
+    }
+    if (properties.signAlwaysShown) {
+        parseFlags |= PARSE_FLAG_PLUS_SIGN_ALLOWED;
+    }
+    if (isStrict) {
+        parseFlags |= PARSE_FLAG_STRICT_GROUPING_SIZE;
+        parseFlags |= PARSE_FLAG_STRICT_SEPARATORS;
+        parseFlags |= PARSE_FLAG_USE_FULL_AFFIXES;
+        parseFlags |= PARSE_FLAG_EXACT_AFFIX;
+    } else {
+        parseFlags |= PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
+    }
+    if (grouper.getPrimary() <= 0) {
+        parseFlags |= PARSE_FLAG_GROUPING_DISABLED;
+    }
+    if (parseCurrency || patternInfo.hasCurrencySign()) {
+        parseFlags |= PARSE_FLAG_MONETARY_SEPARATORS;
+    }
+    if (computeLeads) {
+        parseFlags |= PARSE_FLAG_OPTIMIZE;
+    }
+    IgnorablesMatcher ignorables(isStrict ? unisets::DEFAULT_IGNORABLES : unisets::STRICT_IGNORABLES);
+
+    LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags, status));
+
+    //////////////////////
+    /// AFFIX MATCHERS ///
+    //////////////////////
+
+    // The following statements set up the affix matchers.
+    AffixTokenMatcherSetupData affixSetupData = {
+            currencySymbols,
+            symbols,
+            ignorables,
+            locale};
+    parser->fLocalMatchers.affixTokenMatcherWarehouse = {&affixSetupData};
+    parser->fLocalMatchers.affixMatcherWarehouse = {&parser->fLocalMatchers.affixTokenMatcherWarehouse};
+    parser->fLocalMatchers.affixMatcherWarehouse.createAffixMatchers(
+            patternInfo, *parser, ignorables, parseFlags, status);
+
+    ////////////////////////
+    /// CURRENCY MATCHER ///
+    ////////////////////////
+
+    if (parseCurrency || patternInfo.hasCurrencySign()) {
+        parser->addMatcher(parser->fLocalMatchers.currencyCustom = {currencySymbols, status});
+        parser->addMatcher(parser->fLocalMatchers.currencyNames = {locale, status});
+    }
+
+    ///////////////////////////////
+    /// OTHER STANDARD MATCHERS ///
+    ///////////////////////////////
+
+    if (!isStrict) {
+        parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false});
+        parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false});
+        parser->addMatcher(parser->fLocalMatchers.nan = {symbols});
+        parser->addMatcher(parser->fLocalMatchers.percent = {symbols});
+        parser->addMatcher(parser->fLocalMatchers.permille = {symbols});
+    }
+    parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
+    UnicodeString padString = properties.padString;
+    if (!padString.isBogus() && !ignorables.getSet()->contains(padString)) {
+        parser->addMatcher(parser->fLocalMatchers.padding = {padString});
+    }
+    parser->addMatcher(parser->fLocalMatchers.ignorables);
+    parser->addMatcher(parser->fLocalMatchers.decimal = {symbols, grouper, parseFlags});
+    if (!properties.parseNoExponent) {
+        parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
+    }
+
+    //////////////////
+    /// VALIDATORS ///
+    //////////////////
+
+    parser->addMatcher(parser->fLocalValidators.number = {});
+    if (isStrict) {
+        parser->addMatcher(parser->fLocalValidators.affix = {});
+    }
+    if (isStrict && properties.minimumExponentDigits > 0) {
+        parser->addMatcher(parser->fLocalValidators.exponent = {});
+    }
+    if (parseCurrency) {
+        parser->addMatcher(parser->fLocalValidators.currency = {});
+    }
+    if (properties.decimalPatternMatchRequired) {
+        bool patternHasDecimalSeparator =
+                properties.decimalSeparatorAlwaysShown || properties.maximumFractionDigits != 0;
+        parser->addMatcher(parser->fLocalValidators.decimalSeparator = {patternHasDecimalSeparator});
+    }
+
+    // TODO: MULTIPLIER
+//    if (properties.getMultiplier() != null) {
+//        // We need to use a math context in order to prevent non-terminating decimal expansions.
+//        // This is only used when dividing by the multiplier.
+//        parser.addMatcher(new MultiplierHandler(properties.getMultiplier(),
+//                RoundingUtils.getMathContextOr34Digits(properties)));
+//    }
+
+    parser->freeze();
+    return parser.orphan();
+}
 
 NumberParserImpl::NumberParserImpl(parse_flags_t parseFlags, bool computeLeads)
         : fParseFlags(parseFlags), fComputeLeads(computeLeads) {
index 901c226a13b6f68d4a56820127cee71b2531cfe1..d05d7a24ac1a928a3ffe4177f7a6ab0664d996b4 100644 (file)
@@ -15,6 +15,8 @@
 #include "numparse_currency.h"
 #include "numparse_affixes.h"
 #include "number_decimfmtprops.h"
+#include "unicode/localpointer.h"
+#include "numparse_validators.h"
 
 U_NAMESPACE_BEGIN namespace numparse {
 namespace impl {
@@ -27,7 +29,7 @@ class NumberParserImpl : public MutableMatcherCollection {
                                                 parse_flags_t parseFlags, UErrorCode& status);
 
     static NumberParserImpl* createParserFromProperties(
-            const number::impl::DecimalFormatProperties& properties, DecimalFormatSymbols symbols,
+            const number::impl::DecimalFormatProperties& properties, const DecimalFormatSymbols& symbols,
             bool parseCurrency, bool optimize, UErrorCode& status);
 
     void addMatcher(NumberParseMatcher& matcher) override;
@@ -64,9 +66,17 @@ class NumberParserImpl : public MutableMatcherCollection {
         DecimalMatcher decimal;
         ScientificMatcher scientific;
         CurrencyNamesMatcher currencyNames;
+        CurrencyCustomMatcher currencyCustom;
         AffixMatcherWarehouse affixMatcherWarehouse;
         AffixTokenMatcherWarehouse affixTokenMatcherWarehouse;
     } fLocalMatchers;
+    struct {
+        RequireAffixValidator affix;
+        RequireCurrencyValidator currency;
+        RequireDecimalSeparatorValidator decimalSeparator;
+        RequireExponentValidator exponent;
+        RequireNumberValidator number;
+    } fLocalValidators;
 
     NumberParserImpl(parse_flags_t parseFlags, bool computeLeads);
 
diff --git a/icu4c/source/i18n/numparse_validators.cpp b/icu4c/source/i18n/numparse_validators.cpp
new file mode 100644 (file)
index 0000000..724b0cf
--- /dev/null
@@ -0,0 +1,83 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
+#include "numparse_types.h"
+#include "numparse_validators.h"
+#include "numparse_unisets.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+
+
+void RequireAffixValidator::postProcess(ParsedNumber& result) const {
+    if (result.prefix.isBogus() || result.suffix.isBogus()) {
+        // We saw a prefix or a suffix but not both. Fail the parse.
+        result.flags |= FLAG_FAIL;
+    }
+}
+
+UnicodeString RequireAffixValidator::toString() const {
+    return u"<ReqAffix>";
+}
+
+
+void RequireCurrencyValidator::postProcess(ParsedNumber& result) const {
+    if (result.currencyCode[0] == 0) {
+        result.flags |= FLAG_FAIL;
+    }
+}
+
+UnicodeString RequireCurrencyValidator::toString() const {
+    return u"<ReqCurrency>";
+}
+
+
+RequireDecimalSeparatorValidator::RequireDecimalSeparatorValidator(bool patternHasDecimalSeparator)
+        : fPatternHasDecimalSeparator(patternHasDecimalSeparator) {
+}
+
+void RequireDecimalSeparatorValidator::postProcess(ParsedNumber& result) const {
+    bool parseHasDecimalSeparator = 0 != (result.flags & FLAG_HAS_DECIMAL_SEPARATOR);
+    if (parseHasDecimalSeparator != fPatternHasDecimalSeparator) {
+        result.flags |= FLAG_FAIL;
+    }
+}
+
+UnicodeString RequireDecimalSeparatorValidator::toString() const {
+    return u"<ReqDecimal>";
+}
+
+
+void RequireExponentValidator::postProcess(ParsedNumber& result) const {
+    if (0 == (result.flags & FLAG_HAS_EXPONENT)) {
+        result.flags |= FLAG_FAIL;
+    }
+}
+
+UnicodeString RequireExponentValidator::toString() const {
+    return u"<ReqExponent>";
+}
+
+
+void RequireNumberValidator::postProcess(ParsedNumber& result) const {
+    // Require that a number is matched.
+    if (!result.seenNumber()) {
+        result.flags |= FLAG_FAIL;
+    }
+}
+
+UnicodeString RequireNumberValidator::toString() const {
+    return u"<ReqNumber>";
+}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/icu4c/source/i18n/numparse_validators.h b/icu4c/source/i18n/numparse_validators.h
new file mode 100644 (file)
index 0000000..d158b23
--- /dev/null
@@ -0,0 +1,85 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+#ifndef __SOURCE_NUMPARSE_VALIDATORS_H__
+#define __SOURCE_NUMPARSE_VALIDATORS_H__
+
+#include "numparse_types.h"
+#include "numparse_unisets.h"
+
+U_NAMESPACE_BEGIN namespace numparse {
+namespace impl {
+
+
+class ValidationMatcher : public NumberParseMatcher {
+  public:
+    bool match(StringSegment&, ParsedNumber&, UErrorCode&) const U_OVERRIDE {
+        // No-op
+        return false;
+    }
+
+    const UnicodeSet& getLeadCodePoints() U_OVERRIDE {
+        // No-op
+        return *unisets::get(unisets::EMPTY);
+    }
+
+    virtual void postProcess(ParsedNumber& result) const U_OVERRIDE = 0;
+};
+
+
+class RequireAffixValidator : public ValidationMatcher, public UMemory {
+  public:
+    void postProcess(ParsedNumber& result) const U_OVERRIDE;
+
+    UnicodeString toString() const U_OVERRIDE;
+};
+
+
+class RequireCurrencyValidator : public ValidationMatcher, public UMemory {
+  public:
+    void postProcess(ParsedNumber& result) const U_OVERRIDE;
+
+    UnicodeString toString() const U_OVERRIDE;
+};
+
+
+class RequireDecimalSeparatorValidator : public ValidationMatcher, public UMemory {
+  public:
+    RequireDecimalSeparatorValidator() = default;  // leaves instance in valid but undefined state
+
+    RequireDecimalSeparatorValidator(bool patternHasDecimalSeparator);
+
+    void postProcess(ParsedNumber& result) const U_OVERRIDE;
+
+    UnicodeString toString() const U_OVERRIDE;
+
+  private:
+    bool fPatternHasDecimalSeparator;
+};
+
+
+class RequireExponentValidator : public ValidationMatcher, public UMemory {
+  public:
+    void postProcess(ParsedNumber& result) const U_OVERRIDE;
+
+    UnicodeString toString() const U_OVERRIDE;
+};
+
+
+class RequireNumberValidator : public ValidationMatcher, public UMemory {
+  public:
+    void postProcess(ParsedNumber& result) const U_OVERRIDE;
+
+    UnicodeString toString() const U_OVERRIDE;
+};
+
+
+} // namespace impl
+} // namespace numparse
+U_NAMESPACE_END
+
+#endif //__SOURCE_NUMPARSE_VALIDATORS_H__
+#endif /* #if !UCONFIG_NO_FORMATTING */
index 16323b52db1461a475d25e40b39a03b5a4cc29cb..160bab4fbcfcf34aad5442e1ab2e5f1434425dc0 100644 (file)
@@ -216,13 +216,15 @@ void NumberParserTest::testCurrencyAnyMatcher() {
     IcuTestErrorCode status(*this, "testCurrencyAnyMatcher");
 
     IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
+    Locale locale = Locale::getEnglish();
+
+    DecimalFormatSymbols dfs(locale, status);
+    dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
+    dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status);
+    CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
+
     AffixTokenMatcherSetupData affixSetupData = {
-            u"ICU",
-            u"IU$",
-            u"ICU",
-            {"en", status},
-            ignorables,
-            "en"};
+            currencySymbols, {"en", status}, ignorables, "en"};
     AffixTokenMatcherWarehouse warehouse(&affixSetupData);
     NumberParseMatcher& matcher = warehouse.currency(status);
 
@@ -254,14 +256,16 @@ void NumberParserTest::testCurrencyAnyMatcher() {
 
 void NumberParserTest::testAffixPatternMatcher() {
     IcuTestErrorCode status(*this, "testAffixPatternMatcher");
+    Locale locale = Locale::getEnglish();
     IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
+
+    DecimalFormatSymbols dfs(locale, status);
+    dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
+    dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status);
+    CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
+
     AffixTokenMatcherSetupData affixSetupData = {
-            u"USD",
-            u"foo",
-            u"bar",
-            {"en", status},
-            ignorables,
-            "en"};
+            currencySymbols, {"en", status}, ignorables, "en"};
     AffixTokenMatcherWarehouse warehouse(&affixSetupData);
 
     static const struct TestCase {
index bd8acc9697873c1591bb1e903748ead14451826f..df599d5d0bdcf476b74aa4e894a9703f5478517e 100644 (file)
@@ -28,7 +28,7 @@ void PatternModifierTest::testBasic() {
     mod.setPatternInfo(&patternInfo);
     mod.setPatternAttributes(UNUM_SIGN_AUTO, false);
     DecimalFormatSymbols symbols(Locale::getEnglish(), status);
-    CurrencyDataSymbols currencySymbols({u"USD", status}, "en", status);
+    CurrencySymbols currencySymbols({u"USD", status}, "en", status);
     assertSuccess("Spot 2", status);
     mod.setSymbols(&symbols, &currencySymbols, UNUM_UNIT_WIDTH_SHORT, nullptr);
 
@@ -88,7 +88,7 @@ void PatternModifierTest::testPatternWithNoPlaceholder() {
     mod.setPatternInfo(&patternInfo);
     mod.setPatternAttributes(UNUM_SIGN_AUTO, false);
     DecimalFormatSymbols symbols(Locale::getEnglish(), status);
-    CurrencyDataSymbols currencySymbols({u"USD", status}, "en", status);
+    CurrencySymbols currencySymbols({u"USD", status}, "en", status);
     assertSuccess("Spot 2", status);
     mod.setSymbols(&symbols, &currencySymbols, UNUM_UNIT_WIDTH_SHORT, nullptr);
     mod.setNumberProperties(1, StandardPlural::Form::COUNT);
@@ -124,7 +124,7 @@ void PatternModifierTest::testMutableEqualsImmutable() {
     mod.setPatternInfo(&patternInfo);
     mod.setPatternAttributes(UNUM_SIGN_AUTO, false);
     DecimalFormatSymbols symbols(Locale::getEnglish(), status);
-    CurrencyDataSymbols currencySymbols({u"USD", status}, "en", status);
+    CurrencySymbols currencySymbols({u"USD", status}, "en", status);
     assertSuccess("Spot 2", status);
     if (U_FAILURE(status)) { return; }
     mod.setSymbols(&symbols, &currencySymbols, UNUM_UNIT_WIDTH_SHORT, nullptr);