From: Shane Carr Date: Wed, 21 Mar 2018 05:17:28 +0000 (+0000) Subject: ICU-13644 Property mapper for parsing is building. Refactoring CurrencySymbols a... X-Git-Tag: release-62-rc~200^2~87 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8b4c36746891fb68ac1547a58f5b54ed20ad59c6;p=icu ICU-13644 Property mapper for parsing is building. Refactoring CurrencySymbols a bit. X-SVN-Rev: 41130 --- diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index e0fc77fee08..e249995b47a 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -107,7 +107,7 @@ double-conversion.o double-conversion-bignum-dtoa.o double-conversion-bignum.o \ double-conversion-cached-powers.o double-conversion-diy-fp.o double-conversion-fast-dtoa.o \ numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o \ numparse_impl.o numparse_symbols.o numparse_decimal.o numparse_scientific.o \ -numparse_currency.o numparse_affixes.o numparse_compositions.o \ +numparse_currency.o numparse_affixes.o numparse_compositions.o numparse_validators.o \ number_mapper.o number_multiplier.o number_currencysymbols.o diff --git a/icu4c/source/i18n/number_currencysymbols.cpp b/icu4c/source/i18n/number_currencysymbols.cpp index 0c946f9fc1f..051212fb6b9 100644 --- a/icu4c/source/i18n/number_currencysymbols.cpp +++ b/icu4c/source/i18n/number_currencysymbols.cpp @@ -18,12 +18,39 @@ using namespace icu::number::impl; CurrencySymbols::CurrencySymbols(CurrencyUnit currency, const Locale& locale, UErrorCode& status) - : fCurrency(currency), fLocaleName(locale.getName(), status) {} + : fCurrency(currency), fLocaleName(locale.getName(), status) { + fCurrencySymbol.setToBogus(); + fIntlCurrencySymbol.setToBogus(); +} + +CurrencySymbols::CurrencySymbols(CurrencyUnit currency, const Locale& locale, + const DecimalFormatSymbols& symbols, UErrorCode& status) + : CurrencySymbols(currency, locale, status) { + // If either of the overrides is present, save it in the local UnicodeString. + if (symbols.isCustomCurrencySymbol()) { + fCurrencySymbol = symbols.getConstSymbol(DecimalFormatSymbols::kCurrencySymbol); + } + if (symbols.isCustomIntlCurrencySymbol()) { + fIntlCurrencySymbol = symbols.getConstSymbol(DecimalFormatSymbols::kIntlCurrencySymbol); + } +} + +const char16_t* CurrencySymbols::getIsoCode() const { + return fCurrency.getISOCurrency(); +} UnicodeString CurrencySymbols::getNarrowCurrencySymbol(UErrorCode& status) const { + // Note: currently no override is available for narrow currency symbol return loadSymbol(UCURR_NARROW_SYMBOL_NAME, status); } +UnicodeString CurrencySymbols::getCurrencySymbol(UErrorCode& status) const { + if (!fCurrencySymbol.isBogus()) { + return fCurrencySymbol; + } + return loadSymbol(UCURR_SYMBOL_NAME, status); +} + UnicodeString CurrencySymbols::loadSymbol(UCurrNameStyle selector, UErrorCode& status) const { UBool ignoredIsChoiceFormatFillIn = FALSE; int32_t symbolLen = 0; @@ -38,6 +65,14 @@ UnicodeString CurrencySymbols::loadSymbol(UCurrNameStyle selector, UErrorCode& s return UnicodeString(TRUE, symbol, symbolLen); } +UnicodeString CurrencySymbols::getIntlCurrencySymbol(UErrorCode&) const { + if (!fIntlCurrencySymbol.isBogus()) { + return fIntlCurrencySymbol; + } + // Readonly-aliasing char16_t* constructor: + return UnicodeString(TRUE, fCurrency.getISOCurrency(), 3); +} + UnicodeString CurrencySymbols::getPluralName(StandardPlural::Form plural, UErrorCode& status) const { UBool isChoiceFormat = FALSE; int32_t symbolLen = 0; @@ -53,46 +88,6 @@ UnicodeString CurrencySymbols::getPluralName(StandardPlural::Form plural, UError } -CurrencyDataSymbols::CurrencyDataSymbols(CurrencyUnit currency, const Locale& locale, UErrorCode& status) - : CurrencySymbols(currency, locale, status) {} - -UnicodeString CurrencyDataSymbols::getCurrencySymbol(UErrorCode& status) const { - return loadSymbol(UCURR_SYMBOL_NAME, status); -} - -UnicodeString CurrencyDataSymbols::getIntlCurrencySymbol(UErrorCode&) const { - // Readonly-aliasing char16_t* constructor: - return UnicodeString(TRUE, fCurrency.getISOCurrency(), 3); -} - - -CurrencyCustomSymbols::CurrencyCustomSymbols(CurrencyUnit currency, const Locale& locale, - const DecimalFormatSymbols& symbols, UErrorCode& status) - : CurrencySymbols(currency, locale, status) { - // Hit the data bundle if the DecimalFormatSymbols version is not custom. - // Note: the CurrencyDataSymbols implementation waits to hit the data bundle until requested. - if (symbols.isCustomCurrencySymbol()) { - fCurrencySymbol = symbols.getConstSymbol(DecimalFormatSymbols::kCurrencySymbol); - } else { - fCurrencySymbol = loadSymbol(UCURR_SYMBOL_NAME, status); - } - if (symbols.isCustomIntlCurrencySymbol()) { - fIntlCurrencySymbol = symbols.getConstSymbol(DecimalFormatSymbols::kIntlCurrencySymbol); - } else { - // UnicodeString copy constructor since we don't know about the lifetime of the CurrencyUnit - fIntlCurrencySymbol = UnicodeString(currency.getISOCurrency(), 3); - } -} - -UnicodeString CurrencyCustomSymbols::getCurrencySymbol(UErrorCode&) const { - return fCurrencySymbol; -} - -UnicodeString CurrencyCustomSymbols::getIntlCurrencySymbol(UErrorCode&) const { - return fIntlCurrencySymbol; -} - - CurrencyUnit icu::number::impl::resolveCurrency(const DecimalFormatProperties& properties, const Locale& locale, UErrorCode& status) { diff --git a/icu4c/source/i18n/number_currencysymbols.h b/icu4c/source/i18n/number_currencysymbols.h index 63810b00434..eab1a661db9 100644 --- a/icu4c/source/i18n/number_currencysymbols.h +++ b/icu4c/source/i18n/number_currencysymbols.h @@ -15,55 +15,37 @@ U_NAMESPACE_BEGIN namespace number { namespace impl { -class CurrencySymbols { +class CurrencySymbols : public UMemory { public: CurrencySymbols() = default; // default constructor: leaves class in valid but undefined state - explicit CurrencySymbols(CurrencyUnit currency, const Locale& locale, UErrorCode& status); + /** Creates an instance in which all symbols are loaded from data. */ + CurrencySymbols(CurrencyUnit currency, const Locale& locale, UErrorCode& status); - virtual UnicodeString getCurrencySymbol(UErrorCode& status) const = 0; + /** Creates an instance in which some symbols might be pre-populated. */ + CurrencySymbols(CurrencyUnit currency, const Locale& locale, const DecimalFormatSymbols& symbols, + UErrorCode& status); - virtual UnicodeString getIntlCurrencySymbol(UErrorCode& status) const = 0; + const char16_t* getIsoCode() const; - // Put narrow and plural symbols in the base class since there is no API for overriding them UnicodeString getNarrowCurrencySymbol(UErrorCode& status) const; + UnicodeString getCurrencySymbol(UErrorCode& status) const; + + UnicodeString getIntlCurrencySymbol(UErrorCode& status) const; + UnicodeString getPluralName(StandardPlural::Form plural, UErrorCode& status) const; protected: + // Required fields: CurrencyUnit fCurrency; CharString fLocaleName; - UnicodeString loadSymbol(UCurrNameStyle selector, UErrorCode& status) const; -}; - - -class CurrencyDataSymbols : public CurrencySymbols, public UMemory { - public: - CurrencyDataSymbols() = default; // default constructor: leaves class in valid but undefined state - - CurrencyDataSymbols(CurrencyUnit currency, const Locale& locale, UErrorCode& status); - - UnicodeString getCurrencySymbol(UErrorCode& status) const U_OVERRIDE; - - UnicodeString getIntlCurrencySymbol(UErrorCode& status) const U_OVERRIDE; -}; - - -class CurrencyCustomSymbols : public CurrencySymbols, public UMemory { - public: - CurrencyCustomSymbols() = default; // default constructor: leaves class in valid but undefined state - - CurrencyCustomSymbols(CurrencyUnit currency, const Locale& locale, const DecimalFormatSymbols& symbols, - UErrorCode& status); - - UnicodeString getCurrencySymbol(UErrorCode& status) const U_OVERRIDE; - - UnicodeString getIntlCurrencySymbol(UErrorCode& status) const U_OVERRIDE; - - private: + // Optional fields: UnicodeString fCurrencySymbol; UnicodeString fIntlCurrencySymbol; + + UnicodeString loadSymbol(UCurrNameStyle selector, UErrorCode& status) const; }; diff --git a/icu4c/source/i18n/number_decimfmtprops.cpp b/icu4c/source/i18n/number_decimfmtprops.cpp index 425f11490cb..54fd6a1ef17 100644 --- a/icu4c/source/i18n/number_decimfmtprops.cpp +++ b/icu4c/source/i18n/number_decimfmtprops.cpp @@ -43,7 +43,7 @@ void DecimalFormatProperties::clear() { padString.setToBogus(); parseCaseSensitive = false; parseIntegerOnly = false; - parseLenient = false; + parseMode.nullify(); parseNoExponent = false; parseToBigDecimal = false; parseAllInput = UNUM_MAYBE; @@ -86,7 +86,7 @@ bool DecimalFormatProperties::operator==(const DecimalFormatProperties &other) c eq = eq && padString == other.padString; eq = eq && parseCaseSensitive == other.parseCaseSensitive; eq = eq && parseIntegerOnly == other.parseIntegerOnly; - eq = eq && parseLenient == other.parseLenient; + eq = eq && parseMode == other.parseMode; eq = eq && parseNoExponent == other.parseNoExponent; eq = eq && parseToBigDecimal == other.parseToBigDecimal; eq = eq && parseAllInput == other.parseAllInput; diff --git a/icu4c/source/i18n/number_decimfmtprops.h b/icu4c/source/i18n/number_decimfmtprops.h index 890aa1dee8f..6632dfc5a2a 100644 --- a/icu4c/source/i18n/number_decimfmtprops.h +++ b/icu4c/source/i18n/number_decimfmtprops.h @@ -51,6 +51,40 @@ struct U_I18N_API CopyableLocalPointer { } }; +/** Controls the set of rules for parsing a string from the old DecimalFormat API. */ +enum ParseMode { + /** + * Lenient mode should be used if you want to accept malformed user input. It will use heuristics + * to attempt to parse through typographical errors in the string. + */ + PARSE_MODE_LENIENT, + + /** + * Strict mode should be used if you want to require that the input is well-formed. More + * specifically, it differs from lenient mode in the following ways: + * + * + */ + PARSE_MODE_STRICT, +}; + // Exported as U_I18N_API because it is needed for the unit test PatternStringTest struct U_I18N_API DecimalFormatProperties { @@ -82,7 +116,7 @@ struct U_I18N_API DecimalFormatProperties { UnicodeString padString; bool parseCaseSensitive; bool parseIntegerOnly; - bool parseLenient; + NullableValue parseMode; bool parseNoExponent; bool parseToBigDecimal; UNumberFormatAttributeValue parseAllInput; // ICU4C-only diff --git a/icu4c/source/i18n/number_formatimpl.cpp b/icu4c/source/i18n/number_formatimpl.cpp index 3edf73e197a..102a786a227 100644 --- a/icu4c/source/i18n/number_formatimpl.cpp +++ b/icu4c/source/i18n/number_formatimpl.cpp @@ -210,8 +210,8 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, // Used by the DecimalFormat code path currencySymbols = macros.currencySymbols; } else { - fWarehouse.fCurrencyDataSymbols = {currency, macros.locale, status}; - currencySymbols = &fWarehouse.fCurrencyDataSymbols; + fWarehouse.fCurrencySymbols = {currency, macros.locale, status}; + currencySymbols = &fWarehouse.fCurrencySymbols; } UNumberUnitWidth unitWidth = UNUM_UNIT_WIDTH_SHORT; if (macros.unitWidth != UNUM_UNIT_WIDTH_COUNT) { diff --git a/icu4c/source/i18n/number_formatimpl.h b/icu4c/source/i18n/number_formatimpl.h index dff500ab944..13c22aec75c 100644 --- a/icu4c/source/i18n/number_formatimpl.h +++ b/icu4c/source/i18n/number_formatimpl.h @@ -62,7 +62,7 @@ class NumberFormatterImpl : public UMemory { // Value objects possibly used by the number formatting pipeline: struct Warehouse { - CurrencyDataSymbols fCurrencyDataSymbols; + CurrencySymbols fCurrencySymbols; } fWarehouse; diff --git a/icu4c/source/i18n/number_mapper.cpp b/icu4c/source/i18n/number_mapper.cpp index aeaa647d018..84774011011 100644 --- a/icu4c/source/i18n/number_mapper.cpp +++ b/icu4c/source/i18n/number_mapper.cpp @@ -86,10 +86,8 @@ MacroProps NumberPropertyMapper::oldToNew(const DecimalFormatProperties& propert // NOTE: Slicing is OK. macros.unit = currency; // NOLINT } - if (symbols.isCustomCurrencySymbol() || symbols.isCustomIntlCurrencySymbol()) { - warehouse.currencyCustomSymbols = {currency, locale, symbols, status}; - macros.currencySymbols = &warehouse.currencyCustomSymbols; - } + warehouse.currencySymbols = {currency, locale, symbols, status}; + macros.currencySymbols = &warehouse.currencySymbols; /////////////////////// // ROUNDING STRATEGY // diff --git a/icu4c/source/i18n/number_mapper.h b/icu4c/source/i18n/number_mapper.h index 7c08eecfb6c..5183b1195f6 100644 --- a/icu4c/source/i18n/number_mapper.h +++ b/icu4c/source/i18n/number_mapper.h @@ -29,6 +29,12 @@ class PropertiesAffixPatternProvider : public AffixPatternProvider, public UMemo void setTo(const DecimalFormatProperties& properties, UErrorCode& status); + PropertiesAffixPatternProvider() = default; // puts instance in valid but undefined state + + PropertiesAffixPatternProvider(const DecimalFormatProperties& properties, UErrorCode& status) { + setTo(properties, status); + } + // AffixPatternProvider Methods: char16_t charAt(int32_t flags, int32_t i) const U_OVERRIDE; @@ -106,7 +112,7 @@ class CurrencyPluralInfoAffixProvider : public AffixPatternProvider, public UMem struct DecimalFormatWarehouse { PropertiesAffixPatternProvider propertiesAPP; CurrencyPluralInfoAffixProvider currencyPluralInfoAPP; - CurrencyCustomSymbols currencyCustomSymbols; + CurrencySymbols currencySymbols; }; diff --git a/icu4c/source/i18n/numparse_affixes.cpp b/icu4c/source/i18n/numparse_affixes.cpp index 97ba4a1c660..e4dd8b76626 100644 --- a/icu4c/source/i18n/numparse_affixes.cpp +++ b/icu4c/source/i18n/numparse_affixes.cpp @@ -190,8 +190,7 @@ NumberParseMatcher& AffixTokenMatcherWarehouse::permille() { } NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) { - return fCurrency = {{fSetupData->locale, status}, - {fSetupData->currencyCode, fSetupData->currency1, fSetupData->currency2}}; + return fCurrency = {{fSetupData->locale, status}, {fSetupData->currencySymbols, status}}; } IgnorablesMatcher& AffixTokenMatcherWarehouse::ignorables() { diff --git a/icu4c/source/i18n/numparse_affixes.h b/icu4c/source/i18n/numparse_affixes.h index 59789e03950..8a09983c9e1 100644 --- a/icu4c/source/i18n/numparse_affixes.h +++ b/icu4c/source/i18n/numparse_affixes.h @@ -11,6 +11,7 @@ #include "numparse_symbols.h" #include "numparse_currency.h" #include "number_affixutils.h" +#include "number_currencysymbols.h" #include @@ -23,6 +24,7 @@ class AffixPatternMatcher; using ::icu::number::impl::AffixPatternProvider; using ::icu::number::impl::TokenConsumer; +using ::icu::number::impl::CurrencySymbols; class CodePointMatcher : public NumberParseMatcher, public UMemory { @@ -73,9 +75,7 @@ class CodePointMatcherWarehouse : public UMemory { struct AffixTokenMatcherSetupData { - const UChar* currencyCode; - const UnicodeString& currency1; - const UnicodeString& currency2; + const CurrencySymbols& currencySymbols; const DecimalFormatSymbols& dfs; IgnorablesMatcher& ignorables; const Locale& locale; diff --git a/icu4c/source/i18n/numparse_currency.cpp b/icu4c/source/i18n/numparse_currency.cpp index b3a317ef716..064ba73fc00 100644 --- a/icu4c/source/i18n/numparse_currency.cpp +++ b/icu4c/source/i18n/numparse_currency.cpp @@ -75,10 +75,10 @@ UnicodeString CurrencyNamesMatcher::toString() const { } -CurrencyCustomMatcher::CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1, - const UnicodeString& currency2) - : fCurrency1(currency1), fCurrency2(currency2) { - utils::copyCurrencyCode(fCurrencyCode, currencyCode); +CurrencyCustomMatcher::CurrencyCustomMatcher(const CurrencySymbols& currencySymbols, UErrorCode& status) + : fCurrency1(currencySymbols.getCurrencySymbol(status)), + fCurrency2(currencySymbols.getIntlCurrencySymbol(status)) { + utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode()); } bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const { diff --git a/icu4c/source/i18n/numparse_currency.h b/icu4c/source/i18n/numparse_currency.h index 3d8cb3a2bfb..dd0490a9af5 100644 --- a/icu4c/source/i18n/numparse_currency.h +++ b/icu4c/source/i18n/numparse_currency.h @@ -10,10 +10,12 @@ #include "numparse_types.h" #include "numparse_compositions.h" #include "charstr.h" +#include "number_currencysymbols.h" U_NAMESPACE_BEGIN namespace numparse { namespace impl { +using ::icu::number::impl::CurrencySymbols; /** * Matches currencies according to all available strings in locale data. @@ -46,8 +48,7 @@ class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory { public: CurrencyCustomMatcher() = default; // WARNING: Leaves the object in an unusable state - CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1, - const UnicodeString& currency2); + CurrencyCustomMatcher(const CurrencySymbols& currencySymbols, UErrorCode& status); bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; diff --git a/icu4c/source/i18n/numparse_impl.cpp b/icu4c/source/i18n/numparse_impl.cpp index 1d29aee758d..32d904f99a7 100644 --- a/icu4c/source/i18n/numparse_impl.cpp +++ b/icu4c/source/i18n/numparse_impl.cpp @@ -9,6 +9,9 @@ // Helpful in toString methods and elsewhere. #define UNISTR_FROM_STRING_EXPLICIT +#include +#include +#include #include "number_types.h" #include "number_patternstring.h" #include "numparse_types.h" @@ -16,11 +19,9 @@ #include "numparse_symbols.h" #include "numparse_decimal.h" #include "unicode/numberformatter.h" - -#include -#include -#include #include "cstr.h" +#include "number_mapper.h" +#include "numparse_unisets.h" using namespace icu; using namespace icu::number; @@ -33,22 +34,23 @@ NumberParserImpl* NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& patternString, parse_flags_t parseFlags, UErrorCode& status) { - auto* parser = new NumberParserImpl(parseFlags, true); + LocalPointer parser(new NumberParserImpl(parseFlags, true)); DecimalFormatSymbols symbols(locale, status); parser->fLocalMatchers.ignorables = {unisets::DEFAULT_IGNORABLES}; IgnorablesMatcher& ignorables = parser->fLocalMatchers.ignorables; - const UChar currencyCode[] = u"USD"; - UnicodeString currency1(u"IU$"); - UnicodeString currency2(u"ICU"); + DecimalFormatSymbols dfs(locale, status); + dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status); + dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status); + CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status); ParsedPatternInfo patternInfo; PatternParser::parseToPatternInfo(patternString, patternInfo, status); // The following statements set up the affix matchers. AffixTokenMatcherSetupData affixSetupData = { - currencyCode, currency1, currency2, symbols, ignorables, locale}; + currencySymbols, symbols, ignorables, locale}; parser->fLocalMatchers.affixTokenMatcherWarehouse = {&affixSetupData}; parser->fLocalMatchers.affixMatcherWarehouse = {&parser->fLocalMatchers.affixTokenMatcherWarehouse}; parser->fLocalMatchers.affixMatcherWarehouse.createAffixMatchers( @@ -71,17 +73,129 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& // parser.addMatcher(new RequireNumberMatcher()); parser->freeze(); - return parser; + return parser.orphan(); } -//NumberParserImpl* -//NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatProperties& properties, -// DecimalFormatSymbols symbols, bool parseCurrency, -// bool optimize, UErrorCode& status) { -// // TODO -// status = U_UNSUPPORTED_ERROR; -// return nullptr; -//} +NumberParserImpl* +NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatProperties& properties, + const DecimalFormatSymbols& symbols, bool parseCurrency, + bool computeLeads, UErrorCode& status) { + Locale locale = symbols.getLocale(); + PropertiesAffixPatternProvider patternInfo(properties, status); + CurrencyUnit currency = resolveCurrency(properties, locale, status); + CurrencySymbols currencySymbols(currency, locale, symbols, status); + bool isStrict = properties.parseMode.getOrDefault(PARSE_MODE_STRICT) == PARSE_MODE_STRICT; + Grouper grouper = Grouper::forProperties(properties); + int parseFlags = 0; + // Fraction grouping is disabled by default because it has never been supported in DecimalFormat + parseFlags |= PARSE_FLAG_FRACTION_GROUPING_DISABLED; + if (!properties.parseCaseSensitive) { + parseFlags |= PARSE_FLAG_IGNORE_CASE; + } + if (properties.parseIntegerOnly) { + parseFlags |= PARSE_FLAG_INTEGER_ONLY; + } + if (properties.signAlwaysShown) { + parseFlags |= PARSE_FLAG_PLUS_SIGN_ALLOWED; + } + if (isStrict) { + parseFlags |= PARSE_FLAG_STRICT_GROUPING_SIZE; + parseFlags |= PARSE_FLAG_STRICT_SEPARATORS; + parseFlags |= PARSE_FLAG_USE_FULL_AFFIXES; + parseFlags |= PARSE_FLAG_EXACT_AFFIX; + } else { + parseFlags |= PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES; + } + if (grouper.getPrimary() <= 0) { + parseFlags |= PARSE_FLAG_GROUPING_DISABLED; + } + if (parseCurrency || patternInfo.hasCurrencySign()) { + parseFlags |= PARSE_FLAG_MONETARY_SEPARATORS; + } + if (computeLeads) { + parseFlags |= PARSE_FLAG_OPTIMIZE; + } + IgnorablesMatcher ignorables(isStrict ? unisets::DEFAULT_IGNORABLES : unisets::STRICT_IGNORABLES); + + LocalPointer parser(new NumberParserImpl(parseFlags, status)); + + ////////////////////// + /// AFFIX MATCHERS /// + ////////////////////// + + // The following statements set up the affix matchers. + AffixTokenMatcherSetupData affixSetupData = { + currencySymbols, + symbols, + ignorables, + locale}; + parser->fLocalMatchers.affixTokenMatcherWarehouse = {&affixSetupData}; + parser->fLocalMatchers.affixMatcherWarehouse = {&parser->fLocalMatchers.affixTokenMatcherWarehouse}; + parser->fLocalMatchers.affixMatcherWarehouse.createAffixMatchers( + patternInfo, *parser, ignorables, parseFlags, status); + + //////////////////////// + /// CURRENCY MATCHER /// + //////////////////////// + + if (parseCurrency || patternInfo.hasCurrencySign()) { + parser->addMatcher(parser->fLocalMatchers.currencyCustom = {currencySymbols, status}); + parser->addMatcher(parser->fLocalMatchers.currencyNames = {locale, status}); + } + + /////////////////////////////// + /// OTHER STANDARD MATCHERS /// + /////////////////////////////// + + if (!isStrict) { + parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false}); + parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false}); + parser->addMatcher(parser->fLocalMatchers.nan = {symbols}); + parser->addMatcher(parser->fLocalMatchers.percent = {symbols}); + parser->addMatcher(parser->fLocalMatchers.permille = {symbols}); + } + parser->addMatcher(parser->fLocalMatchers.infinity = {symbols}); + UnicodeString padString = properties.padString; + if (!padString.isBogus() && !ignorables.getSet()->contains(padString)) { + parser->addMatcher(parser->fLocalMatchers.padding = {padString}); + } + parser->addMatcher(parser->fLocalMatchers.ignorables); + parser->addMatcher(parser->fLocalMatchers.decimal = {symbols, grouper, parseFlags}); + if (!properties.parseNoExponent) { + parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper}); + } + + ////////////////// + /// VALIDATORS /// + ////////////////// + + parser->addMatcher(parser->fLocalValidators.number = {}); + if (isStrict) { + parser->addMatcher(parser->fLocalValidators.affix = {}); + } + if (isStrict && properties.minimumExponentDigits > 0) { + parser->addMatcher(parser->fLocalValidators.exponent = {}); + } + if (parseCurrency) { + parser->addMatcher(parser->fLocalValidators.currency = {}); + } + if (properties.decimalPatternMatchRequired) { + bool patternHasDecimalSeparator = + properties.decimalSeparatorAlwaysShown || properties.maximumFractionDigits != 0; + parser->addMatcher(parser->fLocalValidators.decimalSeparator = {patternHasDecimalSeparator}); + } + + // TODO: MULTIPLIER +// if (properties.getMultiplier() != null) { +// // We need to use a math context in order to prevent non-terminating decimal expansions. +// // This is only used when dividing by the multiplier. +// parser.addMatcher(new MultiplierHandler(properties.getMultiplier(), +// RoundingUtils.getMathContextOr34Digits(properties))); +// } + + parser->freeze(); + return parser.orphan(); +} NumberParserImpl::NumberParserImpl(parse_flags_t parseFlags, bool computeLeads) : fParseFlags(parseFlags), fComputeLeads(computeLeads) { diff --git a/icu4c/source/i18n/numparse_impl.h b/icu4c/source/i18n/numparse_impl.h index 901c226a13b..d05d7a24ac1 100644 --- a/icu4c/source/i18n/numparse_impl.h +++ b/icu4c/source/i18n/numparse_impl.h @@ -15,6 +15,8 @@ #include "numparse_currency.h" #include "numparse_affixes.h" #include "number_decimfmtprops.h" +#include "unicode/localpointer.h" +#include "numparse_validators.h" U_NAMESPACE_BEGIN namespace numparse { namespace impl { @@ -27,7 +29,7 @@ class NumberParserImpl : public MutableMatcherCollection { parse_flags_t parseFlags, UErrorCode& status); static NumberParserImpl* createParserFromProperties( - const number::impl::DecimalFormatProperties& properties, DecimalFormatSymbols symbols, + const number::impl::DecimalFormatProperties& properties, const DecimalFormatSymbols& symbols, bool parseCurrency, bool optimize, UErrorCode& status); void addMatcher(NumberParseMatcher& matcher) override; @@ -64,9 +66,17 @@ class NumberParserImpl : public MutableMatcherCollection { DecimalMatcher decimal; ScientificMatcher scientific; CurrencyNamesMatcher currencyNames; + CurrencyCustomMatcher currencyCustom; AffixMatcherWarehouse affixMatcherWarehouse; AffixTokenMatcherWarehouse affixTokenMatcherWarehouse; } fLocalMatchers; + struct { + RequireAffixValidator affix; + RequireCurrencyValidator currency; + RequireDecimalSeparatorValidator decimalSeparator; + RequireExponentValidator exponent; + RequireNumberValidator number; + } fLocalValidators; NumberParserImpl(parse_flags_t parseFlags, bool computeLeads); diff --git a/icu4c/source/i18n/numparse_validators.cpp b/icu4c/source/i18n/numparse_validators.cpp new file mode 100644 index 00000000000..724b0cf0313 --- /dev/null +++ b/icu4c/source/i18n/numparse_validators.cpp @@ -0,0 +1,83 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "numparse_types.h" +#include "numparse_validators.h" +#include "numparse_unisets.h" + +using namespace icu; +using namespace icu::numparse; +using namespace icu::numparse::impl; + + +void RequireAffixValidator::postProcess(ParsedNumber& result) const { + if (result.prefix.isBogus() || result.suffix.isBogus()) { + // We saw a prefix or a suffix but not both. Fail the parse. + result.flags |= FLAG_FAIL; + } +} + +UnicodeString RequireAffixValidator::toString() const { + return u""; +} + + +void RequireCurrencyValidator::postProcess(ParsedNumber& result) const { + if (result.currencyCode[0] == 0) { + result.flags |= FLAG_FAIL; + } +} + +UnicodeString RequireCurrencyValidator::toString() const { + return u""; +} + + +RequireDecimalSeparatorValidator::RequireDecimalSeparatorValidator(bool patternHasDecimalSeparator) + : fPatternHasDecimalSeparator(patternHasDecimalSeparator) { +} + +void RequireDecimalSeparatorValidator::postProcess(ParsedNumber& result) const { + bool parseHasDecimalSeparator = 0 != (result.flags & FLAG_HAS_DECIMAL_SEPARATOR); + if (parseHasDecimalSeparator != fPatternHasDecimalSeparator) { + result.flags |= FLAG_FAIL; + } +} + +UnicodeString RequireDecimalSeparatorValidator::toString() const { + return u""; +} + + +void RequireExponentValidator::postProcess(ParsedNumber& result) const { + if (0 == (result.flags & FLAG_HAS_EXPONENT)) { + result.flags |= FLAG_FAIL; + } +} + +UnicodeString RequireExponentValidator::toString() const { + return u""; +} + + +void RequireNumberValidator::postProcess(ParsedNumber& result) const { + // Require that a number is matched. + if (!result.seenNumber()) { + result.flags |= FLAG_FAIL; + } +} + +UnicodeString RequireNumberValidator::toString() const { + return u""; +} + + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/numparse_validators.h b/icu4c/source/i18n/numparse_validators.h new file mode 100644 index 00000000000..d158b234fd5 --- /dev/null +++ b/icu4c/source/i18n/numparse_validators.h @@ -0,0 +1,85 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT +#ifndef __SOURCE_NUMPARSE_VALIDATORS_H__ +#define __SOURCE_NUMPARSE_VALIDATORS_H__ + +#include "numparse_types.h" +#include "numparse_unisets.h" + +U_NAMESPACE_BEGIN namespace numparse { +namespace impl { + + +class ValidationMatcher : public NumberParseMatcher { + public: + bool match(StringSegment&, ParsedNumber&, UErrorCode&) const U_OVERRIDE { + // No-op + return false; + } + + const UnicodeSet& getLeadCodePoints() U_OVERRIDE { + // No-op + return *unisets::get(unisets::EMPTY); + } + + virtual void postProcess(ParsedNumber& result) const U_OVERRIDE = 0; +}; + + +class RequireAffixValidator : public ValidationMatcher, public UMemory { + public: + void postProcess(ParsedNumber& result) const U_OVERRIDE; + + UnicodeString toString() const U_OVERRIDE; +}; + + +class RequireCurrencyValidator : public ValidationMatcher, public UMemory { + public: + void postProcess(ParsedNumber& result) const U_OVERRIDE; + + UnicodeString toString() const U_OVERRIDE; +}; + + +class RequireDecimalSeparatorValidator : public ValidationMatcher, public UMemory { + public: + RequireDecimalSeparatorValidator() = default; // leaves instance in valid but undefined state + + RequireDecimalSeparatorValidator(bool patternHasDecimalSeparator); + + void postProcess(ParsedNumber& result) const U_OVERRIDE; + + UnicodeString toString() const U_OVERRIDE; + + private: + bool fPatternHasDecimalSeparator; +}; + + +class RequireExponentValidator : public ValidationMatcher, public UMemory { + public: + void postProcess(ParsedNumber& result) const U_OVERRIDE; + + UnicodeString toString() const U_OVERRIDE; +}; + + +class RequireNumberValidator : public ValidationMatcher, public UMemory { + public: + void postProcess(ParsedNumber& result) const U_OVERRIDE; + + UnicodeString toString() const U_OVERRIDE; +}; + + +} // namespace impl +} // namespace numparse +U_NAMESPACE_END + +#endif //__SOURCE_NUMPARSE_VALIDATORS_H__ +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/numbertest_parse.cpp b/icu4c/source/test/intltest/numbertest_parse.cpp index 16323b52db1..160bab4fbcf 100644 --- a/icu4c/source/test/intltest/numbertest_parse.cpp +++ b/icu4c/source/test/intltest/numbertest_parse.cpp @@ -216,13 +216,15 @@ void NumberParserTest::testCurrencyAnyMatcher() { IcuTestErrorCode status(*this, "testCurrencyAnyMatcher"); IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES); + Locale locale = Locale::getEnglish(); + + DecimalFormatSymbols dfs(locale, status); + dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status); + dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status); + CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status); + AffixTokenMatcherSetupData affixSetupData = { - u"ICU", - u"IU$", - u"ICU", - {"en", status}, - ignorables, - "en"}; + currencySymbols, {"en", status}, ignorables, "en"}; AffixTokenMatcherWarehouse warehouse(&affixSetupData); NumberParseMatcher& matcher = warehouse.currency(status); @@ -254,14 +256,16 @@ void NumberParserTest::testCurrencyAnyMatcher() { void NumberParserTest::testAffixPatternMatcher() { IcuTestErrorCode status(*this, "testAffixPatternMatcher"); + Locale locale = Locale::getEnglish(); IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES); + + DecimalFormatSymbols dfs(locale, status); + dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status); + dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status); + CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status); + AffixTokenMatcherSetupData affixSetupData = { - u"USD", - u"foo", - u"bar", - {"en", status}, - ignorables, - "en"}; + currencySymbols, {"en", status}, ignorables, "en"}; AffixTokenMatcherWarehouse warehouse(&affixSetupData); static const struct TestCase { diff --git a/icu4c/source/test/intltest/numbertest_patternmodifier.cpp b/icu4c/source/test/intltest/numbertest_patternmodifier.cpp index bd8acc96978..df599d5d0bd 100644 --- a/icu4c/source/test/intltest/numbertest_patternmodifier.cpp +++ b/icu4c/source/test/intltest/numbertest_patternmodifier.cpp @@ -28,7 +28,7 @@ void PatternModifierTest::testBasic() { mod.setPatternInfo(&patternInfo); mod.setPatternAttributes(UNUM_SIGN_AUTO, false); DecimalFormatSymbols symbols(Locale::getEnglish(), status); - CurrencyDataSymbols currencySymbols({u"USD", status}, "en", status); + CurrencySymbols currencySymbols({u"USD", status}, "en", status); assertSuccess("Spot 2", status); mod.setSymbols(&symbols, ¤cySymbols, UNUM_UNIT_WIDTH_SHORT, nullptr); @@ -88,7 +88,7 @@ void PatternModifierTest::testPatternWithNoPlaceholder() { mod.setPatternInfo(&patternInfo); mod.setPatternAttributes(UNUM_SIGN_AUTO, false); DecimalFormatSymbols symbols(Locale::getEnglish(), status); - CurrencyDataSymbols currencySymbols({u"USD", status}, "en", status); + CurrencySymbols currencySymbols({u"USD", status}, "en", status); assertSuccess("Spot 2", status); mod.setSymbols(&symbols, ¤cySymbols, UNUM_UNIT_WIDTH_SHORT, nullptr); mod.setNumberProperties(1, StandardPlural::Form::COUNT); @@ -124,7 +124,7 @@ void PatternModifierTest::testMutableEqualsImmutable() { mod.setPatternInfo(&patternInfo); mod.setPatternAttributes(UNUM_SIGN_AUTO, false); DecimalFormatSymbols symbols(Locale::getEnglish(), status); - CurrencyDataSymbols currencySymbols({u"USD", status}, "en", status); + CurrencySymbols currencySymbols({u"USD", status}, "en", status); assertSuccess("Spot 2", status); if (U_FAILURE(status)) { return; } mod.setSymbols(&symbols, ¤cySymbols, UNUM_UNIT_WIDTH_SHORT, nullptr);