}
NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
- return fCurrency = {{fSetupData->locale, status}, {fSetupData->currencySymbols, status}};
+ return fCurrency = {fSetupData->currencySymbols, fSetupData->dfs, status};
}
IgnorablesMatcher& AffixTokenMatcherWarehouse::ignorables() {
PlusSignMatcher fPlusSign;
PercentMatcher fPercent;
PermilleMatcher fPermille;
- CurrencyAnyMatcher fCurrency;
+ CombinedCurrencyMatcher fCurrency;
// Use a child class for code point matchers, since it requires non-default operators.
CodePointMatcherWarehouse fCodePoints;
using namespace icu::numparse::impl;
-bool AnyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
- int32_t initialOffset = segment.getOffset();
- bool maybeMore = false;
-
- // NOTE: The range-based for loop calls the virtual begin() and end() methods.
- for (auto& matcher : *this) {
- maybeMore = maybeMore || matcher->match(segment, result, status);
- if (segment.getOffset() != initialOffset) {
- // Match succeeded.
- // NOTE: Except for a couple edge cases, if a matcher accepted string A, then it will
- // accept any string starting with A. Therefore, there is no possibility that matchers
- // later in the list may be evaluated on longer strings, and we can exit the loop here.
- break;
- }
- }
-
- // None of the matchers succeeded.
- return maybeMore;
-}
-
-bool AnyMatcher::smokeTest(const StringSegment& segment) const {
- // NOTE: The range-based for loop calls the virtual begin() and end() methods.
- for (auto& matcher : *this) {
- if (matcher->smokeTest(segment)) {
- return true;
- }
- }
- return false;
-}
-
-void AnyMatcher::postProcess(ParsedNumber& result) const {
- // NOTE: The range-based for loop calls the virtual begin() and end() methods.
- for (auto& matcher : *this) {
- matcher->postProcess(result);
- }
-}
-
-
bool SeriesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
ParsedNumber backup(result);
};
-/**
- * Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
- * the first matcher in the list to succeed.
- *
- * NOTE: In C++, this is a base class, unlike ICU4J, which uses a factory-style interface.
- *
- * @author sffc
- * @see SeriesMatcher
- */
-class AnyMatcher : public CompositionMatcher {
- public:
- bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
-
- bool smokeTest(const StringSegment& segment) const override;
-
- void postProcess(ParsedNumber& result) const override;
-
- protected:
- // No construction except by subclasses!
- AnyMatcher() = default;
-};
+// NOTE: AnyMatcher is no longer being used. The previous definition is shown below.
+// The implementation can be found in SVN source control, deleted around March 30, 2018.
+///**
+// * Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
+// * the first matcher in the list to succeed.
+// *
+// * NOTE: In C++, this is a base class, unlike ICU4J, which uses a factory-style interface.
+// *
+// * @author sffc
+// * @see SeriesMatcher
+// */
+//class AnyMatcher : public CompositionMatcher {
+// public:
+// bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
+//
+// bool smokeTest(const StringSegment& segment) const override;
+//
+// void postProcess(ParsedNumber& result) const override;
+//
+// protected:
+// // No construction except by subclasses!
+// AnyMatcher() = default;
+//};
/**
using namespace icu::numparse::impl;
-CurrencyNamesMatcher::CurrencyNamesMatcher(const Locale& locale, UErrorCode& status)
- : fLocaleName(locale.getName(), -1, status) {
+CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols,
+ const DecimalFormatSymbols& dfs, UErrorCode& status)
+ : fCurrency1(currencySymbols.getCurrencySymbol(status)),
+ fCurrency2(currencySymbols.getIntlCurrencySymbol(status)),
+ afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)),
+ beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)),
+ fLocaleName(dfs.getLocale().getName(), -1, status) {
+ utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
+
+ // Compute the full set of characters that could be the first in a currency to allow for
+ // efficient smoke test.
+ fLeadCodePoints.add(fCurrency1.char32At(0));
+ fLeadCodePoints.add(fCurrency2.char32At(0));
+ fLeadCodePoints.add(beforeSuffixInsert.char32At(0));
uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status);
// Always apply case mapping closure for currencies
fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS);
fLeadCodePoints.freeze();
}
-bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
+bool CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result,
+ UErrorCode& status) const {
if (result.currencyCode[0] != 0) {
return false;
}
+ // Try to match a currency spacing separator.
+ int32_t initialOffset = segment.getOffset();
+ bool maybeMore = false;
+ if (result.seenNumber()) {
+ int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
+ if (overlap == beforeSuffixInsert.length()) {
+ segment.adjustOffset(overlap);
+ // Note: let currency spacing be a weak match. Don't update chars consumed.
+ }
+ maybeMore = maybeMore || overlap == segment.length();
+ }
+
+ // Match the currency string, and reset if we didn't find one.
+ maybeMore = maybeMore || matchCurrency(segment, result, status);
+ if (result.currencyCode[0] == 0) {
+ segment.setOffset(initialOffset);
+ return maybeMore;
+ }
+
+ // Try to match a currency spacing separator.
+ if (!result.seenNumber()) {
+ int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
+ if (overlap == afterPrefixInsert.length()) {
+ segment.adjustOffset(overlap);
+ // Note: let currency spacing be a weak match. Don't update chars consumed.
+ }
+ maybeMore = maybeMore || overlap == segment.length();
+ }
+
+ return maybeMore;
+}
+
+bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result,
+ UErrorCode& status) const {
+
+ int32_t overlap1 = segment.getCommonPrefixLength(fCurrency1);
+ if (overlap1 == fCurrency1.length()) {
+ utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
+ segment.adjustOffset(overlap1);
+ result.setCharsConsumed(segment);
+ return segment.length() == 0;
+ }
+
+ int32_t overlap2 = segment.getCommonPrefixLength(fCurrency2);
+ if (overlap2 == fCurrency2.length()) {
+ utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
+ segment.adjustOffset(overlap2);
+ result.setCharsConsumed(segment);
+ return segment.length() == 0;
+ }
+
// NOTE: This call site should be improved with #13584.
const UnicodeString segmentString = segment.toTempUnicodeString();
result.currencyCode,
status);
- // Possible partial match
- bool partialMatch = partialMatchLen == segment.length();
-
if (U_SUCCESS(status) && ppos.getIndex() != 0) {
// Complete match.
// NOTE: The currency code should already be saved in the ParsedNumber.
result.setCharsConsumed(segment);
}
- return partialMatch;
+ return overlap1 == segment.length() || overlap2 == segment.length() ||
+ partialMatchLen == segment.length();
}
-bool CurrencyNamesMatcher::smokeTest(const StringSegment& segment) const {
+bool CombinedCurrencyMatcher::smokeTest(const StringSegment& segment) const {
return segment.startsWith(fLeadCodePoints);
}
-UnicodeString CurrencyNamesMatcher::toString() const {
- return u"<CurrencyNames>";
-}
-
-
-CurrencyCustomMatcher::CurrencyCustomMatcher(const CurrencySymbols& currencySymbols, UErrorCode& status)
- : fCurrency1(currencySymbols.getCurrencySymbol(status)),
- fCurrency2(currencySymbols.getIntlCurrencySymbol(status)) {
- utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
-}
-
-bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
- if (result.currencyCode[0] != 0) {
- return false;
- }
-
- int overlap1 = segment.getCommonPrefixLength(fCurrency1);
- if (overlap1 == fCurrency1.length()) {
- utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
- segment.adjustOffset(overlap1);
- result.setCharsConsumed(segment);
- }
-
- int overlap2 = segment.getCommonPrefixLength(fCurrency2);
- if (overlap2 == fCurrency2.length()) {
- utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
- segment.adjustOffset(overlap2);
- result.setCharsConsumed(segment);
- }
-
- return overlap1 == segment.length() || overlap2 == segment.length();
-}
-
-bool CurrencyCustomMatcher::smokeTest(const StringSegment& segment) const {
- return segment.startsWith(fCurrency1)
- || segment.startsWith(fCurrency2);
-}
-
-UnicodeString CurrencyCustomMatcher::toString() const {
- return u"<CurrencyCustom>";
-}
-
-
-CurrencyAnyMatcher::CurrencyAnyMatcher() {
- fMatcherArray[0] = &fNamesMatcher;
- fMatcherArray[1] = &fCustomMatcher;
-}
-
-CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher,
- CurrencyCustomMatcher customMatcher)
- : fNamesMatcher(std::move(namesMatcher)), fCustomMatcher(std::move(customMatcher)) {
- fMatcherArray[0] = &fNamesMatcher;
- fMatcherArray[1] = &fCustomMatcher;
-}
-
-CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyAnyMatcher&& src) U_NOEXCEPT
- : fNamesMatcher(std::move(src.fNamesMatcher)), fCustomMatcher(std::move(src.fCustomMatcher)) {
- fMatcherArray[0] = &fNamesMatcher;
- fMatcherArray[1] = &fCustomMatcher;
-}
-
-CurrencyAnyMatcher& CurrencyAnyMatcher::operator=(CurrencyAnyMatcher&& src) U_NOEXCEPT {
- fNamesMatcher = std::move(src.fNamesMatcher);
- fCustomMatcher = std::move(src.fCustomMatcher);
- // Note: do NOT move fMatcherArray
- return *this;
-}
-
-const NumberParseMatcher* const* CurrencyAnyMatcher::begin() const {
- return fMatcherArray;
-}
-
-const NumberParseMatcher* const* CurrencyAnyMatcher::end() const {
- return fMatcherArray + 2;
-}
-
-UnicodeString CurrencyAnyMatcher::toString() const {
- return u"<CurrencyAny>";
+UnicodeString CombinedCurrencyMatcher::toString() const {
+ return u"<CombinedCurrencyMatcher>";
}
using ::icu::number::impl::CurrencySymbols;
/**
- * Matches currencies according to all available strings in locale data.
+ * Matches a currency, either a custom currency or one from the data bundle. The class is called
+ * "combined" to emphasize that the currency string may come from one of multiple sources.
*
- * The implementation of this class is different between J and C. See #13584 for a follow-up.
+ * Will match currency spacing either before or after the number depending on whether we are currently in
+ * the prefix or suffix.
+ *
+ * The implementation of this class is slightly different between J and C. See #13584 for a follow-up.
*
* @author sffc
*/
-class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {
- public:
- CurrencyNamesMatcher() = default; // WARNING: Leaves the object in an unusable state
-
- CurrencyNamesMatcher(const Locale& locale, UErrorCode& status);
-
- bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
-
- bool smokeTest(const StringSegment& segment) const override;
-
- UnicodeString toString() const override;
-
- private:
- // We could use Locale instead of CharString here, but
- // Locale has a non-trivial default constructor.
- CharString fLocaleName;
-
- UnicodeSet fLeadCodePoints;
-};
-
-
-class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
+class CombinedCurrencyMatcher : public NumberParseMatcher, public UMemory {
public:
- CurrencyCustomMatcher() = default; // WARNING: Leaves the object in an unusable state
+ CombinedCurrencyMatcher() = default; // WARNING: Leaves the object in an unusable state
- CurrencyCustomMatcher(const CurrencySymbols& currencySymbols, UErrorCode& status);
+ CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs, UErrorCode& status);
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
UChar fCurrencyCode[4];
UnicodeString fCurrency1;
UnicodeString fCurrency2;
-};
-
-
-/**
- * An implementation of AnyMatcher, allowing for either currency data or locale currency matches.
- */
-class CurrencyAnyMatcher : public AnyMatcher, public UMemory {
- public:
- CurrencyAnyMatcher(); // WARNING: Leaves the object in an unusable state
-
- CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher, CurrencyCustomMatcher customMatcher);
-
- // Needs custom move constructor/operator since constructor is nontrivial
- CurrencyAnyMatcher(CurrencyAnyMatcher&& src) U_NOEXCEPT;
+ UnicodeString afterPrefixInsert;
+ UnicodeString beforeSuffixInsert;
- CurrencyAnyMatcher& operator=(CurrencyAnyMatcher&& src) U_NOEXCEPT;
-
- UnicodeString toString() const override;
-
- protected:
- const NumberParseMatcher* const* begin() const override;
-
- const NumberParseMatcher* const* end() const override;
+ // We could use Locale instead of CharString here, but
+ // Locale has a non-trivial default constructor.
+ CharString fLocaleName;
- private:
- CurrencyNamesMatcher fNamesMatcher;
- CurrencyCustomMatcher fCustomMatcher;
+ UnicodeSet fLeadCodePoints;
- const NumberParseMatcher* fMatcherArray[2];
+ /** Matches the currency string without concern for currency spacing. */
+ bool matchCurrency(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
};
parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
- parser->addMatcher(parser->fLocalMatchers.currencyNames = {locale, status});
+ parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, status});
// parser.addMatcher(new RequireNumberMatcher());
parser->freeze();
////////////////////////
if (parseCurrency || patternInfo.hasCurrencySign()) {
- parser->addMatcher(parser->fLocalMatchers.currencyCustom = {currencySymbols, status});
- parser->addMatcher(parser->fLocalMatchers.currencyNames = {locale, status});
+ parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, status});
}
///////////////////////////////
PlusSignMatcher plusSign;
DecimalMatcher decimal;
ScientificMatcher scientific;
- CurrencyNamesMatcher currencyNames;
- CurrencyCustomMatcher currencyCustom;
+ CombinedCurrencyMatcher currency;
AffixMatcherWarehouse affixMatcherWarehouse;
AffixTokenMatcherWarehouse affixTokenMatcherWarehouse;
} fLocalMatchers;
void testBasic();
void testLocaleFi();
void testSeriesMatcher();
- void testCurrencyAnyMatcher();
+ void testCombinedCurrencyMatcher();
void testAffixPatternMatcher();
void testGroupingDisabled();
+ void testCaseFolding();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
};
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testBasic);
TESTCASE_AUTO(testSeriesMatcher);
- TESTCASE_AUTO(testCurrencyAnyMatcher);
+ TESTCASE_AUTO(testCombinedCurrencyMatcher);
TESTCASE_AUTO(testAffixPatternMatcher);
TESTCASE_AUTO_END;
}
}
}
-void NumberParserTest::testCurrencyAnyMatcher() {
- IcuTestErrorCode status(*this, "testCurrencyAnyMatcher");
+void NumberParserTest::testCombinedCurrencyMatcher() {
+ IcuTestErrorCode status(*this, "testCombinedCurrencyMatcher");
IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
Locale locale = Locale::getEnglish();
# Basics
fp: "0.####" 0.10005 "0.1" 0.1
fp: - 0.10006 "0.1001" 0.1001
-pat: - "#0.####"
+pat: - "0.####"
fp: "#.####" 0.10005 "0.1" 0.1
-pat: - "#0.####"
+pat: - "0.####"
rt: "0" 1234 "1234"
-pat: - "#0"
+pat: - "0"
# Significant digits
fp: "@@@" 1.234567 "1.23" 1.23
# ISO codes that overlap display names (QQQ vs. Q)
# recognize real ISO name in parsing, so, can not use fake name as QQQ
#fpc: - 123/QQQ "QQQ123.00" 123/QQQ # QQQ is fake
-fpc: - 123/GTQ "GTQ123.00" 123/GTQ
+fpc: - 123/GTQ "GTQ 123.00" 123/GTQ
# ChoiceFormat-based display names
fpc: - 1/INR "\u20b91.00" 1/INR
fpc: - 2/INR "\u20b92.00" 2/INR
# Display names with shared prefix (YDD vs. Y)
-fpc: - 100/YDD "YDD100.00" 100/YDD
+fpc: - 100/YDD "YDD 100.00" 100/YDD
fpc: - 100/CNY "CN\u00a5100.00" 100/CNY
# Regression Tests bug#7914
return PermilleMatcher.getInstance(symbols);
}
- public AnyMatcher currency() {
- AnyMatcher any = new AnyMatcher();
- any.addMatcher(CurrencyCustomMatcher.getInstance(currency, locale));
- any.addMatcher(CurrencyNamesMatcher.getInstance(locale));
- any.freeze();
- return any;
+ public CombinedCurrencyMatcher currency() {
+ return CombinedCurrencyMatcher.getInstance(currency, symbols);
}
public IgnorablesMatcher ignorables() {
+++ /dev/null
-// © 2018 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.number.parse;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import com.ibm.icu.impl.StringSegment;
-
-/**
- * Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
- * the first matcher in the list to succeed.
- *
- * @author sffc
- * @see SeriesMatcher
- */
-public class AnyMatcher implements NumberParseMatcher {
-
- protected List<NumberParseMatcher> matchers = null;
- protected boolean frozen = false;
-
- public void addMatcher(NumberParseMatcher matcher) {
- assert !frozen;
- if (matchers == null) {
- matchers = new ArrayList<NumberParseMatcher>();
- }
- matchers.add(matcher);
- }
-
- public void freeze() {
- frozen = true;
- }
-
- @Override
- public boolean match(StringSegment segment, ParsedNumber result) {
- assert frozen;
- if (matchers == null) {
- return false;
- }
-
- int initialOffset = segment.getOffset();
- boolean maybeMore = false;
- for (int i = 0; i < matchers.size(); i++) {
- NumberParseMatcher matcher = matchers.get(i);
- maybeMore = maybeMore || matcher.match(segment, result);
- if (segment.getOffset() != initialOffset) {
- // Match succeeded.
- // NOTE: Except for a couple edge cases, if a matcher accepted string A, then it will
- // accept any string starting with A. Therefore, there is no possibility that matchers
- // later in the list may be evaluated on longer strings, and we can exit the loop here.
- break;
- }
- }
-
- // None of the matchers succeeded.
- return maybeMore;
- }
-
- @Override
- public boolean smokeTest(StringSegment segment) {
- assert frozen;
- if (matchers == null) {
- return false;
- }
-
- for (int i = 0; i < matchers.size(); i++) {
- if (matchers.get(i).smokeTest(segment)) {
- return true;
- }
- }
- return false;
- }
-
- @Override
- public void postProcess(ParsedNumber result) {
- assert frozen;
- if (matchers == null) {
- return;
- }
-
- for (int i = 0; i < matchers.size(); i++) {
- NumberParseMatcher matcher = matchers.get(i);
- matcher.postProcess(result);
- }
- }
-
- @Override
- public String toString() {
- return "<AnyMatcher " + matchers + ">";
- }
-
-}
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import java.util.Iterator;
+
+import com.ibm.icu.impl.StringSegment;
+import com.ibm.icu.impl.TextTrieMap;
+import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.Currency;
+import com.ibm.icu.util.Currency.CurrencyStringInfo;
+
+/**
+ * Matches a currency, either a custom currency or one from the data bundle. The class is called
+ * "combined" to emphasize that the currency string may come from one of multiple sources.
+ *
+ * Will match currency spacing either before or after the number depending on whether we are currently in
+ * the prefix or suffix.
+ *
+ * The implementation of this class is slightly different between J and C. See #13584 for a follow-up.
+ *
+ * @author sffc
+ */
+public class CombinedCurrencyMatcher implements NumberParseMatcher {
+
+ private final String isoCode;
+ private final String currency1;
+ private final String currency2;
+
+ private final String afterPrefixInsert;
+ private final String beforeSuffixInsert;
+
+ private final TextTrieMap<CurrencyStringInfo> longNameTrie;
+ private final TextTrieMap<CurrencyStringInfo> symbolTrie;
+
+ private final UnicodeSet leadCodePoints;
+
+ public static CombinedCurrencyMatcher getInstance(Currency currency, DecimalFormatSymbols dfs) {
+ // TODO: Cache these instances. They are somewhat expensive.
+ return new CombinedCurrencyMatcher(currency, dfs);
+ }
+
+ private CombinedCurrencyMatcher(Currency currency, DecimalFormatSymbols dfs) {
+ this.isoCode = currency.getSubtype();
+ this.currency1 = currency.getSymbol(dfs.getULocale());
+ this.currency2 = currency.getCurrencyCode();
+
+ afterPrefixInsert = dfs
+ .getPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, false);
+ beforeSuffixInsert = dfs
+ .getPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, true);
+
+ // TODO: Currency trie does not currently have an option for case folding. It defaults to use
+ // case folding on long-names but not symbols.
+ longNameTrie = Currency.getParsingTrie(dfs.getULocale(), Currency.LONG_NAME);
+ symbolTrie = Currency.getParsingTrie(dfs.getULocale(), Currency.SYMBOL_NAME);
+
+ // Compute the full set of characters that could be the first in a currency to allow for
+ // efficient smoke test.
+ leadCodePoints = new UnicodeSet();
+ leadCodePoints.add(currency1.codePointAt(0));
+ leadCodePoints.add(currency2.codePointAt(0));
+ leadCodePoints.add(beforeSuffixInsert.codePointAt(0));
+ longNameTrie.putLeadCodePoints(leadCodePoints);
+ symbolTrie.putLeadCodePoints(leadCodePoints);
+ // Always apply case mapping closure for currencies
+ leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
+ leadCodePoints.freeze();
+ }
+
+ @Override
+ public boolean match(StringSegment segment, ParsedNumber result) {
+ if (result.currencyCode != null) {
+ return false;
+ }
+
+ // Try to match a currency spacing separator.
+ int initialOffset = segment.getOffset();
+ boolean maybeMore = false;
+ if (result.seenNumber()) {
+ int overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
+ if (overlap == beforeSuffixInsert.length()) {
+ segment.adjustOffset(overlap);
+ // Note: let currency spacing be a weak match. Don't update chars consumed.
+ }
+ maybeMore = maybeMore || overlap == segment.length();
+ }
+
+ // Match the currency string, and reset if we didn't find one.
+ maybeMore = maybeMore || matchCurrency(segment, result);
+ if (result.currencyCode == null) {
+ segment.setOffset(initialOffset);
+ return maybeMore;
+ }
+
+ // Try to match a currency spacing separator.
+ if (!result.seenNumber()) {
+ int overlap = segment.getCommonPrefixLength(afterPrefixInsert);
+ if (overlap == afterPrefixInsert.length()) {
+ segment.adjustOffset(overlap);
+ // Note: let currency spacing be a weak match. Don't update chars consumed.
+ }
+ maybeMore = maybeMore || overlap == segment.length();
+ }
+
+ return maybeMore;
+ }
+
+ /** Matches the currency string without concern for currency spacing. */
+ private boolean matchCurrency(StringSegment segment, ParsedNumber result) {
+ int overlap1 = segment.getCommonPrefixLength(currency1);
+ if (overlap1 == currency1.length()) {
+ result.currencyCode = isoCode;
+ segment.adjustOffset(overlap1);
+ result.setCharsConsumed(segment);
+ return segment.length() == 0;
+ }
+
+ int overlap2 = segment.getCommonPrefixLength(currency2);
+ if (overlap2 == currency2.length()) {
+ result.currencyCode = isoCode;
+ segment.adjustOffset(overlap2);
+ result.setCharsConsumed(segment);
+ return segment.length() == 0;
+ }
+
+ TextTrieMap.Output trieOutput = new TextTrieMap.Output();
+ Iterator<CurrencyStringInfo> values = longNameTrie.get(segment, 0, trieOutput);
+ if (values == null) {
+ values = symbolTrie.get(segment, 0, trieOutput);
+ }
+ if (values != null) {
+ result.currencyCode = values.next().getISOCode();
+ segment.adjustOffset(trieOutput.matchLength);
+ result.setCharsConsumed(segment);
+ }
+
+ return overlap1 == segment.length() || overlap2 == segment.length() || trieOutput.partialMatch;
+ }
+
+ @Override
+ public boolean smokeTest(StringSegment segment) {
+ return segment.startsWith(leadCodePoints);
+ }
+
+ @Override
+ public void postProcess(ParsedNumber result) {
+ // No-op
+ }
+
+ @Override
+ public String toString() {
+ return "<CombinedCurrencyMatcher " + isoCode + ">";
+ }
+
+}
+++ /dev/null
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.number.parse;
-
-import com.ibm.icu.impl.StringSegment;
-import com.ibm.icu.util.Currency;
-import com.ibm.icu.util.ULocale;
-
-/**
- * A matcher for a single currency instance (not the full trie).
- */
-public class CurrencyCustomMatcher implements NumberParseMatcher {
-
- private final String isoCode;
- private final String currency1;
- private final String currency2;
-
- public static CurrencyCustomMatcher getInstance(Currency currency, ULocale loc) {
- return new CurrencyCustomMatcher(currency.getSubtype(),
- currency.getSymbol(loc),
- currency.getCurrencyCode());
- }
-
- private CurrencyCustomMatcher(String isoCode, String currency1, String currency2) {
- this.isoCode = isoCode;
- this.currency1 = currency1;
- this.currency2 = currency2;
- }
-
- @Override
- public boolean match(StringSegment segment, ParsedNumber result) {
- if (result.currencyCode != null) {
- return false;
- }
-
- int overlap1 = segment.getCommonPrefixLength(currency1);
- if (overlap1 == currency1.length()) {
- result.currencyCode = isoCode;
- segment.adjustOffset(overlap1);
- result.setCharsConsumed(segment);
- }
-
- int overlap2 = segment.getCommonPrefixLength(currency2);
- if (overlap2 == currency2.length()) {
- result.currencyCode = isoCode;
- segment.adjustOffset(overlap2);
- result.setCharsConsumed(segment);
- }
-
- return overlap1 == segment.length() || overlap2 == segment.length();
- }
-
- @Override
- public boolean smokeTest(StringSegment segment) {
- return segment.startsWith(currency1) || segment.startsWith(currency2);
- }
-
- @Override
- public void postProcess(ParsedNumber result) {
- // No-op
- }
-
- @Override
- public String toString() {
- return "<CurrencyMatcher " + isoCode + ">";
- }
-}
+++ /dev/null
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.number.parse;
-
-import java.util.Iterator;
-
-import com.ibm.icu.impl.StringSegment;
-import com.ibm.icu.impl.TextTrieMap;
-import com.ibm.icu.text.UnicodeSet;
-import com.ibm.icu.util.Currency;
-import com.ibm.icu.util.Currency.CurrencyStringInfo;
-import com.ibm.icu.util.ULocale;
-
-/**
- * Matches currencies according to all available strings in locale data.
- *
- * The implementation of this class is different between J and C. See #13584 for a follow-up.
- *
- * @author sffc
- */
-public class CurrencyNamesMatcher implements NumberParseMatcher {
-
- private final TextTrieMap<CurrencyStringInfo> longNameTrie;
- private final TextTrieMap<CurrencyStringInfo> symbolTrie;
-
- private final UnicodeSet leadCodePoints;
-
- public static CurrencyNamesMatcher getInstance(ULocale locale) {
- // TODO: Pre-compute some of the more popular locales?
- return new CurrencyNamesMatcher(locale);
- }
-
- private CurrencyNamesMatcher(ULocale locale) {
- // TODO: Currency trie does not currently have an option for case folding. It defaults to use
- // case folding on long-names but not symbols.
- longNameTrie = Currency.getParsingTrie(locale, Currency.LONG_NAME);
- symbolTrie = Currency.getParsingTrie(locale, Currency.SYMBOL_NAME);
-
- // Compute the full set of characters that could be the first in a currency to allow for
- // efficient smoke test.
- leadCodePoints = new UnicodeSet();
- longNameTrie.putLeadCodePoints(leadCodePoints);
- symbolTrie.putLeadCodePoints(leadCodePoints);
- // Always apply case mapping closure for currencies
- leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
- leadCodePoints.freeze();
- }
-
- @Override
- public boolean match(StringSegment segment, ParsedNumber result) {
- if (result.currencyCode != null) {
- return false;
- }
-
- TextTrieMap.Output trieOutput = new TextTrieMap.Output();
- Iterator<CurrencyStringInfo> values = longNameTrie.get(segment, 0, trieOutput);
- if (values == null) {
- values = symbolTrie.get(segment, 0, trieOutput);
- }
- if (values != null) {
- result.currencyCode = values.next().getISOCode();
- segment.adjustOffset(trieOutput.matchLength);
- result.setCharsConsumed(segment);
- }
- return trieOutput.partialMatch;
- }
-
- @Override
- public boolean smokeTest(StringSegment segment) {
- return segment.startsWith(leadCodePoints);
- }
-
- @Override
- public void postProcess(ParsedNumber result) {
- // No-op
- }
-
- @Override
- public String toString() {
- return "<CurrencyTrieMatcher>";
- }
-}
public static NumberParserImpl createSimpleParser(ULocale locale, String pattern, int parseFlags) {
NumberParserImpl parser = new NumberParserImpl(parseFlags);
+ Currency currency = Currency.getInstance("USD");
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
- factory.currency = Currency.getInstance("USD");
+ factory.currency = currency;
factory.symbols = symbols;
factory.ignorables = ignorables;
factory.locale = locale;
parser.addMatcher(InfinityMatcher.getInstance(symbols));
parser.addMatcher(PaddingMatcher.getInstance("@"));
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
- parser.addMatcher(CurrencyNamesMatcher.getInstance(locale));
+ parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols));
parser.addMatcher(new RequireNumberValidator());
parser.freeze();
////////////////////////
if (parseCurrency || patternInfo.hasCurrencySign()) {
- parser.addMatcher(CurrencyCustomMatcher.getInstance(currency, locale));
- parser.addMatcher(CurrencyNamesMatcher.getInstance(locale));
+ parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols));
}
///////////////////////////////
df.setParseStrict(true);
expect2(df, 0.5, "50x%");
}
+
+ @Test
+ public void testParseIsoStrict() {
+ DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
+ DecimalFormat df = new DecimalFormat("¤¤0;-0¤¤", dfs);
+ df.setCurrency(Currency.getInstance("USD"));
+ df.setParseStrict(true);
+ expect2(df, 45, "USD 45.00");
+ expect2(df, -45, "-45.00 USD");
+ }
}
import com.ibm.icu.impl.number.DecimalFormatProperties;
import com.ibm.icu.impl.number.parse.AffixPatternMatcher;
import com.ibm.icu.impl.number.parse.AffixTokenMatcherFactory;
-import com.ibm.icu.impl.number.parse.AnyMatcher;
+import com.ibm.icu.impl.number.parse.CombinedCurrencyMatcher;
import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
import com.ibm.icu.impl.number.parse.MinusSignMatcher;
import com.ibm.icu.impl.number.parse.NumberParserImpl;
}
@Test
- public void testCurrencyAnyMatcher() {
+ public void testCombinedCurrencyMatcher() {
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
factory.locale = ULocale.ENGLISH;
CustomSymbolCurrency currency = new CustomSymbolCurrency("ICU", "IU$", "ICU");
factory.currency = currency;
- AnyMatcher matcher = factory.currency();
+ factory.symbols = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
+ CombinedCurrencyMatcher matcher = factory.currency();
Object[][] cases = new Object[][] {
{ "", null },