#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "unicode/parsepos.h"
+#include "unicode/uniset.h"
+#include "unicode/utf16.h"
#include "ustr_imp.h"
#include "charstr.h"
#include "cmemory.h"
linearSearch(const CurrencyNameStruct* currencyNames,
int32_t begin, int32_t end,
const UChar* text, int32_t textLen,
+ int32_t *partialMatchLen,
int32_t *maxMatchLen, int32_t* maxMatchIndex) {
+ int32_t initialPartialMatchLen = *partialMatchLen;
for (int32_t index = begin; index <= end; ++index) {
int32_t len = currencyNames[index].currencyNameLen;
if (len > *maxMatchLen && len <= textLen &&
uprv_memcmp(currencyNames[index].currencyName, text, len * sizeof(UChar)) == 0) {
+ *partialMatchLen = MAX(*partialMatchLen, len);
*maxMatchIndex = index;
*maxMatchLen = len;
#ifdef UCURR_DEBUG
printf("maxMatchIndex = %d, maxMatchLen = %d\n",
*maxMatchIndex, *maxMatchLen);
#endif
+ } else {
+ // Check for partial matches.
+ for (int32_t i=initialPartialMatchLen; i<MIN(len, textLen); i++) {
+ if (currencyNames[index].currencyName[i] != text[i]) {
+ break;
+ }
+ *partialMatchLen = MAX(*partialMatchLen, i + 1);
+ }
}
}
}
static void
searchCurrencyName(const CurrencyNameStruct* currencyNames,
int32_t total_currency_count,
- const UChar* text, int32_t textLen,
+ const UChar* text, int32_t textLen,
+ int32_t *partialMatchLen,
int32_t* maxMatchLen, int32_t* maxMatchIndex) {
*maxMatchIndex = -1;
*maxMatchLen = 0;
if (binarySearchBegin == -1) { // did not find the range
break;
}
+ *partialMatchLen = MAX(*partialMatchLen, index + 1);
if (matchIndex != -1) {
// find an exact match for text from text[0] to text[index]
// in currencyNames array.
// linear search if within threshold.
linearSearch(currencyNames, binarySearchBegin, binarySearchEnd,
text, textLen,
+ partialMatchLen,
maxMatchLen, maxMatchIndex);
break;
}
}
-U_CAPI void
-uprv_parseCurrency(const char* locale,
- const icu::UnicodeString& text,
- icu::ParsePosition& pos,
- int8_t type,
- UChar* result,
- UErrorCode& ec)
-{
- U_NAMESPACE_USE
-
- if (U_FAILURE(ec)) {
- return;
- }
+/**
+ * Loads the currency name data from the cache, or from resource bundles if necessary.
+ * The refCount is automatically incremented. It is the caller's responsibility
+ * to decrement it when done!
+ */
+static CurrencyNameCacheEntry*
+getCacheEntry(const char* locale, UErrorCode& ec) {
int32_t total_currency_name_count = 0;
CurrencyNameStruct* currencyNames = NULL;
}
if (found != -1) {
cacheEntry = currCache[found];
- currencyNames = cacheEntry->currencyNames;
- total_currency_name_count = cacheEntry->totalCurrencyNameCount;
- currencySymbols = cacheEntry->currencySymbols;
- total_currency_symbol_count = cacheEntry->totalCurrencySymbolCount;
++(cacheEntry->refCount);
}
umtx_unlock(&gCurrencyCacheMutex);
if (found == -1) {
collectCurrencyNames(locale, ¤cyNames, &total_currency_name_count, ¤cySymbols, &total_currency_symbol_count, ec);
if (U_FAILURE(ec)) {
- return;
+ return NULL;
}
umtx_lock(&gCurrencyCacheMutex);
// check again.
deleteCurrencyNames(currencyNames, total_currency_name_count);
deleteCurrencyNames(currencySymbols, total_currency_symbol_count);
cacheEntry = currCache[found];
- currencyNames = cacheEntry->currencyNames;
- total_currency_name_count = cacheEntry->totalCurrencyNameCount;
- currencySymbols = cacheEntry->currencySymbols;
- total_currency_symbol_count = cacheEntry->totalCurrencySymbolCount;
++(cacheEntry->refCount);
}
umtx_unlock(&gCurrencyCacheMutex);
}
+ return cacheEntry;
+}
+
+static void releaseCacheEntry(CurrencyNameCacheEntry* cacheEntry) {
+ umtx_lock(&gCurrencyCacheMutex);
+ --(cacheEntry->refCount);
+ if (cacheEntry->refCount == 0) { // remove
+ deleteCacheEntry(cacheEntry);
+ }
+ umtx_unlock(&gCurrencyCacheMutex);
+}
+
+U_CAPI void
+uprv_parseCurrency(const char* locale,
+ const icu::UnicodeString& text,
+ icu::ParsePosition& pos,
+ int8_t type,
+ int32_t* partialMatchLen,
+ UChar* result,
+ UErrorCode& ec) {
+ U_NAMESPACE_USE
+ if (U_FAILURE(ec)) {
+ return;
+ }
+ CurrencyNameCacheEntry* cacheEntry = getCacheEntry(locale, ec);
+ if (U_FAILURE(ec)) {
+ return;
+ }
+
+ int32_t total_currency_name_count = cacheEntry->totalCurrencyNameCount;
+ CurrencyNameStruct* currencyNames = cacheEntry->currencyNames;
+ int32_t total_currency_symbol_count = cacheEntry->totalCurrencySymbolCount;
+ CurrencyNameStruct* currencySymbols = cacheEntry->currencySymbols;
+
int32_t start = pos.getIndex();
UChar inputText[MAX_CURRENCY_NAME_LEN];
UErrorCode ec1 = U_ZERO_ERROR;
textLen = u_strToUpper(upperText, MAX_CURRENCY_NAME_LEN, inputText, textLen, locale, &ec1);
+ // Make sure partialMatchLen is initialized
+ *partialMatchLen = 0;
+
int32_t max = 0;
int32_t matchIndex = -1;
// case in-sensitive comparision against currency names
searchCurrencyName(currencyNames, total_currency_name_count,
- upperText, textLen, &max, &matchIndex);
+ upperText, textLen, partialMatchLen, &max, &matchIndex);
#ifdef UCURR_DEBUG
printf("search in names, max = %d, matchIndex = %d\n", max, matchIndex);
if (type != UCURR_LONG_NAME) { // not name only
// case sensitive comparison against currency symbols and ISO code.
searchCurrencyName(currencySymbols, total_currency_symbol_count,
- inputText, textLen,
+ inputText, textLen,
+ partialMatchLen,
&maxInSymbol, &matchIndexInSymbol);
}
} else if (maxInSymbol >= max && matchIndexInSymbol != -1) {
u_charsToUChars(currencySymbols[matchIndexInSymbol].IsoCode, result, 4);
pos.setIndex(start + maxInSymbol);
- }
+ }
// decrease reference count
- umtx_lock(&gCurrencyCacheMutex);
- --(cacheEntry->refCount);
- if (cacheEntry->refCount == 0) { // remove
- deleteCacheEntry(cacheEntry);
+ releaseCacheEntry(cacheEntry);
+}
+
+void uprv_currencyLeads(const char* locale, icu::UnicodeSet& result, UErrorCode& ec) {
+ U_NAMESPACE_USE
+ if (U_FAILURE(ec)) {
+ return;
}
- umtx_unlock(&gCurrencyCacheMutex);
+ CurrencyNameCacheEntry* cacheEntry = getCacheEntry(locale, ec);
+ if (U_FAILURE(ec)) {
+ return;
+ }
+
+ for (int32_t i=0; i<cacheEntry->totalCurrencySymbolCount; i++) {
+ const CurrencyNameStruct& info = cacheEntry->currencySymbols[i];
+ UChar32 cp;
+ U16_GET(info.currencyName, 0, 0, info.currencyNameLen, cp);
+ result.add(cp);
+ }
+
+ for (int32_t i=0; i<cacheEntry->totalCurrencyNameCount; i++) {
+ const CurrencyNameStruct& info = cacheEntry->currencyNames[i];
+ UChar32 cp;
+ U16_GET(info.currencyName, 0, 0, info.currencyNameLen, cp);
+ result.add(cp);
+ }
+
+ // decrease reference count
+ releaseCacheEntry(cacheEntry);
}
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/parsepos.h"
+#include "unicode/uniset.h"
/**
* Internal method. Given a currency ISO code and a locale, return
* match, then the display name is preferred, unless it's length
* is less than 3.
*
+ * The parameters must not be NULL.
+ *
* @param locale the locale of the display names to match
* @param text the text to parse
* @param pos input-output position; on input, the position within
* on output, the position after the last matched character. If
* the parse fails, the position in unchanged upon output.
* @param type currency type to parse against, LONG_NAME only or not
+ * @param partialMatchLen The length of the longest matching prefix;
+ * this may be nonzero even if no full currency was matched.
* @return the ISO 4217 code, as a string, of the best match, or
* null if there is no match
*
const icu::UnicodeString& text,
icu::ParsePosition& pos,
int8_t type,
+ int32_t* partialMatchLen,
UChar* result,
UErrorCode& ec);
+/**
+ * Puts all possible first-characters of a currency into the
+ * specified UnicodeSet.
+ *
+ * @param locale the locale of the display names of interest
+ * @param result the UnicodeSet to which to add the starting characters
+ */
+void uprv_currencyLeads(const char* locale, icu::UnicodeSet& result, UErrorCode& ec);
+
+
+
#endif /* #ifndef _UCURR_IMP_H_ */
//eof
number_padding.o number_patternmodifier.o number_patternstring.o \
number_rounding.o number_scientific.o number_stringbuilder.o \
numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o \
-numparse_impl.o numparse_symbols.o numparse_decimal.o numparse_scientific.o
+numparse_impl.o numparse_symbols.o numparse_decimal.o numparse_scientific.o \
+numparse_currency.o
## Header files to install
// determine our locale.
const char* loc = fCurrencyPluralInfo->getLocale().getName();
ParsePosition ppos(pos);
+ int32_t currMatchLen = 0;
UChar curr[4];
UErrorCode ec = U_ZERO_ERROR;
// Delegate parse of display name => ISO code to Currency
- uprv_parseCurrency(loc, text, ppos, type, curr, ec);
+ uprv_parseCurrency(loc, text, ppos, type, &currMatchLen, curr, ec);
// If parse succeeds, populate currency[0]
if (U_SUCCESS(ec) && ppos.getIndex() != pos) {
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+#include "numparse_types.h"
+#include "numparse_currency.h"
+#include "ucurrimp.h"
+#include "unicode/errorcode.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+
+
+CurrencyNamesMatcher::CurrencyNamesMatcher(const Locale& locale, UErrorCode& status)
+ : fLocaleName(locale.getName(), -1, status) {}
+
+bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
+ if (result.currencyCode[0] != 0) {
+ return false;
+ }
+
+ // NOTE: This requires a new UnicodeString to be allocated, instead of using the StringSegment.
+ // This should be fixed with #13584.
+ UnicodeString segmentString = segment.toUnicodeString();
+
+ // Try to parse the currency
+ ParsePosition ppos(0);
+ int32_t partialMatchLen = 0;
+ uprv_parseCurrency(
+ fLocaleName.data(),
+ segmentString,
+ ppos,
+ UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME
+ &partialMatchLen,
+ result.currencyCode,
+ status);
+
+ // Possible partial match
+ bool partialMatch = partialMatchLen == segment.length();
+
+ if (U_SUCCESS(status) && ppos.getIndex() != 0) {
+ // Complete match.
+ // NOTE: The currency code should already be saved in the ParsedNumber.
+ segment.adjustOffset(ppos.getIndex());
+ result.setCharsConsumed(segment);
+ }
+
+ return partialMatch;
+}
+
+const UnicodeSet* CurrencyNamesMatcher::getLeadCodePoints() const {
+ ErrorCode status;
+ UnicodeSet* leadCodePoints = new UnicodeSet();
+ uprv_currencyLeads(fLocaleName.data(), *leadCodePoints, status);
+ // Always apply case mapping closure for currencies
+ leadCodePoints->closeOver(USET_ADD_CASE_MAPPINGS);
+ leadCodePoints->freeze();
+
+ return leadCodePoints;
+}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+#ifndef __NUMPARSE_CURRENCY_H__
+#define __NUMPARSE_CURRENCY_H__
+
+#include "numparse_types.h"
+#include "charstr.h"
+
+U_NAMESPACE_BEGIN namespace numparse {
+namespace impl {
+
+
+/**
+ * Matches currencies according to all available strings in locale data.
+ *
+ * The implementation of this class is different between J and C. See #13584 for a follow-up.
+ *
+ * @author sffc
+ */
+class CurrencyNamesMatcher : public NumberParseMatcher, public UMemory {
+ public:
+ CurrencyNamesMatcher() = default; // WARNING: Leaves the object in an unusable state
+
+ CurrencyNamesMatcher(const Locale& locale, UErrorCode& status);
+
+ bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
+
+ const UnicodeSet* getLeadCodePoints() const override;
+
+ private:
+ // We could use Locale instead of CharString here, but
+ // Locale has a non-trivial default constructor.
+ CharString fLocaleName;
+
+};
+
+
+} // namespace impl
+} // namespace numparse
+U_NAMESPACE_END
+
+#endif //__NUMPARSE_CURRENCY_H__
+#endif /* #if !UCONFIG_NO_FORMATTING */
parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
-// parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
+ parser->addMatcher(parser->fLocalMatchers.currencyNames = {locale, status});
// parser.addMatcher(new RequireNumberMatcher());
parser->freeze();
fMatchers[fNumMatchers] = &matcher;
if (fComputeLeads) {
- fLeads[fNumMatchers] = matcher.getLeadCodePoints();
+ addLeadCodePointsForMatcher(matcher);
}
fNumMatchers++;
}
+void NumberParserImpl::addLeadCodePointsForMatcher(const NumberParseMatcher& matcher) {
+ const UnicodeSet* leadCodePoints = matcher.getLeadCodePoints();
+ // TODO: Avoid the clone operation here.
+ if (0 != (fParseFlags & PARSE_FLAG_IGNORE_CASE)) {
+ UnicodeSet* copy = static_cast<UnicodeSet*>(leadCodePoints->cloneAsThawed());
+ delete leadCodePoints;
+ copy->closeOver(USET_ADD_CASE_MAPPINGS);
+ copy->freeze();
+ fLeads[fNumMatchers] = copy;
+ } else {
+ fLeads[fNumMatchers] = leadCodePoints;
+ }
+}
+
void NumberParserImpl::freeze() {
fFrozen = true;
}
#include "numparse_symbols.h"
#include "numparse_scientific.h"
#include "unicode/uniset.h"
+#include "numparse_currency.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {
bool fComputeLeads;
bool fFrozen = false;
- // WARNING: All of these matchers start in an uninitialized state.
+ // WARNING: All of these matchers start in an undefined state (default-constructed).
// You must use an assignment operator on them before using.
struct {
IgnorablesMatcher ignorables;
PlusSignMatcher plusSign;
DecimalMatcher decimal;
ScientificMatcher scientific;
+ CurrencyNamesMatcher currencyNames;
} fLocalMatchers;
NumberParserImpl(parse_flags_t parseFlags, bool computeLeads);
+ void addLeadCodePointsForMatcher(const NumberParseMatcher& matcher);
+
void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
void parseLongestRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
flags = 0;
prefix.setToBogus();
suffix.setToBogus();
- currencyCode.setToBogus();
+ currencyCode[0] = 0;
}
void ParsedNumber::setCharsConsumed(const StringSegment& segment) {
/**
* The currency that got consumed.
*/
- UnicodeString currencyCode;
+ UChar currencyCode[4];
ParsedNumber();
{3, u"@@@123@@ ", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
// {3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
// {3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
-// {3, u"514.23 USD", u"¤0", 10, 514.23},
-// {3, u"514.23 GBP", u"¤0", 10, 514.23},
+ {3, u"514.23 USD", u"¤0", 10, 514.23},
+ {3, u"514.23 GBP", u"¤0", 10, 514.23},
// {3, u"a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 14, 51423.},
// {3, u"-a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
// {3, u"a -𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
{3, u"𝟱.𝟭𝟰𝟮E𝟯", u"0", 12, 5142.},
{3, u"𝟱.𝟭𝟰𝟮E-𝟯", u"0", 13, 0.005142},
{3, u"𝟱.𝟭𝟰𝟮e-𝟯", u"0", 13, 0.005142},
-// {7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5},
+ {7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5},
// {3, u"a$ b5", u"a ¤ b0", 5, 5.0},
// {3, u"📺1.23", u"📺0;📻0", 6, 1.23},
// {3, u"📻1.23", u"📺0;📻0", 6, -1.23},