From: Travis Keep Date: Thu, 13 Jun 2013 18:38:53 +0000 (+0000) Subject: ICU-10069 Take into account compatible currency symbols, e.g half-width and full... X-Git-Tag: milestone-59-0-1~2836 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6676337f3eb1f9a92296480273775afc81ad896f;p=icu ICU-10069 Take into account compatible currency symbols, e.g half-width and full width yen symbol when parsing currencies in C++. Currency equivalence relation is hard-coded for now. X-SVN-Rev: 33825 --- diff --git a/icu4c/source/i18n/ucurr.cpp b/icu4c/source/i18n/ucurr.cpp index 5e5c4e892d8..d8031ac87ce 100644 --- a/icu4c/source/i18n/ucurr.cpp +++ b/icu4c/source/i18n/ucurr.cpp @@ -23,6 +23,7 @@ #include "ucln_in.h" #include "uenumimp.h" #include "uhash.h" +#include "hash.h" #include "uresimp.h" #include "ulist.h" #include "ureslocs.h" @@ -53,6 +54,14 @@ static const int32_t POW10[] = { 1, 10, 100, 1000, 10000, 100000, static const int32_t MAX_POW10 = (sizeof(POW10)/sizeof(POW10[0])) - 1; +// Defines equivalent currency symbols. +static const char *EQUIV_CURRENCY_SYMBOLS[][2] = { + {"\\u00a5", "\\uffe5"}, + {"$", "\\ufe69"}, + {"$", "\\uff04"}, + {"\\u20a8", "\\u20b9"}, + {"\\u00a3", "\\u20a4"}}; + #define ISO_CURRENCY_CODE_LENGTH 3 //------------------------------------------------------------ @@ -102,6 +111,120 @@ static const UChar EUR_STR[] = {0x0045,0x0055,0x0052,0}; static const UHashtable* gIsoCodes = NULL; static UInitOnce gIsoCodesInitOnce = U_INITONCE_INITIALIZER; +// Currency symbol equivalances +static const icu::Hashtable* gCurrSymbolsEquiv = NULL; +static UInitOnce gCurrSymbolsEquivInitOnce = U_INITONCE_INITIALIZER; + +// EquivIterator iterates over all strings that are equivalent to a given +// string, s. Note that EquivIterator will never yield s itself. +class EquivIterator : icu::UMemory { +public: + // Constructor. hash stores the equivalence relationships; s is the string + // for which we find equivalent strings. + inline EquivIterator(const icu::Hashtable& hash, const icu::UnicodeString& s) + : _hash(hash) { + _start = _current = &s; + } + inline ~EquivIterator() { } + + // next returns the next equivalent string or NULL if there are no more. + // If s has no equivalent strings, next returns NULL on the first call. + const icu::UnicodeString *next(); +private: + const icu::Hashtable& _hash; + const icu::UnicodeString* _start; + const icu::UnicodeString* _current; +}; + +const icu::UnicodeString * +EquivIterator::next() { + const icu::UnicodeString* _next = (const icu::UnicodeString*) _hash.get(*_current); + if (_next == NULL) { + U_ASSERT(_current == _start); + return NULL; + } + if (*_next == *_start) { + return NULL; + } + _current = _next; + return _next; +} + +// makeEquivalent makes lhs and rhs equivalent by updating the equivalence +// relations in hash accordingly. +static void makeEquivalent( + const icu::UnicodeString &lhs, + const icu::UnicodeString &rhs, + icu::Hashtable* hash, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + if (lhs == rhs) { + // already equivalent + return; + } + EquivIterator leftIter(*hash, lhs); + EquivIterator rightIter(*hash, rhs); + const icu::UnicodeString *firstLeft = leftIter.next(); + const icu::UnicodeString *firstRight = rightIter.next(); + const icu::UnicodeString *nextLeft = firstLeft; + const icu::UnicodeString *nextRight = firstRight; + while (nextLeft != NULL && nextRight != NULL) { + if (*nextLeft == rhs || *nextRight == lhs) { + // Already equivalent + return; + } + nextLeft = leftIter.next(); + nextRight = rightIter.next(); + } + // Not equivalent. Must join. + icu::UnicodeString *newFirstLeft; + icu::UnicodeString *newFirstRight; + if (firstRight == NULL && firstLeft == NULL) { + // Neither lhs or rhs belong to an equivalence circle, so we form + // a new equivalnce circle of just lhs and rhs. + newFirstLeft = new icu::UnicodeString(rhs); + newFirstRight = new icu::UnicodeString(lhs); + } else if (firstRight == NULL) { + // lhs belongs to an equivalence circle, but rhs does not, so we link + // rhs into lhs' circle. + newFirstLeft = new icu::UnicodeString(rhs); + newFirstRight = new icu::UnicodeString(*firstLeft); + } else if (firstLeft == NULL) { + // rhs belongs to an equivlance circle, but lhs does not, so we link + // lhs into rhs' circle. + newFirstLeft = new icu::UnicodeString(*firstRight); + newFirstRight = new icu::UnicodeString(lhs); + } else { + // Both lhs and rhs belong to different equivalnce circles. We link + // them together to form one single, larger equivalnce circle. + newFirstLeft = new icu::UnicodeString(*firstRight); + newFirstRight = new icu::UnicodeString(*firstLeft); + } + if (newFirstLeft == NULL || newFirstRight == NULL) { + delete newFirstLeft; + delete newFirstRight; + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + hash->put(lhs, (void *) newFirstLeft, status); + hash->put(rhs, (void *) newFirstRight, status); +} + +// countEquivalent counts how many strings are equivalent to s. +// hash stores all the equivalnce relations. +// countEquivalent does not include s itself in the count. +static int countEquivalent(const icu::Hashtable &hash, const icu::UnicodeString &s) { + int result = 0; + EquivIterator iter(hash, s); + while (iter.next() != NULL) { + ++result; + } + return result; +} + +static const icu::Hashtable* getCurrSymbolsEquiv(); + //------------------------------------------------------------ // Code @@ -119,6 +242,18 @@ isoCodes_cleanup(void) return TRUE; } +/** + * Cleanup callback func + */ +static UBool U_CALLCONV +currSymbolsEquiv_cleanup(void) +{ + delete const_cast(gCurrSymbolsEquiv); + gCurrSymbolsEquiv = NULL; + gCurrSymbolsEquivInitOnce.reset(); + return TRUE; +} + /** * Deleter for OlsonToMetaMappingEntry */ @@ -128,6 +263,15 @@ deleteIsoCodeEntry(void *obj) { uprv_free(entry); } +/** + * Deleter for gCurrSymbolsEquiv. + */ +static void U_CALLCONV +deleteUnicode(void *obj) { + icu::UnicodeString *entry = (icu::UnicodeString*)obj; + delete entry; +} + /** * Unfortunately, we have to convert the UChar* currency code to char* * to use it as a resource key. @@ -376,6 +520,7 @@ static UBool U_CALLCONV currency_cleanup(void) { */ currency_cache_cleanup(); isoCodes_cleanup(); + currSymbolsEquiv_cleanup(); return TRUE; } @@ -742,6 +887,7 @@ getCurrencyNameCount(const char* loc, int32_t* total_currency_name_count, int32_ const UChar* s = NULL; char locale[ULOC_FULLNAME_CAPACITY]; uprv_strcpy(locale, loc); + const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv(); for (;;) { UErrorCode ec2 = U_ZERO_ERROR; // TODO: ures_openDirect? @@ -767,6 +913,9 @@ getCurrencyNameCount(const char* loc, int32_t* total_currency_name_count, int32_ *total_currency_symbol_count += fmt_count; } else { ++(*total_currency_symbol_count); // currency symbol + if (currencySymbolsEquiv != NULL) { + *total_currency_symbol_count += countEquivalent(*currencySymbolsEquiv, UnicodeString(TRUE, s, len)); + } } ++(*total_currency_symbol_count); // iso code @@ -823,6 +972,7 @@ collectCurrencyNames(const char* locale, int32_t* total_currency_symbol_count, UErrorCode& ec) { U_NAMESPACE_USE + const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv(); // Look up the Currencies resource for the given locale. UErrorCode ec2 = U_ZERO_ERROR; @@ -902,6 +1052,17 @@ collectCurrencyNames(const char* locale, (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*)s; (*currencySymbols)[*total_currency_symbol_count].flag = 0; (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = len; + // Add equivalent symbols + if (currencySymbolsEquiv != NULL) { + EquivIterator iter(*currencySymbolsEquiv, UnicodeString(TRUE, s, len)); + const UnicodeString *symbol; + while ((symbol = iter.next()) != NULL) { + (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso; + (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*) symbol->getBuffer(); + (*currencySymbols)[*total_currency_symbol_count].flag = 0; + (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = symbol->length(); + } + } } // Add currency long name. @@ -1952,6 +2113,41 @@ static void U_CALLCONV initIsoCodes(UErrorCode &status) { // and read only access is safe without synchronization. } +static void populateCurrSymbolsEquiv(icu::Hashtable *hash, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + int32_t length = sizeof(EQUIV_CURRENCY_SYMBOLS) / sizeof(EQUIV_CURRENCY_SYMBOLS[0]); + for (int32_t i = 0; i < length; ++i) { + icu::UnicodeString lhs(EQUIV_CURRENCY_SYMBOLS[i][0], -1, US_INV); + icu::UnicodeString rhs(EQUIV_CURRENCY_SYMBOLS[i][1], -1, US_INV); + makeEquivalent(lhs.unescape(), rhs.unescape(), hash, status); + if (U_FAILURE(status)) { + return; + } + } +} + +static void U_CALLCONV initCurrSymbolsEquiv() { + U_ASSERT(gCurrSymbolsEquiv == NULL); + UErrorCode status = U_ZERO_ERROR; + ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY, currency_cleanup); + icu::Hashtable *temp = new icu::Hashtable(status); + if (temp == NULL) { + return; + } + if (U_FAILURE(status)) { + delete temp; + return; + } + temp->setValueDeleter(deleteUnicode); + populateCurrSymbolsEquiv(temp, status); + if (U_FAILURE(status)) { + delete temp; + return; + } + gCurrSymbolsEquiv = temp; +} U_CAPI UBool U_EXPORT2 ucurr_isAvailable(const UChar* isoCode, UDate from, UDate to, UErrorCode* eErrorCode) { @@ -1972,6 +2168,11 @@ ucurr_isAvailable(const UChar* isoCode, UDate from, UDate to, UErrorCode* eError return TRUE; } +static const icu::Hashtable* getCurrSymbolsEquiv() { + umtx_initOnce(gCurrSymbolsEquivInitOnce, &initCurrSymbolsEquiv); + return gCurrSymbolsEquiv; +} + U_CAPI UEnumeration * U_EXPORT2 ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode) { UEnumeration *myEnum = NULL; diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index f46a51f767c..915a021dab9 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -42,8 +42,10 @@ #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0])) static const UChar EUR[] = {69,85,82,0}; // "EUR" +static const UChar JPY[] = {0x4A, 0x50, 0x59, 0}; static const UChar ISO_CURRENCY_USD[] = {0x55, 0x53, 0x44, 0}; // "USD" + // ***************************************************************************** // class NumberFormatTest // ***************************************************************************** @@ -120,6 +122,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n TESTCASE_AUTO(TestFormattableSize); TESTCASE_AUTO(TestSignificantDigits); TESTCASE_AUTO(TestShowZero); + TESTCASE_AUTO(TestCompatibleCurrencies); TESTCASE_AUTO_END; } @@ -2613,6 +2616,31 @@ void NumberFormatTest::expectPad(DecimalFormat& fmt, const UnicodeString& pat, ", expected " + pos + " " + width + " " + pad); } } + +void NumberFormatTest::TestCompatibleCurrencies() { + UErrorCode status = U_ZERO_ERROR; + LocalPointer fmt( + NumberFormat::createCurrencyInstance(Locale::getUS(), status)); + if (U_FAILURE(status)) { + errln("Could not create number format instance."); + return; + } + expectParseCurrency(*fmt, JPY, "\\u00A51,235"); + expectParseCurrency(*fmt, JPY, "\\uFFE51,235"); +} + +void NumberFormatTest::expectParseCurrency(const NumberFormat &fmt, const UChar* currency, const char *text) { + ParsePosition ppos; + UnicodeString utext = ctou(text); + LocalPointer currencyAmount(fmt.parseCurrency(utext, ppos)); + if (!ppos.getIndex()) { + errln(UnicodeString("Parse of ") + utext + " should have succeeded."); + return; + } + assertEquals("currency", currency, currencyAmount->getISOCurrency()); +} + + void NumberFormatTest::TestJB3832(){ const char* localeID = "pt_PT@currency=PTE"; Locale loc(localeID); diff --git a/icu4c/source/test/intltest/numfmtst.h b/icu4c/source/test/intltest/numfmtst.h index 82dac27b160..06a7ceda64b 100644 --- a/icu4c/source/test/intltest/numfmtst.h +++ b/icu4c/source/test/intltest/numfmtst.h @@ -165,7 +165,9 @@ class NumberFormatTest: public CalendarTimeZoneTest { void TestSignificantDigits(); void TestShowZero(); + void TestCompatibleCurrencies(); private: + void expectParseCurrency(const NumberFormat &fmt, const UChar* currency, const char *text); static UBool equalValue(const Formattable& a, const Formattable& b);