]> granicus.if.org Git - icu/commitdiff
ICU-10069 Take into account compatible currency symbols, e.g half-width and full...
authorTravis Keep <keep94@gmail.com>
Thu, 13 Jun 2013 18:38:53 +0000 (18:38 +0000)
committerTravis Keep <keep94@gmail.com>
Thu, 13 Jun 2013 18:38:53 +0000 (18:38 +0000)
X-SVN-Rev: 33825

icu4c/source/i18n/ucurr.cpp
icu4c/source/test/intltest/numfmtst.cpp
icu4c/source/test/intltest/numfmtst.h

index 5e5c4e892d8256edf8f07efb0438516036686314..d8031ac87ce8eda443c5a2c56cc19d7567a0da49 100644 (file)
@@ -23,6 +23,7 @@
 #include "ucln_in.h"
 #include "uenumimp.h"
 #include "uhash.h"
+#include "hash.h"
 #include "uresimp.h"
 #include "ulist.h"
 #include "ureslocs.h"
@@ -53,6 +54,14 @@ static const int32_t POW10[] = { 1, 10, 100, 1000, 10000, 100000,
 
 static const int32_t MAX_POW10 = (sizeof(POW10)/sizeof(POW10[0])) - 1;
 
+// Defines equivalent currency symbols.
+static const char *EQUIV_CURRENCY_SYMBOLS[][2] = {
+    {"\\u00a5", "\\uffe5"},
+    {"$", "\\ufe69"},
+    {"$", "\\uff04"},
+    {"\\u20a8", "\\u20b9"},
+    {"\\u00a3", "\\u20a4"}};
+
 #define ISO_CURRENCY_CODE_LENGTH 3
 
 //------------------------------------------------------------
@@ -102,6 +111,120 @@ static const UChar EUR_STR[] = {0x0045,0x0055,0x0052,0};
 static const UHashtable* gIsoCodes = NULL;
 static UInitOnce gIsoCodesInitOnce = U_INITONCE_INITIALIZER;
 
+// Currency symbol equivalances
+static const icu::Hashtable* gCurrSymbolsEquiv = NULL;
+static UInitOnce gCurrSymbolsEquivInitOnce = U_INITONCE_INITIALIZER;
+
+// EquivIterator iterates over all strings that are equivalent to a given
+// string, s. Note that EquivIterator will never yield s itself.
+class EquivIterator : icu::UMemory {
+public:
+    // Constructor. hash stores the equivalence relationships; s is the string
+    // for which we find equivalent strings.
+    inline EquivIterator(const icu::Hashtable& hash, const icu::UnicodeString& s)
+        : _hash(hash) { 
+        _start = _current = &s;
+    }
+    inline ~EquivIterator() { }
+
+    // next returns the next equivalent string or NULL if there are no more.
+    // If s has no equivalent strings, next returns NULL on the first call.
+    const icu::UnicodeString *next();
+private:
+    const icu::Hashtable& _hash;
+    const icu::UnicodeString* _start;
+    const icu::UnicodeString* _current;
+};
+
+const icu::UnicodeString *
+EquivIterator::next() {
+    const icu::UnicodeString* _next = (const icu::UnicodeString*) _hash.get(*_current);
+    if (_next == NULL) {
+        U_ASSERT(_current == _start);
+        return NULL;
+    }
+    if (*_next == *_start) {
+        return NULL;
+    }
+    _current = _next;
+    return _next;
+}
+
+// makeEquivalent makes lhs and rhs equivalent by updating the equivalence
+// relations in hash accordingly.
+static void makeEquivalent(
+    const icu::UnicodeString &lhs,
+    const icu::UnicodeString &rhs,
+    icu::Hashtable* hash, UErrorCode &status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    if (lhs == rhs) {
+        // already equivalent
+        return;
+    }
+    EquivIterator leftIter(*hash, lhs);
+    EquivIterator rightIter(*hash, rhs);
+    const icu::UnicodeString *firstLeft = leftIter.next();
+    const icu::UnicodeString *firstRight = rightIter.next();
+    const icu::UnicodeString *nextLeft = firstLeft;
+    const icu::UnicodeString *nextRight = firstRight;
+    while (nextLeft != NULL && nextRight != NULL) {
+        if (*nextLeft == rhs || *nextRight == lhs) {
+            // Already equivalent
+            return;
+        }
+        nextLeft = leftIter.next();
+        nextRight = rightIter.next();
+    }
+    // Not equivalent. Must join.
+    icu::UnicodeString *newFirstLeft;
+    icu::UnicodeString *newFirstRight;
+    if (firstRight == NULL && firstLeft == NULL) {
+        // Neither lhs or rhs belong to an equivalence circle, so we form
+        // a new equivalnce circle of just lhs and rhs.
+        newFirstLeft = new icu::UnicodeString(rhs);
+        newFirstRight = new icu::UnicodeString(lhs);
+    } else if (firstRight == NULL) {
+        // lhs belongs to an equivalence circle, but rhs does not, so we link
+        // rhs into lhs' circle.
+        newFirstLeft = new icu::UnicodeString(rhs);
+        newFirstRight = new icu::UnicodeString(*firstLeft);
+    } else if (firstLeft == NULL) {
+        // rhs belongs to an equivlance circle, but lhs does not, so we link
+        // lhs into rhs' circle.
+        newFirstLeft = new icu::UnicodeString(*firstRight);
+        newFirstRight = new icu::UnicodeString(lhs);
+    } else {
+        // Both lhs and rhs belong to different equivalnce circles. We link
+        // them together to form one single, larger equivalnce circle.
+        newFirstLeft = new icu::UnicodeString(*firstRight);
+        newFirstRight = new icu::UnicodeString(*firstLeft);
+    }
+    if (newFirstLeft == NULL || newFirstRight == NULL) {
+        delete newFirstLeft;
+        delete newFirstRight;
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    hash->put(lhs, (void *) newFirstLeft, status);
+    hash->put(rhs, (void *) newFirstRight, status);
+}
+
+// countEquivalent counts how many strings are equivalent to s.
+// hash stores all the equivalnce relations.
+// countEquivalent does not include s itself in the count.
+static int countEquivalent(const icu::Hashtable &hash, const icu::UnicodeString &s) {
+    int result = 0;
+    EquivIterator iter(hash, s);
+    while (iter.next() != NULL) {
+        ++result;
+    }
+    return result;
+}
+
+static const icu::Hashtable* getCurrSymbolsEquiv();
+
 //------------------------------------------------------------
 // Code
 
@@ -119,6 +242,18 @@ isoCodes_cleanup(void)
     return TRUE;
 }
 
+/**
+ * Cleanup callback func
+ */
+static UBool U_CALLCONV 
+currSymbolsEquiv_cleanup(void)
+{
+    delete const_cast<icu::Hashtable *>(gCurrSymbolsEquiv);
+    gCurrSymbolsEquiv = NULL;
+    gCurrSymbolsEquivInitOnce.reset();
+    return TRUE;
+}
+
 /**
  * Deleter for OlsonToMetaMappingEntry
  */
@@ -128,6 +263,15 @@ deleteIsoCodeEntry(void *obj) {
     uprv_free(entry);
 }
 
+/**
+ * Deleter for gCurrSymbolsEquiv.
+ */
+static void U_CALLCONV
+deleteUnicode(void *obj) {
+    icu::UnicodeString *entry = (icu::UnicodeString*)obj;
+    delete entry;
+}
+
 /**
  * Unfortunately, we have to convert the UChar* currency code to char*
  * to use it as a resource key.
@@ -376,6 +520,7 @@ static UBool U_CALLCONV currency_cleanup(void) {
      */
     currency_cache_cleanup();
     isoCodes_cleanup();
+    currSymbolsEquiv_cleanup();
 
     return TRUE;
 }
@@ -742,6 +887,7 @@ getCurrencyNameCount(const char* loc, int32_t* total_currency_name_count, int32_
     const UChar* s = NULL;
     char locale[ULOC_FULLNAME_CAPACITY];
     uprv_strcpy(locale, loc);
+    const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv();
     for (;;) {
         UErrorCode ec2 = U_ZERO_ERROR;
         // TODO: ures_openDirect?
@@ -767,6 +913,9 @@ getCurrencyNameCount(const char* loc, int32_t* total_currency_name_count, int32_
                 *total_currency_symbol_count += fmt_count;
             } else {
                 ++(*total_currency_symbol_count);  // currency symbol
+                if (currencySymbolsEquiv != NULL) {
+                    *total_currency_symbol_count += countEquivalent(*currencySymbolsEquiv, UnicodeString(TRUE, s, len));
+                }
             }
 
             ++(*total_currency_symbol_count); // iso code
@@ -823,6 +972,7 @@ collectCurrencyNames(const char* locale,
                      int32_t* total_currency_symbol_count, 
                      UErrorCode& ec) {
     U_NAMESPACE_USE
+    const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv();
     // Look up the Currencies resource for the given locale.
     UErrorCode ec2 = U_ZERO_ERROR;
 
@@ -902,6 +1052,17 @@ collectCurrencyNames(const char* locale,
                 (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*)s;
                 (*currencySymbols)[*total_currency_symbol_count].flag = 0;
                 (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = len;
+                // Add equivalent symbols
+                if (currencySymbolsEquiv != NULL) {
+                    EquivIterator iter(*currencySymbolsEquiv, UnicodeString(TRUE, s, len));
+                    const UnicodeString *symbol;
+                    while ((symbol = iter.next()) != NULL) {
+                        (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso;
+                        (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*) symbol->getBuffer();
+                        (*currencySymbols)[*total_currency_symbol_count].flag = 0;
+                        (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = symbol->length();
+                    }
+                }
             }
 
             // Add currency long name.
@@ -1952,6 +2113,41 @@ static void U_CALLCONV initIsoCodes(UErrorCode &status) {
                            //       and read only access is safe without synchronization.
 }
 
+static void populateCurrSymbolsEquiv(icu::Hashtable *hash, UErrorCode &status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    int32_t length = sizeof(EQUIV_CURRENCY_SYMBOLS) / sizeof(EQUIV_CURRENCY_SYMBOLS[0]);
+    for (int32_t i = 0; i < length; ++i) {
+        icu::UnicodeString lhs(EQUIV_CURRENCY_SYMBOLS[i][0], -1, US_INV);
+        icu::UnicodeString rhs(EQUIV_CURRENCY_SYMBOLS[i][1], -1, US_INV);
+        makeEquivalent(lhs.unescape(), rhs.unescape(), hash, status);
+        if (U_FAILURE(status)) {
+            return;
+        }
+    }
+}
+
+static void U_CALLCONV initCurrSymbolsEquiv() {
+    U_ASSERT(gCurrSymbolsEquiv == NULL);
+    UErrorCode status = U_ZERO_ERROR;
+    ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY, currency_cleanup);
+    icu::Hashtable *temp = new icu::Hashtable(status);
+    if (temp == NULL) {
+        return;
+    }
+    if (U_FAILURE(status)) {
+        delete temp;
+        return;
+    }
+    temp->setValueDeleter(deleteUnicode);
+    populateCurrSymbolsEquiv(temp, status);
+    if (U_FAILURE(status)) {
+        delete temp;
+        return;
+    }
+    gCurrSymbolsEquiv = temp;
+}
 
 U_CAPI UBool U_EXPORT2
 ucurr_isAvailable(const UChar* isoCode, UDate from, UDate to, UErrorCode* eErrorCode) {
@@ -1972,6 +2168,11 @@ ucurr_isAvailable(const UChar* isoCode, UDate from, UDate to, UErrorCode* eError
     return TRUE;
 }
 
+static const icu::Hashtable* getCurrSymbolsEquiv() {
+    umtx_initOnce(gCurrSymbolsEquivInitOnce, &initCurrSymbolsEquiv);
+    return gCurrSymbolsEquiv;
+}
+
 U_CAPI UEnumeration * U_EXPORT2
 ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode) {
     UEnumeration *myEnum = NULL;
index f46a51f767c4f6ec9f6ce55cd62f17b5c0098bd8..915a021dab97511cf459a6c7f59e58e17e4f1ca6 100644 (file)
 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0]))
 
 static const UChar EUR[] = {69,85,82,0}; // "EUR"
+static const UChar JPY[] = {0x4A, 0x50, 0x59, 0};
 static const UChar ISO_CURRENCY_USD[] = {0x55, 0x53, 0x44, 0}; // "USD"
 
+
 // *****************************************************************************
 // class NumberFormatTest
 // *****************************************************************************
@@ -120,6 +122,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
   TESTCASE_AUTO(TestFormattableSize);
   TESTCASE_AUTO(TestSignificantDigits);
   TESTCASE_AUTO(TestShowZero);
+  TESTCASE_AUTO(TestCompatibleCurrencies);
   TESTCASE_AUTO_END;
 }
 
@@ -2613,6 +2616,31 @@ void NumberFormatTest::expectPad(DecimalFormat& fmt, const UnicodeString& pat,
               ", expected " + pos + " " + width + " " + pad);
     }
 }
+
+void NumberFormatTest::TestCompatibleCurrencies() {
+    UErrorCode status = U_ZERO_ERROR;
+    LocalPointer<NumberFormat> fmt(
+        NumberFormat::createCurrencyInstance(Locale::getUS(), status));
+    if (U_FAILURE(status)) {
+        errln("Could not create number format instance.");
+        return;
+    }
+    expectParseCurrency(*fmt, JPY, "\\u00A51,235");
+    expectParseCurrency(*fmt, JPY, "\\uFFE51,235");
+}
+
+void NumberFormatTest::expectParseCurrency(const NumberFormat &fmt, const UChar* currency, const char *text) {
+    ParsePosition ppos;
+    UnicodeString utext = ctou(text);
+    LocalPointer<CurrencyAmount> currencyAmount(fmt.parseCurrency(utext, ppos));
+    if (!ppos.getIndex()) {
+        errln(UnicodeString("Parse of ") + utext + " should have succeeded.");
+        return;
+    }
+    assertEquals("currency", currency, currencyAmount->getISOCurrency());
+}
+  
+
 void NumberFormatTest::TestJB3832(){
     const char* localeID = "pt_PT@currency=PTE";
     Locale loc(localeID);
index 82dac27b1605e0c783e19c70e481b482f3d49e6a..06a7ceda64be4c972585593b179384a5654f01b0 100644 (file)
@@ -165,7 +165,9 @@ class NumberFormatTest: public CalendarTimeZoneTest {
     void TestSignificantDigits();
     void TestShowZero();
 
+    void TestCompatibleCurrencies();
  private:
+    void expectParseCurrency(const NumberFormat &fmt, const UChar* currency, const char *text);
 
     static UBool equalValue(const Formattable& a, const Formattable& b);