From: Shane F. Carr <shane@unicode.org>
Date: Fri, 16 Sep 2022 07:08:41 +0000 (-0700)
Subject: ICU-20512 Add extra matchers to handle empty currency symbols
X-Git-Tag: release-72-rc~18
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9dc1c020a19a5b950af3e6304b97e7396b92aaaf;p=icu

ICU-20512 Add extra matchers to handle empty currency symbols
---

diff --git a/icu4c/source/i18n/number_currencysymbols.cpp b/icu4c/source/i18n/number_currencysymbols.cpp
index da1812f49f0..8d5127556be 100644
--- a/icu4c/source/i18n/number_currencysymbols.cpp
+++ b/icu4c/source/i18n/number_currencysymbols.cpp
@@ -108,6 +108,10 @@ UnicodeString CurrencySymbols::getPluralName(StandardPlural::Form plural, UError
     }
 }
 
+bool CurrencySymbols::hasEmptyCurrencySymbol() const {
+    return !fCurrencySymbol.isBogus() && fCurrencySymbol.isEmpty();
+}
+
 
 CurrencyUnit
 icu::number::impl::resolveCurrency(const DecimalFormatProperties& properties, const Locale& locale,
diff --git a/icu4c/source/i18n/number_currencysymbols.h b/icu4c/source/i18n/number_currencysymbols.h
index 7e38fdf8287..c2223bd0f0b 100644
--- a/icu4c/source/i18n/number_currencysymbols.h
+++ b/icu4c/source/i18n/number_currencysymbols.h
@@ -41,6 +41,8 @@ class U_I18N_API CurrencySymbols : public UMemory {
 
     UnicodeString getPluralName(StandardPlural::Form plural, UErrorCode& status) const;
 
+    bool hasEmptyCurrencySymbol() const;
+
   protected:
     // Required fields:
     CurrencyUnit fCurrency;
diff --git a/icu4c/source/i18n/number_patternmodifier.cpp b/icu4c/source/i18n/number_patternmodifier.cpp
index b6543b262b4..088a30ecd7f 100644
--- a/icu4c/source/i18n/number_patternmodifier.cpp
+++ b/icu4c/source/i18n/number_patternmodifier.cpp
@@ -284,6 +284,7 @@ void MutablePatternModifier::prepareAffix(bool isPrefix) {
             fApproximately,
             fPlural,
             fPerMilleReplacesPercent,
+            false, // dropCurrencySymbols
             currentAffix);
 }
 
diff --git a/icu4c/source/i18n/number_patternstring.cpp b/icu4c/source/i18n/number_patternstring.cpp
index 2738895d8ad..557a7320856 100644
--- a/icu4c/source/i18n/number_patternstring.cpp
+++ b/icu4c/source/i18n/number_patternstring.cpp
@@ -1056,7 +1056,9 @@ void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider&
                                                     PatternSignType patternSignType,
                                                     bool approximately,
                                                     StandardPlural::Form plural,
-                                                    bool perMilleReplacesPercent, UnicodeString& output) {
+                                                    bool perMilleReplacesPercent,
+                                                    bool dropCurrencySymbols,
+                                                    UnicodeString& output) {
 
     // Should the output render '+' where '-' would normally appear in the pattern?
     bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN)
@@ -1130,6 +1132,9 @@ void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider&
         if (perMilleReplacesPercent && candidate == u'%') {
             candidate = u'‰';
         }
+        if (dropCurrencySymbols && candidate == u'\u00A4') {
+            continue;
+        }
         output.append(candidate);
     }
 }
diff --git a/icu4c/source/i18n/number_patternstring.h b/icu4c/source/i18n/number_patternstring.h
index 94afda37229..08696697847 100644
--- a/icu4c/source/i18n/number_patternstring.h
+++ b/icu4c/source/i18n/number_patternstring.h
@@ -317,7 +317,9 @@ class U_I18N_API PatternStringUtils {
     static void patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
                                            PatternSignType patternSignType,
                                            bool approximately,
-                                           StandardPlural::Form plural, bool perMilleReplacesPercent,
+                                           StandardPlural::Form plural,
+                                           bool perMilleReplacesPercent,
+                                           bool dropCurrencySymbols,
                                            UnicodeString& output);
 
     static PatternSignType resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum);
diff --git a/icu4c/source/i18n/numparse_affixes.cpp b/icu4c/source/i18n/numparse_affixes.cpp
index 14140065983..ad3d48b4731 100644
--- a/icu4c/source/i18n/numparse_affixes.cpp
+++ b/icu4c/source/i18n/numparse_affixes.cpp
@@ -169,6 +169,10 @@ NumberParseMatcher* AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp,
     return result;
 }
 
+bool AffixTokenMatcherWarehouse::hasEmptyCurrencySymbol() const {
+    return fSetupData->currencySymbols.hasEmptyCurrencySymbol();
+}
+
 
 CodePointMatcher::CodePointMatcher(UChar32 cp)
         : fCp(cp) {}
@@ -280,8 +284,16 @@ void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patt
     AffixPatternMatcher* posSuffix = nullptr;
 
     // Pre-process the affix strings to resolve LDML rules like sign display.
-    for (int8_t typeInt = 0; typeInt < PATTERN_SIGN_TYPE_COUNT; typeInt++) {
-        auto type = static_cast<PatternSignType>(typeInt);
+    for (int8_t typeInt = 0; typeInt < PATTERN_SIGN_TYPE_COUNT * 2; typeInt++) {
+        auto type = static_cast<PatternSignType>(typeInt / 2);
+        bool dropCurrencySymbols = (typeInt % 2) == 1;
+
+        if (dropCurrencySymbols && !patternInfo.hasCurrencySign()) {
+            continue;
+        }
+        if (dropCurrencySymbols && !fTokenWarehouse->hasEmptyCurrencySymbol()) {
+            continue;
+        }
 
         // Skip affixes in some cases
         if (type == PATTERN_SIGN_TYPE_POS
@@ -297,7 +309,7 @@ void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patt
         // TODO: Handle approximately sign?
         bool hasPrefix = false;
         PatternStringUtils::patternInfoToStringBuilder(
-                patternInfo, true, type, false, StandardPlural::OTHER, false, sb);
+                patternInfo, true, type, false, StandardPlural::OTHER, false, dropCurrencySymbols, sb);
         fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
                 sb, *fTokenWarehouse, parseFlags, &hasPrefix, status);
         AffixPatternMatcher* prefix = hasPrefix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
@@ -307,7 +319,7 @@ void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patt
         // TODO: Handle approximately sign?
         bool hasSuffix = false;
         PatternStringUtils::patternInfoToStringBuilder(
-                patternInfo, false, type, false, StandardPlural::OTHER, false, sb);
+                patternInfo, false, type, false, StandardPlural::OTHER, false, dropCurrencySymbols, sb);
         fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
                 sb, *fTokenWarehouse, parseFlags, &hasSuffix, status);
         AffixPatternMatcher* suffix = hasSuffix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
diff --git a/icu4c/source/i18n/numparse_affixes.h b/icu4c/source/i18n/numparse_affixes.h
index a82b731ab5b..ad731ed5d80 100644
--- a/icu4c/source/i18n/numparse_affixes.h
+++ b/icu4c/source/i18n/numparse_affixes.h
@@ -101,6 +101,8 @@ class U_I18N_API AffixTokenMatcherWarehouse : public UMemory {
 
     NumberParseMatcher* nextCodePointMatcher(UChar32 cp, UErrorCode& status);
 
+    bool hasEmptyCurrencySymbol() const;
+
   private:
     // NOTE: The following field may be unsafe to access after construction is done!
     const AffixTokenMatcherSetupData* fSetupData;
@@ -204,10 +206,12 @@ class AffixMatcherWarehouse {
                              UErrorCode& status);
 
   private:
-    // 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix
-    AffixMatcher fAffixMatchers[9];
-    // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each
-    AffixPatternMatcher fAffixPatternMatchers[6];
+    // 18 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix,
+    // and doubled since there may be an empty currency symbol
+    AffixMatcher fAffixMatchers[18];
+    // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each,
+    // and doubled since there may be an empty currency symbol
+    AffixPatternMatcher fAffixPatternMatchers[12];
     // Reference to the warehouse for tokens used by the AffixPatternMatchers
     AffixTokenMatcherWarehouse* fTokenWarehouse;
 
diff --git a/icu4c/source/test/cintltst/cnumtst.c b/icu4c/source/test/cintltst/cnumtst.c
index 615deacf31f..6448760f2c6 100644
--- a/icu4c/source/test/cintltst/cnumtst.c
+++ b/icu4c/source/test/cintltst/cnumtst.c
@@ -78,6 +78,7 @@ static void TestSciNotationMaxFracCap(void);
 static void TestMinIntMinFracZero(void);
 static void Test21479_ExactCurrency(void);
 static void Test22088_Ethiopic(void);
+static void TestParseWithEmptyCurr(void);
 
 #define TESTCASE(x) addTest(root, &x, "tsformat/cnumtst/" #x)
 
@@ -121,6 +122,7 @@ void addNumForTest(TestNode** root)
     TESTCASE(TestMinIntMinFracZero);
     TESTCASE(Test21479_ExactCurrency);
     TESTCASE(Test22088_Ethiopic);
+    TESTCASE(TestParseWithEmptyCurr);
 }
 
 /* test Parse int 64 */
@@ -3634,4 +3636,147 @@ static void Test22088_Ethiopic(void) {
     unum_close(nf3);
 }
 
+static void TestParseWithEmptyCurr(void) {
+    UErrorCode status = U_ZERO_ERROR;
+    UNumberFormat* unum = unum_open(UNUM_CURRENCY, NULL, 0, "en_US", NULL, &status);
+    if (U_FAILURE(status)) {
+        log_data_err("unum_open UNUM_CURRENCY for \"en_US\" fails with %s\n", u_errorName(status));
+    } else {
+        unum_setSymbol(unum, UNUM_CURRENCY_SYMBOL, u"", 0, &status);
+        if (U_FAILURE(status)) {
+            log_err("unum_setSymbol UNUM_CURRENCY_SYMBOL u\"\" fails with %s\n", u_errorName(status));
+        } else {
+            char bbuf[kBBufMax] = { 0 };
+            UChar curr[4] = { 0 };
+            int32_t ppos, blen;
+            double val;
+            const UChar* text = u"3";
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            blen = unum_parseDecimal(unum, text, -1, &ppos, bbuf, kBBufMax, &status);
+            if (U_FAILURE(status)) {
+                log_err("unum_parseDecimal u\"3\" with empty curr symbol fails with %s, ppos %d\n", u_errorName(status), ppos);
+            } else if (ppos != 1 || blen != 1 || bbuf[0] != '3') {
+                log_err("unum_parseDecimal expect ppos 1, blen 1, str 3; get %d, %d, %s\n", ppos, blen, bbuf);
+            }
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            val = unum_parseDouble(unum, text, -1, &ppos, &status);
+            if (U_FAILURE(status)) {
+                log_err("unum_parseDouble u\"3\" with empty curr symbol fails with %s, ppos %d\n", u_errorName(status), ppos);
+            } else if (ppos != 1 || val != 3.0) {
+                log_err("unum_parseDouble expect ppos 1, val 3.0; get %d, %.2f\n", ppos, val);
+            }
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            val = unum_parseDoubleCurrency(unum, text, -1, &ppos, curr, &status);
+            if (U_SUCCESS(status)) {
+                log_err("unum_parseDoubleCurrency u\"3\" with empty curr symbol succeeds, get ppos %d, val %.2f\n", ppos, val);
+            }
+        }
+        unum_close(unum);
+    }
+
+    //                              "¤#,##0.00" "¤ #,##0.00" "#,##0.00 ¤" "#,##,##0.00¤"
+    static const char* locales[] = {"en_US",    "nb_NO",     "cs_CZ",     "bn_BD",       NULL };
+    const char ** localesPtr = locales;
+    const char* locale;
+    while ((locale = *localesPtr++) != NULL) {
+        status = U_ZERO_ERROR;
+        unum = unum_open(UNUM_CURRENCY, NULL, 0, locale, NULL, &status);
+        if (U_FAILURE(status)) {
+            log_data_err("locale %s unum_open UNUM_CURRENCY fails with %s\n", locale, u_errorName(status));
+        } else {
+            UChar ubuf[kUBufMax];
+            int32_t ppos, ulen;
+            const double posValToUse = 37.0;
+            const double negValToUse = -3.0;
+            double val;
+
+            status = U_ZERO_ERROR;
+            unum_setSymbol(unum, UNUM_CURRENCY_SYMBOL, u"", 0, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s unum_setSymbol UNUM_CURRENCY_SYMBOL u\"\" fails with %s, skipping\n", locale, u_errorName(status));
+                continue;
+            }
+
+            status = U_ZERO_ERROR;
+            ulen = unum_formatDouble(unum, posValToUse, ubuf, kUBufMax, NULL, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s unum_formatDouble %.1f fails with %s, skipping\n", locale, posValToUse, u_errorName(status));
+                continue;
+            }
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, posValToUse);
+            } else if (ppos != ulen || val != posValToUse) {
+                log_err("locale %s unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, posValToUse, ppos, val);
+            }
+
+            status = U_ZERO_ERROR;
+            ulen = unum_formatDouble(unum, negValToUse, ubuf, kUBufMax, NULL, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s unum_formatDouble %.1f fails with %s, skipping\n", locale, negValToUse, u_errorName(status));
+                continue;
+            }
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, negValToUse);
+            } else if (ppos != ulen || val != negValToUse) {
+                log_err("locale %s unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, negValToUse, ppos, val);
+            }
+
+            status = U_ZERO_ERROR;
+            unum_applyPattern(unum, false, u"#,##0.00¤", -1, NULL, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s unum_applyPattern \"#,##0.00¤\" fails with %s, skipping\n", locale, u_errorName(status));
+                continue;
+            }
+
+            status = U_ZERO_ERROR;
+            ulen = unum_formatDouble(unum, posValToUse, ubuf, kUBufMax, NULL, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s with \"#,##0.00¤\" unum_formatDouble %.1f fails with %s, skipping\n", locale, posValToUse, u_errorName(status));
+                continue;
+            }
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s with \"#,##0.00¤\" unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, posValToUse);
+            } else if (ppos != ulen || val != posValToUse) {
+                log_err("locale %s with \"#,##0.00¤\" unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, posValToUse, ppos, val);
+            }
+
+            status = U_ZERO_ERROR;
+            ulen = unum_formatDouble(unum, negValToUse, ubuf, kUBufMax, NULL, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s with \"#,##0.00¤\" unum_formatDouble %.1f fails with %s, skipping\n", locale, negValToUse, u_errorName(status));
+                continue;
+            }
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s with \"#,##0.00¤\" unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, negValToUse);
+            } else if (ppos != ulen || val != negValToUse) {
+                log_err("locale %s with \"#,##0.00¤\" unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, negValToUse, ppos, val);
+            }
+
+            unum_close(unum);
+        }
+    }
+}
+
 #endif /* #if !UCONFIG_NO_FORMATTING */