ICU-20512 Add extra matchers to handle empty currency symbols

author Shane F. Carr <shane@unicode.org>

Fri, 16 Sep 2022 07:08:41 +0000 (00:08 -0700)

committer Peter Edberg <42151464+pedberg-icu@users.noreply.github.com>

Fri, 16 Sep 2022 16:00:50 +0000 (09:00 -0700)
author Shane F. Carr <shane@unicode.org>
Fri, 16 Sep 2022 07:08:41 +0000 (00:08 -0700)
committer Peter Edberg <42151464+pedberg-icu@users.noreply.github.com>
Fri, 16 Sep 2022 16:00:50 +0000 (09:00 -0700)
diff --git a/icu4c/source/i18n/number_currencysymbols.cpp b/icu4c/source/i18n/number_currencysymbols.cpp

index da1812f49f0b712514ff57ad02442cb331d79fab..8d5127556be91ab7d12fc1ed155b7993054049f4 100644 (file)
--- a/icu4c/source/i18n/number_currencysymbols.cpp
+++ b/icu4c/source/i18n/number_currencysymbols.cpp
@@ -108,6 +108,10 @@ UnicodeString CurrencySymbols::getPluralName(StandardPlural::Form plural, UError
      }
  }
  
+bool CurrencySymbols::hasEmptyCurrencySymbol() const {
+    return !fCurrencySymbol.isBogus() && fCurrencySymbol.isEmpty();
+}
+
  
  CurrencyUnit
  icu::number::impl::resolveCurrency(const DecimalFormatProperties& properties, const Locale& locale,
diff --git a/icu4c/source/i18n/number_currencysymbols.h b/icu4c/source/i18n/number_currencysymbols.h

index 7e38fdf8287e145910a852f8460dff08fbde4ee3..c2223bd0f0be5c59903d673123666efc4d76c42b 100644 (file)
--- a/icu4c/source/i18n/number_currencysymbols.h
+++ b/icu4c/source/i18n/number_currencysymbols.h
@@ -41,6 +41,8 @@ class U_I18N_API CurrencySymbols : public UMemory {
  
      UnicodeString getPluralName(StandardPlural::Form plural, UErrorCode& status) const;
  
+    bool hasEmptyCurrencySymbol() const;
+
    protected:
      // Required fields:
      CurrencyUnit fCurrency;
diff --git a/icu4c/source/i18n/number_patternmodifier.cpp b/icu4c/source/i18n/number_patternmodifier.cpp

index b6543b262b42db4227e96deaa07622f37344ac0e..088a30ecd7ff6f331e8b89785cb8f1474e62ab5a 100644 (file)
--- a/icu4c/source/i18n/number_patternmodifier.cpp
+++ b/icu4c/source/i18n/number_patternmodifier.cpp
@@ -284,6 +284,7 @@ void MutablePatternModifier::prepareAffix(bool isPrefix) {
              fApproximately,
              fPlural,
              fPerMilleReplacesPercent,
+            false, // dropCurrencySymbols
              currentAffix);
  }
  
diff --git a/icu4c/source/i18n/number_patternstring.cpp b/icu4c/source/i18n/number_patternstring.cpp

index 2738895d8ad03fcf65d2f5a6d47658012ba25184..557a7320856562c6dddfeeec5dba509ec34e6f3e 100644 (file)
--- a/icu4c/source/i18n/number_patternstring.cpp
+++ b/icu4c/source/i18n/number_patternstring.cpp
@@ -1056,7 +1056,9 @@ void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider&
                                                      PatternSignType patternSignType,
                                                      bool approximately,
                                                      StandardPlural::Form plural,
-                                                    bool perMilleReplacesPercent, UnicodeString& output) {
+                                                    bool perMilleReplacesPercent,
+                                                    bool dropCurrencySymbols,
+                                                    UnicodeString& output) {
  
      // Should the output render '+' where '-' would normally appear in the pattern?
      bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN)
@@ -1130,6 +1132,9 @@ void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider&
          if (perMilleReplacesPercent && candidate == u'%') {
              candidate = u'‰';
          }
+        if (dropCurrencySymbols && candidate == u'\u00A4') {
+            continue;
+        }
          output.append(candidate);
      }
  }
diff --git a/icu4c/source/i18n/number_patternstring.h b/icu4c/source/i18n/number_patternstring.h

index 94afda372296ae5daed57984d69e9b0954d69ce2..08696697847e77309c3bdba82c5ab8fc1e5ac9b5 100644 (file)
--- a/icu4c/source/i18n/number_patternstring.h
+++ b/icu4c/source/i18n/number_patternstring.h
@@ -317,7 +317,9 @@ class U_I18N_API PatternStringUtils {
      static void patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
                                             PatternSignType patternSignType,
                                             bool approximately,
-                                           StandardPlural::Form plural, bool perMilleReplacesPercent,
+                                           StandardPlural::Form plural,
+                                           bool perMilleReplacesPercent,
+                                           bool dropCurrencySymbols,
                                             UnicodeString& output);
  
      static PatternSignType resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum);
diff --git a/icu4c/source/i18n/numparse_affixes.cpp b/icu4c/source/i18n/numparse_affixes.cpp

index 1414006598386683b9794024770bd975b665b9f3..ad3d48b4731f481cc1e9934405159bfd83c7b667 100644 (file)
--- a/icu4c/source/i18n/numparse_affixes.cpp
+++ b/icu4c/source/i18n/numparse_affixes.cpp
@@ -169,6 +169,10 @@ NumberParseMatcher* AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp,
      return result;
  }
  
+bool AffixTokenMatcherWarehouse::hasEmptyCurrencySymbol() const {
+    return fSetupData->currencySymbols.hasEmptyCurrencySymbol();
+}
+
  
  CodePointMatcher::CodePointMatcher(UChar32 cp)
          : fCp(cp) {}
@@ -280,8 +284,16 @@ void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patt
      AffixPatternMatcher* posSuffix = nullptr;
  
      // Pre-process the affix strings to resolve LDML rules like sign display.
-    for (int8_t typeInt = 0; typeInt < PATTERN_SIGN_TYPE_COUNT; typeInt++) {
-        auto type = static_cast<PatternSignType>(typeInt);
+    for (int8_t typeInt = 0; typeInt < PATTERN_SIGN_TYPE_COUNT * 2; typeInt++) {
+        auto type = static_cast<PatternSignType>(typeInt / 2);
+        bool dropCurrencySymbols = (typeInt % 2) == 1;
+
+        if (dropCurrencySymbols && !patternInfo.hasCurrencySign()) {
+            continue;
+        }
+        if (dropCurrencySymbols && !fTokenWarehouse->hasEmptyCurrencySymbol()) {
+            continue;
+        }
  
          // Skip affixes in some cases
          if (type == PATTERN_SIGN_TYPE_POS
@@ -297,7 +309,7 @@ void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patt
          // TODO: Handle approximately sign?
          bool hasPrefix = false;
          PatternStringUtils::patternInfoToStringBuilder(
-                patternInfo, true, type, false, StandardPlural::OTHER, false, sb);
+                patternInfo, true, type, false, StandardPlural::OTHER, false, dropCurrencySymbols, sb);
          fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
                  sb, *fTokenWarehouse, parseFlags, &hasPrefix, status);
          AffixPatternMatcher* prefix = hasPrefix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
@@ -307,7 +319,7 @@ void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patt
          // TODO: Handle approximately sign?
          bool hasSuffix = false;
          PatternStringUtils::patternInfoToStringBuilder(
-                patternInfo, false, type, false, StandardPlural::OTHER, false, sb);
+                patternInfo, false, type, false, StandardPlural::OTHER, false, dropCurrencySymbols, sb);
          fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
                  sb, *fTokenWarehouse, parseFlags, &hasSuffix, status);
          AffixPatternMatcher* suffix = hasSuffix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
diff --git a/icu4c/source/i18n/numparse_affixes.h b/icu4c/source/i18n/numparse_affixes.h

index a82b731ab5b7521aa0935fb2d8e034b952cea035..ad731ed5d80e94a85ddba9048c7ecae61315a200 100644 (file)
--- a/icu4c/source/i18n/numparse_affixes.h
+++ b/icu4c/source/i18n/numparse_affixes.h
@@ -101,6 +101,8 @@ class U_I18N_API AffixTokenMatcherWarehouse : public UMemory {
  
      NumberParseMatcher* nextCodePointMatcher(UChar32 cp, UErrorCode& status);
  
+    bool hasEmptyCurrencySymbol() const;
+
    private:
      // NOTE: The following field may be unsafe to access after construction is done!
      const AffixTokenMatcherSetupData* fSetupData;
@@ -204,10 +206,12 @@ class AffixMatcherWarehouse {
                               UErrorCode& status);
  
    private:
-    // 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix
-    AffixMatcher fAffixMatchers[9];
-    // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each
-    AffixPatternMatcher fAffixPatternMatchers[6];
+    // 18 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix,
+    // and doubled since there may be an empty currency symbol
+    AffixMatcher fAffixMatchers[18];
+    // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each,
+    // and doubled since there may be an empty currency symbol
+    AffixPatternMatcher fAffixPatternMatchers[12];
      // Reference to the warehouse for tokens used by the AffixPatternMatchers
      AffixTokenMatcherWarehouse* fTokenWarehouse;
  
diff --git a/icu4c/source/test/cintltst/cnumtst.c b/icu4c/source/test/cintltst/cnumtst.c

index 615deacf31f4b3b5580c11ec4cbaf650e2c6a144..6448760f2c6542b32ef347dc37af39ea5b0280ec 100644 (file)
--- a/icu4c/source/test/cintltst/cnumtst.c
+++ b/icu4c/source/test/cintltst/cnumtst.c
@@ -78,6 +78,7 @@ static void TestSciNotationMaxFracCap(void);
  static void TestMinIntMinFracZero(void);
  static void Test21479_ExactCurrency(void);
  static void Test22088_Ethiopic(void);
+static void TestParseWithEmptyCurr(void);
  
  #define TESTCASE(x) addTest(root, &x, "tsformat/cnumtst/" #x)
  
@@ -121,6 +122,7 @@ void addNumForTest(TestNode** root)
      TESTCASE(TestMinIntMinFracZero);
      TESTCASE(Test21479_ExactCurrency);
      TESTCASE(Test22088_Ethiopic);
+    TESTCASE(TestParseWithEmptyCurr);
  }
  
  /* test Parse int 64 */
@@ -3634,4 +3636,147 @@ static void Test22088_Ethiopic(void) {
      unum_close(nf3);
  }
  
+static void TestParseWithEmptyCurr(void) {
+    UErrorCode status = U_ZERO_ERROR;
+    UNumberFormat* unum = unum_open(UNUM_CURRENCY, NULL, 0, "en_US", NULL, &status);
+    if (U_FAILURE(status)) {
+        log_data_err("unum_open UNUM_CURRENCY for \"en_US\" fails with %s\n", u_errorName(status));
+    } else {
+        unum_setSymbol(unum, UNUM_CURRENCY_SYMBOL, u"", 0, &status);
+        if (U_FAILURE(status)) {
+            log_err("unum_setSymbol UNUM_CURRENCY_SYMBOL u\"\" fails with %s\n", u_errorName(status));
+        } else {
+            char bbuf[kBBufMax] = { 0 };
+            UChar curr[4] = { 0 };
+            int32_t ppos, blen;
+            double val;
+            const UChar* text = u"3";
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            blen = unum_parseDecimal(unum, text, -1, &ppos, bbuf, kBBufMax, &status);
+            if (U_FAILURE(status)) {
+                log_err("unum_parseDecimal u\"3\" with empty curr symbol fails with %s, ppos %d\n", u_errorName(status), ppos);
+            } else if (ppos != 1 || blen != 1 || bbuf[0] != '3') {
+                log_err("unum_parseDecimal expect ppos 1, blen 1, str 3; get %d, %d, %s\n", ppos, blen, bbuf);
+            }
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            val = unum_parseDouble(unum, text, -1, &ppos, &status);
+            if (U_FAILURE(status)) {
+                log_err("unum_parseDouble u\"3\" with empty curr symbol fails with %s, ppos %d\n", u_errorName(status), ppos);
+            } else if (ppos != 1 || val != 3.0) {
+                log_err("unum_parseDouble expect ppos 1, val 3.0; get %d, %.2f\n", ppos, val);
+            }
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            val = unum_parseDoubleCurrency(unum, text, -1, &ppos, curr, &status);
+            if (U_SUCCESS(status)) {
+                log_err("unum_parseDoubleCurrency u\"3\" with empty curr symbol succeeds, get ppos %d, val %.2f\n", ppos, val);
+            }
+        }
+        unum_close(unum);
+    }
+
+    //                              "¤#,##0.00" "¤ #,##0.00" "#,##0.00 ¤" "#,##,##0.00¤"
+    static const char* locales[] = {"en_US",    "nb_NO",     "cs_CZ",     "bn_BD",       NULL };
+    const char ** localesPtr = locales;
+    const char* locale;
+    while ((locale = *localesPtr++) != NULL) {
+        status = U_ZERO_ERROR;
+        unum = unum_open(UNUM_CURRENCY, NULL, 0, locale, NULL, &status);
+        if (U_FAILURE(status)) {
+            log_data_err("locale %s unum_open UNUM_CURRENCY fails with %s\n", locale, u_errorName(status));
+        } else {
+            UChar ubuf[kUBufMax];
+            int32_t ppos, ulen;
+            const double posValToUse = 37.0;
+            const double negValToUse = -3.0;
+            double val;
+
+            status = U_ZERO_ERROR;
+            unum_setSymbol(unum, UNUM_CURRENCY_SYMBOL, u"", 0, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s unum_setSymbol UNUM_CURRENCY_SYMBOL u\"\" fails with %s, skipping\n", locale, u_errorName(status));
+                continue;
+            }
+
+            status = U_ZERO_ERROR;
+            ulen = unum_formatDouble(unum, posValToUse, ubuf, kUBufMax, NULL, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s unum_formatDouble %.1f fails with %s, skipping\n", locale, posValToUse, u_errorName(status));
+                continue;
+            }
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, posValToUse);
+            } else if (ppos != ulen || val != posValToUse) {
+                log_err("locale %s unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, posValToUse, ppos, val);
+            }
+
+            status = U_ZERO_ERROR;
+            ulen = unum_formatDouble(unum, negValToUse, ubuf, kUBufMax, NULL, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s unum_formatDouble %.1f fails with %s, skipping\n", locale, negValToUse, u_errorName(status));
+                continue;
+            }
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, negValToUse);
+            } else if (ppos != ulen || val != negValToUse) {
+                log_err("locale %s unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, negValToUse, ppos, val);
+            }
+
+            status = U_ZERO_ERROR;
+            unum_applyPattern(unum, false, u"#,##0.00¤", -1, NULL, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s unum_applyPattern \"#,##0.00¤\" fails with %s, skipping\n", locale, u_errorName(status));
+                continue;
+            }
+
+            status = U_ZERO_ERROR;
+            ulen = unum_formatDouble(unum, posValToUse, ubuf, kUBufMax, NULL, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s with \"#,##0.00¤\" unum_formatDouble %.1f fails with %s, skipping\n", locale, posValToUse, u_errorName(status));
+                continue;
+            }
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s with \"#,##0.00¤\" unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, posValToUse);
+            } else if (ppos != ulen || val != posValToUse) {
+                log_err("locale %s with \"#,##0.00¤\" unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, posValToUse, ppos, val);
+            }
+
+            status = U_ZERO_ERROR;
+            ulen = unum_formatDouble(unum, negValToUse, ubuf, kUBufMax, NULL, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s with \"#,##0.00¤\" unum_formatDouble %.1f fails with %s, skipping\n", locale, negValToUse, u_errorName(status));
+                continue;
+            }
+
+            status = U_ZERO_ERROR;
+            ppos = 0;
+            val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status);
+            if (U_FAILURE(status)) {
+                log_err("locale %s with \"#,##0.00¤\" unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, negValToUse);
+            } else if (ppos != ulen || val != negValToUse) {
+                log_err("locale %s with \"#,##0.00¤\" unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, negValToUse, ppos, val);
+            }
+
+            unum_close(unum);
+        }
+    }
+}
+
  #endif /* #if !UCONFIG_NO_FORMATTING */
author	Shane F. Carr <shane@unicode.org>
	Fri, 16 Sep 2022 07:08:41 +0000 (00:08 -0700)
committer	Peter Edberg <42151464+pedberg-icu@users.noreply.github.com>
	Fri, 16 Sep 2022 16:00:50 +0000 (09:00 -0700)
icu4c/source/i18n/number_currencysymbols.cpp		patch \| blob \| history
icu4c/source/i18n/number_currencysymbols.h		patch \| blob \| history
icu4c/source/i18n/number_patternmodifier.cpp		patch \| blob \| history
icu4c/source/i18n/number_patternstring.cpp		patch \| blob \| history
icu4c/source/i18n/number_patternstring.h		patch \| blob \| history
icu4c/source/i18n/numparse_affixes.cpp		patch \| blob \| history
icu4c/source/i18n/numparse_affixes.h		patch \| blob \| history
icu4c/source/test/cintltst/cnumtst.c		patch \| blob \| history