]> granicus.if.org Git - icu/commitdiff
ICU-10633 Implement context-sensitive number formatting (currently just for RBNF)
authorPeter Edberg <pedberg@unicode.org>
Thu, 6 Feb 2014 09:41:17 +0000 (09:41 +0000)
committerPeter Edberg <pedberg@unicode.org>
Thu, 6 Feb 2014 09:41:17 +0000 (09:41 +0000)
X-SVN-Rev: 35095

icu4c/source/i18n/numfmt.cpp
icu4c/source/i18n/rbnf.cpp
icu4c/source/i18n/unicode/rbnf.h
icu4c/source/test/cintltst/cnumtst.c

index 15a60e439d61530a6dedd505823e4413ba5ea7b0..52220978041b429147bbed769934f3dae907d731 100644 (file)
@@ -260,6 +260,7 @@ NumberFormat::operator=(const NumberFormat& rhs)
         fParseIntegerOnly = rhs.fParseIntegerOnly;
         u_strncpy(fCurrency, rhs.fCurrency, 4);
         fLenient = rhs.fLenient;
+        fCapitalizationContext = rhs.fCapitalizationContext;
     }
     return *this;
 }
@@ -306,6 +307,10 @@ NumberFormat::operator==(const Format& that) const
         if (first) { printf("[ "); first = FALSE; } else { printf(", "); }
         debug("fLenient != ");
     }
+    if (!(fCapitalizationContext == other->fCapitalizationContext)) {
+        if (first) { printf("[ "); first = FALSE; } else { printf(", "); }
+        debug("fCapitalizationContext != ");
+    }
     if (!first) { printf(" ]"); }
 #endif
 
@@ -318,7 +323,8 @@ NumberFormat::operator==(const Format& that) const
               fGroupingUsed == other->fGroupingUsed &&
               fParseIntegerOnly == other->fParseIntegerOnly &&
               u_strcmp(fCurrency, other->fCurrency) == 0 &&
-              fLenient == other->fLenient)));
+              fLenient == other->fLenient &&
+              fCapitalizationContext == other->fCapitalizationContext)));
 }
 
 // -------------------------------------
index 4a6742c661667b6fc8227a15671fcda331279bce..486332d60495a6ad722662d609b5f2ace2ea83a1 100644 (file)
@@ -5,6 +5,7 @@
 *******************************************************************************
 */
 
+#include "unicode/utypes.h"
 #include "utypeinfo.h"  // for 'typeid' to work
 
 #include "unicode/rbnf.h"
@@ -21,6 +22,8 @@
 #include "unicode/ustring.h"
 #include "unicode/utf16.h"
 #include "unicode/udata.h"
+#include "unicode/udisplaycontext.h"
+#include "unicode/brkiter.h"
 #include "nfrs.h"
 
 #include "cmemory.h"
@@ -660,6 +663,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
+  , capitalizationInfoSet(FALSE)
+  , capitalizationForUIListMenu(FALSE)
+  , capitalizationForStandAlone(FALSE)
+  , capitalizationBrkIter(NULL)
 {
   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
   init(description, locinfo, perror, status);
@@ -678,6 +685,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
+  , capitalizationInfoSet(FALSE)
+  , capitalizationForUIListMenu(FALSE)
+  , capitalizationForStandAlone(FALSE)
+  , capitalizationBrkIter(NULL)
 {
   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
   init(description, locinfo, perror, status);
@@ -696,6 +707,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
+  , capitalizationInfoSet(FALSE)
+  , capitalizationForUIListMenu(FALSE)
+  , capitalizationForStandAlone(FALSE)
+  , capitalizationBrkIter(NULL)
 {
   init(description, info, perror, status);
 }
@@ -713,6 +728,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
+  , capitalizationInfoSet(FALSE)
+  , capitalizationForUIListMenu(FALSE)
+  , capitalizationForStandAlone(FALSE)
+  , capitalizationBrkIter(NULL)
 {
     init(description, NULL, perror, status);
 }
@@ -731,6 +750,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
+  , capitalizationInfoSet(FALSE)
+  , capitalizationForUIListMenu(FALSE)
+  , capitalizationForStandAlone(FALSE)
+  , capitalizationBrkIter(NULL)
 {
     init(description, NULL, perror, status);
 }
@@ -746,6 +769,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale&
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
+  , capitalizationInfoSet(FALSE)
+  , capitalizationForUIListMenu(FALSE)
+  , capitalizationForStandAlone(FALSE)
+  , capitalizationBrkIter(NULL)
 {
     if (U_FAILURE(status)) {
         return;
@@ -806,6 +833,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
   , lenient(FALSE)
   , lenientParseRules(NULL)
   , localizations(NULL)
+  , capitalizationInfoSet(FALSE)
+  , capitalizationForUIListMenu(FALSE)
+  , capitalizationForStandAlone(FALSE)
+  , capitalizationBrkIter(NULL)
 {
     this->operator=(rhs);
 }
@@ -828,6 +859,12 @@ RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
     init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
     setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
     setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
+
+    capitalizationInfoSet = rhs.capitalizationInfoSet;
+    capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
+    capitalizationForStandAlone = rhs.capitalizationForStandAlone;
+    capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
+
     return *this;
 }
 
@@ -851,6 +888,9 @@ RuleBasedNumberFormat::operator==(const Format& other) const
 
     if (typeid(*this) == typeid(other)) {
         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
+        // test for capitalization info equality is adequately handled
+        // by the NumberFormat test for fCapitalizationContext equality;
+        // the info here is just derived from that.
         if (locale == rhs.locale &&
             lenient == rhs.lenient &&
             (localizations == NULL 
@@ -1022,7 +1062,11 @@ RuleBasedNumberFormat::format(int32_t number,
                               UnicodeString& toAppendTo,
                               FieldPosition& /* pos */) const
 {
-    if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
+    if (defaultRuleSet) {
+        int32_t startPos = toAppendTo.length();
+        defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
+        adjustForCapitalizationContext(startPos, toAppendTo);
+    }
     return toAppendTo;
 }
 
@@ -1032,7 +1076,11 @@ RuleBasedNumberFormat::format(int64_t number,
                               UnicodeString& toAppendTo,
                               FieldPosition& /* pos */) const
 {
-    if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
+    if (defaultRuleSet) {
+        int32_t startPos = toAppendTo.length();
+        defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
+        adjustForCapitalizationContext(startPos, toAppendTo);
+    }
     return toAppendTo;
 }
 
@@ -1042,6 +1090,7 @@ RuleBasedNumberFormat::format(double number,
                               UnicodeString& toAppendTo,
                               FieldPosition& /* pos */) const
 {
+    int32_t startPos = toAppendTo.length();
     // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
     if (uprv_isNaN(number)) {
         DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
@@ -1051,7 +1100,7 @@ RuleBasedNumberFormat::format(double number,
     } else if (defaultRuleSet) {
         defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
     }
-    return toAppendTo;
+    return adjustForCapitalizationContext(startPos, toAppendTo);
 }
 
 
@@ -1070,7 +1119,9 @@ RuleBasedNumberFormat::format(int32_t number,
         } else {
             NFRuleSet *rs = findRuleSet(ruleSetName, status);
             if (rs) {
+                int32_t startPos = toAppendTo.length();
                 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
+                adjustForCapitalizationContext(startPos, toAppendTo);
             }
         }
     }
@@ -1092,7 +1143,9 @@ RuleBasedNumberFormat::format(int64_t number,
         } else {
             NFRuleSet *rs = findRuleSet(ruleSetName, status);
             if (rs) {
+                int32_t startPos = toAppendTo.length();
                 rs->format(number, toAppendTo, toAppendTo.length());
+                adjustForCapitalizationContext(startPos, toAppendTo);
             }
         }
     }
@@ -1114,13 +1167,39 @@ RuleBasedNumberFormat::format(double number,
         } else {
             NFRuleSet *rs = findRuleSet(ruleSetName, status);
             if (rs) {
+                int32_t startPos = toAppendTo.length();
                 rs->format(number, toAppendTo, toAppendTo.length());
+                adjustForCapitalizationContext(startPos, toAppendTo);
             }
         }
     }
     return toAppendTo;
 }
 
+UnicodeString&
+RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
+                                                      UnicodeString& currentResult) const
+{
+#if !UCONFIG_NO_BREAK_ITERATION
+    if (startPos==0 && currentResult.length() > 0) {
+        // capitalize currentResult according to context
+        UChar32 ch = currentResult.char32At(0);
+        UErrorCode status = U_ZERO_ERROR;
+        UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
+        if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
+              ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
+                (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
+                (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
+            // titlecase first word of currentResult, here use sentence iterator unlike current implementations
+            // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
+            currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
+        }
+    }
+#endif
+    return currentResult;
+}
+
+
 void
 RuleBasedNumberFormat::parse(const UnicodeString& text,
                              Formattable& result,
@@ -1422,6 +1501,52 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali
     originalDescription = rules;
 }
 
+// override the NumberFormat implementation in order to
+// lazily initialize relevant items
+void
+RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
+{
+    NumberFormat::setContext(value, status);
+    if (U_SUCCESS(status)) {
+       if (!capitalizationInfoSet &&
+               (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
+           initCapitalizationContextInfo(locale);
+           capitalizationInfoSet = TRUE;
+        }
+#if !UCONFIG_NO_BREAK_ITERATION
+        if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
+                (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
+                (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
+            UErrorCode status = U_ZERO_ERROR;
+            capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
+        }
+#endif
+    }
+}
+
+void
+RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
+{
+#if !UCONFIG_NO_BREAK_ITERATION
+    const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
+    UErrorCode status = U_ZERO_ERROR;
+    UResourceBundle *rb = ures_open(NULL, localeID, &status);
+    rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
+    // Have't got a good contextTransforms type for RBNF number spellout,
+    // fix that with CLDR #6857. In the meantime use "symbol".
+    rb = ures_getByKeyWithFallback(rb, "symbol", rb, &status);
+    if (U_SUCCESS(status) && rb != NULL) {
+        int32_t len = 0;
+        const int32_t * intVector = ures_getIntVector(rb, &len, &status);
+        if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
+            capitalizationForUIListMenu = intVector[0];
+            capitalizationForStandAlone = intVector[1];
+        }
+    }
+    ures_close(rb);
+#endif
+}
+
 void
 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
 {
@@ -1489,6 +1614,9 @@ RuleBasedNumberFormat::dispose()
     delete lenientParseRules;
     lenientParseRules = NULL;
 
+   delete capitalizationBrkIter;
+   capitalizationBrkIter = NULL;
+
     if (localizations) localizations = localizations->unref();
 }
 
index 25b39e23cd8b9e231ef90e554d0cf2136b37cfe4..d3a4e7d02f68d8f9e5172ed89994f8c5b7643408 100644 (file)
@@ -34,6 +34,7 @@
 #include "unicode/numfmt.h"
 #include "unicode/unistr.h"
 #include "unicode/strenum.h"
+#include "unicode/brkiter.h"
 
 U_NAMESPACE_BEGIN
 
@@ -894,6 +895,19 @@ public:
    */
   virtual UnicodeString getDefaultRuleSetName() const;
 
+  /* Cannot use #ifndef U_HIDE_DRAFT_API for the following draft method since it is virtual */
+  /**
+   * Set a particular UDisplayContext value in the formatter, such as
+   * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. Note: For getContext, see
+   * NumberFormat.
+   * @param value The UDisplayContext value to set.
+   * @param status Input/output status. If at entry this indicates a failure
+   *               status, the function will do nothing; otherwise this will be
+   *               updated with any new status from the function. 
+   * @draft ICU 53
+   */
+  virtual void setContext(UDisplayContext value, UErrorCode& status);
+
 public:
     /**
      * ICU "poor man's RTTI", returns a UClassID for this class.
@@ -939,6 +953,7 @@ private:
               const Locale& locale, UParseError& perror, UErrorCode& status);
 
     void init(const UnicodeString& rules, LocalizationInfo* localizations, UParseError& perror, UErrorCode& status);
+    void initCapitalizationContextInfo(const Locale& thelocale);
     void dispose();
     void stripWhitespace(UnicodeString& src);
     void initDefaultRuleSet();
@@ -953,6 +968,7 @@ private:
     inline NFRuleSet * getDefaultRuleSet() const;
     Collator * getCollator() const;
     DecimalFormatSymbols * getDecimalFormatSymbols() const;
+    UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult) const;
 
 private:
     NFRuleSet **ruleSets;
@@ -966,6 +982,14 @@ private:
     UnicodeString* lenientParseRules;
     LocalizationInfo* localizations;
     UnicodeString originalDescription;
+    UBool capitalizationInfoSet;
+    UBool capitalizationForUIListMenu;
+    UBool capitalizationForStandAlone;
+#if !UCONFIG_NO_BREAK_ITERATION
+    BreakIterator* capitalizationBrkIter;
+#else
+    void* capitalizationBrkIter;
+#endif
 };
 
 // ---------------
index 111741d38d71fc794da20f5e3a4286ad40fbc905..876e93cc53b09e2e56db4598e15494611069cd45 100644 (file)
@@ -28,6 +28,7 @@
 #include "unicode/unum.h"
 #include "unicode/unumsys.h"
 #include "unicode/ustring.h"
+#include "unicode/udisplaycontext.h"
 
 #include "cintltst.h"
 #include "cnumtst.h"
@@ -2450,13 +2451,31 @@ static void TestCurrencyIsoPluralFormat(void) {
                         localeString, currencyISOCode, DATA[i][3 + sIndex]);
             }
         }
+        unum_close(unumFmt);
       }
     }  
 }
 
+typedef struct {
+    const char * locale;
+    UNumberFormatStyle style;
+    UDisplayContext context;
+    const char * expectedResult;
+} TestContextItem;
+
+/* currently no locales have contextTransforms data for "symbol" type */
+static const TestContextItem tcItems[] = { /* results for 123.45 */
+    { "sv", UNUM_SPELLOUT, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,    "ett\\u00ADhundra\\u00ADtjugo\\u00ADtre komma fyra fem" },
+    { "sv", UNUM_SPELLOUT, UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, "Ett\\u00ADhundra\\u00ADtjugo\\u00ADtre komma fyra fem" },
+    { "sv", UNUM_SPELLOUT, UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU,       "ett\\u00ADhundra\\u00ADtjugo\\u00ADtre komma fyra fem" },
+    { "sv", UNUM_SPELLOUT, UDISPCTX_CAPITALIZATION_FOR_STANDALONE,            "ett\\u00ADhundra\\u00ADtjugo\\u00ADtre komma fyra fem" },
+    { NULL, (UNumberFormatStyle)0, (UDisplayContext)0, NULL }
+};
+
 static void TestContext(void) {
-    /* just a minimal sanity check for now */
     UErrorCode status = U_ZERO_ERROR;
+    const TestContextItem* itemPtr;
+    
     UNumberFormat *unum = unum_open(UNUM_SPELLOUT, NULL, 0, "en", NULL, &status);
     if ( U_SUCCESS(status) ) {
         UDisplayContext context = unum_getContext(unum, UDISPCTX_TYPE_CAPITALIZATION, &status);
@@ -2473,6 +2492,36 @@ static void TestContext(void) {
     } else {
         log_data_err("unum_open UNUM_SPELLOUT for en fails with status %s\n", myErrorName(status));
     }
+    
+    for (itemPtr = tcItems; itemPtr->locale != NULL; itemPtr++) {
+        UChar ubufResult[kUBufMax];
+        int32_t ulenRes;
+        
+        status = U_ZERO_ERROR;
+        unum = unum_open(itemPtr->style, NULL, 0, itemPtr->locale, NULL, &status);
+        if (U_FAILURE(status)) {
+            log_data_err("FAIL: unum_open, locale %s, style %d - %s\n",
+                        itemPtr->locale, (int)itemPtr->style, myErrorName(status));
+            continue;
+        }
+        unum_setContext(unum, itemPtr->context, &status);
+        ulenRes = unum_formatDouble(unum, 123.45, ubufResult, kUBufMax, NULL, &status);
+        if (U_FAILURE(status)) {
+            log_err("FAIL: unum_formatDouble, locale %s, style %d, context %d - %s\n",
+                    itemPtr->locale, (int)itemPtr->style, (int)itemPtr->context, myErrorName(status));
+        } else {
+            UChar ubufExpected[kUBufMax];
+            int32_t ulenExp = u_unescape(itemPtr->expectedResult, ubufExpected, kUBufMax);
+            if (ulenRes != ulenExp || u_strncmp(ubufResult, ubufExpected, ulenExp) != 0) {
+                char bbuf[kUBufMax*2];
+                u_austrncpy(bbuf, ubufResult, sizeof(bbuf)); 
+                log_err("FAIL: unum_formatDouble, locale %s, style %d, context %d, expected %d:\"%s\", got %d:\"%s\"\n",
+                        itemPtr->locale, (int)itemPtr->style, (int)itemPtr->context, ulenExp,
+                        itemPtr->expectedResult, ulenRes, bbuf);
+            }
+        }
+        unum_close(unum);
+    }
 }
 
 #endif /* #if !UCONFIG_NO_FORMATTING */