From 401daae867db5b6ac6b1b66826d725f0c269ce9e Mon Sep 17 00:00:00 2001 From: Peter Edberg Date: Thu, 6 Feb 2014 09:41:17 +0000 Subject: [PATCH] ICU-10633 Implement context-sensitive number formatting (currently just for RBNF) X-SVN-Rev: 35095 --- icu4c/source/i18n/numfmt.cpp | 8 +- icu4c/source/i18n/rbnf.cpp | 134 ++++++++++++++++++++++++++- icu4c/source/i18n/unicode/rbnf.h | 24 +++++ icu4c/source/test/cintltst/cnumtst.c | 51 +++++++++- 4 files changed, 212 insertions(+), 5 deletions(-) diff --git a/icu4c/source/i18n/numfmt.cpp b/icu4c/source/i18n/numfmt.cpp index 15a60e439d6..52220978041 100644 --- a/icu4c/source/i18n/numfmt.cpp +++ b/icu4c/source/i18n/numfmt.cpp @@ -260,6 +260,7 @@ NumberFormat::operator=(const NumberFormat& rhs) fParseIntegerOnly = rhs.fParseIntegerOnly; u_strncpy(fCurrency, rhs.fCurrency, 4); fLenient = rhs.fLenient; + fCapitalizationContext = rhs.fCapitalizationContext; } return *this; } @@ -306,6 +307,10 @@ NumberFormat::operator==(const Format& that) const if (first) { printf("[ "); first = FALSE; } else { printf(", "); } debug("fLenient != "); } + if (!(fCapitalizationContext == other->fCapitalizationContext)) { + if (first) { printf("[ "); first = FALSE; } else { printf(", "); } + debug("fCapitalizationContext != "); + } if (!first) { printf(" ]"); } #endif @@ -318,7 +323,8 @@ NumberFormat::operator==(const Format& that) const fGroupingUsed == other->fGroupingUsed && fParseIntegerOnly == other->fParseIntegerOnly && u_strcmp(fCurrency, other->fCurrency) == 0 && - fLenient == other->fLenient))); + fLenient == other->fLenient && + fCapitalizationContext == other->fCapitalizationContext))); } // ------------------------------------- diff --git a/icu4c/source/i18n/rbnf.cpp b/icu4c/source/i18n/rbnf.cpp index 4a6742c6616..486332d6049 100644 --- a/icu4c/source/i18n/rbnf.cpp +++ b/icu4c/source/i18n/rbnf.cpp @@ -5,6 +5,7 @@ ******************************************************************************* */ +#include "unicode/utypes.h" #include "utypeinfo.h" // for 'typeid' to work #include "unicode/rbnf.h" @@ -21,6 +22,8 @@ #include "unicode/ustring.h" #include "unicode/utf16.h" #include "unicode/udata.h" +#include "unicode/udisplaycontext.h" +#include "unicode/brkiter.h" #include "nfrs.h" #include "cmemory.h" @@ -660,6 +663,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) + , capitalizationInfoSet(FALSE) + , capitalizationForUIListMenu(FALSE) + , capitalizationForStandAlone(FALSE) + , capitalizationBrkIter(NULL) { LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); init(description, locinfo, perror, status); @@ -678,6 +685,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) + , capitalizationInfoSet(FALSE) + , capitalizationForUIListMenu(FALSE) + , capitalizationForStandAlone(FALSE) + , capitalizationBrkIter(NULL) { LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); init(description, locinfo, perror, status); @@ -696,6 +707,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) + , capitalizationInfoSet(FALSE) + , capitalizationForUIListMenu(FALSE) + , capitalizationForStandAlone(FALSE) + , capitalizationBrkIter(NULL) { init(description, info, perror, status); } @@ -713,6 +728,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) + , capitalizationInfoSet(FALSE) + , capitalizationForUIListMenu(FALSE) + , capitalizationForStandAlone(FALSE) + , capitalizationBrkIter(NULL) { init(description, NULL, perror, status); } @@ -731,6 +750,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) + , capitalizationInfoSet(FALSE) + , capitalizationForUIListMenu(FALSE) + , capitalizationForStandAlone(FALSE) + , capitalizationBrkIter(NULL) { init(description, NULL, perror, status); } @@ -746,6 +769,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) + , capitalizationInfoSet(FALSE) + , capitalizationForUIListMenu(FALSE) + , capitalizationForStandAlone(FALSE) + , capitalizationBrkIter(NULL) { if (U_FAILURE(status)) { return; @@ -806,6 +833,10 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) + , capitalizationInfoSet(FALSE) + , capitalizationForUIListMenu(FALSE) + , capitalizationForStandAlone(FALSE) + , capitalizationBrkIter(NULL) { this->operator=(rhs); } @@ -828,6 +859,12 @@ RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status); setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols()); setDefaultRuleSet(rhs.getDefaultRuleSetName(), status); + + capitalizationInfoSet = rhs.capitalizationInfoSet; + capitalizationForUIListMenu = rhs.capitalizationForUIListMenu; + capitalizationForStandAlone = rhs.capitalizationForStandAlone; + capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL; + return *this; } @@ -851,6 +888,9 @@ RuleBasedNumberFormat::operator==(const Format& other) const if (typeid(*this) == typeid(other)) { const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other; + // test for capitalization info equality is adequately handled + // by the NumberFormat test for fCapitalizationContext equality; + // the info here is just derived from that. if (locale == rhs.locale && lenient == rhs.lenient && (localizations == NULL @@ -1022,7 +1062,11 @@ RuleBasedNumberFormat::format(int32_t number, UnicodeString& toAppendTo, FieldPosition& /* pos */) const { - if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length()); + if (defaultRuleSet) { + int32_t startPos = toAppendTo.length(); + defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length()); + adjustForCapitalizationContext(startPos, toAppendTo); + } return toAppendTo; } @@ -1032,7 +1076,11 @@ RuleBasedNumberFormat::format(int64_t number, UnicodeString& toAppendTo, FieldPosition& /* pos */) const { - if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); + if (defaultRuleSet) { + int32_t startPos = toAppendTo.length(); + defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); + adjustForCapitalizationContext(startPos, toAppendTo); + } return toAppendTo; } @@ -1042,6 +1090,7 @@ RuleBasedNumberFormat::format(double number, UnicodeString& toAppendTo, FieldPosition& /* pos */) const { + int32_t startPos = toAppendTo.length(); // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does. if (uprv_isNaN(number)) { DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal @@ -1051,7 +1100,7 @@ RuleBasedNumberFormat::format(double number, } else if (defaultRuleSet) { defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); } - return toAppendTo; + return adjustForCapitalizationContext(startPos, toAppendTo); } @@ -1070,7 +1119,9 @@ RuleBasedNumberFormat::format(int32_t number, } else { NFRuleSet *rs = findRuleSet(ruleSetName, status); if (rs) { + int32_t startPos = toAppendTo.length(); rs->format((int64_t)number, toAppendTo, toAppendTo.length()); + adjustForCapitalizationContext(startPos, toAppendTo); } } } @@ -1092,7 +1143,9 @@ RuleBasedNumberFormat::format(int64_t number, } else { NFRuleSet *rs = findRuleSet(ruleSetName, status); if (rs) { + int32_t startPos = toAppendTo.length(); rs->format(number, toAppendTo, toAppendTo.length()); + adjustForCapitalizationContext(startPos, toAppendTo); } } } @@ -1114,13 +1167,39 @@ RuleBasedNumberFormat::format(double number, } else { NFRuleSet *rs = findRuleSet(ruleSetName, status); if (rs) { + int32_t startPos = toAppendTo.length(); rs->format(number, toAppendTo, toAppendTo.length()); + adjustForCapitalizationContext(startPos, toAppendTo); } } } return toAppendTo; } +UnicodeString& +RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos, + UnicodeString& currentResult) const +{ +#if !UCONFIG_NO_BREAK_ITERATION + if (startPos==0 && currentResult.length() > 0) { + // capitalize currentResult according to context + UChar32 ch = currentResult.char32At(0); + UErrorCode status = U_ZERO_ERROR; + UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); + if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL && + ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || + (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || + (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { + // titlecase first word of currentResult, here use sentence iterator unlike current implementations + // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format + currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); + } + } +#endif + return currentResult; +} + + void RuleBasedNumberFormat::parse(const UnicodeString& text, Formattable& result, @@ -1422,6 +1501,52 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali originalDescription = rules; } +// override the NumberFormat implementation in order to +// lazily initialize relevant items +void +RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status) +{ + NumberFormat::setContext(value, status); + if (U_SUCCESS(status)) { + if (!capitalizationInfoSet && + (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) { + initCapitalizationContextInfo(locale); + capitalizationInfoSet = TRUE; + } +#if !UCONFIG_NO_BREAK_ITERATION + if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || + (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || + (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { + UErrorCode status = U_ZERO_ERROR; + capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status); + } +#endif + } +} + +void +RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale) +{ +#if !UCONFIG_NO_BREAK_ITERATION + const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL; + UErrorCode status = U_ZERO_ERROR; + UResourceBundle *rb = ures_open(NULL, localeID, &status); + rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status); + // Have't got a good contextTransforms type for RBNF number spellout, + // fix that with CLDR #6857. In the meantime use "symbol". + rb = ures_getByKeyWithFallback(rb, "symbol", rb, &status); + if (U_SUCCESS(status) && rb != NULL) { + int32_t len = 0; + const int32_t * intVector = ures_getIntVector(rb, &len, &status); + if (U_SUCCESS(status) && intVector != NULL && len >= 2) { + capitalizationForUIListMenu = intVector[0]; + capitalizationForStandAlone = intVector[1]; + } + } + ures_close(rb); +#endif +} + void RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) { @@ -1489,6 +1614,9 @@ RuleBasedNumberFormat::dispose() delete lenientParseRules; lenientParseRules = NULL; + delete capitalizationBrkIter; + capitalizationBrkIter = NULL; + if (localizations) localizations = localizations->unref(); } diff --git a/icu4c/source/i18n/unicode/rbnf.h b/icu4c/source/i18n/unicode/rbnf.h index 25b39e23cd8..d3a4e7d02f6 100644 --- a/icu4c/source/i18n/unicode/rbnf.h +++ b/icu4c/source/i18n/unicode/rbnf.h @@ -34,6 +34,7 @@ #include "unicode/numfmt.h" #include "unicode/unistr.h" #include "unicode/strenum.h" +#include "unicode/brkiter.h" U_NAMESPACE_BEGIN @@ -894,6 +895,19 @@ public: */ virtual UnicodeString getDefaultRuleSetName() const; + /* Cannot use #ifndef U_HIDE_DRAFT_API for the following draft method since it is virtual */ + /** + * Set a particular UDisplayContext value in the formatter, such as + * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. Note: For getContext, see + * NumberFormat. + * @param value The UDisplayContext value to set. + * @param status Input/output status. If at entry this indicates a failure + * status, the function will do nothing; otherwise this will be + * updated with any new status from the function. + * @draft ICU 53 + */ + virtual void setContext(UDisplayContext value, UErrorCode& status); + public: /** * ICU "poor man's RTTI", returns a UClassID for this class. @@ -939,6 +953,7 @@ private: const Locale& locale, UParseError& perror, UErrorCode& status); void init(const UnicodeString& rules, LocalizationInfo* localizations, UParseError& perror, UErrorCode& status); + void initCapitalizationContextInfo(const Locale& thelocale); void dispose(); void stripWhitespace(UnicodeString& src); void initDefaultRuleSet(); @@ -953,6 +968,7 @@ private: inline NFRuleSet * getDefaultRuleSet() const; Collator * getCollator() const; DecimalFormatSymbols * getDecimalFormatSymbols() const; + UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult) const; private: NFRuleSet **ruleSets; @@ -966,6 +982,14 @@ private: UnicodeString* lenientParseRules; LocalizationInfo* localizations; UnicodeString originalDescription; + UBool capitalizationInfoSet; + UBool capitalizationForUIListMenu; + UBool capitalizationForStandAlone; +#if !UCONFIG_NO_BREAK_ITERATION + BreakIterator* capitalizationBrkIter; +#else + void* capitalizationBrkIter; +#endif }; // --------------- diff --git a/icu4c/source/test/cintltst/cnumtst.c b/icu4c/source/test/cintltst/cnumtst.c index 111741d38d7..876e93cc53b 100644 --- a/icu4c/source/test/cintltst/cnumtst.c +++ b/icu4c/source/test/cintltst/cnumtst.c @@ -28,6 +28,7 @@ #include "unicode/unum.h" #include "unicode/unumsys.h" #include "unicode/ustring.h" +#include "unicode/udisplaycontext.h" #include "cintltst.h" #include "cnumtst.h" @@ -2450,13 +2451,31 @@ static void TestCurrencyIsoPluralFormat(void) { localeString, currencyISOCode, DATA[i][3 + sIndex]); } } + unum_close(unumFmt); } } } +typedef struct { + const char * locale; + UNumberFormatStyle style; + UDisplayContext context; + const char * expectedResult; +} TestContextItem; + +/* currently no locales have contextTransforms data for "symbol" type */ +static const TestContextItem tcItems[] = { /* results for 123.45 */ + { "sv", UNUM_SPELLOUT, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, "ett\\u00ADhundra\\u00ADtjugo\\u00ADtre komma fyra fem" }, + { "sv", UNUM_SPELLOUT, UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, "Ett\\u00ADhundra\\u00ADtjugo\\u00ADtre komma fyra fem" }, + { "sv", UNUM_SPELLOUT, UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, "ett\\u00ADhundra\\u00ADtjugo\\u00ADtre komma fyra fem" }, + { "sv", UNUM_SPELLOUT, UDISPCTX_CAPITALIZATION_FOR_STANDALONE, "ett\\u00ADhundra\\u00ADtjugo\\u00ADtre komma fyra fem" }, + { NULL, (UNumberFormatStyle)0, (UDisplayContext)0, NULL } +}; + static void TestContext(void) { - /* just a minimal sanity check for now */ UErrorCode status = U_ZERO_ERROR; + const TestContextItem* itemPtr; + UNumberFormat *unum = unum_open(UNUM_SPELLOUT, NULL, 0, "en", NULL, &status); if ( U_SUCCESS(status) ) { UDisplayContext context = unum_getContext(unum, UDISPCTX_TYPE_CAPITALIZATION, &status); @@ -2473,6 +2492,36 @@ static void TestContext(void) { } else { log_data_err("unum_open UNUM_SPELLOUT for en fails with status %s\n", myErrorName(status)); } + + for (itemPtr = tcItems; itemPtr->locale != NULL; itemPtr++) { + UChar ubufResult[kUBufMax]; + int32_t ulenRes; + + status = U_ZERO_ERROR; + unum = unum_open(itemPtr->style, NULL, 0, itemPtr->locale, NULL, &status); + if (U_FAILURE(status)) { + log_data_err("FAIL: unum_open, locale %s, style %d - %s\n", + itemPtr->locale, (int)itemPtr->style, myErrorName(status)); + continue; + } + unum_setContext(unum, itemPtr->context, &status); + ulenRes = unum_formatDouble(unum, 123.45, ubufResult, kUBufMax, NULL, &status); + if (U_FAILURE(status)) { + log_err("FAIL: unum_formatDouble, locale %s, style %d, context %d - %s\n", + itemPtr->locale, (int)itemPtr->style, (int)itemPtr->context, myErrorName(status)); + } else { + UChar ubufExpected[kUBufMax]; + int32_t ulenExp = u_unescape(itemPtr->expectedResult, ubufExpected, kUBufMax); + if (ulenRes != ulenExp || u_strncmp(ubufResult, ubufExpected, ulenExp) != 0) { + char bbuf[kUBufMax*2]; + u_austrncpy(bbuf, ubufResult, sizeof(bbuf)); + log_err("FAIL: unum_formatDouble, locale %s, style %d, context %d, expected %d:\"%s\", got %d:\"%s\"\n", + itemPtr->locale, (int)itemPtr->style, (int)itemPtr->context, ulenExp, + itemPtr->expectedResult, ulenRes, bbuf); + } + } + unum_close(unum); + } } #endif /* #if !UCONFIG_NO_FORMATTING */ -- 2.40.0