From 72280fb1520a92d533082091980f748ab51b48da Mon Sep 17 00:00:00 2001 From: George Rhoten Date: Sat, 14 Jan 2017 16:29:40 +0000 Subject: [PATCH] ICU-12713 Port fix for handling large magnitude numbers from ICU4J to ICU4C X-SVN-Rev: 39560 --- icu4c/source/i18n/nfrs.cpp | 25 +-- icu4c/source/i18n/nfrs.h | 2 +- icu4c/source/i18n/nfrule.cpp | 17 +- icu4c/source/i18n/nfrule.h | 5 +- icu4c/source/i18n/nfsubs.cpp | 83 +++++---- icu4c/source/i18n/nfsubs.h | 2 +- icu4c/source/i18n/numfmt.cpp | 4 +- icu4c/source/i18n/rbnf.cpp | 171 +++++++++++++----- icu4c/source/i18n/unicode/rbnf.h | 49 ++++- icu4c/source/test/intltest/itrbnf.cpp | 65 +++++++ icu4c/source/test/intltest/itrbnf.h | 2 + .../ibm/icu/text/RuleBasedNumberFormat.java | 6 +- 12 files changed, 326 insertions(+), 105 deletions(-) diff --git a/icu4c/source/i18n/nfrs.cpp b/icu4c/source/i18n/nfrs.cpp index 94777b830cb..5460be9dd07 100644 --- a/icu4c/source/i18n/nfrs.cpp +++ b/icu4c/source/i18n/nfrs.cpp @@ -23,6 +23,7 @@ #include "nfrule.h" #include "nfrlist.h" #include "patternprops.h" +#include "putilimp.h" #ifdef RBNF_DEBUG #include "cmemory.h" @@ -544,7 +545,7 @@ NFRuleSet::findNormalRule(int64_t number) const // an explanation of the rollback rule). If we do, roll back // one rule and return that one instead of the one we'd normally // return - if (result->shouldRollBack((double)number)) { + if (result->shouldRollBack(number)) { if (hi == 1) { // bad rule set, no prior rule to rollback to from this base return NULL; } @@ -829,18 +830,20 @@ int64_t util64_fromDouble(double d) { return result; } -int64_t util64_pow(int32_t r, uint32_t e) { - if (r == 0) { +int64_t util64_pow(int32_t base, uint16_t exponent) { + if (base == 0) { return 0; - } else if (e == 0) { - return 1; - } else { - int64_t n = r; - while (--e > 0) { - n *= r; - } - return n; } + int64_t result = 1; + int64_t pow = base; + while (exponent > 0) { + if ((exponent & 1) == 1) { + result *= pow; + } + pow *= pow; + exponent >>= 1; + } + return result; } static const uint8_t asciiDigits[] = { diff --git a/icu4c/source/i18n/nfrs.h b/icu4c/source/i18n/nfrs.h index b6aa579eb72..6cbee0f245f 100644 --- a/icu4c/source/i18n/nfrs.h +++ b/icu4c/source/i18n/nfrs.h @@ -88,7 +88,7 @@ private: int64_t util64_fromDouble(double d); // raise radix to the power exponent, only non-negative exponents -int64_t util64_pow(int32_t radix, uint32_t exponent); +int64_t util64_pow(int32_t radix, uint16_t exponent); // convert n to digit string in buffer, return length of string uint32_t util64_tou(int64_t n, UChar* buffer, uint32_t buflen, uint32_t radix = 10, UBool raw = FALSE); diff --git a/icu4c/source/i18n/nfrule.cpp b/icu4c/source/i18n/nfrule.cpp index a188fe11a9e..3297e4dcd60 100644 --- a/icu4c/source/i18n/nfrule.cpp +++ b/icu4c/source/i18n/nfrule.cpp @@ -30,6 +30,7 @@ #include "nfrlist.h" #include "nfsubs.h" #include "patternprops.h" +#include "putilimp.h" U_NAMESPACE_BEGIN @@ -715,6 +716,12 @@ NFRule::_appendRuleText(UnicodeString& result) const result.append(gSemicolon); } +int64_t NFRule::getDivisor() const +{ + return util64_pow(radix, exponent); +} + + //----------------------------------------------------------------------- // formatting //----------------------------------------------------------------------- @@ -749,7 +756,7 @@ NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos, int32 toInsertInto.insert(pos, ruleText.tempSubString(pluralRuleEnd + 2)); } toInsertInto.insert(pos, - rulePatternFormat->format((int32_t)(number/uprv_pow(radix, exponent)), status)); + rulePatternFormat->format((int32_t)(number/util64_pow(radix, exponent)), status)); if (pluralRuleStart > 0) { toInsertInto.insert(pos, ruleText.tempSubString(0, pluralRuleStart)); } @@ -798,10 +805,10 @@ NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, int32_ if (0 <= pluralVal && pluralVal < 1) { // We're in a fractional rule, and we have to match the NumeratorSubstitution behavior. // 2.3 can become 0.2999999999999998 for the fraction due to rounding errors. - pluralVal = uprv_round(pluralVal * uprv_pow(radix, exponent)); + pluralVal = uprv_round(pluralVal * util64_pow(radix, exponent)); } else { - pluralVal = pluralVal / uprv_pow(radix, exponent); + pluralVal = pluralVal / util64_pow(radix, exponent); } toInsertInto.insert(pos, rulePatternFormat->format((int32_t)(pluralVal), status)); if (pluralRuleStart > 0) { @@ -827,7 +834,7 @@ NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, int32_ * this one in its list; false if it should use this rule */ UBool -NFRule::shouldRollBack(double number) const +NFRule::shouldRollBack(int64_t number) const { // we roll back if the rule contains a modulus substitution, // the number being formatted is an even multiple of the rule's @@ -847,7 +854,7 @@ NFRule::shouldRollBack(double number) const // multiple of 100. This is called the "rollback rule." if ((sub1 != NULL && sub1->isModulusSubstitution()) || (sub2 != NULL && sub2->isModulusSubstitution())) { int64_t re = util64_pow(radix, exponent); - return uprv_fmod(number, (double)re) == 0 && (baseValue % re) != 0; + return (number % re) == 0 && (baseValue % re) != 0; } return FALSE; } diff --git a/icu4c/source/i18n/nfrule.h b/icu4c/source/i18n/nfrule.h index fe00cbe11cb..6ef535dbe4c 100644 --- a/icu4c/source/i18n/nfrule.h +++ b/icu4c/source/i18n/nfrule.h @@ -17,7 +17,6 @@ #include "unicode/utypes.h" #include "unicode/uobject.h" #include "unicode/unistr.h" -#include "putilimp.h" U_NAMESPACE_BEGIN @@ -66,7 +65,7 @@ public: UChar getDecimalPoint() const { return decimalPoint; } - double getDivisor() const { return uprv_pow(radix, exponent); } + int64_t getDivisor() const; void doFormat(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const; void doFormat(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const; @@ -77,7 +76,7 @@ public: double upperBound, Formattable& result) const; - UBool shouldRollBack(double number) const; + UBool shouldRollBack(int64_t number) const; void _appendRuleText(UnicodeString& result) const; diff --git a/icu4c/source/i18n/nfsubs.cpp b/icu4c/source/i18n/nfsubs.cpp index 16bbdc17dab..acbd3f5dec2 100644 --- a/icu4c/source/i18n/nfsubs.cpp +++ b/icu4c/source/i18n/nfsubs.cpp @@ -20,6 +20,7 @@ #include "nfsubs.h" #include "digitlst.h" +#include "fmtableimp.h" #if U_HAVE_RBNF @@ -68,27 +69,24 @@ public: SameValueSubstitution::~SameValueSubstitution() {} class MultiplierSubstitution : public NFSubstitution { - double divisor; - int64_t ldivisor; + int64_t divisor; public: MultiplierSubstitution(int32_t _pos, - double _divisor, + const NFRule *rule, const NFRuleSet* _ruleSet, const UnicodeString& description, UErrorCode& status) - : NFSubstitution(_pos, _ruleSet, description, status), divisor(_divisor) + : NFSubstitution(_pos, _ruleSet, description, status), divisor(rule->getDivisor()) { - ldivisor = util64_fromDouble(divisor); if (divisor == 0) { status = U_PARSE_ERROR; } } virtual ~MultiplierSubstitution(); - virtual void setDivisor(int32_t radix, int32_t exponent, UErrorCode& status) { - divisor = uprv_pow(radix, exponent); - ldivisor = util64_fromDouble(divisor); + virtual void setDivisor(int32_t radix, int16_t exponent, UErrorCode& status) { + divisor = util64_pow(radix, exponent); if(divisor == 0) { status = U_PARSE_ERROR; @@ -98,14 +96,14 @@ public: virtual UBool operator==(const NFSubstitution& rhs) const; virtual int64_t transformNumber(int64_t number) const { - return number / ldivisor; + return number / divisor; } virtual double transformNumber(double number) const { if (getRuleSet()) { return uprv_floor(number / divisor); } else { - return number/divisor; + return number / divisor; } } @@ -125,21 +123,19 @@ public: MultiplierSubstitution::~MultiplierSubstitution() {} class ModulusSubstitution : public NFSubstitution { - double divisor; - int64_t ldivisor; + int64_t divisor; const NFRule* ruleToUse; public: ModulusSubstitution(int32_t pos, - double _divisor, + const NFRule* rule, const NFRule* rulePredecessor, const NFRuleSet* ruleSet, const UnicodeString& description, UErrorCode& status); virtual ~ModulusSubstitution(); - virtual void setDivisor(int32_t radix, int32_t exponent, UErrorCode& status) { - divisor = uprv_pow(radix, exponent); - ldivisor = util64_fromDouble(divisor); + virtual void setDivisor(int32_t radix, int16_t exponent, UErrorCode& status) { + divisor = util64_pow(radix, exponent); if (divisor == 0) { status = U_PARSE_ERROR; @@ -151,7 +147,7 @@ public: virtual void doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const; virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const; - virtual int64_t transformNumber(int64_t number) const { return number % ldivisor; } + virtual int64_t transformNumber(int64_t number) const { return number % divisor; } virtual double transformNumber(double number) const { return uprv_fmod(number, divisor); } virtual UBool doParse(const UnicodeString& text, @@ -353,7 +349,7 @@ NFSubstitution::makeSubstitution(int32_t pos, // otherwise, return a MultiplierSubstitution else { - return new MultiplierSubstitution(pos, rule->getDivisor(), ruleSet, + return new MultiplierSubstitution(pos, rule, ruleSet, description, status); } @@ -383,7 +379,7 @@ NFSubstitution::makeSubstitution(int32_t pos, // otherwise, return a ModulusSubstitution else { - return new ModulusSubstitution(pos, rule->getDivisor(), predecessor, + return new ModulusSubstitution(pos, rule, predecessor, ruleSet, description, status); } @@ -491,7 +487,7 @@ NFSubstitution::~NFSubstitution() * @param exponent The exponent of the divisor */ void -NFSubstitution::setDivisor(int32_t /*radix*/, int32_t /*exponent*/, UErrorCode& /*status*/) { +NFSubstitution::setDivisor(int32_t /*radix*/, int16_t /*exponent*/, UErrorCode& /*status*/) { // a no-op for all substitutions except multiplier and modulus substitutions } @@ -572,23 +568,38 @@ void NFSubstitution::doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t _pos, int32_t recursionCount, UErrorCode& status) const { if (ruleSet != NULL) { - // perform a transformation on the number that is dependent + // Perform a transformation on the number that is dependent // on the type of substitution this is, then just call its // rule set's format() method to format the result ruleSet->format(transformNumber(number), toInsertInto, _pos + this->pos, recursionCount, status); } else if (numberFormat != NULL) { - // or perform the transformation on the number (preserving - // the result's fractional part if the formatter it set - // to show it), then use that formatter's format() method - // to format the result - double numberToFormat = transformNumber((double)number); - if (numberFormat->getMaximumFractionDigits() == 0) { - numberToFormat = uprv_floor(numberToFormat); - } + if (number <= MAX_INT64_IN_DOUBLE) { + // or perform the transformation on the number (preserving + // the result's fractional part if the formatter it set + // to show it), then use that formatter's format() method + // to format the result + double numberToFormat = transformNumber((double)number); + if (numberFormat->getMaximumFractionDigits() == 0) { + numberToFormat = uprv_floor(numberToFormat); + } - UnicodeString temp; - numberFormat->format(numberToFormat, temp, status); - toInsertInto.insert(_pos + this->pos, temp); + UnicodeString temp; + numberFormat->format(numberToFormat, temp, status); + toInsertInto.insert(_pos + this->pos, temp); + } + else { + // We have gone beyond double precision. Something has to give. + // We're favoring accuracy of the large number over potential rules + // that round like a CompactDecimalFormat, which is not a common use case. + // + // Perform a transformation on the number that is dependent + // on the type of substitution this is, then just call its + // rule set's format() method to format the result + int64_t numberToFormat = transformNumber(number); + UnicodeString temp; + numberFormat->format(numberToFormat, temp, status); + toInsertInto.insert(_pos + this->pos, temp); + } } } @@ -809,22 +820,20 @@ UBool MultiplierSubstitution::operator==(const NFSubstitution& rhs) const * regular rule. */ ModulusSubstitution::ModulusSubstitution(int32_t _pos, - double _divisor, + const NFRule* rule, const NFRule* predecessor, const NFRuleSet* _ruleSet, const UnicodeString& description, UErrorCode& status) : NFSubstitution(_pos, _ruleSet, description, status) - , divisor(_divisor) + , divisor(rule->getDivisor()) , ruleToUse(NULL) { - ldivisor = util64_fromDouble(_divisor); - // the owning rule's divisor controls the behavior of this // substitution: rather than keeping a backpointer to the rule, // we keep a copy of the divisor - if (ldivisor == 0) { + if (divisor == 0) { status = U_PARSE_ERROR; } diff --git a/icu4c/source/i18n/nfsubs.h b/icu4c/source/i18n/nfsubs.h index 62d963dd48a..237f29f2c35 100644 --- a/icu4c/source/i18n/nfsubs.h +++ b/icu4c/source/i18n/nfsubs.h @@ -91,7 +91,7 @@ public: * @param radix The radix of the divisor * @param exponent The exponent of the divisor */ - virtual void setDivisor(int32_t radix, int32_t exponent, UErrorCode& status); + virtual void setDivisor(int32_t radix, int16_t exponent, UErrorCode& status); /** * Replaces result with the string describing the substitution. diff --git a/icu4c/source/i18n/numfmt.cpp b/icu4c/source/i18n/numfmt.cpp index ef0851911f6..51e096c437d 100644 --- a/icu4c/source/i18n/numfmt.cpp +++ b/icu4c/source/i18n/numfmt.cpp @@ -529,7 +529,7 @@ UnicodeString& NumberFormat::format(const DigitList &number, FieldPositionIterator* posIter, UErrorCode& status) const { // DecimalFormat overrides this function, and handles DigitList based big decimals. - // Other subclasses (ChoiceFormat, RuleBasedNumberFormat) do not (yet) handle DigitLists, + // Other subclasses (ChoiceFormat) do not (yet) handle DigitLists, // so this default implementation falls back to formatting decimal numbers as doubles. if (U_FAILURE(status)) { return appendTo; @@ -547,7 +547,7 @@ NumberFormat::format(const DigitList &number, FieldPosition& pos, UErrorCode &status) const { // DecimalFormat overrides this function, and handles DigitList based big decimals. - // Other subclasses (ChoiceFormat, RuleBasedNumberFormat) do not (yet) handle DigitLists, + // Other subclasses (ChoiceFormat) do not (yet) handle DigitLists, // so this default implementation falls back to formatting decimal numbers as doubles. if (U_FAILURE(status)) { return appendTo; diff --git a/icu4c/source/i18n/rbnf.cpp b/icu4c/source/i18n/rbnf.cpp index fc4fd43a7bf..764ed12bd7c 100644 --- a/icu4c/source/i18n/rbnf.cpp +++ b/icu4c/source/i18n/rbnf.cpp @@ -27,12 +27,13 @@ #include "unicode/udata.h" #include "unicode/udisplaycontext.h" #include "unicode/brkiter.h" -#include "nfrs.h" #include "cmemory.h" #include "cstring.h" #include "patternprops.h" #include "uresimp.h" +#include "nfrs.h" +#include "digitlst.h" // debugging // #define RBNF_DEBUG @@ -1078,18 +1079,77 @@ RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status return NULL; } +UnicodeString& +RuleBasedNumberFormat::format(const DigitList &number, + UnicodeString &appendTo, + FieldPositionIterator *posIter, + UErrorCode &status) const { + if (U_FAILURE(status)) { + return appendTo; + } + DigitList copy(number); + if (copy.fitsIntoInt64(false)) { + format(((DigitList &)number).getInt64(), appendTo, posIter, status); + } + else { + copy.roundAtExponent(0); + if (copy.fitsIntoInt64(false)) { + format(number.getDouble(), appendTo, posIter, status); + } + else { + // We're outside of our normal range that this framework can handle. + // The DecimalFormat will provide more accurate results. + + // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. + NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); + Formattable f; + f.adoptDigitList(new DigitList(number)); + decimalFormat->format(f, appendTo, posIter, status); + delete decimalFormat; + } + } + return appendTo; +} + + +UnicodeString& +RuleBasedNumberFormat::format(const DigitList &number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode &status) const { + if (U_FAILURE(status)) { + return appendTo; + } + DigitList copy(number); + if (copy.fitsIntoInt64(false)) { + format(((DigitList &)number).getInt64(), appendTo, pos, status); + } + else { + copy.roundAtExponent(0); + if (copy.fitsIntoInt64(false)) { + format(number.getDouble(), appendTo, pos, status); + } + else { + // We're outside of our normal range that this framework can handle. + // The DecimalFormat will provide more accurate results. + + // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. + NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); + Formattable f; + f.adoptDigitList(new DigitList(number)); + decimalFormat->format(f, appendTo, pos, status); + delete decimalFormat; + } + } + return appendTo; +} + UnicodeString& RuleBasedNumberFormat::format(int32_t number, UnicodeString& toAppendTo, - FieldPosition& /* pos */) const + FieldPosition& pos) const { - if (defaultRuleSet) { - UErrorCode status = U_ZERO_ERROR; - int32_t startPos = toAppendTo.length(); - defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status); - adjustForCapitalizationContext(startPos, toAppendTo); - } - return toAppendTo; + return format((int64_t)number, toAppendTo, pos); } @@ -1100,9 +1160,7 @@ RuleBasedNumberFormat::format(int64_t number, { if (defaultRuleSet) { UErrorCode status = U_ZERO_ERROR; - int32_t startPos = toAppendTo.length(); - defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status); - adjustForCapitalizationContext(startPos, toAppendTo); + format(number, defaultRuleSet, toAppendTo, status); } return toAppendTo; } @@ -1114,11 +1172,11 @@ RuleBasedNumberFormat::format(double number, FieldPosition& /* pos */) const { int32_t startPos = toAppendTo.length(); + UErrorCode status = U_ZERO_ERROR; if (defaultRuleSet) { - UErrorCode status = U_ZERO_ERROR; defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status); } - return adjustForCapitalizationContext(startPos, toAppendTo); + return adjustForCapitalizationContext(startPos, toAppendTo, status); } @@ -1126,24 +1184,10 @@ UnicodeString& RuleBasedNumberFormat::format(int32_t number, const UnicodeString& ruleSetName, UnicodeString& toAppendTo, - FieldPosition& /* pos */, + FieldPosition& pos, UErrorCode& status) const { - // return format((int64_t)number, ruleSetName, toAppendTo, pos, status); - if (U_SUCCESS(status)) { - if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { - // throw new IllegalArgumentException("Can't use internal rule set"); - status = U_ILLEGAL_ARGUMENT_ERROR; - } else { - NFRuleSet *rs = findRuleSet(ruleSetName, status); - if (rs) { - int32_t startPos = toAppendTo.length(); - rs->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status); - adjustForCapitalizationContext(startPos, toAppendTo); - } - } - } - return toAppendTo; + return format((int64_t)number, ruleSetName, toAppendTo, pos, status); } @@ -1161,9 +1205,7 @@ RuleBasedNumberFormat::format(int64_t number, } else { NFRuleSet *rs = findRuleSet(ruleSetName, status); if (rs) { - int32_t startPos = toAppendTo.length(); - rs->format(number, toAppendTo, toAppendTo.length(), 0, status); - adjustForCapitalizationContext(startPos, toAppendTo); + format(number, rs, toAppendTo, status); } } } @@ -1187,27 +1229,72 @@ RuleBasedNumberFormat::format(double number, if (rs) { int32_t startPos = toAppendTo.length(); rs->format(number, toAppendTo, toAppendTo.length(), 0, status); - adjustForCapitalizationContext(startPos, toAppendTo); + adjustForCapitalizationContext(startPos, toAppendTo, status); } } } return toAppendTo; } +/** + * Bottleneck through which all the public format() methods + * that take a long pass. By the time we get here, we know + * which rule set we're using to do the formatting. + * @param number The number to format + * @param ruleSet The rule set to use to format the number + * @return The text that resulted from formatting the number + */ +UnicodeString& +RuleBasedNumberFormat::format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const +{ + // all API format() routines that take a double vector through + // here. We have these two identical functions-- one taking a + // double and one taking a long-- the couple digits of precision + // that long has but double doesn't (both types are 8 bytes long, + // but double has to borrow some of the mantissa bits to hold + // the exponent). + // Create an empty string buffer where the result will + // be built, and pass it to the rule set (along with an insertion + // position of 0 and the number being formatted) to the rule set + // for formatting + + if (U_SUCCESS(status)) { + if (number == INT64_MIN) { + // We can't handle this value right now. Provide an accurate default value. + + // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. + NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); + Formattable f; + FieldPosition pos(FieldPosition::DONT_CARE); + DigitList *digitList = new DigitList(); + digitList->set(number); + f.adoptDigitList(digitList); + decimalFormat->format(f, toAppendTo, pos, status); + delete decimalFormat; + } + else { + int32_t startPos = toAppendTo.length(); + ruleSet->format(number, toAppendTo, toAppendTo.length(), 0, status); + adjustForCapitalizationContext(startPos, toAppendTo, status); + } + } + return toAppendTo; +} + UnicodeString& RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos, - UnicodeString& currentResult) const + UnicodeString& currentResult, + UErrorCode& status) const { #if !UCONFIG_NO_BREAK_ITERATION - if (startPos==0 && currentResult.length() > 0) { + UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); + if (capitalizationContext != UDISPCTX_CAPITALIZATION_NONE && startPos == 0 && currentResult.length() > 0) { // capitalize currentResult according to context UChar32 ch = currentResult.char32At(0); - UErrorCode status = U_ZERO_ERROR; - UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); - if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL && - ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || - (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || - (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { + if (u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter != NULL && + ( capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || + (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || + (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { // titlecase first word of currentResult, here use sentence iterator unlike current implementations // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); diff --git a/icu4c/source/i18n/unicode/rbnf.h b/icu4c/source/i18n/unicode/rbnf.h index d41ffbe3816..1af61605fa2 100644 --- a/icu4c/source/i18n/unicode/rbnf.h +++ b/icu4c/source/i18n/unicode/rbnf.h @@ -866,6 +866,52 @@ public: FieldPosition& pos, UErrorCode& status) const; +protected: + /** + * Format a decimal number. + * The number is a DigitList wrapper onto a floating point decimal number. + * The default implementation in NumberFormat converts the decimal number + * to a double and formats that. Subclasses of NumberFormat that want + * to specifically handle big decimal numbers must override this method. + * class DecimalFormat does so. + * + * @param number The number, a DigitList format Decimal Floating Point. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(const DigitList &number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format a decimal number. + * The number is a DigitList wrapper onto a floating point decimal number. + * The default implementation in NumberFormat converts the decimal number + * to a double and formats that. Subclasses of NumberFormat that want + * to specifically handle big decimal numbers must override this method. + * class DecimalFormat does so. + * + * @param number The number, a DigitList format Decimal Floating Point. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(const DigitList &number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; +public: + using NumberFormat::parse; /** @@ -1031,7 +1077,8 @@ private: NFRule * initializeDefaultNaNRule(UErrorCode &status); const NFRule * getDefaultNaNRule() const; PluralFormat *createPluralFormat(UPluralType pluralType, const UnicodeString &pattern, UErrorCode& status) const; - UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult) const; + UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult, UErrorCode& status) const; + UnicodeString& format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const; private: NFRuleSet **ruleSets; diff --git a/icu4c/source/test/intltest/itrbnf.cpp b/icu4c/source/test/intltest/itrbnf.cpp index d9d7489241f..373e0778cb6 100644 --- a/icu4c/source/test/intltest/itrbnf.cpp +++ b/icu4c/source/test/intltest/itrbnf.cpp @@ -73,6 +73,8 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name, TESTCASE(21, TestMultiplePluralRules); TESTCASE(22, TestInfinityNaN); TESTCASE(23, TestVariableDecimalPoint); + TESTCASE(24, TestLargeNumbers); + TESTCASE(25, TestCompactDecimalFormatStyle); #else TESTCASE(0, TestRBNFDisabled); #endif @@ -2218,6 +2220,69 @@ void IntlTestRBNF::TestVariableDecimalPoint() { doTest(&enFormatter, enTestCommaData, true); } +void IntlTestRBNF::TestLargeNumbers() { + UErrorCode status = U_ZERO_ERROR; + RuleBasedNumberFormat rbnf(URBNF_SPELLOUT, Locale::getEnglish(), status); + + const char * const enTestFullData[][2] = { + {"-9007199254740991", "minus nine quadrillion seven trillion one hundred ninety-nine billion two hundred fifty-four million seven hundred forty thousand nine hundred ninety-one"}, // Maximum precision in both a double and a long + {"9007199254740991", "nine quadrillion seven trillion one hundred ninety-nine billion two hundred fifty-four million seven hundred forty thousand nine hundred ninety-one"}, // Maximum precision in both a double and a long + {"-9007199254740992", "minus nine quadrillion seven trillion one hundred ninety-nine billion two hundred fifty-four million seven hundred forty thousand nine hundred ninety-two"}, // Only precisely contained in a long + {"9007199254740992", "nine quadrillion seven trillion one hundred ninety-nine billion two hundred fifty-four million seven hundred forty thousand nine hundred ninety-two"}, // Only precisely contained in a long + {"9999999999999998", "nine quadrillion nine hundred ninety-nine trillion nine hundred ninety-nine billion nine hundred ninety-nine million nine hundred ninety-nine thousand nine hundred ninety-eight"}, + {"9999999999999999", "nine quadrillion nine hundred ninety-nine trillion nine hundred ninety-nine billion nine hundred ninety-nine million nine hundred ninety-nine thousand nine hundred ninety-nine"}, + {"999999999999999999", "nine hundred ninety-nine quadrillion nine hundred ninety-nine trillion nine hundred ninety-nine billion nine hundred ninety-nine million nine hundred ninety-nine thousand nine hundred ninety-nine"}, + {"1000000000000000000", "1,000,000,000,000,000,000"}, // The rules don't go to 1 quintillion yet + {"-9223372036854775809", "-9,223,372,036,854,775,809"}, // We've gone beyond 64-bit precision + {"-9223372036854775808", "-9,223,372,036,854,775,808"}, // We've gone beyond +64-bit precision + {"-9223372036854775807", "minus 9,223,372,036,854,775,807"}, // Minimum 64-bit precision + {"-9223372036854775806", "minus 9,223,372,036,854,775,806"}, // Minimum 64-bit precision + 1 + {"9223372036854774111", "9,223,372,036,854,774,111"}, // Below 64-bit precision + {"9223372036854774999", "9,223,372,036,854,774,999"}, // Below 64-bit precision + {"9223372036854775000", "9,223,372,036,854,775,000"}, // Below 64-bit precision + {"9223372036854775806", "9,223,372,036,854,775,806"}, // Maximum 64-bit precision - 1 + {"9223372036854775807", "9,223,372,036,854,775,807"}, // Maximum 64-bit precision + {"9223372036854775808", "9,223,372,036,854,775,808"}, // We've gone beyond 64-bit precision. This can only be represented with BigDecimal. + { NULL, NULL } + }; + doTest(&rbnf, enTestFullData, false); +} + +void IntlTestRBNF::TestCompactDecimalFormatStyle() { + UErrorCode status = U_ZERO_ERROR; + UParseError parseError; + // This is not a common use case, but we're testing it anyway. + UnicodeString numberPattern("=###0.#####=;" + "1000: <###0.00< K;" + "1000000: <###0.00< M;" + "1000000000: <###0.00< B;" + "1000000000000: <###0.00< T;" + "1000000000000000: <###0.00< Q;"); + RuleBasedNumberFormat rbnf(numberPattern, UnicodeString(), Locale::getEnglish(), parseError, status); + + const char * const enTestFullData[][2] = { + {"1000", "1.00 K"}, + {"1234", "1.23 K"}, + {"999994", "999.99 K"}, + {"999995", "1000.00 K"}, + {"1000000", "1.00 M"}, + {"1200000", "1.20 M"}, + {"1200000000", "1.20 B"}, + {"1200000000000", "1.20 T"}, + {"1200000000000000", "1.20 Q"}, + {"4503599627370495", "4.50 Q"}, + {"4503599627370496", "4.50 Q"}, + {"8990000000000000", "8.99 Q"}, + {"9008000000000000", "9.00 Q"}, // Number doesn't precisely fit into a double + {"9456000000000000", "9.00 Q"}, // Number doesn't precisely fit into a double + {"10000000000000000", "10.00 Q"}, // Number doesn't precisely fit into a double + {"9223372036854775807", "9223.00 Q"}, // Maximum 64-bit precision + {"9223372036854775808", "9,223,372,036,854,775,808"}, // We've gone beyond 64-bit precision. This can only be represented with BigDecimal. + { NULL, NULL } + }; + doTest(&rbnf, enTestFullData, false); +} + void IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing) { diff --git a/icu4c/source/test/intltest/itrbnf.h b/icu4c/source/test/intltest/itrbnf.h index 8c23a7d5c64..2d3e7605ef0 100644 --- a/icu4c/source/test/intltest/itrbnf.h +++ b/icu4c/source/test/intltest/itrbnf.h @@ -145,6 +145,8 @@ class IntlTestRBNF : public IntlTest { void TestInfinityNaN(); void TestVariableDecimalPoint(); void TestRounding(); + void TestLargeNumbers(); + void TestCompactDecimalFormatStyle(); protected: virtual void doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java index beeb89c2d06..9ec0ea47de4 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java @@ -2026,8 +2026,10 @@ public class RuleBasedNumberFormat extends NumberFormat { * Adjust capitalization of formatted result for display context */ private String adjustForContext(String result) { - if (result != null && result.length() > 0 && UCharacter.isLowerCase(result.codePointAt(0))) { - DisplayContext capitalization = getContext(DisplayContext.Type.CAPITALIZATION); + DisplayContext capitalization = getContext(DisplayContext.Type.CAPITALIZATION); + if (capitalization != DisplayContext.CAPITALIZATION_NONE && result != null && result.length() > 0 + && UCharacter.isLowerCase(result.codePointAt(0))) + { if ( capitalization==DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || (capitalization == DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForListOrMenu) || (capitalization == DisplayContext.CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone) ) { -- 2.40.0