ICU-12713 Port fix for handling large magnitude numbers from ICU4J to ICU4C

author George Rhoten <grhoten@users.noreply.github.com>

Sat, 14 Jan 2017 16:29:40 +0000 (16:29 +0000)

committer George Rhoten <grhoten@users.noreply.github.com>

Sat, 14 Jan 2017 16:29:40 +0000 (16:29 +0000)
author George Rhoten <grhoten@users.noreply.github.com>
Sat, 14 Jan 2017 16:29:40 +0000 (16:29 +0000)
committer George Rhoten <grhoten@users.noreply.github.com>
Sat, 14 Jan 2017 16:29:40 +0000 (16:29 +0000)
diff --git a/icu4c/source/i18n/nfrs.cpp b/icu4c/source/i18n/nfrs.cpp

index 94777b830cb467637cb461d1d0dd000364a52a79..5460be9dd07d83dd7eb13eeef6f2e2ff80d7b4cd 100644 (file)
--- a/icu4c/source/i18n/nfrs.cpp
+++ b/icu4c/source/i18n/nfrs.cpp
@@ -23,6 +23,7 @@
  #include "nfrule.h"
  #include "nfrlist.h"
  #include "patternprops.h"
+#include "putilimp.h"
  
  #ifdef RBNF_DEBUG
  #include "cmemory.h"
@@ -544,7 +545,7 @@ NFRuleSet::findNormalRule(int64_t number) const
          // an explanation of the rollback rule).  If we do, roll back
          // one rule and return that one instead of the one we'd normally
          // return
-        if (result->shouldRollBack((double)number)) {
+        if (result->shouldRollBack(number)) {
              if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
                  return NULL;
              }
@@ -829,18 +830,20 @@ int64_t util64_fromDouble(double d) {
      return result;
  }
  
-int64_t util64_pow(int32_t r, uint32_t e)  { 
-    if (r == 0) {
+int64_t util64_pow(int32_t base, uint16_t exponent)  { 
+    if (base == 0) {
          return 0;
-    } else if (e == 0) {
-        return 1;
-    } else {
-        int64_t n = r;
-        while (--e > 0) {
-            n *= r;
-        }
-        return n;
      }
+    int64_t result = 1;
+    int64_t pow = base;
+    while (exponent > 0) { 
+        if ((exponent & 1) == 1) { 
+            result *= pow; 
+        } 
+        pow *= pow; 
+        exponent >>= 1; 
+    } 
+    return result;
  }
  
  static const uint8_t asciiDigits[] = { 
diff --git a/icu4c/source/i18n/nfrs.h b/icu4c/source/i18n/nfrs.h

index b6aa579eb7233aab04911ae8cbc685a062aa4995..6cbee0f245f78bfd2ff10129d89f1bfc030dfd14 100644 (file)
--- a/icu4c/source/i18n/nfrs.h
+++ b/icu4c/source/i18n/nfrs.h
@@ -88,7 +88,7 @@ private:
  int64_t util64_fromDouble(double d);
  
  // raise radix to the power exponent, only non-negative exponents
-int64_t util64_pow(int32_t radix, uint32_t exponent);
+int64_t util64_pow(int32_t radix, uint16_t exponent);
  
  // convert n to digit string in buffer, return length of string
  uint32_t util64_tou(int64_t n, UChar* buffer, uint32_t buflen, uint32_t radix = 10, UBool raw = FALSE);
diff --git a/icu4c/source/i18n/nfrule.cpp b/icu4c/source/i18n/nfrule.cpp

index a188fe11a9e645e6cbbf0bd4d12ca2509885c0dd..3297e4dcd60f3bb7d7833939e4abc785709d507f 100644 (file)
--- a/icu4c/source/i18n/nfrule.cpp
+++ b/icu4c/source/i18n/nfrule.cpp
@@ -30,6 +30,7 @@
  #include "nfrlist.h"
  #include "nfsubs.h"
  #include "patternprops.h"
+#include "putilimp.h"
  
  U_NAMESPACE_BEGIN
  
@@ -715,6 +716,12 @@ NFRule::_appendRuleText(UnicodeString& result) const
      result.append(gSemicolon);
  }
  
+int64_t NFRule::getDivisor() const
+{
+    return util64_pow(radix, exponent);
+}
+
+
  //-----------------------------------------------------------------------
  // formatting
  //-----------------------------------------------------------------------
@@ -749,7 +756,7 @@ NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos, int32
              toInsertInto.insert(pos, ruleText.tempSubString(pluralRuleEnd + 2));
          }
          toInsertInto.insert(pos,
-            rulePatternFormat->format((int32_t)(number/uprv_pow(radix, exponent)), status));
+            rulePatternFormat->format((int32_t)(number/util64_pow(radix, exponent)), status));
          if (pluralRuleStart > 0) {
              toInsertInto.insert(pos, ruleText.tempSubString(0, pluralRuleStart));
          }
@@ -798,10 +805,10 @@ NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, int32_
          if (0 <= pluralVal && pluralVal < 1) {
              // We're in a fractional rule, and we have to match the NumeratorSubstitution behavior.
              // 2.3 can become 0.2999999999999998 for the fraction due to rounding errors.
-            pluralVal = uprv_round(pluralVal * uprv_pow(radix, exponent));
+            pluralVal = uprv_round(pluralVal * util64_pow(radix, exponent));
          }
          else {
-            pluralVal = pluralVal / uprv_pow(radix, exponent);
+            pluralVal = pluralVal / util64_pow(radix, exponent);
          }
          toInsertInto.insert(pos, rulePatternFormat->format((int32_t)(pluralVal), status));
          if (pluralRuleStart > 0) {
@@ -827,7 +834,7 @@ NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, int32_
  * this one in its list; false if it should use this rule
  */
  UBool
-NFRule::shouldRollBack(double number) const
+NFRule::shouldRollBack(int64_t number) const
  {
      // we roll back if the rule contains a modulus substitution,
      // the number being formatted is an even multiple of the rule's
@@ -847,7 +854,7 @@ NFRule::shouldRollBack(double number) const
      // multiple of 100.  This is called the "rollback rule."
      if ((sub1 != NULL && sub1->isModulusSubstitution()) || (sub2 != NULL && sub2->isModulusSubstitution())) {
          int64_t re = util64_pow(radix, exponent);
-        return uprv_fmod(number, (double)re) == 0 && (baseValue % re) != 0;
+        return (number % re) == 0 && (baseValue % re) != 0;
      }
      return FALSE;
  }
diff --git a/icu4c/source/i18n/nfrule.h b/icu4c/source/i18n/nfrule.h

index fe00cbe11cbb30f11f5aa2649bfe22831d265bfa..6ef535dbe4c2b584b023e519fe9ac8d324ff3ca3 100644 (file)
--- a/icu4c/source/i18n/nfrule.h
+++ b/icu4c/source/i18n/nfrule.h
@@ -17,7 +17,6 @@
  #include "unicode/utypes.h"
  #include "unicode/uobject.h"
  #include "unicode/unistr.h"
-#include "putilimp.h"
  
  U_NAMESPACE_BEGIN
  
@@ -66,7 +65,7 @@ public:
  
      UChar getDecimalPoint() const { return decimalPoint; }
  
-    double getDivisor() const { return uprv_pow(radix, exponent); }
+    int64_t getDivisor() const;
  
      void doFormat(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
      void doFormat(double  number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
@@ -77,7 +76,7 @@ public:
                    double upperBound,
                    Formattable& result) const;
  
-    UBool shouldRollBack(double number) const;
+    UBool shouldRollBack(int64_t number) const;
  
      void _appendRuleText(UnicodeString& result) const;
  
diff --git a/icu4c/source/i18n/nfsubs.cpp b/icu4c/source/i18n/nfsubs.cpp

index 16bbdc17dab8963d7ac5ae039eb5ed210ac50517..acbd3f5dec2fd1120fc56177784db53a12127267 100644 (file)
--- a/icu4c/source/i18n/nfsubs.cpp
+++ b/icu4c/source/i18n/nfsubs.cpp
@@ -20,6 +20,7 @@
  
  #include "nfsubs.h"
  #include "digitlst.h"
+#include "fmtableimp.h"
  
  #if U_HAVE_RBNF
  
@@ -68,27 +69,24 @@ public:
  SameValueSubstitution::~SameValueSubstitution() {}
  
  class MultiplierSubstitution : public NFSubstitution {
-    double divisor;
-    int64_t ldivisor;
+    int64_t divisor;
  
  public:
      MultiplierSubstitution(int32_t _pos,
-        double _divisor,
+        const NFRule *rule,
          const NFRuleSet* _ruleSet,
          const UnicodeString& description,
          UErrorCode& status)
-        : NFSubstitution(_pos, _ruleSet, description, status), divisor(_divisor)
+        : NFSubstitution(_pos, _ruleSet, description, status), divisor(rule->getDivisor())
      {
-        ldivisor = util64_fromDouble(divisor);
          if (divisor == 0) {
              status = U_PARSE_ERROR;
          }
      }
      virtual ~MultiplierSubstitution();
  
-    virtual void setDivisor(int32_t radix, int32_t exponent, UErrorCode& status) { 
-        divisor = uprv_pow(radix, exponent);
-        ldivisor = util64_fromDouble(divisor);
+    virtual void setDivisor(int32_t radix, int16_t exponent, UErrorCode& status) { 
+        divisor = util64_pow(radix, exponent);
  
          if(divisor == 0) {
              status = U_PARSE_ERROR;
@@ -98,14 +96,14 @@ public:
      virtual UBool operator==(const NFSubstitution& rhs) const;
  
      virtual int64_t transformNumber(int64_t number) const {
-        return number / ldivisor;
+        return number / divisor;
      }
  
      virtual double transformNumber(double number) const {
          if (getRuleSet()) {
              return uprv_floor(number / divisor);
          } else {
-            return number/divisor;
+            return number / divisor;
          }
      }
  
@@ -125,21 +123,19 @@ public:
  MultiplierSubstitution::~MultiplierSubstitution() {}
  
  class ModulusSubstitution : public NFSubstitution {
-    double divisor;
-    int64_t  ldivisor;
+    int64_t  divisor;
      const NFRule* ruleToUse;
  public:
      ModulusSubstitution(int32_t pos,
-        double _divisor,
+        const NFRule* rule,
          const NFRule* rulePredecessor,
          const NFRuleSet* ruleSet,
          const UnicodeString& description,
          UErrorCode& status);
      virtual ~ModulusSubstitution();
  
-    virtual void setDivisor(int32_t radix, int32_t exponent, UErrorCode& status) { 
-        divisor = uprv_pow(radix, exponent);
-        ldivisor = util64_fromDouble(divisor);
+    virtual void setDivisor(int32_t radix, int16_t exponent, UErrorCode& status) { 
+        divisor = util64_pow(radix, exponent);
  
          if (divisor == 0) {
              status = U_PARSE_ERROR;
@@ -151,7 +147,7 @@ public:
      virtual void doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
      virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
  
-    virtual int64_t transformNumber(int64_t number) const { return number % ldivisor; }
+    virtual int64_t transformNumber(int64_t number) const { return number % divisor; }
      virtual double transformNumber(double number) const { return uprv_fmod(number, divisor); }
  
      virtual UBool doParse(const UnicodeString& text, 
@@ -353,7 +349,7 @@ NFSubstitution::makeSubstitution(int32_t pos,
  
          // otherwise, return a MultiplierSubstitution
          else {
-            return new MultiplierSubstitution(pos, rule->getDivisor(), ruleSet,
+            return new MultiplierSubstitution(pos, rule, ruleSet,
                  description, status);
          }
  
@@ -383,7 +379,7 @@ NFSubstitution::makeSubstitution(int32_t pos,
  
          // otherwise, return a ModulusSubstitution
          else {
-            return new ModulusSubstitution(pos, rule->getDivisor(), predecessor,
+            return new ModulusSubstitution(pos, rule, predecessor,
                  ruleSet, description, status);
          }
  
@@ -491,7 +487,7 @@ NFSubstitution::~NFSubstitution()
   * @param exponent The exponent of the divisor
   */
  void
-NFSubstitution::setDivisor(int32_t /*radix*/, int32_t /*exponent*/, UErrorCode& /*status*/) {
+NFSubstitution::setDivisor(int32_t /*radix*/, int16_t /*exponent*/, UErrorCode& /*status*/) {
    // a no-op for all substitutions except multiplier and modulus substitutions
  }
  
@@ -572,23 +568,38 @@ void
  NFSubstitution::doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t _pos, int32_t recursionCount, UErrorCode& status) const
  {
      if (ruleSet != NULL) {
-        // perform a transformation on the number that is dependent
+        // Perform a transformation on the number that is dependent
          // on the type of substitution this is, then just call its
          // rule set's format() method to format the result
          ruleSet->format(transformNumber(number), toInsertInto, _pos + this->pos, recursionCount, status);
      } else if (numberFormat != NULL) {
-        // or perform the transformation on the number (preserving
-        // the result's fractional part if the formatter it set
-        // to show it), then use that formatter's format() method
-        // to format the result
-        double numberToFormat = transformNumber((double)number);
-        if (numberFormat->getMaximumFractionDigits() == 0) {
-            numberToFormat = uprv_floor(numberToFormat);
-        }
+        if (number <= MAX_INT64_IN_DOUBLE) {
+            // or perform the transformation on the number (preserving
+            // the result's fractional part if the formatter it set
+            // to show it), then use that formatter's format() method
+            // to format the result
+            double numberToFormat = transformNumber((double)number);
+            if (numberFormat->getMaximumFractionDigits() == 0) {
+                numberToFormat = uprv_floor(numberToFormat);
+            }
  
-        UnicodeString temp;
-        numberFormat->format(numberToFormat, temp, status);
-        toInsertInto.insert(_pos + this->pos, temp);
+            UnicodeString temp;
+            numberFormat->format(numberToFormat, temp, status);
+            toInsertInto.insert(_pos + this->pos, temp);
+        } 
+        else { 
+            // We have gone beyond double precision. Something has to give. 
+            // We're favoring accuracy of the large number over potential rules 
+            // that round like a CompactDecimalFormat, which is not a common use case. 
+            // 
+            // Perform a transformation on the number that is dependent 
+            // on the type of substitution this is, then just call its 
+            // rule set's format() method to format the result 
+            int64_t numberToFormat = transformNumber(number); 
+            UnicodeString temp;
+            numberFormat->format(numberToFormat, temp, status);
+            toInsertInto.insert(_pos + this->pos, temp);
+        } 
      }
  }
  
@@ -809,22 +820,20 @@ UBool MultiplierSubstitution::operator==(const NFSubstitution& rhs) const
   * regular rule.
   */
  ModulusSubstitution::ModulusSubstitution(int32_t _pos,
-                                         double _divisor,
+                                         const NFRule* rule,
                                           const NFRule* predecessor,
                                           const NFRuleSet* _ruleSet,
                                           const UnicodeString& description,
                                           UErrorCode& status)
   : NFSubstitution(_pos, _ruleSet, description, status)
- , divisor(_divisor)
+ , divisor(rule->getDivisor())
   , ruleToUse(NULL)
  {
-  ldivisor = util64_fromDouble(_divisor);
-
    // the owning rule's divisor controls the behavior of this
    // substitution: rather than keeping a backpointer to the rule,
    // we keep a copy of the divisor
  
-  if (ldivisor == 0) {
+  if (divisor == 0) {
        status = U_PARSE_ERROR;
    }
  
diff --git a/icu4c/source/i18n/nfsubs.h b/icu4c/source/i18n/nfsubs.h

index 62d963dd48aed20394e76417e77a9924fb4da8ad..237f29f2c35b8260084e36a69c9942bc47fad5ce 100644 (file)
--- a/icu4c/source/i18n/nfsubs.h
+++ b/icu4c/source/i18n/nfsubs.h
@@ -91,7 +91,7 @@ public:
       * @param radix The radix of the divisor
       * @param exponent The exponent of the divisor
       */
-    virtual void setDivisor(int32_t radix, int32_t exponent, UErrorCode& status);
+    virtual void setDivisor(int32_t radix, int16_t exponent, UErrorCode& status);
      
      /**
       * Replaces result with the string describing the substitution.
diff --git a/icu4c/source/i18n/numfmt.cpp b/icu4c/source/i18n/numfmt.cpp

index ef0851911f6621ad36210794465af3bf721126e3..51e096c437de631690fe02fcee3f6ef3cf65f07c 100644 (file)
--- a/icu4c/source/i18n/numfmt.cpp
+++ b/icu4c/source/i18n/numfmt.cpp
@@ -529,7 +529,7 @@ UnicodeString& NumberFormat::format(const DigitList &number,
                        FieldPositionIterator* posIter,
                        UErrorCode& status) const {
      // DecimalFormat overrides this function, and handles DigitList based big decimals.
-    // Other subclasses (ChoiceFormat, RuleBasedNumberFormat) do not (yet) handle DigitLists,
+    // Other subclasses (ChoiceFormat) do not (yet) handle DigitLists,
      // so this default implementation falls back to formatting decimal numbers as doubles.
      if (U_FAILURE(status)) {
          return appendTo;
@@ -547,7 +547,7 @@ NumberFormat::format(const DigitList &number,
                       FieldPosition& pos,
                       UErrorCode &status) const { 
      // DecimalFormat overrides this function, and handles DigitList based big decimals.
-    // Other subclasses (ChoiceFormat, RuleBasedNumberFormat) do not (yet) handle DigitLists,
+    // Other subclasses (ChoiceFormat) do not (yet) handle DigitLists,
      // so this default implementation falls back to formatting decimal numbers as doubles.
      if (U_FAILURE(status)) {
          return appendTo;
diff --git a/icu4c/source/i18n/rbnf.cpp b/icu4c/source/i18n/rbnf.cpp

index fc4fd43a7bf54be1ed81a8df6cb07507a825d576..764ed12bd7c3983d613a9f85796c8e4db5a6333a 100644 (file)
--- a/icu4c/source/i18n/rbnf.cpp
+++ b/icu4c/source/i18n/rbnf.cpp
@@ -27,12 +27,13 @@
  #include "unicode/udata.h"
  #include "unicode/udisplaycontext.h"
  #include "unicode/brkiter.h"
-#include "nfrs.h"
  
  #include "cmemory.h"
  #include "cstring.h"
  #include "patternprops.h"
  #include "uresimp.h"
+#include "nfrs.h"
+#include "digitlst.h"
  
  // debugging
  // #define RBNF_DEBUG
@@ -1078,18 +1079,77 @@ RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status
      return NULL;
  }
  
+UnicodeString&
+RuleBasedNumberFormat::format(const DigitList &number,
+                      UnicodeString &appendTo,
+                      FieldPositionIterator *posIter,
+                      UErrorCode &status) const {
+    if (U_FAILURE(status)) {
+        return appendTo;
+    }
+    DigitList copy(number);
+    if (copy.fitsIntoInt64(false)) {
+        format(((DigitList &)number).getInt64(), appendTo, posIter, status);
+    }
+    else {
+        copy.roundAtExponent(0);
+        if (copy.fitsIntoInt64(false)) {
+            format(number.getDouble(), appendTo, posIter, status);
+        }
+        else {
+            // We're outside of our normal range that this framework can handle.
+            // The DecimalFormat will provide more accurate results.
+
+            // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
+            NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status);
+            Formattable f;
+            f.adoptDigitList(new DigitList(number));
+            decimalFormat->format(f, appendTo, posIter, status);
+            delete decimalFormat;
+        }
+    }
+    return appendTo;
+}
+
+
+UnicodeString&
+RuleBasedNumberFormat::format(const DigitList &number,
+                     UnicodeString& appendTo,
+                     FieldPosition& pos,
+                     UErrorCode &status) const {
+    if (U_FAILURE(status)) {
+        return appendTo;
+    }
+    DigitList copy(number);
+    if (copy.fitsIntoInt64(false)) {
+        format(((DigitList &)number).getInt64(), appendTo, pos, status);
+    }
+    else {
+        copy.roundAtExponent(0);
+        if (copy.fitsIntoInt64(false)) {
+            format(number.getDouble(), appendTo, pos, status);
+        }
+        else {
+            // We're outside of our normal range that this framework can handle.
+            // The DecimalFormat will provide more accurate results.
+
+            // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
+            NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status);
+            Formattable f;
+            f.adoptDigitList(new DigitList(number));
+            decimalFormat->format(f, appendTo, pos, status);
+            delete decimalFormat;
+        }
+    }
+    return appendTo;
+}
+
  UnicodeString&
  RuleBasedNumberFormat::format(int32_t number,
                                UnicodeString& toAppendTo,
-                              FieldPosition& /* pos */) const
+                              FieldPosition& pos) const
  {
-    if (defaultRuleSet) {
-        UErrorCode status = U_ZERO_ERROR;
-        int32_t startPos = toAppendTo.length();
-        defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
-        adjustForCapitalizationContext(startPos, toAppendTo);
-    }
-    return toAppendTo;
+    return format((int64_t)number, toAppendTo, pos);
  }
  
  
@@ -1100,9 +1160,7 @@ RuleBasedNumberFormat::format(int64_t number,
  {
      if (defaultRuleSet) {
          UErrorCode status = U_ZERO_ERROR;
-        int32_t startPos = toAppendTo.length();
-        defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
-        adjustForCapitalizationContext(startPos, toAppendTo);
+        format(number, defaultRuleSet, toAppendTo, status);
      }
      return toAppendTo;
  }
@@ -1114,11 +1172,11 @@ RuleBasedNumberFormat::format(double number,
                                FieldPosition& /* pos */) const
  {
      int32_t startPos = toAppendTo.length();
+    UErrorCode status = U_ZERO_ERROR;
      if (defaultRuleSet) {
-        UErrorCode status = U_ZERO_ERROR;
          defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
      }
-    return adjustForCapitalizationContext(startPos, toAppendTo);
+    return adjustForCapitalizationContext(startPos, toAppendTo, status);
  }
  
  
@@ -1126,24 +1184,10 @@ UnicodeString&
  RuleBasedNumberFormat::format(int32_t number,
                                const UnicodeString& ruleSetName,
                                UnicodeString& toAppendTo,
-                              FieldPosition& /* pos */,
+                              FieldPosition& pos,
                                UErrorCode& status) const
  {
-    // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
-    if (U_SUCCESS(status)) {
-        if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
-            // throw new IllegalArgumentException("Can't use internal rule set");
-            status = U_ILLEGAL_ARGUMENT_ERROR;
-        } else {
-            NFRuleSet *rs = findRuleSet(ruleSetName, status);
-            if (rs) {
-                int32_t startPos = toAppendTo.length();
-                rs->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
-                adjustForCapitalizationContext(startPos, toAppendTo);
-            }
-        }
-    }
-    return toAppendTo;
+    return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
  }
  
  
@@ -1161,9 +1205,7 @@ RuleBasedNumberFormat::format(int64_t number,
          } else {
              NFRuleSet *rs = findRuleSet(ruleSetName, status);
              if (rs) {
-                int32_t startPos = toAppendTo.length();
-                rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
-                adjustForCapitalizationContext(startPos, toAppendTo);
+                format(number, rs, toAppendTo, status);
              }
          }
      }
@@ -1187,27 +1229,72 @@ RuleBasedNumberFormat::format(double number,
              if (rs) {
                  int32_t startPos = toAppendTo.length();
                  rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
-                adjustForCapitalizationContext(startPos, toAppendTo);
+                adjustForCapitalizationContext(startPos, toAppendTo, status);
              }
          }
      }
      return toAppendTo;
  }
  
+/**
+ * Bottleneck through which all the public format() methods
+ * that take a long pass. By the time we get here, we know
+ * which rule set we're using to do the formatting.
+ * @param number The number to format
+ * @param ruleSet The rule set to use to format the number
+ * @return The text that resulted from formatting the number
+ */
+UnicodeString&
+RuleBasedNumberFormat::format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const
+{
+    // all API format() routines that take a double vector through
+    // here.  We have these two identical functions-- one taking a
+    // double and one taking a long-- the couple digits of precision
+    // that long has but double doesn't (both types are 8 bytes long,
+    // but double has to borrow some of the mantissa bits to hold
+    // the exponent).
+    // Create an empty string buffer where the result will
+    // be built, and pass it to the rule set (along with an insertion
+    // position of 0 and the number being formatted) to the rule set
+    // for formatting
+
+    if (U_SUCCESS(status)) {
+        if (number == INT64_MIN) {
+            // We can't handle this value right now. Provide an accurate default value.
+
+            // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
+            NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status);
+            Formattable f;
+            FieldPosition pos(FieldPosition::DONT_CARE);
+            DigitList *digitList = new DigitList();
+            digitList->set(number);
+            f.adoptDigitList(digitList);
+            decimalFormat->format(f, toAppendTo, pos, status);
+            delete decimalFormat;
+        }
+        else {
+            int32_t startPos = toAppendTo.length();
+            ruleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
+            adjustForCapitalizationContext(startPos, toAppendTo, status);
+        }
+    }
+    return toAppendTo;
+}
+
  UnicodeString&
  RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
-                                                      UnicodeString& currentResult) const
+                                                      UnicodeString& currentResult,
+                                                      UErrorCode& status) const
  {
  #if !UCONFIG_NO_BREAK_ITERATION
-    if (startPos==0 && currentResult.length() > 0) {
+    UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
+    if (capitalizationContext != UDISPCTX_CAPITALIZATION_NONE && startPos == 0 && currentResult.length() > 0) {
          // capitalize currentResult according to context
          UChar32 ch = currentResult.char32At(0);
-        UErrorCode status = U_ZERO_ERROR;
-        UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
-        if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
-              ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
-                (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
-                (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
+        if (u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter != NULL &&
+              ( capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
+                (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
+                (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
              // titlecase first word of currentResult, here use sentence iterator unlike current implementations
              // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
              currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
diff --git a/icu4c/source/i18n/unicode/rbnf.h b/icu4c/source/i18n/unicode/rbnf.h

index d41ffbe3816a67b5c41fa79d82f372c02b326af3..1af61605fa2b1bae0b80013a869c0f19ae50d594 100644 (file)
--- a/icu4c/source/i18n/unicode/rbnf.h
+++ b/icu4c/source/i18n/unicode/rbnf.h
@@ -866,6 +866,52 @@ public:
                                  FieldPosition& pos,
                                  UErrorCode& status) const;
  
+protected:
+    /**
+     * Format a decimal number.
+     * The number is a DigitList wrapper onto a floating point decimal number.
+     * The default implementation in NumberFormat converts the decimal number
+     * to a double and formats that.  Subclasses of NumberFormat that want
+     * to specifically handle big decimal numbers must override this method.
+     * class DecimalFormat does so.
+     *
+     * @param number    The number, a DigitList format Decimal Floating Point.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param posIter   On return, can be used to iterate over positions
+     *                  of fields generated by this format call.
+     * @param status    Output param filled with success/failure status.
+     * @return          Reference to 'appendTo' parameter.
+     * @internal
+     */
+    virtual UnicodeString& format(const DigitList &number,
+                                  UnicodeString& appendTo,
+                                  FieldPositionIterator* posIter,
+                                  UErrorCode& status) const;
+
+    /**
+     * Format a decimal number.
+     * The number is a DigitList wrapper onto a floating point decimal number.
+     * The default implementation in NumberFormat converts the decimal number
+     * to a double and formats that.  Subclasses of NumberFormat that want
+     * to specifically handle big decimal numbers must override this method.
+     * class DecimalFormat does so.
+     *
+     * @param number    The number, a DigitList format Decimal Floating Point.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @param status    Output param filled with success/failure status.
+     * @return          Reference to 'appendTo' parameter.
+     * @internal
+     */
+    virtual UnicodeString& format(const DigitList &number,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos,
+                                  UErrorCode& status) const;
+public:
+
    using NumberFormat::parse;
  
    /**
@@ -1031,7 +1077,8 @@ private:
      NFRule * initializeDefaultNaNRule(UErrorCode &status);
      const NFRule * getDefaultNaNRule() const;
      PluralFormat *createPluralFormat(UPluralType pluralType, const UnicodeString &pattern, UErrorCode& status) const;
-    UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult) const;
+    UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult, UErrorCode& status) const;
+    UnicodeString& format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const;
  
  private:
      NFRuleSet **ruleSets;
diff --git a/icu4c/source/test/intltest/itrbnf.cpp b/icu4c/source/test/intltest/itrbnf.cpp

index d9d7489241fb24219244e3c09063b72459f4ac5d..373e0778cb6f2c916e5c41c9b74f83508c44c113 100644 (file)
--- a/icu4c/source/test/intltest/itrbnf.cpp
+++ b/icu4c/source/test/intltest/itrbnf.cpp
@@ -73,6 +73,8 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name,
          TESTCASE(21, TestMultiplePluralRules);
          TESTCASE(22, TestInfinityNaN);
          TESTCASE(23, TestVariableDecimalPoint);
+        TESTCASE(24, TestLargeNumbers);
+        TESTCASE(25, TestCompactDecimalFormatStyle);
  #else
          TESTCASE(0, TestRBNFDisabled);
  #endif
@@ -2218,6 +2220,69 @@ void IntlTestRBNF::TestVariableDecimalPoint() {
      doTest(&enFormatter, enTestCommaData, true);
  }
  
+void IntlTestRBNF::TestLargeNumbers() {
+    UErrorCode status = U_ZERO_ERROR;
+    RuleBasedNumberFormat rbnf(URBNF_SPELLOUT, Locale::getEnglish(), status);
+
+    const char * const enTestFullData[][2] = {
+            {"-9007199254740991", "minus nine quadrillion seven trillion one hundred ninety-nine billion two hundred fifty-four million seven hundred forty thousand nine hundred ninety-one"}, // Maximum precision in both a double and a long
+            {"9007199254740991", "nine quadrillion seven trillion one hundred ninety-nine billion two hundred fifty-four million seven hundred forty thousand nine hundred ninety-one"}, // Maximum precision in both a double and a long
+            {"-9007199254740992", "minus nine quadrillion seven trillion one hundred ninety-nine billion two hundred fifty-four million seven hundred forty thousand nine hundred ninety-two"}, // Only precisely contained in a long
+            {"9007199254740992", "nine quadrillion seven trillion one hundred ninety-nine billion two hundred fifty-four million seven hundred forty thousand nine hundred ninety-two"}, // Only precisely contained in a long
+            {"9999999999999998", "nine quadrillion nine hundred ninety-nine trillion nine hundred ninety-nine billion nine hundred ninety-nine million nine hundred ninety-nine thousand nine hundred ninety-eight"},
+            {"9999999999999999", "nine quadrillion nine hundred ninety-nine trillion nine hundred ninety-nine billion nine hundred ninety-nine million nine hundred ninety-nine thousand nine hundred ninety-nine"},
+            {"999999999999999999", "nine hundred ninety-nine quadrillion nine hundred ninety-nine trillion nine hundred ninety-nine billion nine hundred ninety-nine million nine hundred ninety-nine thousand nine hundred ninety-nine"},
+            {"1000000000000000000", "1,000,000,000,000,000,000"}, // The rules don't go to 1 quintillion yet
+            {"-9223372036854775809", "-9,223,372,036,854,775,809"}, // We've gone beyond 64-bit precision
+            {"-9223372036854775808", "-9,223,372,036,854,775,808"}, // We've gone beyond +64-bit precision
+            {"-9223372036854775807", "minus 9,223,372,036,854,775,807"}, // Minimum 64-bit precision
+            {"-9223372036854775806", "minus 9,223,372,036,854,775,806"}, // Minimum 64-bit precision + 1
+            {"9223372036854774111", "9,223,372,036,854,774,111"}, // Below 64-bit precision
+            {"9223372036854774999", "9,223,372,036,854,774,999"}, // Below 64-bit precision
+            {"9223372036854775000", "9,223,372,036,854,775,000"}, // Below 64-bit precision
+            {"9223372036854775806", "9,223,372,036,854,775,806"}, // Maximum 64-bit precision - 1
+            {"9223372036854775807", "9,223,372,036,854,775,807"}, // Maximum 64-bit precision
+            {"9223372036854775808", "9,223,372,036,854,775,808"}, // We've gone beyond 64-bit precision. This can only be represented with BigDecimal.
+            { NULL, NULL }
+    };
+    doTest(&rbnf, enTestFullData, false);
+}
+
+void IntlTestRBNF::TestCompactDecimalFormatStyle() {
+    UErrorCode status = U_ZERO_ERROR;
+    UParseError parseError;
+    // This is not a common use case, but we're testing it anyway.
+    UnicodeString numberPattern("=###0.#####=;"
+            "1000: <###0.00< K;"
+            "1000000: <###0.00< M;"
+            "1000000000: <###0.00< B;"
+            "1000000000000: <###0.00< T;"
+            "1000000000000000: <###0.00< Q;");
+    RuleBasedNumberFormat rbnf(numberPattern, UnicodeString(), Locale::getEnglish(), parseError, status);
+
+    const char * const enTestFullData[][2] = {
+            {"1000", "1.00 K"},
+            {"1234", "1.23 K"},
+            {"999994", "999.99 K"},
+            {"999995", "1000.00 K"},
+            {"1000000", "1.00 M"},
+            {"1200000", "1.20 M"},
+            {"1200000000", "1.20 B"},
+            {"1200000000000", "1.20 T"},
+            {"1200000000000000", "1.20 Q"},
+            {"4503599627370495", "4.50 Q"},
+            {"4503599627370496", "4.50 Q"},
+            {"8990000000000000", "8.99 Q"},
+            {"9008000000000000", "9.00 Q"}, // Number doesn't precisely fit into a double
+            {"9456000000000000", "9.00 Q"},  // Number doesn't precisely fit into a double
+            {"10000000000000000", "10.00 Q"},  // Number doesn't precisely fit into a double
+            {"9223372036854775807", "9223.00 Q"}, // Maximum 64-bit precision
+            {"9223372036854775808", "9,223,372,036,854,775,808"}, // We've gone beyond 64-bit precision. This can only be represented with BigDecimal.
+            { NULL, NULL }
+    };
+    doTest(&rbnf, enTestFullData, false);
+}
+
  void 
  IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing) 
  {
diff --git a/icu4c/source/test/intltest/itrbnf.h b/icu4c/source/test/intltest/itrbnf.h

index 8c23a7d5c6467b4691a3ce2e57c1add1f44680ff..2d3e7605ef0a58cb230c3cc096872e27cc562f37 100644 (file)
--- a/icu4c/source/test/intltest/itrbnf.h
+++ b/icu4c/source/test/intltest/itrbnf.h
@@ -145,6 +145,8 @@ class IntlTestRBNF : public IntlTest {
      void TestInfinityNaN();
      void TestVariableDecimalPoint();
      void TestRounding();
+    void TestLargeNumbers();
+    void TestCompactDecimalFormatStyle();
  
  protected:
    virtual void doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing);
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java

index beeb89c2d067a34331baca5bb9e778b260b6ab0a..9ec0ea47de48404d190d30452836a243e4d6fb3b 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java
@@ -2026,8 +2026,10 @@ public class RuleBasedNumberFormat extends NumberFormat {
       * Adjust capitalization of formatted result for display context
       */
      private String adjustForContext(String result) {
-        if (result != null && result.length() > 0 && UCharacter.isLowerCase(result.codePointAt(0))) {
-            DisplayContext capitalization = getContext(DisplayContext.Type.CAPITALIZATION);
+        DisplayContext capitalization = getContext(DisplayContext.Type.CAPITALIZATION);
+        if (capitalization != DisplayContext.CAPITALIZATION_NONE && result != null && result.length() > 0
+            && UCharacter.isLowerCase(result.codePointAt(0)))
+        {
              if (  capitalization==DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
                    (capitalization == DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForListOrMenu) ||
                    (capitalization == DisplayContext.CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone) ) {
author	George Rhoten <grhoten@users.noreply.github.com>
	Sat, 14 Jan 2017 16:29:40 +0000 (16:29 +0000)
committer	George Rhoten <grhoten@users.noreply.github.com>
	Sat, 14 Jan 2017 16:29:40 +0000 (16:29 +0000)
icu4c/source/i18n/nfrs.cpp		patch \| blob \| history
icu4c/source/i18n/nfrs.h		patch \| blob \| history
icu4c/source/i18n/nfrule.cpp		patch \| blob \| history
icu4c/source/i18n/nfrule.h		patch \| blob \| history
icu4c/source/i18n/nfsubs.cpp		patch \| blob \| history
icu4c/source/i18n/nfsubs.h		patch \| blob \| history
icu4c/source/i18n/numfmt.cpp		patch \| blob \| history
icu4c/source/i18n/rbnf.cpp		patch \| blob \| history
icu4c/source/i18n/unicode/rbnf.h		patch \| blob \| history
icu4c/source/test/intltest/itrbnf.cpp		patch \| blob \| history
icu4c/source/test/intltest/itrbnf.h		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java		patch \| blob \| history