ICU-13569 rbbi, refresh from trunk, plus table state compaction, work in progress.

author Andy Heninger <andy.heninger@gmail.com>

Sat, 10 Feb 2018 01:31:35 +0000 (01:31 +0000)

committer Andy Heninger <andy.heninger@gmail.com>

Sat, 10 Feb 2018 01:31:35 +0000 (01:31 +0000)
author Andy Heninger <andy.heninger@gmail.com>
Sat, 10 Feb 2018 01:31:35 +0000 (01:31 +0000)
committer Andy Heninger <andy.heninger@gmail.com>
Sat, 10 Feb 2018 01:31:35 +0000 (01:31 +0000)
diff --git a/icu4c/source/common/rbbirb.cpp b/icu4c/source/common/rbbirb.cpp

index a0f92da619f15271170763ef31be78070128ff5e..99c8e5dd5af1afa7acbae486ce11553c93724161 100644 (file)
--- a/icu4c/source/common/rbbirb.cpp
+++ b/icu4c/source/common/rbbirb.cpp
@@ -367,6 +367,9 @@ void RBBIRuleBuilder::optimizeTables() {
          fSafeRevTables->removeColumn(rightClass);
      }
  
+    fForwardTables->removeDuplicateStates();
+
+
  
  }
  
diff --git a/icu4c/source/common/rbbitblb.cpp b/icu4c/source/common/rbbitblb.cpp

index b71921a9dee16d44adf482323291d7501a42aaef..68e9ffb666d7b7bbde2479e225e54efbfe4528b4 100644 (file)
--- a/icu4c/source/common/rbbitblb.cpp
+++ b/icu4c/source/common/rbbitblb.cpp
@@ -1120,8 +1120,79 @@ void RBBITableBuilder::removeColumn(int32_t column) {
      }
  }
  
+/*
+ * findDuplicateState
+ */
+bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) {
+    int32_t numStates = fDStates->size();
+    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
+
+    for (; firstState<numStates-1; ++firstState) {
+        RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(firstState);
+        for (duplState=firstState+1; duplState<numStates; ++duplState) {
+            RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
+            if (firstSD->fAccepting != duplSD->fAccepting ||
+                firstSD->fLookAhead != duplSD->fLookAhead ||
+                firstSD->fTagsIdx   != duplSD->fTagsIdx) {
+                continue;
+            }
+            bool rowsMatch = true;
+            for (int32_t col=0; col < numCols; ++col) {
+                int32_t firstVal = firstSD->fDtran->elementAti(col);
+                int32_t duplVal = duplSD->fDtran->elementAti(col);
+                if (!((firstVal == duplVal) ||
+                        ((firstVal == firstState || firstVal == duplState) &&
+                        (duplVal  == firstState || duplVal  == duplState)))) {
+                    rowsMatch = false;
+                    break;
+                }
+            }
+            if (rowsMatch) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
  
+void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
+    U_ASSERT(keepState < duplState);
+    U_ASSERT(duplState < fDStates->size());
  
+    RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
+    fDStates->removeElementAt(duplState);
+    delete duplSD;
+
+    int32_t numStates = fDStates->size();
+    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
+    for (int32_t state=0; state<numStates; ++state) {
+        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
+        for (int32_t col=0; col<numCols; col++) {
+            int32_t existingVal = sd->fDtran->elementAti(col);
+            int32_t newVal = existingVal;
+            if (existingVal == duplState) {
+                existingVal = keepState;
+            } else if (existingVal > duplState) {
+                newVal = existingVal - 1;
+            }
+            sd->fDtran->setElementAt(newVal, col);
+        }
+    }
+}
+
+
+/*
+ * RemoveDuplicateStates
+ */
+void RBBITableBuilder::removeDuplicateStates() {
+    int32_t firstState = 0;
+    int32_t duplicateState = 0;
+    while (findDuplicateState(firstState, duplicateState)) {
+        printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
+        removeState(firstState, duplicateState);
+    }
+
+}
  
  //-----------------------------------------------------------------------------
  //
diff --git a/icu4c/source/common/rbbitblb.h b/icu4c/source/common/rbbitblb.h

index 375ed6edd2701f23b57a51c42c32c8e6585d53b9..09b57b5cf0f4c3f44463c0ed0a26ec148b223aa5 100644 (file)
--- a/icu4c/source/common/rbbitblb.h
+++ b/icu4c/source/common/rbbitblb.h
@@ -59,7 +59,8 @@ public:
       */
      void     removeColumn(int32_t column);
  
-
+    /** Check for, and remove dupicate states (table rows). */
+    void     removeDuplicateStates();
  
  
  private:
@@ -83,6 +84,21 @@ private:
  
      void     addRuleRootNodes(UVector *dest, RBBINode *node);
  
+    /** Find the next duplicate state. An iterator function.
+     * @param firstState (in/out) begin looking at this state, return the first of the
+     *                   pair of duplicates.
+     * @param duplicateState returns the duplicate state of fistState
+     * @return true if a duplicate pair of states was found.
+     */
+    bool findDuplicateState(int32_t &firstState, int32_t &duplicateState);
+
+    /** Remove a duplicate state/
+     * @param keepState First of the duplicate pair. Keep it.
+     * @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state
+     *                  to refer to keepState instead.
+     */
+    void removeState(int32_t keepState, int32_t duplState);
+
      // Set functions for UVector.
      //   TODO:  make a USet subclass of UVector
  
diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h

index d60450b5a563d06cbf728497c31e5e437fa72c41..b7796cb7ddaf2a03ed5a49d5b9029c65b2abb49e 100644 (file)
--- a/icu4c/source/common/unicode/utypes.h
+++ b/icu4c/source/common/unicode/utypes.h
@@ -539,6 +539,7 @@ typedef enum UErrorCode {
      U_DEFAULT_KEYWORD_MISSING,        /**< Missing DEFAULT rule in plural rules */
      U_DECIMAL_NUMBER_SYNTAX_ERROR,    /**< Decimal number syntax error */
      U_FORMAT_INEXACT_ERROR,           /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
+    U_NUMBER_ARG_OUTOFBOUNDS_ERROR,   /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */
  #ifndef U_HIDE_DEPRECATED_API
      /**
       * One more than the highest normal formatting API error code.
diff --git a/icu4c/source/common/utypes.cpp b/icu4c/source/common/utypes.cpp

index 8f5791be160e1727464af68b720ba88b7fcc99af..5d6a0504ba682a32461ff20626e82fc2a87f0dd1 100644 (file)
--- a/icu4c/source/common/utypes.cpp
+++ b/icu4c/source/common/utypes.cpp
@@ -125,7 +125,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
      "U_UNDEFINED_KEYWORD",
      "U_DEFAULT_KEYWORD_MISSING",
      "U_DECIMAL_NUMBER_SYNTAX_ERROR",
-    "U_FORMAT_INEXACT_ERROR"
+    "U_FORMAT_INEXACT_ERROR",
+    "U_NUMBER_ARG_OUTOFBOUNDS_ERROR"
  };
  
  static const char * const
diff --git a/icu4c/source/i18n/dcfmtsym.cpp b/icu4c/source/i18n/dcfmtsym.cpp

index d321a82f8a9550f49eff66b1dbd5a01dbbd2e915..02c0e077f43807d62854902627a16617950fb40a 100644 (file)
--- a/icu4c/source/i18n/dcfmtsym.cpp
+++ b/icu4c/source/i18n/dcfmtsym.cpp
@@ -38,6 +38,7 @@
  #include "uresimp.h"
  #include "ureslocs.h"
  #include "charstr.h"
+#include "uassert.h"
  
  // *****************************************************************************
  // class DecimalFormatSymbols
@@ -165,6 +166,7 @@ DecimalFormatSymbols::operator=(const DecimalFormatSymbols& rhs)
          uprv_strcpy(actualLocale, rhs.actualLocale);
          fIsCustomCurrencySymbol = rhs.fIsCustomCurrencySymbol; 
          fIsCustomIntlCurrencySymbol = rhs.fIsCustomIntlCurrencySymbol; 
+        fCodePointZero = rhs.fCodePointZero;
      }
      return *this;
  }
@@ -196,6 +198,7 @@ DecimalFormatSymbols::operator==(const DecimalFormatSymbols& that) const
              return FALSE;
          }
      }
+    // No need to check fCodePointZero since it is based on fSymbols
      return locale == that.locale &&
          uprv_strcmp(validLocale, that.validLocale) == 0 &&
          uprv_strcmp(actualLocale, that.actualLocale) == 0;
@@ -433,6 +436,20 @@ DecimalFormatSymbols::initialize(const Locale& loc, UErrorCode& status,
      // Let the monetary number separators equal the default number separators if necessary.
      sink.resolveMissingMonetarySeparators(fSymbols);
  
+    // Resolve codePointZero
+    const UnicodeString& stringZero = getConstDigitSymbol(0);
+    UChar32 tempCodePointZero = stringZero.char32At(0);
+    if (u_isdigit(tempCodePointZero) && stringZero.countChar32() == 1) {
+        for (int32_t i=0; i<=9; i++) {
+            const UnicodeString& stringDigit = getConstDigitSymbol(i);
+            if (stringDigit.char32At(0) != tempCodePointZero + i || stringDigit.countChar32() != 1) {
+                tempCodePointZero = -1;
+                break;
+            }
+        }
+    }
+    fCodePointZero = tempCodePointZero;
+
      // Obtain currency data from the currency API.  This is strictly
      // for backward compatibility; we don't use DecimalFormatSymbols
      // for currency data anymore.
@@ -530,6 +547,8 @@ DecimalFormatSymbols::initialize() {
      fSymbols[kExponentMultiplicationSymbol] = (UChar)0xd7; // 'x' multiplication symbol for exponents
      fIsCustomCurrencySymbol = FALSE; 
      fIsCustomIntlCurrencySymbol = FALSE;
+    fCodePointZero = 0x30;
+    U_ASSERT(fCodePointZero == fSymbols[kZeroDigitSymbol].char32At(0));
  
  }
  
diff --git a/icu4c/source/i18n/number_integerwidth.cpp b/icu4c/source/i18n/number_integerwidth.cpp

index 10dacfc4acb96f442b3ca3be5a6ef7e7d4d357c7..4a612273f5e530f25f7fd729325e32b8a06ec183 100644 (file)
--- a/icu4c/source/i18n/number_integerwidth.cpp
+++ b/icu4c/source/i18n/number_integerwidth.cpp
@@ -13,25 +13,28 @@ using namespace icu;
  using namespace icu::number;
  using namespace icu::number::impl;
  
-IntegerWidth::IntegerWidth(int8_t minInt, int8_t maxInt) {
+IntegerWidth::IntegerWidth(digits_t minInt, digits_t maxInt) {
      fUnion.minMaxInt.fMinInt = minInt;
      fUnion.minMaxInt.fMaxInt = maxInt;
  }
  
  IntegerWidth IntegerWidth::zeroFillTo(int32_t minInt) {
      if (minInt >= 0 && minInt <= kMaxIntFracSig) {
-        return {static_cast<int8_t>(minInt), -1};
+        return {static_cast<digits_t>(minInt), -1};
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
  IntegerWidth IntegerWidth::truncateAt(int32_t maxInt) {
      if (fHasError) { return *this; }  // No-op on error
-    if (maxInt >= 0 && maxInt <= kMaxIntFracSig) {
-        return {fUnion.minMaxInt.fMinInt, static_cast<int8_t>(maxInt)};
+    digits_t minInt = fUnion.minMaxInt.fMinInt;
+    if (maxInt >= 0 && maxInt <= kMaxIntFracSig && minInt <= maxInt) {
+        return {minInt, static_cast<digits_t>(maxInt)};
+    } else if (maxInt == -1) {
+        return {minInt, -1};
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
diff --git a/icu4c/source/i18n/number_notation.cpp b/icu4c/source/i18n/number_notation.cpp

index ff0cd9505de299d8537b215564ff7bd9bd334a8a..f4ad333354d0c7ee48b10fa91c4357c871a6e390 100644 (file)
--- a/icu4c/source/i18n/number_notation.cpp
+++ b/icu4c/source/i18n/number_notation.cpp
@@ -54,13 +54,13 @@ Notation Notation::simple() {
  
  ScientificNotation
  ScientificNotation::withMinExponentDigits(int32_t minExponentDigits) const {
-    if (minExponentDigits >= 0 && minExponentDigits < kMaxIntFracSig) {
+    if (minExponentDigits >= 1 && minExponentDigits <= kMaxIntFracSig) {
          ScientificSettings settings = fUnion.scientific;
-        settings.fMinExponentDigits = (int8_t) minExponentDigits;
+        settings.fMinExponentDigits = static_cast<digits_t>(minExponentDigits);
          NotationUnion union_ = {settings};
          return {NTN_SCIENTIFIC, union_};
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
diff --git a/icu4c/source/i18n/number_padding.cpp b/icu4c/source/i18n/number_padding.cpp

index a478af60541dde532bb135c80c81ec951c5fc56f..b1db3490cd4489f3f9371d08ef784006c879d698 100644 (file)
--- a/icu4c/source/i18n/number_padding.cpp
+++ b/icu4c/source/i18n/number_padding.cpp
@@ -43,7 +43,7 @@ Padder Padder::codePoints(UChar32 cp, int32_t targetWidth, UNumberFormatPadPosit
      if (targetWidth >= 0) {
          return {cp, targetWidth, position};
      } else {
-        return {U_NUMBER_PADDING_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
diff --git a/icu4c/source/i18n/number_rounding.cpp b/icu4c/source/i18n/number_rounding.cpp

index 2f31727e994f75526cfb5da161080a42766e8439..fd4dafdf983b61a289be460d8af4eef6fac78be5 100644 (file)
--- a/icu4c/source/i18n/number_rounding.cpp
+++ b/icu4c/source/i18n/number_rounding.cpp
@@ -58,7 +58,7 @@ FractionRounder Rounder::fixedFraction(int32_t minMaxFractionPlaces) {
      if (minMaxFractionPlaces >= 0 && minMaxFractionPlaces <= kMaxIntFracSig) {
          return constructFraction(minMaxFractionPlaces, minMaxFractionPlaces);
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
@@ -66,7 +66,7 @@ FractionRounder Rounder::minFraction(int32_t minFractionPlaces) {
      if (minFractionPlaces >= 0 && minFractionPlaces <= kMaxIntFracSig) {
          return constructFraction(minFractionPlaces, -1);
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
@@ -74,7 +74,7 @@ FractionRounder Rounder::maxFraction(int32_t maxFractionPlaces) {
      if (maxFractionPlaces >= 0 && maxFractionPlaces <= kMaxIntFracSig) {
          return constructFraction(0, maxFractionPlaces);
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
@@ -83,40 +83,40 @@ FractionRounder Rounder::minMaxFraction(int32_t minFractionPlaces, int32_t maxFr
          minFractionPlaces <= maxFractionPlaces) {
          return constructFraction(minFractionPlaces, maxFractionPlaces);
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
  Rounder Rounder::fixedDigits(int32_t minMaxSignificantDigits) {
-    if (minMaxSignificantDigits >= 0 && minMaxSignificantDigits <= kMaxIntFracSig) {
+    if (minMaxSignificantDigits >= 1 && minMaxSignificantDigits <= kMaxIntFracSig) {
          return constructSignificant(minMaxSignificantDigits, minMaxSignificantDigits);
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
  Rounder Rounder::minDigits(int32_t minSignificantDigits) {
-    if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) {
+    if (minSignificantDigits >= 1 && minSignificantDigits <= kMaxIntFracSig) {
          return constructSignificant(minSignificantDigits, -1);
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
  Rounder Rounder::maxDigits(int32_t maxSignificantDigits) {
-    if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) {
-        return constructSignificant(0, maxSignificantDigits);
+    if (maxSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig) {
+        return constructSignificant(1, maxSignificantDigits);
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
  Rounder Rounder::minMaxDigits(int32_t minSignificantDigits, int32_t maxSignificantDigits) {
-    if (minSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig &&
+    if (minSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig &&
          minSignificantDigits <= maxSignificantDigits) {
          return constructSignificant(minSignificantDigits, maxSignificantDigits);
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
@@ -124,7 +124,7 @@ IncrementRounder Rounder::increment(double roundingIncrement) {
      if (roundingIncrement > 0.0) {
          return constructIncrement(roundingIncrement, 0);
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
@@ -139,19 +139,19 @@ Rounder Rounder::withMode(RoundingMode roundingMode) const {
  
  Rounder FractionRounder::withMinDigits(int32_t minSignificantDigits) const {
      if (fType == RND_ERROR) { return *this; } // no-op in error state
-    if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) {
+    if (minSignificantDigits >= 1 && minSignificantDigits <= kMaxIntFracSig) {
          return constructFractionSignificant(*this, minSignificantDigits, -1);
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
  Rounder FractionRounder::withMaxDigits(int32_t maxSignificantDigits) const {
      if (fType == RND_ERROR) { return *this; } // no-op in error state
-    if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) {
+    if (maxSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig) {
          return constructFractionSignificant(*this, -1, maxSignificantDigits);
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
@@ -185,14 +185,14 @@ Rounder IncrementRounder::withMinFraction(int32_t minFrac) const {
      if (minFrac >= 0 && minFrac <= kMaxIntFracSig) {
          return constructIncrement(fUnion.increment.fIncrement, minFrac);
      } else {
-        return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR};
+        return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR};
      }
  }
  
  FractionRounder Rounder::constructFraction(int32_t minFrac, int32_t maxFrac) {
      FractionSignificantSettings settings;
-    settings.fMinFrac = static_cast<int8_t> (minFrac);
-    settings.fMaxFrac = static_cast<int8_t> (maxFrac);
+    settings.fMinFrac = static_cast<digits_t>(minFrac);
+    settings.fMaxFrac = static_cast<digits_t>(maxFrac);
      settings.fMinSig = -1;
      settings.fMaxSig = -1;
      RounderUnion union_;
@@ -204,8 +204,8 @@ Rounder Rounder::constructSignificant(int32_t minSig, int32_t maxSig) {
      FractionSignificantSettings settings;
      settings.fMinFrac = -1;
      settings.fMaxFrac = -1;
-    settings.fMinSig = static_cast<int8_t>(minSig);
-    settings.fMaxSig = static_cast<int8_t>(maxSig);
+    settings.fMinSig = static_cast<digits_t>(minSig);
+    settings.fMaxSig = static_cast<digits_t>(maxSig);
      RounderUnion union_;
      union_.fracSig = settings;
      return {RND_SIGNIFICANT, union_, kDefaultMode};
@@ -214,8 +214,8 @@ Rounder Rounder::constructSignificant(int32_t minSig, int32_t maxSig) {
  Rounder
  Rounder::constructFractionSignificant(const FractionRounder &base, int32_t minSig, int32_t maxSig) {
      FractionSignificantSettings settings = base.fUnion.fracSig;
-    settings.fMinSig = static_cast<int8_t>(minSig);
-    settings.fMaxSig = static_cast<int8_t>(maxSig);
+    settings.fMinSig = static_cast<digits_t>(minSig);
+    settings.fMaxSig = static_cast<digits_t>(maxSig);
      RounderUnion union_;
      union_.fracSig = settings;
      return {RND_FRACTION_SIGNIFICANT, union_, kDefaultMode};
@@ -224,7 +224,7 @@ Rounder::constructFractionSignificant(const FractionRounder &base, int32_t minSi
  IncrementRounder Rounder::constructIncrement(double increment, int32_t minFrac) {
      IncrementSettings settings;
      settings.fIncrement = increment;
-    settings.fMinFrac = minFrac;
+    settings.fMinFrac = static_cast<digits_t>(minFrac);
      RounderUnion union_;
      union_.increment = settings;
      return {RND_INCREMENT, union_, kDefaultMode};
diff --git a/icu4c/source/i18n/number_types.h b/icu4c/source/i18n/number_types.h

index e914ef71ac085c97e4071d98d9745a21a2a92e66..c01765e2cea6c6c0f3dacdd24407fc272cce18cf 100644 (file)
--- a/icu4c/source/i18n/number_types.h
+++ b/icu4c/source/i18n/number_types.h
@@ -31,7 +31,7 @@ typedef UNumberFormatPadPosition PadPosition;
  typedef UNumberCompactStyle CompactStyle;
  
  // ICU4J Equivalent: RoundingUtils.MAX_INT_FRAC_SIG
-static constexpr int32_t kMaxIntFracSig = 100;
+static constexpr int32_t kMaxIntFracSig = 999;
  
  // ICU4J Equivalent: RoundingUtils.DEFAULT_ROUNDING_MODE
  static constexpr RoundingMode kDefaultMode = RoundingMode::UNUM_FOUND_HALFEVEN;
@@ -42,10 +42,6 @@ static constexpr char16_t kFallbackPaddingString[] = u" ";
  // ICU4J Equivalent: NumberFormatterImpl.DEFAULT_CURRENCY
  static constexpr char16_t kDefaultCurrency[] = u"XXX";
  
-// FIXME: New error codes:
-static constexpr UErrorCode U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR = U_ILLEGAL_ARGUMENT_ERROR;
-static constexpr UErrorCode U_NUMBER_PADDING_WIDTH_OUTOFBOUNDS_ERROR = U_ILLEGAL_ARGUMENT_ERROR;
-
  // Forward declarations:
  
  class Modifier;
diff --git a/icu4c/source/i18n/unicode/dcfmtsym.h b/icu4c/source/i18n/unicode/dcfmtsym.h

index 4dc6f950f294ca424a93e221720767280fa439ee..86f5c198269ecaad3443a200865364742c77cf2e 100644 (file)
--- a/icu4c/source/i18n/unicode/dcfmtsym.h
+++ b/icu4c/source/i18n/unicode/dcfmtsym.h
@@ -80,10 +80,6 @@ U_NAMESPACE_BEGIN
   * If you supply a pattern with multiple grouping characters, the interval
   * between the last one and the end of the integer is the one that is
   * used. So "#,##,###,####" == "######,####" == "##,####,####".
- * <P>
- * This class only handles localized digits where the 10 digits are
- * contiguous in Unicode, from 0 to 9. Other digits sets (such as
- * superscripts) would need a different subclass.
   */
  class U_I18N_API DecimalFormatSymbols : public UObject {
  public:
@@ -396,6 +392,13 @@ public:
      inline UBool isCustomIntlCurrencySymbol() const {
          return fIsCustomIntlCurrencySymbol;
      }
+
+    /**
+     * @internal For ICU use only
+     */
+    inline UChar32 getCodePointZero() const {
+        return fCodePointZero;
+    }
  #endif  /* U_HIDE_INTERNAL_API */
  
      /**
@@ -408,11 +411,24 @@ public:
       *
       * @param symbol Constant to indicate a number format symbol.
       * @return the format symbol by the param 'symbol'
-     * @internal
+     * @draft ICU 61
       */
-    inline const UnicodeString &getConstSymbol(ENumberFormatSymbol symbol) const;
+    inline const UnicodeString& getConstSymbol(ENumberFormatSymbol symbol) const;
  
  #ifndef U_HIDE_INTERNAL_API
+    /**
+     * Returns the const UnicodeString reference, like getConstSymbol,
+     * corresponding to the digit with the given value.  This is equivalent
+     * to accessing the symbol from getConstSymbol with the corresponding
+     * key, such as kZeroDigitSymbol or kOneDigitSymbol.
+     *
+     * @param digit The digit, an integer between 0 and 9 inclusive.
+     *              If outside the range 0 to 9, the zero digit is returned.
+     * @return the format symbol for the given digit.
+     * @internal This API is currently for ICU use only.
+     */
+    inline const UnicodeString& getConstDigitSymbol(int32_t digit) const;
+
      /**
       * Returns that pattern stored in currecy info. Internal API for use by NumberFormat API.
       * @internal
@@ -444,6 +460,22 @@ private:
       */
      UnicodeString fNoSymbol;
  
+    /**
+     * Dealing with code points is faster than dealing with strings when formatting. Because of
+     * this, we maintain a value containing the zero code point that is used whenever digitStrings
+     * represents a sequence of ten code points in order.
+     *
+     * <p>If the value stored here is positive, it means that the code point stored in this value
+     * corresponds to the digitStrings array, and codePointZero can be used instead of the
+     * digitStrings array for the purposes of efficient formatting; if -1, then digitStrings does
+     * *not* contain a sequence of code points, and it must be used directly.
+     *
+     * <p>It is assumed that codePointZero always shadows the value in digitStrings. codePointZero
+     * should never be set directly; rather, it should be updated only when digitStrings mutates.
+     * That is, the flow of information is digitStrings -> codePointZero, not the other way.
+     */
+    UChar32 fCodePointZero;
+
      Locale locale;
  
      char actualLocale[ULOC_FULLNAME_CAPACITY];
@@ -481,6 +513,17 @@ DecimalFormatSymbols::getConstSymbol(ENumberFormatSymbol symbol) const {
      return *strPtr;
  }
  
+inline const UnicodeString& DecimalFormatSymbols::getConstDigitSymbol(int32_t digit) const {
+    if (digit < 0 || digit > 9) {
+        digit = 0;
+    }
+    if (digit == 0) {
+        return fSymbols[kZeroDigitSymbol];
+    }
+    ENumberFormatSymbol key = static_cast<ENumberFormatSymbol>(kOneDigitSymbol + digit - 1);
+    return fSymbols[key];
+}
+
  // -------------------------------------
  
  inline void
@@ -497,14 +540,20 @@ DecimalFormatSymbols::setSymbol(ENumberFormatSymbol symbol, const UnicodeString
  
      // If the zero digit is being set to a known zero digit according to Unicode,
      // then we automatically set the corresponding 1-9 digits
-    if ( propogateDigits && symbol == kZeroDigitSymbol && value.countChar32() == 1 ) {
+    // Also record updates to fCodePointZero. Be conservative if in doubt.
+    if (symbol == kZeroDigitSymbol) {
          UChar32 sym = value.char32At(0);
-        if ( u_charDigitValue(sym) == 0 ) {
+        if ( propogateDigits && u_charDigitValue(sym) == 0 && value.countChar32() == 1 ) {
+            fCodePointZero = sym;
              for ( int8_t i = 1 ; i<= 9 ; i++ ) {
                  sym++;
                  fSymbols[(int)kOneDigitSymbol+i-1] = UnicodeString(sym);
              }
+        } else {
+            fCodePointZero = -1;
          }
+    } else if (symbol >= kOneDigitSymbol && symbol <= kNineDigitSymbol) {
+        fCodePointZero = -1;
      }
  }
  
diff --git a/icu4c/source/i18n/unicode/numberformatter.h b/icu4c/source/i18n/unicode/numberformatter.h

index 4c4f542b4ddd4d2d0d1e58551af28e4e2a2ed71a..ac852f27e8eda9cb1124a1a74fee2319c2a8e833 100644 (file)
--- a/icu4c/source/i18n/unicode/numberformatter.h
+++ b/icu4c/source/i18n/unicode/numberformatter.h
@@ -253,16 +253,17 @@ typedef enum UGroupingStrategy {
  } UGroupingStrategy;
  
  /**
- * An enum declaring how to denote positive and negative numbers. Example outputs when formatting 123 and -123 in
- * <em>en-US</em>:
+ * An enum declaring how to denote positive and negative numbers. Example outputs when formatting
+ * 123, 0, and -123 in <em>en-US</em>:
   *
- * <p>
   * <ul>
- * <li>AUTO: "123", "-123"
- * <li>ALWAYS: "+123", "-123"
- * <li>NEVER: "123", "123"
- * <li>ACCOUNTING: "$123", "($123)"
- * <li>ACCOUNTING_ALWAYS: "+$123", "($123)"
+ * <li>AUTO: "123", "0", and "-123"
+ * <li>ALWAYS: "+123", "+0", and "-123"
+ * <li>NEVER: "123", "0", and "123"
+ * <li>ACCOUNTING: "$123", "$0", and "($123)"
+ * <li>ACCOUNTING_ALWAYS: "+$123", "+$0", and "($123)"
+ * <li>EXCEPT_ZERO: "+123", "0", and "-123"
+ * <li>ACCOUNTING_EXCEPT_ZERO: "+$123", "$0", and "($123)"
   * </ul>
   *
   * <p>
@@ -394,6 +395,21 @@ class IntegerWidth;
  
  namespace impl {
  
+/**
+ * Datatype for minimum/maximum fraction digits. Must be able to hold kMaxIntFracSig.
+ *
+ * @internal
+ */
+typedef int16_t digits_t;
+
+/**
+ * Use a default threshold of 3. This means that the third time .format() is called, the data structures get built
+ * using the "safe" code path. The first two calls to .format() will trigger the unsafe code path.
+ *
+ * @internal
+ */
+static constexpr int32_t DEFAULT_THRESHOLD = 3;
+
  // Forward declarations:
  class Padder;
  struct MacroProps;
@@ -577,7 +593,7 @@ class U_I18N_API Notation : public UMemory {
          struct ScientificSettings {
              int8_t fEngineeringInterval;
              bool fRequireMinInt;
-            int8_t fMinExponentDigits;
+            impl::digits_t fMinExponentDigits;
              UNumberSignDisplay fExponentSignDisplay;
          } scientific;
  
@@ -892,14 +908,14 @@ class U_I18N_API Rounder : public UMemory {
      union RounderUnion {
          struct FractionSignificantSettings {
              // For RND_FRACTION, RND_SIGNIFICANT, and RND_FRACTION_SIGNIFICANT
-            int8_t fMinFrac;
-            int8_t fMaxFrac;
-            int8_t fMinSig;
-            int8_t fMaxSig;
+            impl::digits_t fMinFrac;
+            impl::digits_t fMaxFrac;
+            impl::digits_t fMinSig;
+            impl::digits_t fMaxSig;
          } fracSig;
          struct IncrementSettings {
              double fIncrement;
-            int32_t fMinFrac;
+            impl::digits_t fMinFrac;
          } increment; // For RND_INCREMENT
          UCurrencyUsage currencyUsage; // For RND_CURRENCY
          UErrorCode errorCode; // For RND_ERROR
@@ -1153,7 +1169,8 @@ class U_I18N_API IntegerWidth : public UMemory {
       * For example, with maxInt=3, the number 1234 will get printed as "234".
       *
       * @param maxInt
-     *            The maximum number of places before the decimal separator.
+     *            The maximum number of places before the decimal separator. maxInt == -1 means no
+     *            truncation.
       * @return An IntegerWidth for passing to the NumberFormatter integerWidth() setter.
       * @draft ICU 60
       * @see NumberFormatter
@@ -1163,14 +1180,14 @@ class U_I18N_API IntegerWidth : public UMemory {
    private:
      union {
          struct {
-            int8_t fMinInt;
-            int8_t fMaxInt;
+            impl::digits_t fMinInt;
+            impl::digits_t fMaxInt;
          } minMaxInt;
          UErrorCode errorCode;
      } fUnion;
      bool fHasError = false;
  
-    IntegerWidth(int8_t minInt, int8_t maxInt);
+    IntegerWidth(impl::digits_t minInt, impl::digits_t maxInt);
  
      IntegerWidth(UErrorCode errorCode) { // NOLINT
          fUnion.errorCode = errorCode;
@@ -1205,14 +1222,6 @@ class U_I18N_API IntegerWidth : public UMemory {
  
  namespace impl {
  
-/**
- * Use a default threshold of 3. This means that the third time .format() is called, the data structures get built
- * using the "safe" code path. The first two calls to .format() will trigger the unsafe code path.
- *
- * @internal
- */
-static constexpr int32_t DEFAULT_THRESHOLD = 3;
-
  /** @internal */
  class U_I18N_API SymbolsWrapper : public UMemory {
    public:
diff --git a/icu4c/source/i18n/unicode/utrans.h b/icu4c/source/i18n/unicode/utrans.h

index d0f05cf2b60c32e6239ef078776a4d68f6db7ad0..697681aef856e439bf4877eac0d987c02fdda422 100644 (file)
--- a/icu4c/source/i18n/unicode/utrans.h
+++ b/icu4c/source/i18n/unicode/utrans.h
@@ -382,7 +382,7 @@ utrans_openIDs(UErrorCode *pErrorCode);
  U_STABLE void U_EXPORT2 
  utrans_trans(const UTransliterator* trans,
               UReplaceable* rep,
-             UReplaceableCallbacks* repFunc,
+             const UReplaceableCallbacks* repFunc,
               int32_t start,
               int32_t* limit,
               UErrorCode* status);
@@ -433,7 +433,7 @@ utrans_trans(const UTransliterator* trans,
  U_STABLE void U_EXPORT2 
  utrans_transIncremental(const UTransliterator* trans,
                          UReplaceable* rep,
-                        UReplaceableCallbacks* repFunc,
+                        const UReplaceableCallbacks* repFunc,
                          UTransPosition* pos,
                          UErrorCode* status);
  
diff --git a/icu4c/source/i18n/utrans.cpp b/icu4c/source/i18n/utrans.cpp

index 5124833ac3360cbcc331d1dd365529dd8013b334..29013ead1257c239755aaa554a28a1158969100f 100644 (file)
--- a/icu4c/source/i18n/utrans.cpp
+++ b/icu4c/source/i18n/utrans.cpp
@@ -41,12 +41,12 @@ U_NAMESPACE_BEGIN
  class ReplaceableGlue : public Replaceable {
  
      UReplaceable *rep;
-    UReplaceableCallbacks *func;
+    const UReplaceableCallbacks *func;
  
  public:
  
      ReplaceableGlue(UReplaceable *replaceable,
-                    UReplaceableCallbacks *funcCallback);
+                    const UReplaceableCallbacks *funcCallback);
  
      virtual ~ReplaceableGlue();
  
@@ -88,7 +88,7 @@ protected:
  UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ReplaceableGlue)
  
  ReplaceableGlue::ReplaceableGlue(UReplaceable *replaceable,
-                                 UReplaceableCallbacks *funcCallback)
+                                 const UReplaceableCallbacks *funcCallback)
    : Replaceable()
  {
      this->rep = replaceable;
@@ -398,7 +398,7 @@ utrans_openIDs(UErrorCode *pErrorCode) {
  U_CAPI void U_EXPORT2
  utrans_trans(const UTransliterator* trans,
               UReplaceable* rep,
-             UReplaceableCallbacks* repFunc,
+             const UReplaceableCallbacks* repFunc,
               int32_t start,
               int32_t* limit,
               UErrorCode* status) {
@@ -418,7 +418,7 @@ utrans_trans(const UTransliterator* trans,
  U_CAPI void U_EXPORT2
  utrans_transIncremental(const UTransliterator* trans,
                          UReplaceable* rep,
-                        UReplaceableCallbacks* repFunc,
+                        const UReplaceableCallbacks* repFunc,
                          UTransPosition* pos,
                          UErrorCode* status) {
  
diff --git a/icu4c/source/test/intltest/intltest.cpp b/icu4c/source/test/intltest/intltest.cpp

index 5edf872d3b72170dfe9a15ce4c541f4e1b36cc7b..c45913796a461cd6c6a91527192819dae71c8f63 100644 (file)
--- a/icu4c/source/test/intltest/intltest.cpp
+++ b/icu4c/source/test/intltest/intltest.cpp
@@ -2030,6 +2030,25 @@ UBool IntlTest::assertEquals(const char* message,
      return TRUE;
  }
  
+
+UBool IntlTest::assertEquals(const char* message,
+                             UErrorCode expected,
+                             UErrorCode actual) {
+    if (expected != actual) {
+        errln((UnicodeString)"FAIL: " + message + "; got " +
+              u_errorName(actual) + 
+              "; expected " + u_errorName(expected));
+        return FALSE;
+    }
+#ifdef VERBOSE_ASSERTIONS
+    else {
+        logln((UnicodeString)"Ok: " + message + "; got " + u_errorName(actual));
+    }
+#endif
+    return TRUE;
+}
+
+
  #if !UCONFIG_NO_FORMATTING
  UBool IntlTest::assertEquals(const char* message,
                               const Formattable& expected,
@@ -2105,6 +2124,16 @@ UBool IntlTest::assertEquals(const UnicodeString& message,
                               int64_t actual) {
      return assertEquals(extractToAssertBuf(message), expected, actual);
  }
+UBool IntlTest::assertEquals(const UnicodeString& message,
+                             double expected,
+                             double actual) {
+    return assertEquals(extractToAssertBuf(message), expected, actual);
+}
+UBool IntlTest::assertEquals(const UnicodeString& message,
+                             UErrorCode expected,
+                             UErrorCode actual) {
+    return assertEquals(extractToAssertBuf(message), expected, actual);
+}
  
  #if !UCONFIG_NO_FORMATTING
  UBool IntlTest::assertEquals(const UnicodeString& message,
diff --git a/icu4c/source/test/intltest/intltest.h b/icu4c/source/test/intltest/intltest.h

index 1f7c80d4794ebc80f8e5e49a3ab443606cf2eac2..08765b707d0ebed84291831dc023bfde055bbad6 100644 (file)
--- a/icu4c/source/test/intltest/intltest.h
+++ b/icu4c/source/test/intltest/intltest.h
@@ -289,13 +289,12 @@ public:
      UBool assertSuccess(const char* message, UErrorCode ec, UBool possibleDataError=FALSE, const char *file=NULL, int line=0);
      UBool assertEquals(const char* message, const UnicodeString& expected,
                         const UnicodeString& actual, UBool possibleDataError=FALSE);
-    UBool assertEquals(const char* message, const char* expected,
-                       const char* actual);
-    UBool assertEquals(const char* message, UBool expected,
-                       UBool actual);
+    UBool assertEquals(const char* message, const char* expected, const char* actual);
+    UBool assertEquals(const char* message, UBool expected, UBool actual);
      UBool assertEquals(const char* message, int32_t expected, int32_t actual);
      UBool assertEquals(const char* message, int64_t expected, int64_t actual);
      UBool assertEquals(const char* message, double expected, double actual);
+    UBool assertEquals(const char* message, UErrorCode expected, UErrorCode actual);
  #if !UCONFIG_NO_FORMATTING
      UBool assertEquals(const char* message, const Formattable& expected,
                         const Formattable& actual, UBool possibleDataError=FALSE);
@@ -307,11 +306,12 @@ public:
      UBool assertSuccess(const UnicodeString& message, UErrorCode ec);
      UBool assertEquals(const UnicodeString& message, const UnicodeString& expected,
                         const UnicodeString& actual, UBool possibleDataError=FALSE);
-    UBool assertEquals(const UnicodeString& message, const char* expected,
-                       const char* actual);
+    UBool assertEquals(const UnicodeString& message, const char* expected, const char* actual);
      UBool assertEquals(const UnicodeString& message, UBool expected, UBool actual);
      UBool assertEquals(const UnicodeString& message, int32_t expected, int32_t actual);
      UBool assertEquals(const UnicodeString& message, int64_t expected, int64_t actual);
+    UBool assertEquals(const UnicodeString& message, double expected, double actual);
+    UBool assertEquals(const UnicodeString& message, UErrorCode expected, UErrorCode actual);
  
      virtual void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL ); // overide !
  
diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h

index 9d4ffb7cef0d2afbc832b536f2785fad1610fe8b..5b4030a94942961d731fc64c03a6eed03b391698 100644 (file)
--- a/icu4c/source/test/intltest/numbertest.h
+++ b/icu4c/source/test/intltest/numbertest.h
@@ -63,6 +63,7 @@ class NumberFormatterApiTest : public IntlTest {
      void locale();
      void formatTypes();
      void errors();
+    void validRanges();
  
      void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
  
diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp

index 62db705eac794598e4b211e1be6d174ee2923d5e..2d625877f304b0725dfa6f3830a4ae95b7f0d82f 100644 (file)
--- a/icu4c/source/test/intltest/numbertest_api.cpp
+++ b/icu4c/source/test/intltest/numbertest_api.cpp
@@ -76,6 +76,7 @@ void NumberFormatterApiTest::runIndexedTest(int32_t index, UBool exec, const cha
          TESTCASE_AUTO(locale);
          TESTCASE_AUTO(formatTypes);
          TESTCASE_AUTO(errors);
+        TESTCASE_AUTO(validRanges);
      TESTCASE_AUTO_END;
  }
  
@@ -1748,27 +1749,83 @@ void NumberFormatterApiTest::errors() {
          UErrorCode status2 = U_ZERO_ERROR;
          FormattedNumber fn = lnf.formatInt(1, status1);
          assertEquals(
-                "Should fail with U_ILLEGAL_ARGUMENT_ERROR since rounder is not legal",
-                U_ILLEGAL_ARGUMENT_ERROR,
-                status1);
+                "Should fail since rounder is not legal",
+                (UBool) TRUE,
+                (UBool) U_FAILURE(status1));
          FieldPosition fp;
          fn.populateFieldPosition(fp, status2);
          assertEquals(
-                "Should fail with U_ILLEGAL_ARGUMENT_ERROR on terminal method",
-                U_ILLEGAL_ARGUMENT_ERROR,
-                status2);
+                "Should fail on terminal method",
+                (UBool) TRUE,
+                (UBool) U_FAILURE(status2));
      }
  
      {
          UErrorCode status = U_ZERO_ERROR;
          lnf.copyErrorTo(status);
          assertEquals(
-                "Should fail with U_ILLEGAL_ARGUMENT_ERROR since rounder is not legal",
-                U_ILLEGAL_ARGUMENT_ERROR,
-                status);
+                "Should fail since rounder is not legal",
+                (UBool) TRUE,
+                (UBool) U_FAILURE(status));
      }
  }
  
+void NumberFormatterApiTest::validRanges() {
+
+#define EXPECTED_MAX_INT_FRAC_SIG 999
+
+#define VALID_RANGE_ASSERT(status, method, lowerBound, argument) { \
+    UErrorCode expectedStatus = ((lowerBound <= argument) && (argument <= EXPECTED_MAX_INT_FRAC_SIG)) \
+        ? U_ZERO_ERROR \
+        : U_NUMBER_ARG_OUTOFBOUNDS_ERROR; \
+    assertEquals( \
+        UnicodeString(u"Incorrect status for " #method " on input ") \
+            + Int64ToUnicodeString(argument), \
+        expectedStatus, \
+        status); \
+}
+
+#define VALID_RANGE_ONEARG(setting, method, lowerBound) { \
+    for (int32_t argument = -2; argument <= EXPECTED_MAX_INT_FRAC_SIG + 2; argument++) { \
+        UErrorCode status = U_ZERO_ERROR; \
+        NumberFormatter::with().setting(method(argument)).copyErrorTo(status); \
+        VALID_RANGE_ASSERT(status, method, lowerBound, argument); \
+    } \
+}
+
+#define VALID_RANGE_TWOARGS(setting, method, lowerBound) { \
+    for (int32_t argument = -2; argument <= EXPECTED_MAX_INT_FRAC_SIG + 2; argument++) { \
+        UErrorCode status = U_ZERO_ERROR; \
+        /* Pass EXPECTED_MAX_INT_FRAC_SIG as the second argument so arg1 <= arg2 in expected cases */ \
+        NumberFormatter::with().setting(method(argument, EXPECTED_MAX_INT_FRAC_SIG)).copyErrorTo(status); \
+        VALID_RANGE_ASSERT(status, method, lowerBound, argument); \
+        status = U_ZERO_ERROR; \
+        /* Pass lowerBound as the first argument so arg1 <= arg2 in expected cases */ \
+        NumberFormatter::with().setting(method(lowerBound, argument)).copyErrorTo(status); \
+        VALID_RANGE_ASSERT(status, method, lowerBound, argument); \
+        /* Check that first argument must be less than or equal to second argument */ \
+        NumberFormatter::with().setting(method(argument, argument - 1)).copyErrorTo(status); \
+        assertEquals("Incorrect status for " #method " on max < min input", \
+            U_NUMBER_ARG_OUTOFBOUNDS_ERROR, \
+            status); \
+    } \
+}
+
+    VALID_RANGE_ONEARG(rounding, Rounder::fixedFraction, 0);
+    VALID_RANGE_ONEARG(rounding, Rounder::minFraction, 0);
+    VALID_RANGE_ONEARG(rounding, Rounder::maxFraction, 0);
+    VALID_RANGE_TWOARGS(rounding, Rounder::minMaxFraction, 0);
+    VALID_RANGE_ONEARG(rounding, Rounder::fixedDigits, 1);
+    VALID_RANGE_ONEARG(rounding, Rounder::minDigits, 1);
+    VALID_RANGE_ONEARG(rounding, Rounder::maxDigits, 1);
+    VALID_RANGE_TWOARGS(rounding, Rounder::minMaxDigits, 1);
+    VALID_RANGE_ONEARG(rounding, Rounder::fixedFraction(1).withMinDigits, 1);
+    VALID_RANGE_ONEARG(rounding, Rounder::fixedFraction(1).withMaxDigits, 1);
+    VALID_RANGE_ONEARG(notation, Notation::scientific().withMinExponentDigits, 1);
+    VALID_RANGE_ONEARG(integerWidth, IntegerWidth::zeroFillTo, 0);
+    VALID_RANGE_ONEARG(integerWidth, IntegerWidth::zeroFillTo(0).truncateAt, -1);
+}
+
  
  void NumberFormatterApiTest::assertFormatDescending(const UnicodeString &message,
                                                   const UnlocalizedNumberFormatter &f,
diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp

index c65859b873c7ccdaa939ee246f5ad727db571d1b..78b1029cf28663cdfcd83a20657f9be56a996b50 100644 (file)
--- a/icu4c/source/test/intltest/numfmtst.cpp
+++ b/icu4c/source/test/intltest/numfmtst.cpp
@@ -8923,20 +8923,23 @@ void NumberFormatTest::checkExceptionIssue11735() {
  }
  
  void NumberFormatTest::Test11035_FormatCurrencyAmount() {
-    UErrorCode status;
+    UErrorCode status = U_ZERO_ERROR;
      double amount = 12345.67;
      const char16_t* expected = u"12,345$67 ";
  
      // Test two ways to set a currency via API
  
      Locale loc1 = Locale("pt_PT");
-    NumberFormat* fmt1 = NumberFormat::createCurrencyInstance(loc1, status);
+    LocalPointer<NumberFormat> fmt1(NumberFormat::createCurrencyInstance(loc1, status));
+    assertSuccess("Creating fmt1", status);
      fmt1->setCurrency(u"PTE", status);
+    assertSuccess("Setting currency on fmt1", status);
      UnicodeString actualSetCurrency;
      fmt1->format(amount, actualSetCurrency);
  
      Locale loc2 = Locale("pt_PT@currency=PTE");
-    NumberFormat* fmt2 = NumberFormat::createCurrencyInstance(loc2, status);
+    LocalPointer<NumberFormat> fmt2(NumberFormat::createCurrencyInstance(loc2, status));
+    assertSuccess("Creating fmt2", status);
      UnicodeString actualLocaleString;
      fmt2->format(amount, actualLocaleString);
  
diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp

index b28723f4564c015c85bc6e313b466ace4bf797d3..fd150617a7914b2efec5dae7281d65b6f261a9b0 100644 (file)
--- a/icu4c/source/test/intltest/rbbitst.cpp
+++ b/icu4c/source/test/intltest/rbbitst.cpp
@@ -4469,16 +4469,15 @@ void RBBITest::TestTableRedundancies() {
                  "!!forward; \n"
                  "($s0 | '?')*; \n"
                  "($s1 | $s2 | $s3)*; \n" };
-    RuleBasedBreakIterator *lbi = 
+
+    RuleBasedBreakIterator *lbi =
          (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
      //lbi->dumpTables();
-    rules = lbi->getRules();
+    UnicodeString lbRules = lbi->getRules();
      delete lbi;
  
      UParseError pe {};
-    RuleBasedBreakIterator *bi =
-    //         (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
-           new RuleBasedBreakIterator(rules, pe, status);
+    RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(lbRules, pe, status);
      assertSuccess(WHERE, status);
      if (U_FAILURE(status)) return;
      bi->dumpTables();
diff --git a/icu4c/source/test/intltest/tsdcfmsy.cpp b/icu4c/source/test/intltest/tsdcfmsy.cpp

index 90198e070f4f513f4191029e08f12eaed581eac1..1ed6d760e677fcc592920a65f97987876bb4f498 100644 (file)
--- a/icu4c/source/test/intltest/tsdcfmsy.cpp
+++ b/icu4c/source/test/intltest/tsdcfmsy.cpp
@@ -23,6 +23,7 @@ void IntlTestDecimalFormatSymbols::runIndexedTest( int32_t index, UBool exec, co
      TESTCASE_AUTO_BEGIN;
      TESTCASE_AUTO(testSymbols);
      TESTCASE_AUTO(testLastResortData);
+    TESTCASE_AUTO(testDigitSymbols);
      TESTCASE_AUTO(testNumberingSystem);
      TESTCASE_AUTO_END;
  }
@@ -249,6 +250,102 @@ void IntlTestDecimalFormatSymbols::testLastResortData() {
      Verify(1234567.25, "#,##0.##", *lastResort, "1,234,567.25");
  }
  
+void IntlTestDecimalFormatSymbols::testDigitSymbols() {
+    // This test does more in ICU4J than in ICU4C right now.
+    // In ICU4C, it is basically just a test for codePointZero and getConstDigitSymbol.
+    UChar defZero = u'0';
+    UChar32 osmanyaZero = U'\U000104A0';
+    static const UChar* osmanyaDigitStrings[] = {
+        u"\U000104A0", u"\U000104A1", u"\U000104A2", u"\U000104A3", u"\U000104A4",
+        u"\U000104A5", u"\U000104A6", u"\U000104A7", u"\U000104A8", u"\U000104A9"
+    };
+
+    IcuTestErrorCode status(*this, "testDigitSymbols()");
+    DecimalFormatSymbols symbols(Locale("en"), status);
+
+    if (defZero != symbols.getCodePointZero()) {
+        errln("ERROR: Code point zero be ASCII 0");
+    }
+    for (int32_t i=0; i<=9; i++) {
+        assertEquals(UnicodeString("i. ASCII Digit at index ") + Int64ToUnicodeString(i),
+            UnicodeString(u'0' + i),
+            symbols.getConstDigitSymbol(i));
+    }
+
+    for (int32_t i=0; i<=9; i++) {
+        DecimalFormatSymbols::ENumberFormatSymbol key =
+            i == 0
+            ? DecimalFormatSymbols::kZeroDigitSymbol
+            : static_cast<DecimalFormatSymbols::ENumberFormatSymbol>
+                (DecimalFormatSymbols::kOneDigitSymbol + i - 1);
+        symbols.setSymbol(key, UnicodeString(osmanyaDigitStrings[i]), FALSE);
+    }
+    // NOTE: in ICU4J, the calculation of codePointZero is smarter;
+    // in ICU4C, it is more conservative and is only set if propogateDigits is true.
+    if (-1 != symbols.getCodePointZero()) {
+        errln("ERROR: Code point zero be invalid");
+    }
+    for (int32_t i=0; i<=9; i++) {
+        assertEquals(UnicodeString("ii. Osmanya digit at index ") + Int64ToUnicodeString(i),
+            UnicodeString(osmanyaDigitStrings[i]),
+            symbols.getConstDigitSymbol(i));
+    }
+
+    // Check Osmanya codePointZero
+    symbols.setSymbol(
+        DecimalFormatSymbols::kZeroDigitSymbol,
+        UnicodeString(osmanyaDigitStrings[0]), TRUE);
+    if (osmanyaZero != symbols.getCodePointZero()) {
+        errln("ERROR: Code point zero be Osmanya code point zero");
+    }
+    for (int32_t i=0; i<=9; i++) {
+        assertEquals(UnicodeString("iii. Osmanya digit at index ") + Int64ToUnicodeString(i),
+            UnicodeString(osmanyaDigitStrings[i]),
+            symbols.getConstDigitSymbol(i));
+    }
+
+    // Check after copy
+    DecimalFormatSymbols copy(symbols);
+    if (osmanyaZero != copy.getCodePointZero()) {
+        errln("ERROR: Code point zero be Osmanya code point zero");
+    }
+    for (int32_t i=0; i<=9; i++) {
+        assertEquals(UnicodeString("iv. After copy at index ") + Int64ToUnicodeString(i),
+            UnicodeString(osmanyaDigitStrings[i]),
+            copy.getConstDigitSymbol(i));
+    }
+
+    // Check when loaded from resource bundle
+    DecimalFormatSymbols fromData(Locale("en@numbers=osma"), status);
+    if (osmanyaZero != fromData.getCodePointZero()) {
+        errln("ERROR: Code point zero be Osmanya code point zero");
+    }
+    for (int32_t i=0; i<=9; i++) {
+        assertEquals(UnicodeString("v. Resource bundle at index ") + Int64ToUnicodeString(i),
+            UnicodeString(osmanyaDigitStrings[i]),
+            fromData.getConstDigitSymbol(i));
+    }
+
+    // Setting a digit somewhere in the middle should invalidate codePointZero
+    symbols.setSymbol(DecimalFormatSymbols::kOneDigitSymbol, u"foo", FALSE);
+    if (-1 != symbols.getCodePointZero()) {
+        errln("ERROR: Code point zero be invalid");
+    }
+
+    // Reset digits to Latin
+    symbols.setSymbol(
+        DecimalFormatSymbols::kZeroDigitSymbol,
+        UnicodeString(defZero));
+    if (defZero != symbols.getCodePointZero()) {
+        errln("ERROR: Code point zero be ASCII 0");
+    }
+    for (int32_t i=0; i<=9; i++) {
+        assertEquals(UnicodeString("vi. ASCII Digit at index ") + Int64ToUnicodeString(i),
+            UnicodeString(u'0' + i),
+            symbols.getConstDigitSymbol(i));
+    }
+}
+
  void IntlTestDecimalFormatSymbols::testNumberingSystem() {
      IcuTestErrorCode errorCode(*this, "testNumberingSystem");
      struct testcase {
diff --git a/icu4c/source/test/intltest/tsdcfmsy.h b/icu4c/source/test/intltest/tsdcfmsy.h

index 1fd1dfdfba3dc357ecdffe6287d4c5a33bb0826d..1922941b847d65205fb0de9683d660a38b05f53d 100644 (file)
--- a/icu4c/source/test/intltest/tsdcfmsy.h
+++ b/icu4c/source/test/intltest/tsdcfmsy.h
@@ -28,6 +28,7 @@ private:
       */
      void testSymbols(/*char *par*/);
      void testLastResortData();
+    void testDigitSymbols();
      void testNumberingSystem();
  
       /** helper functions**/
diff --git a/icu4c/source/tools/escapesrc/escapesrc.cpp b/icu4c/source/tools/escapesrc/escapesrc.cpp

index 5e9648476e90c5adbc6697ced7546b074af4305c..53f6a40db485b1625857e9358fcffebd48230945 100644 (file)
--- a/icu4c/source/tools/escapesrc/escapesrc.cpp
+++ b/icu4c/source/tools/escapesrc/escapesrc.cpp
@@ -4,39 +4,76 @@
  #include <stdio.h>
  #include <string>
  #include <stdlib.h>
-#include <unistd.h>
  #include <errno.h>
  #include <string.h>
  #include <iostream>
  #include <fstream>
  
-// with caution:
+// Include this even though we aren't linking against it.
  #include "unicode/utf8.h"
  
+// Include this here, to avoid needing to compile and link part of common lib
+// (bootstrapping problem)
+#include "utf_impl.cpp"
+
+/**
+ * What is this?
+ *  or even:
+ * what IS this??
+ * 
+ * "This" is a preprocessor that makes an attempt to convert fully valid C++11 source code
+ * in utf-8 into.. something else. Something consumable by certain compilers (Solaris, xlC)
+ * which aren't quite there.
+ *
+ * - u"<unicode>" or u'<unicode>' gets converted to u"\uNNNN" or u'\uNNNN'
+ * - u8"<unicode>" gets converted to "\xAA\xBB\xCC\xDD" etc.
+ * - if the system is EBCDIC-based, well, that's taken into account.
+ *
+ * Usage:
+ *   escapesrc infile.cpp outfile.cpp
+ * Normally this is invoked by the build stage, with a rule such as:
+ *
+ * _%.cpp: $(srcdir)/%.cpp
+ *       @$(BINDIR)/escapesrc$(EXEEXT) $< $@
+ * %.o: _%.cpp
+ *       $(COMPILE.cc) ... $@ $<
+ *
+ * Naturally, 'escapesrc' has to be excluded from said build rule.
+
+ */
+
+
  static const char
    kSPACE   = 0x20,
    kTAB     = 0x09,
    kLF      = 0x0A,
    kCR      = 0x0D;
-  // kHASH    = 0x23,
-  // kSLASH   = 0x2f,
-  // kSTAR    = 0x2A,
  
+// This contains a codepage and ISO 14882:1998 illegality table.
+// Use "make gen-table" to rebuild it.
  # include "cptbl.h"
  
+// For convenience
  # define cp1047_to_8859(c) cp1047_8859_1[c]
  
+// Our app's name
  std::string prog;
  
+/**
+ * Give the usual 1-line documentation and exit
+ */
  void usage() {
    fprintf(stderr, "%s: usage: %s infile.cpp outfile.cpp\n", prog.c_str(), prog.c_str());
  }
  
-
+/**
+ * Delete the output file (if any)
+ * We want to delete even if we didn't generate, because it might be stale.
+ */
  int cleanup(const std::string &outfile) {
    const char *outstr = outfile.c_str();
    if(outstr && *outstr) {
-    int rc = unlink(outstr);
+    int rc = std::remove(outstr);
      if(rc == 0) {
        fprintf(stderr, "%s: deleted %s\n", prog.c_str(), outstr);
        return 0;
@@ -44,7 +81,7 @@ int cleanup(const std::string &outfile) {
        if( errno == ENOENT ) {
          return 0; // File did not exist - no error.
        } else {
-        perror("unlink");
+        perror("std::remove");
          return 1;
        }
      }
@@ -52,16 +89,12 @@ int cleanup(const std::string &outfile) {
    return 0;
  }
  
-// inline bool hasNonAscii(const char *line, size_t len) {
-//   const unsigned char *uline = reinterpret_cast<const unsigned char*>(line);
-//   for(size_t i=0;i<len; i++) {
-//     if( uline[i] > 0x7F) {
-//       return true;
-//     }
-//   }
-//   return false;
-// }
-
+/**
+ * Skip across any known whitespace.
+ * @param p startpoint
+ * @param e limit
+ * @return first non-whitespace char
+ */
  inline const char *skipws(const char *p, const char *e) {
    for(;p<e;p++) {
      switch(*p) {
@@ -77,30 +110,11 @@ inline const char *skipws(const char *p, const char *e) {
    return p;
  }
  
-// inline bool isCommentOrEmpty(const char* line, size_t len) {
-//   const char *p = line;
-//   const char *e = line+len;
-//   p = skipws(p,e);
-//   if(p==e) {
-//     return true; // whitespace only
-//   }
-//   p++;
-//   switch(*p) {
-//   case kHASH: return true; // #directive
-//   case kSLASH:
-//     p++;
-//     if(p==e) return false; // single slash
-//     switch(*p) {
-//     case kSLASH: // '/ /'
-//     case kSTAR: // '/ *'
-//       return true; // start of comment
-//     default: return false; // something else
-//     }
-//   default: return false; // something else
-//   }
-//   /*NOTREACHED*/
-// }
-
+/**
+ * Append a byte, hex encoded
+ * @param outstr sstring to append to
+ * @param byte the byte to append
+ */
  void appendByte(std::string &outstr,
                  uint8_t byte) {
      char tmp2[5];
@@ -109,6 +123,11 @@ void appendByte(std::string &outstr,
  }
  
  /**
+ * Append the bytes from 'linestr' into outstr, with escaping
+ * @param outstr the output buffer
+ * @param linestr the input buffer
+ * @param pos in/out: the current char under consideration
+ * @param chars the number of chars to consider
   * @return true on failure
   */
  bool appendUtf8(std::string &outstr,
@@ -141,6 +160,7 @@ bool appendUtf8(std::string &outstr,
  }
  
  /**
+ * Fixup u8"x"
   * @param linestr string to mutate. Already escaped into \u format.
   * @param origpos beginning, points to 'u8"'
   * @param pos end, points to "
@@ -184,9 +204,11 @@ bool fixu8(std::string &linestr, size_t origpos, size_t &endpos) {
  }
  
  /**
- * fix the string at the position
- * false = no err
- * true = had err
+ * fix the u"x"/u'x'/u8"x" string at the position
+ * u8'x' is not supported, sorry.
+ * @param linestr the input string
+ * @param pos the position
+ * @return false = no err, true = had err
   */
  bool fixAt(std::string &linestr, size_t pos) {
    size_t origpos = pos;
@@ -292,8 +314,12 @@ bool fixAt(std::string &linestr, size_t pos) {
  }
  
  /**
+ * Fixup an entire line
   * false = no err
   * true = had err
+ * @param no the line number (not used)
+ * @param linestr the string to fix
+ * @return true if any err, else false
   */
  bool fixLine(int /*no*/, std::string &linestr) {
    const char *line = linestr.c_str();
@@ -304,17 +330,6 @@ bool fixLine(int /*no*/, std::string &linestr) {
      return false; // Nothing to do. No u' or u" detected
    }
  
-  // lines such as u8"\u0308" are all ASCII.
-  // // Quick Check: all ascii?
-  // if(!hasNonAscii(line, len)) {
-  //   return false; // ASCII
-  // }
-
-  // // comment or empty line?
-  // if(isCommentOrEmpty(line, len)) {
-  //   return false; // Comment or just empty
-  // }
-
    // start from the end and find all u" cases
    size_t pos = len = linestr.size();
    while((pos>0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) {
@@ -345,6 +360,12 @@ bool fixLine(int /*no*/, std::string &linestr) {
    return false;
  }
  
+/**
+ * Convert a whole file
+ * @param infile
+ * @param outfile
+ * @return 1 on err, 0 otherwise
+ */
  int convert(const std::string &infile, const std::string &outfile) {
    fprintf(stderr, "escapesrc: %s -> %s\n", infile.c_str(), outfile.c_str());
  
@@ -386,6 +407,9 @@ int convert(const std::string &infile, const std::string &outfile) {
    return 0;
  }
  
+/**
+ * Main function
+ */
  int main(int argc, const char *argv[]) {
    prog = argv[0];
  
@@ -399,6 +423,3 @@ int main(int argc, const char *argv[]) {
  
    return convert(infile, outfile);
  }
-
-
-#include "utf_impl.cpp"
author	Andy Heninger <andy.heninger@gmail.com>
	Sat, 10 Feb 2018 01:31:35 +0000 (01:31 +0000)
committer	Andy Heninger <andy.heninger@gmail.com>
	Sat, 10 Feb 2018 01:31:35 +0000 (01:31 +0000)
icu4c/source/common/rbbirb.cpp		patch \| blob \| history
icu4c/source/common/rbbitblb.cpp		patch \| blob \| history
icu4c/source/common/rbbitblb.h		patch \| blob \| history
icu4c/source/common/unicode/utypes.h		patch \| blob \| history
icu4c/source/common/utypes.cpp		patch \| blob \| history
icu4c/source/i18n/dcfmtsym.cpp		patch \| blob \| history
icu4c/source/i18n/number_integerwidth.cpp		patch \| blob \| history
icu4c/source/i18n/number_notation.cpp		patch \| blob \| history
icu4c/source/i18n/number_padding.cpp		patch \| blob \| history
icu4c/source/i18n/number_rounding.cpp		patch \| blob \| history
icu4c/source/i18n/number_types.h		patch \| blob \| history
icu4c/source/i18n/unicode/dcfmtsym.h		patch \| blob \| history
icu4c/source/i18n/unicode/numberformatter.h		patch \| blob \| history
icu4c/source/i18n/unicode/utrans.h		patch \| blob \| history
icu4c/source/i18n/utrans.cpp		patch \| blob \| history
icu4c/source/test/intltest/intltest.cpp		patch \| blob \| history
icu4c/source/test/intltest/intltest.h		patch \| blob \| history
icu4c/source/test/intltest/numbertest.h		patch \| blob \| history
icu4c/source/test/intltest/numbertest_api.cpp		patch \| blob \| history
icu4c/source/test/intltest/numfmtst.cpp		patch \| blob \| history
icu4c/source/test/intltest/rbbitst.cpp		patch \| blob \| history
icu4c/source/test/intltest/tsdcfmsy.cpp		patch \| blob \| history
icu4c/source/test/intltest/tsdcfmsy.h		patch \| blob \| history
icu4c/source/tools/escapesrc/escapesrc.cpp		patch \| blob \| history