]> granicus.if.org Git - icu/commitdiff
ICU-8610 Adding tests for number skeletons in C++. Adding error code handling to...
authorShane Carr <shane@unicode.org>
Sat, 24 Mar 2018 05:46:28 +0000 (05:46 +0000)
committerShane Carr <shane@unicode.org>
Sat, 24 Mar 2018 05:46:28 +0000 (05:46 +0000)
X-SVN-Rev: 41152

17 files changed:
icu4c/source/i18n/fmtable.cpp
icu4c/source/i18n/number_decimalquantity.cpp
icu4c/source/i18n/number_decimalquantity.h
icu4c/source/i18n/number_fluent.cpp
icu4c/source/i18n/number_integerwidth.cpp
icu4c/source/i18n/number_skeletons.cpp
icu4c/source/i18n/number_skeletons.h
icu4c/source/i18n/numparse_stringsegment.cpp
icu4c/source/i18n/plurrule.cpp
icu4c/source/i18n/unicode/numberformatter.h
icu4c/source/test/intltest/Makefile.in
icu4c/source/test/intltest/numbertest.h
icu4c/source/test/intltest/numbertest_skeletons.cpp [new file with mode: 0644]
icu4c/source/test/intltest/numfmtst.cpp
icu4c/source/test/intltest/plurults.cpp
icu4j/main/classes/core/src/com/ibm/icu/number/NumberSkeletonImpl.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java

index 051eccf33d1da74cba0d14df111611e9c1a548cb..c65c5c2e095301e79a4f3bf75181918614ec2060 100644 (file)
@@ -795,7 +795,7 @@ Formattable::setDecimalNumber(StringPiece numberString, UErrorCode &status) {
     dispose();
 
     DecimalQuantity* dq = new DecimalQuantity();
-    dq->setToDecNumber(numberString);
+    dq->setToDecNumber(numberString, status);
     adoptDecimalQuantity(dq);
 
     // Note that we do not hang on to the caller's input string.
index c00d1a126aeebbc783b98be9182b69abcd0c6c0e..cd5df22f485d1106f66723cdf62076bb2f450925 100644 (file)
@@ -16,6 +16,7 @@
 #include "number_roundingutils.h"
 #include "double-conversion.h"
 #include "unicode/plurrule.h"
+#include "charstr.h"
 
 using namespace icu;
 using namespace icu::number;
@@ -33,19 +34,29 @@ static constexpr int32_t DEFAULT_DIGITS = 34;
 typedef MaybeStackHeaderAndArray<decNumber, char, DEFAULT_DIGITS> DecNumberWithStorage;
 
 /** Helper function to convert a decNumber-compatible string into a decNumber. */
-void stringToDecNumber(StringPiece n, DecNumberWithStorage &dn) {
+void stringToDecNumber(StringPiece n, DecNumberWithStorage &dn, UErrorCode& status) {
     decContext set;
     uprv_decContextDefault(&set, DEC_INIT_BASE);
     uprv_decContextSetRounding(&set, DEC_ROUND_HALF_EVEN);
-    set.traps = 0; // no traps, thank you
+    set.traps = 0; // no traps, thank you (what does this mean?)
     if (n.length() > DEFAULT_DIGITS) {
         dn.resize(n.length(), 0);
         set.digits = n.length();
     } else {
         set.digits = DEFAULT_DIGITS;
     }
-    uprv_decNumberFromString(dn.getAlias(), n.data(), &set);
-    U_ASSERT(DECDPUN == 1);
+
+    // Make sure that the string is NUL-terminated; CharString guarantees this, but not StringPiece.
+    CharString cs(n, status);
+    if (U_FAILURE(status)) { return; }
+
+    static_assert(DECDPUN == 1, "Assumes that DECDPUN is set to 1");
+    uprv_decNumberFromString(dn.getAlias(), cs.data(), &set);
+
+    // Check for invalid syntax and set the corresponding error code.
+    if ((set.status & DEC_Conversion_syntax) != 0) {
+        status = U_DECIMAL_NUMBER_SYNTAX_ERROR;
+    }
 }
 
 /** Helper function for safe subtraction (no overflow). */
@@ -329,7 +340,9 @@ void DecimalQuantity::_setToLong(int64_t n) {
     if (n == INT64_MIN) {
         static const char *int64minStr = "9.223372036854775808E+18";
         DecNumberWithStorage dn;
-        stringToDecNumber(int64minStr, dn);
+        UErrorCode localStatus = U_ZERO_ERROR;
+        stringToDecNumber(int64minStr, dn, localStatus);
+        if (U_FAILURE(localStatus)) { return; } // unexpected
         readDecNumberToBcd(dn.getAlias());
     } else if (n <= INT32_MAX) {
         readIntToBcd(static_cast<int32_t>(n));
@@ -429,12 +442,13 @@ void DecimalQuantity::convertToAccurateDouble() {
     explicitExactDouble = true;
 }
 
-DecimalQuantity &DecimalQuantity::setToDecNumber(StringPiece n) {
+DecimalQuantity &DecimalQuantity::setToDecNumber(StringPiece n, UErrorCode& status) {
     setBcdToZero();
     flags = 0;
 
     DecNumberWithStorage dn;
-    stringToDecNumber(n, dn);
+    stringToDecNumber(n, dn, status);
+    if (U_FAILURE(status)) { return *this; }
 
     // The code path for decNumber is modeled after BigDecimal in Java.
     if (decNumberIsNegative(dn.getAlias())) {
index 495ba80ec1ccf69f08b0e804690dd586ec7d3e5d..10f2e669b8a91580c0aae131fa2882b74f6365ab 100644 (file)
@@ -145,7 +145,7 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory {
 
     /** decNumber is similar to BigDecimal in Java. */
 
-    DecimalQuantity &setToDecNumber(StringPiece n);
+    DecimalQuantity &setToDecNumber(StringPiece n, UErrorCode& status);
 
     /**
      * Appends a digit, optionally with one or more leading zeros, to the end of the value represented
index 372e6f18d8345769b97770cf18a33e8d02c87624..e0ba258e3cf85369ecb60f4027aed2aefc820b32 100644 (file)
@@ -11,6 +11,7 @@
 #include "number_decimalquantity.h"
 #include "number_formatimpl.h"
 #include "umutex.h"
+#include "number_skeletons.h"
 
 using namespace icu;
 using namespace icu::number;
@@ -287,6 +288,11 @@ Derived NumberFormatterSettings<Derived>::macros(impl::MacroProps&& macros) && {
     return move;
 }
 
+template<typename Derived>
+UnicodeString NumberFormatterSettings<Derived>::toSkeleton(UErrorCode& status) const {
+    return skeleton::generate(fMacros, status);
+}
+
 // Declare all classes that implement NumberFormatterSettings
 // See https://stackoverflow.com/a/495056/1407170
 template
@@ -304,6 +310,11 @@ LocalizedNumberFormatter NumberFormatter::withLocale(const Locale& locale) {
     return with().locale(locale);
 }
 
+UnlocalizedNumberFormatter
+NumberFormatter::fromSkeleton(const UnicodeString& skeleton, UErrorCode& status) {
+    return skeleton::create(skeleton, status);
+}
+
 
 template<typename T>
 using NFS = NumberFormatterSettings<T>;
@@ -563,7 +574,7 @@ FormattedNumber LocalizedNumberFormatter::formatDecimal(StringPiece value, UErro
         status = U_MEMORY_ALLOCATION_ERROR;
         return FormattedNumber(status);
     }
-    results->quantity.setToDecNumber(value);
+    results->quantity.setToDecNumber(value, status);
     return formatImpl(results, status);
 }
 
index 464c2230fffe108308b6d3d43320e651b8c11e1f..87e543622cc9c2548a5d58ae9bbb158058c02588 100644 (file)
@@ -39,7 +39,7 @@ IntegerWidth IntegerWidth::truncateAt(int32_t maxInt) {
     }
 }
 
-void IntegerWidth::apply(impl::DecimalQuantity &quantity, UErrorCode &status) const {
+void IntegerWidth::apply(impl::DecimalQuantity& quantity, UErrorCode& status) const {
     if (fHasError) {
         status = U_ILLEGAL_ARGUMENT_ERROR;
     } else if (fUnion.minMaxInt.fMaxInt == -1) {
@@ -50,12 +50,13 @@ void IntegerWidth::apply(impl::DecimalQuantity &quantity, UErrorCode &status) co
 }
 
 bool IntegerWidth::operator==(const IntegerWidth& other) const {
-    if (fHasError) {
-        return other.fHasError && fUnion.errorCode == other.fUnion.errorCode;
-    } else {
-        return !other.fHasError && fUnion.minMaxInt.fMinInt == other.fUnion.minMaxInt.fMinInt &&
-               fUnion.minMaxInt.fMaxInt == other.fUnion.minMaxInt.fMaxInt;
-    }
+    // Private operator==; do error and bogus checking first!
+    U_ASSERT(!fHasError);
+    U_ASSERT(!other.fHasError);
+    U_ASSERT(!isBogus());
+    U_ASSERT(!other.isBogus());
+    return fUnion.minMaxInt.fMinInt == other.fUnion.minMaxInt.fMinInt &&
+           fUnion.minMaxInt.fMaxInt == other.fUnion.minMaxInt.fMaxInt;
 }
 
 #endif /* #if !UCONFIG_NO_FORMATTING */
index 3efae97f20ab491cb90d2b138e3f2af0bda71e97..357c443455ec8f428988a49c95d3e32f1c95f0f4 100644 (file)
 #include "number_utils.h"
 #include "number_decimalquantity.h"
 #include "unicode/numberformatter.h"
+#include "uinvchar.h"
+#include "charstr.h"
 
 using namespace icu;
 using namespace icu::number;
 using namespace icu::number::impl;
 using namespace icu::number::impl::skeleton;
 
-static constexpr UErrorCode U_NUMBER_SKELETON_SYNTAX_ERROR = U_ILLEGAL_ARGUMENT_ERROR;
-
 namespace {
 
 icu::UInitOnce gNumberSkeletonsInitOnce = U_INITONCE_INITIALIZER;
@@ -107,7 +107,7 @@ inline void appendMultiple(UnicodeString& sb, UChar32 cp, int32_t count) {
 }
 
 
-#define CHECK_NULL(seen, field, status) (void)(seen); /* for auto-format line wraping */ \
+#define CHECK_NULL(seen, field, status) (void)(seen); /* for auto-format line wrapping */ \
 { \
     if ((seen).field) { \
         (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
@@ -117,8 +117,24 @@ inline void appendMultiple(UnicodeString& sb, UChar32 cp, int32_t count) {
 }
 
 
+#define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \
+{ \
+    UErrorCode conversionStatus = U_ZERO_ERROR; \
+    (dest).appendInvariantChars({FALSE, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \
+    if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \
+        /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \
+        (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
+        return; \
+    } else if (U_FAILURE(conversionStatus)) { \
+        (status) = conversionStatus; \
+        return; \
+    } \
+}
+
+
+// NOTE: The order of these strings must be consistent with UNumberFormatRoundingMode
 const char16_t* const kRoundingModeStrings[] = {
-        u"up", u"down", u"ceiling", u"floor", u"half-up", u"half-down", u"half-even", u"unnecessary"};
+        u"ceiling", u"floor", u"down", u"up", u"half-even", u"half-down", u"half-up", u"unnecessary"};
 
 constexpr int32_t kRoundingModeCount = 8;
 static_assert(
@@ -357,14 +373,14 @@ MacroProps skeleton::parseSkeleton(const UnicodeString& skeletonString, UErrorCo
 
     SeenMacroProps seen;
     MacroProps macros;
-    StringSegment segment(skeletonString, false);
+    StringSegment segment(tempSkeletonString, false);
     UCharsTrie stemTrie(kSerializedStemTrie);
     ParseState stem = STATE_NULL;
-    int offset = 0;
+    int32_t offset = 0;
 
     // Primary skeleton parse loop:
     while (offset < segment.length()) {
-        int cp = segment.codePointAt(offset);
+        UChar32 cp = segment.codePointAt(offset);
         bool isTokenSeparator = PatternProps::isWhiteSpace(cp);
         bool isOptionSeparator = (cp == u'/');
 
@@ -772,21 +788,17 @@ blueprint_helpers::parseExponentSignOption(const StringSegment& segment, MacroPr
 
 void blueprint_helpers::parseCurrencyOption(const StringSegment& segment, MacroProps& macros,
                                             UErrorCode& status) {
-    if (segment.length() != 3) {
-        // throw new SkeletonSyntaxException("Invalid currency", segment);
-        status = U_NUMBER_SKELETON_SYNTAX_ERROR;
-        return;
-    }
     const UChar* currencyCode = segment.toUnicodeString().getTerminatedBuffer();
-    // Check that the currency code is valid:
-    int32_t numericCode = ucurr_getNumericCode(currencyCode);
-    if (numericCode == 0) {
+    UErrorCode localStatus = U_ZERO_ERROR;
+    CurrencyUnit currency(currencyCode, localStatus);
+    if (U_FAILURE(localStatus)) {
+        // Not 3 ascii chars
         // throw new SkeletonSyntaxException("Invalid currency", segment);
         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
         return;
     }
     // Slicing is OK
-    macros.unit = CurrencyUnit(currencyCode, status); // NOLINT
+    macros.unit = currency; // NOLINT
 }
 
 void
@@ -796,48 +808,40 @@ blueprint_helpers::generateCurrencyOption(const CurrencyUnit& currency, UnicodeS
 
 void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros,
                                                UErrorCode& status) {
+    UnicodeString stemString = segment.toUnicodeString();
+
     // NOTE: The category (type) of the unit is guaranteed to be a valid subtag (alphanumeric)
     // http://unicode.org/reports/tr35/#Validity_Data
     int firstHyphen = 0;
-    while (firstHyphen < segment.length() && segment.charAt(firstHyphen) != '-') {
+    while (firstHyphen < stemString.length() && stemString.charAt(firstHyphen) != '-') {
         firstHyphen++;
     }
-    if (firstHyphen == segment.length()) {
+    if (firstHyphen == stemString.length()) {
         // throw new SkeletonSyntaxException("Invalid measure unit option", segment);
         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
         return;
     }
 
-    // MeasureUnit is in char space; we need to convert.
-    // Note: the longest type/subtype as of this writing (March 2018) is 24 chars.
-    static constexpr int32_t CAPACITY = 30;
-    char type[CAPACITY];
-    char subType[CAPACITY];
-    const int32_t typeLen = firstHyphen;
-    const int32_t subTypeLen = segment.length() - firstHyphen - 1;
-    if (typeLen + 1 > CAPACITY || subTypeLen + 1 > CAPACITY) {
-        // Type or subtype longer than 30?
-        // The capacity should be increased if this is a problem with a real CLDR unit.
-        status = U_NUMBER_SKELETON_SYNTAX_ERROR;
-        return;
-    }
-    u_UCharsToChars(segment.toUnicodeString().getBuffer(), type, typeLen);
-    u_UCharsToChars(segment.toUnicodeString().getBuffer() + firstHyphen + 1, subType, subTypeLen);
-    type[typeLen] = 0;
-    subType[subTypeLen] = 0;
+    // Need to do char <-> UChar conversion...
+    if (U_FAILURE(status)) { return; }
+    CharString type;
+    SKELETON_UCHAR_TO_CHAR(type, stemString, 0, firstHyphen, status);
+    CharString subType;
+    SKELETON_UCHAR_TO_CHAR(subType, stemString, firstHyphen + 1, stemString.length(), status);
 
     // Note: the largest type as of this writing (March 2018) is "volume", which has 24 units.
-    MeasureUnit units[30];
+    static constexpr int32_t CAPACITY = 30;
+    MeasureUnit units[CAPACITY];
     UErrorCode localStatus = U_ZERO_ERROR;
-    int32_t numUnits = MeasureUnit::getAvailable(type, units, 30, localStatus);
+    int32_t numUnits = MeasureUnit::getAvailable(type.data(), units, CAPACITY, localStatus);
     if (U_FAILURE(localStatus)) {
         // More than 30 units in this type?
-        status = U_NUMBER_SKELETON_SYNTAX_ERROR;
+        status = U_INTERNAL_PROGRAM_ERROR;
         return;
     }
     for (int32_t i = 0; i < numUnits; i++) {
         auto& unit = units[i];
-        if (uprv_strcmp(subType, unit.getSubtype()) == 0) {
+        if (uprv_strcmp(subType.data(), unit.getSubtype()) == 0) {
             macros.unit = unit;
             return;
         }
@@ -848,26 +852,11 @@ void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, Mac
 }
 
 void blueprint_helpers::generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb,
-                                                  UErrorCode& status) {
-    // We need to convert from char* to UChar*...
-    // See comments in the previous function about the capacity setting.
-    static constexpr int32_t CAPACITY = 30;
-    char16_t type16[CAPACITY];
-    char16_t subType16[CAPACITY];
-    const auto typeLen = static_cast<int32_t>(uprv_strlen(measureUnit.getType()));
-    const auto subTypeLen = static_cast<int32_t>(uprv_strlen(measureUnit.getSubtype()));
-    if (typeLen + 1 > CAPACITY || subTypeLen + 1 > CAPACITY) {
-        // Type or subtype longer than 30?
-        // The capacity should be increased if this is a problem with a real CLDR unit.
-        status = U_UNSUPPORTED_ERROR;
-        return;
-    }
-    u_charsToUChars(measureUnit.getType(), type16, typeLen);
-    u_charsToUChars(measureUnit.getSubtype(), subType16, subTypeLen);
-
-    sb.append(type16, typeLen);
+                                                  UErrorCode&) {
+    // Need to do char <-> UChar conversion...
+    sb.append(UnicodeString(measureUnit.getType(), -1, US_INV));
     sb.append(u'-');
-    sb.append(subType16, subTypeLen);
+    sb.append(UnicodeString(measureUnit.getSubtype(), -1, US_INV));
 }
 
 void blueprint_helpers::parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros,
@@ -1052,17 +1041,19 @@ bool blueprint_helpers::parseFracSigOption(const StringSegment& segment, MacroPr
 
 void blueprint_helpers::parseIncrementOption(const StringSegment& segment, MacroProps& macros,
                                              UErrorCode& status) {
+    // Need to do char <-> UChar conversion...
+    CharString buffer;
+    SKELETON_UCHAR_TO_CHAR(buffer, segment.toUnicodeString(), 0, segment.length(), status);
+
     // Utilize DecimalQuantity/decNumber to parse this for us.
-    static constexpr int32_t CAPACITY = 30;
-    char buffer[CAPACITY];
-    if (segment.length() > CAPACITY) {
-        // No support for numbers this long; they won't fit in a double anyway.
+    DecimalQuantity dq;
+    UErrorCode localStatus = U_ZERO_ERROR;
+    dq.setToDecNumber({buffer.data(), buffer.length()}, localStatus);
+    if (U_FAILURE(localStatus)) {
+        // throw new SkeletonSyntaxException("Invalid rounding increment", segment, e);
         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
         return;
     }
-    u_UCharsToChars(segment.toUnicodeString().getBuffer(), buffer, segment.length());
-    DecimalQuantity dq;
-    dq.setToDecNumber({buffer, segment.length()});
     double increment = dq.toDouble();
     macros.rounder = Rounder::increment(increment);
 }
@@ -1146,17 +1137,10 @@ void blueprint_helpers::generateIntegerWidthOption(int32_t minInt, int32_t maxIn
 void blueprint_helpers::parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros,
                                                    UErrorCode& status) {
     // Need to do char <-> UChar conversion...
-    static constexpr int32_t CAPACITY = 30;
-    char buffer[CAPACITY];
-    if (segment.length() + 1 > CAPACITY) {
-        // No support for numbers this long; they won't fit in a double anyway.
-        status = U_NUMBER_SKELETON_SYNTAX_ERROR;
-        return;
-    }
-    u_UCharsToChars(segment.toUnicodeString().getBuffer(), buffer, segment.length());
-    buffer[segment.length()] = 0;
+    CharString buffer;
+    SKELETON_UCHAR_TO_CHAR(buffer, segment.toUnicodeString(), 0, segment.length(), status);
 
-    NumberingSystem* ns = NumberingSystem::createInstanceByName(buffer, status);
+    NumberingSystem* ns = NumberingSystem::createInstanceByName(buffer.data(), status);
     if (ns == nullptr) {
         // throw new SkeletonSyntaxException("Unknown numbering system", segment);
         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
@@ -1166,18 +1150,9 @@ void blueprint_helpers::parseNumberingSystemOption(const StringSegment& segment,
 }
 
 void blueprint_helpers::generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb,
-                                                      UErrorCode& status) {
+                                                      UErrorCode&) {
     // Need to do char <-> UChar conversion...
-    static constexpr int32_t CAPACITY = 30;
-    char16_t buffer16[CAPACITY];
-    const auto len = static_cast<int32_t>(uprv_strlen(ns.getName()));
-    if (len > CAPACITY) {
-        // No support for numbers this long; they won't fit in a double anyway.
-        status = U_UNSUPPORTED_ERROR;
-        return;
-    }
-    u_charsToUChars(ns.getName(), buffer16, len);
-    sb.append(buffer16, len);
+    sb.append(UnicodeString(ns.getName(), -1, US_INV));
 }
 
 
@@ -1243,7 +1218,15 @@ bool GeneratorHelpers::unit(const MacroProps& macros, UnicodeString& sb, UErrorC
 
 bool GeneratorHelpers::perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
     // Per-units are currently expected to be only MeasureUnits.
-    if (unitIsCurrency(macros.perUnit) || unitIsNoUnit(macros.perUnit)) {
+    if (unitIsNoUnit(macros.perUnit)) {
+        if (unitIsPercent(macros.perUnit) || unitIsPermille(macros.perUnit)) {
+            status = U_UNSUPPORTED_ERROR;
+            return false;
+        } else {
+            // Default value: ok to ignore
+            return false;
+        }
+    } else if (unitIsCurrency(macros.perUnit)) {
         status = U_UNSUPPORTED_ERROR;
         return false;
     } else {
@@ -1298,7 +1281,9 @@ bool GeneratorHelpers::rounding(const MacroProps& macros, UnicodeString& sb, UEr
 }
 
 bool GeneratorHelpers::grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
-    if (macros.grouper.isBogus() || macros.grouper.fStrategy == UNUM_GROUPING_COUNT) {
+    if (macros.grouper.isBogus()) {
+        return false; // No value
+    } else if (macros.grouper.fStrategy == UNUM_GROUPING_COUNT) {
         status = U_UNSUPPORTED_ERROR;
         return false;
     } else if (macros.grouper.fStrategy == UNUM_GROUPING_AUTO) {
@@ -1310,7 +1295,8 @@ bool GeneratorHelpers::grouping(const MacroProps& macros, UnicodeString& sb, UEr
 }
 
 bool GeneratorHelpers::integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
-    if (macros.integerWidth.fHasError || macros.integerWidth == IntegerWidth::standard()) {
+    if (macros.integerWidth.fHasError || macros.integerWidth.isBogus() ||
+        macros.integerWidth == IntegerWidth::standard()) {
         // Error or Default
         return false;
     }
index e874d6acc33e26d05baa0044d4683938621f2d08..6a15d9efb40f6e87a909d41f0b20f9d0c01f3414 100644 (file)
@@ -16,6 +16,8 @@ using icu::numparse::impl::StringSegment;
 U_NAMESPACE_BEGIN namespace number {
 namespace impl {
 
+static constexpr UErrorCode U_NUMBER_SKELETON_SYNTAX_ERROR = U_ILLEGAL_ARGUMENT_ERROR;
+
 // Forward-declaration
 struct SeenMacroProps;
 
index aa8e62beefb4b2e4b895187e41ca56e99b64a930..0a6e4fd104916ba247c4c25cf934d4171c543b29 100644 (file)
@@ -133,5 +133,9 @@ bool StringSegment::codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase) {
     return cp1 == cp2;
 }
 
+bool StringSegment::operator==(const UnicodeString& other) const {
+    return toUnicodeString() == other;
+}
+
 
 #endif /* #if !UCONFIG_NO_FORMATTING */
index eb77905b0c3cd129975ab54d641833e8991a69b2..1b0089f9c9d2d28f72bdc62b533b25a4423cde74 100644 (file)
@@ -1458,7 +1458,7 @@ FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) {
     CharString cs;
     cs.appendInvariantChars(num, status);
     DecimalQuantity dl;
-    dl.setToDecNumber(cs.toStringPiece());
+    dl.setToDecNumber(cs.toStringPiece(), status);
     if (U_FAILURE(status)) {
         init(0, 0, 0);
         return;
index fdc6bdc285cda9e285023b3afceddea88b383db2..c0131eaa71266a238f09eae3c9ccbe7b9f9f2bb0 100644 (file)
@@ -2179,6 +2179,25 @@ class U_I18N_API NumberFormatterSettings {
 
 #endif  /* U_HIDE_INTERNAL_API */
 
+    /**
+     * Creates a skeleton string representation of this number formatter. A skeleton string is a
+     * locale-agnostic serialized form of a number formatter.
+     * <p>
+     * Not all options are capable of being represented in the skeleton string; for example, a
+     * DecimalFormatSymbols object. If any such option is encountered, an
+     * {@link UnsupportedOperationException} is thrown.
+     * <p>
+     * The returned skeleton is in normalized form, such that two number formatters with equivalent
+     * behavior should produce the same skeleton.
+     * <p>
+     * Sets an error code if the number formatter has an option that cannot be represented in a skeleton
+     * string.
+     *
+     * @return A number skeleton string with behavior corresponding to this number formatter.
+     * @draft ICU 62
+     */
+    UnicodeString toSkeleton(UErrorCode& status) const;
+
     /**
      * Sets the UErrorCode if an error occurred in the fluent chain.
      * Preserves older error codes in the outErrorCode.
@@ -2192,7 +2211,7 @@ class U_I18N_API NumberFormatterSettings {
         }
         fMacros.copyErrorTo(outErrorCode);
         return U_FAILURE(outErrorCode);
-    }
+    };
 
     // NOTE: Uses default copy and move constructors.
 
@@ -2588,6 +2607,18 @@ class U_I18N_API NumberFormatter final {
      */
     static LocalizedNumberFormatter withLocale(const Locale &locale);
 
+    /**
+     * Call this method at the beginning of a NumberFormatter fluent chain to create an instance based
+     * on a given number skeleton string.
+     *
+     * @param skeleton
+     *            The skeleton string off of which to base this NumberFormatter.
+     * @return An UnlocalizedNumberFormatter, to be used for chaining.
+     * @throws SkeletonSyntaxException If the given string is not a valid number formatting skeleton.
+     * @draft ICU 62
+     */
+    static UnlocalizedNumberFormatter fromSkeleton(const UnicodeString& skeleton, UErrorCode& status);
+
     /**
      * Use factory methods instead of the constructor to create a NumberFormatter.
      * @draft ICU 60
index 6699d319fef7c905935bd899113f585ac34df6c4..55d1fb21421fb1f69ce7249ff58b94061c719d7f 100644 (file)
@@ -65,7 +65,7 @@ numberformattesttuple.o pluralmaptest.o \
 numbertest_affixutils.o numbertest_api.o numbertest_decimalquantity.o \
 numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \
 numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o \
-numbertest_parse.o numbertest_doubleconversion.o
+numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o
 
 DEPS = $(OBJECTS:.o=.d)
 
index 410d9ba316bbd88350c3d85fd7f944d9716c1c82..595a954e28e4250891cbd2d14eb9fbbf82bb6fd7 100644 (file)
@@ -229,6 +229,23 @@ class NumberParserTest : public IntlTest {
     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
 };
 
+class NumberSkeletonTest : public IntlTest {
+  public:
+    void validTokens();
+    void invalidTokens();
+    void unknownTokens();
+    void unexpectedTokens();
+    void duplicateValues();
+    void stemsRequiringOption();
+    void defaultTokens();
+    void flexibleSeparators();
+
+    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
+
+  private:
+    void expectedErrorSkeleton(const char16_t** cases, int32_t casesLen);
+};
+
 
 // NOTE: This macro is identical to the one in itformat.cpp
 #define TESTCLASS(id, TestClass)          \
@@ -261,6 +278,7 @@ class NumberTest : public IntlTest {
         TESTCLASS(8, StringSegmentTest);
         TESTCLASS(9, UniSetsTest);
         TESTCLASS(10, NumberParserTest);
+        TESTCLASS(11, NumberSkeletonTest);
         default: name = ""; break; // needed to end loop
         }
     }
diff --git a/icu4c/source/test/intltest/numbertest_skeletons.cpp b/icu4c/source/test/intltest/numbertest_skeletons.cpp
new file mode 100644 (file)
index 0000000..80196f9
--- /dev/null
@@ -0,0 +1,245 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+#include "putilimp.h"
+#include "unicode/dcfmtsym.h"
+#include "numbertest.h"
+#include "number_utils.h"
+#include "number_skeletons.h"
+
+using namespace icu::number::impl;
+
+
+void NumberSkeletonTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
+    if (exec) {
+        logln("TestSuite AffixUtilsTest: ");
+    }
+    TESTCASE_AUTO_BEGIN;
+        TESTCASE_AUTO(validTokens);
+        TESTCASE_AUTO(invalidTokens);
+        TESTCASE_AUTO(unknownTokens);
+        TESTCASE_AUTO(unexpectedTokens);
+        TESTCASE_AUTO(duplicateValues);
+        TESTCASE_AUTO(stemsRequiringOption);
+        TESTCASE_AUTO(defaultTokens);
+        TESTCASE_AUTO(flexibleSeparators);
+    TESTCASE_AUTO_END;
+}
+
+void NumberSkeletonTest::validTokens() {
+    // This tests only if the tokens are valid, not their behavior.
+    // Most of these are from the design doc.
+    static const char16_t* cases[] = {
+            u"round-integer",
+            u"round-unlimited",
+            u"@@@##",
+            u"@@+",
+            u".000##",
+            u".00+",
+            u".",
+            u".+",
+            u".######",
+            u".00/@@+",
+            u".00/@##",
+            u"round-increment/3.14",
+            u"round-currency-standard",
+            u"round-integer/half-up",
+            u".00#/ceiling",
+            u".00/@@+/floor",
+            u"scientific",
+            u"scientific/+ee",
+            u"scientific/sign-always",
+            u"scientific/+ee/sign-always",
+            u"scientific/sign-always/+ee",
+            u"scientific/sign-except-zero",
+            u"engineering",
+            u"engineering/+eee",
+            u"compact-short",
+            u"compact-long",
+            u"notation-simple",
+            u"percent",
+            u"permille",
+            u"measure-unit/length-meter",
+            u"measure-unit/area-square-meter",
+            u"measure-unit/energy-joule per-measure-unit/length-meter",
+            u"currency/XXX",
+            u"currency/ZZZ",
+            u"group-off",
+            u"group-min2",
+            u"group-auto",
+            u"group-on-aligned",
+            u"group-thousands",
+            u"integer-width/00",
+            u"integer-width/#0",
+            u"integer-width/+00",
+            u"sign-always",
+            u"sign-auto",
+            u"sign-never",
+            u"sign-accounting",
+            u"sign-accounting-always",
+            u"sign-except-zero",
+            u"sign-accounting-except-zero",
+            u"unit-width-narrow",
+            u"unit-width-short",
+            u"unit-width-iso-code",
+            u"unit-width-full-name",
+            u"unit-width-hidden",
+            u"decimal-auto",
+            u"decimal-always",
+            u"latin",
+            u"numbering-system/arab",
+            u"numbering-system/latn",
+            u"round-integer/@##",
+            u"round-integer/ceiling",
+            u"round-currency-cash/ceiling"};
+
+    for (auto& cas : cases) {
+        UnicodeString skeletonString(cas);
+        UErrorCode status = U_ZERO_ERROR;
+        NumberFormatter::fromSkeleton(skeletonString, status);
+        assertSuccess(skeletonString, status);
+    }
+}
+
+void NumberSkeletonTest::invalidTokens() {
+    static const char16_t* cases[] = {
+            u".00x",
+            u".00##0",
+            u".##+",
+            u".00##+",
+            u".0#+",
+            u"@@x",
+            u"@@##0",
+            u"@#+",
+            u".00/@",
+            u".00/@@",
+            u".00/@@x",
+            u".00/@@#",
+            u".00/@@#+",
+            u".00/floor/@@+", // wrong order
+            u"round-currency-cash/XXX",
+            u"scientific/ee",
+            u"round-increment/xxx",
+            u"round-increment/0.1.2",
+            u"currency/dummy",
+            u"measure-unit/foo",
+            u"integer-width/xxx",
+            u"integer-width/0+",
+            u"integer-width/+0#",
+            u"scientific/foo"};
+
+    expectedErrorSkeleton(cases, sizeof(cases) / sizeof(*cases));
+}
+
+void NumberSkeletonTest::unknownTokens() {
+    static const char16_t* cases[] = {
+            u"maesure-unit",
+            u"measure-unit/foo-bar",
+            u"numbering-system/dummy",
+            u"français",
+            u"measure-unit/français-français", // non-invariant characters for C++
+            u"numbering-system/français", // non-invariant characters for C++
+            u"round-increment/français", // non-invariant characters for C++
+            u"currency-USD"};
+
+    expectedErrorSkeleton(cases, sizeof(cases) / sizeof(*cases));
+}
+
+void NumberSkeletonTest::unexpectedTokens() {
+    static const char16_t* cases[] = {
+            u"group-thousands/foo",
+            u"round-integer//ceiling group-off",
+            u"round-integer//ceiling  group-off",
+            u"round-integer/ group-off",
+            u"round-integer// group-off"};
+
+    expectedErrorSkeleton(cases, sizeof(cases) / sizeof(*cases));
+}
+
+void NumberSkeletonTest::duplicateValues() {
+    static const char16_t* cases[] = {
+            u"round-integer round-integer",
+            u"round-integer .00+",
+            u"round-integer round-unlimited",
+            u"round-integer @@@",
+            u"scientific engineering",
+            u"engineering compact-long",
+            u"sign-auto sign-always"};
+
+    expectedErrorSkeleton(cases, sizeof(cases) / sizeof(*cases));
+}
+
+void NumberSkeletonTest::stemsRequiringOption() {
+    static const char16_t* stems[] = {u"round-increment", u"currency", u"measure-unit", u"integer-width",};
+    static const char16_t* suffixes[] = {u"", u"/ceiling", u" scientific", u"/ceiling scientific"};
+
+    for (auto& stem : stems) {
+        for (auto& suffix : suffixes) {
+            UnicodeString skeletonString = UnicodeString(stem) + suffix;
+            UErrorCode status = U_ZERO_ERROR;
+            NumberFormatter::fromSkeleton(skeletonString, status);
+            assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status);
+        }
+    }
+}
+
+void NumberSkeletonTest::defaultTokens() {
+    IcuTestErrorCode status(*this, "defaultTokens");
+
+    static const char16_t* cases[] = {
+            u"notation-simple",
+            u"base-unit",
+            u"group-auto",
+            u"integer-width/+0",
+            u"sign-auto",
+            u"unit-width-short",
+            u"decimal-auto"};
+
+    for (auto& cas : cases) {
+        UnicodeString skeletonString(cas);
+        status.setScope(skeletonString);
+        UnicodeString normalized = NumberFormatter::fromSkeleton(
+                skeletonString, status).toSkeleton(status);
+        // Skeleton should become empty when normalized
+        assertEquals(skeletonString, u"", normalized);
+    }
+}
+
+void NumberSkeletonTest::flexibleSeparators() {
+    IcuTestErrorCode status(*this, "flexibleSeparators");
+
+    static struct TestCase {
+        const char16_t* skeleton;
+        const char16_t* expected;
+    } cases[] = {{u"round-integer group-off", u"5142"},
+                 {u"round-integer  group-off", u"5142"},
+                 {u"round-integer/ceiling group-off", u"5143"},
+                 {u"round-integer/ceiling  group-off", u"5143"}};
+
+    for (auto& cas : cases) {
+        UnicodeString skeletonString(cas.skeleton);
+        UnicodeString expected(cas.expected);
+        status.setScope(skeletonString);
+        UnicodeString actual = NumberFormatter::fromSkeleton(skeletonString, status).locale("en")
+                .formatDouble(5142.3, status)
+                .toString();
+        assertEquals(skeletonString, expected, actual);
+    }
+}
+
+// In C++, there is no distinguishing between "invalid", "unknown", and "unexpected" tokens.
+void NumberSkeletonTest::expectedErrorSkeleton(const char16_t** cases, int32_t casesLen) {
+    for (int32_t i = 0; i < casesLen; i++) {
+        UnicodeString skeletonString(cases[i]);
+        UErrorCode status = U_ZERO_ERROR;
+        NumberFormatter::fromSkeleton(skeletonString, status);
+        assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status);
+    }
+}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
index 35825b62cf368ddda9aed9d5c7d8ed6714dc0dd7..3487d432f772b33ba28adff371b6fde57d1032de 100644 (file)
@@ -105,7 +105,7 @@ static DecimalQuantity &strToDigitList(
     }
     CharString formatValue;
     formatValue.appendInvariantChars(str, status);
-    digitList.setToDecNumber(StringPiece(formatValue.data()));
+    digitList.setToDecNumber(StringPiece(formatValue.data()), status);
     return digitList;
 }
 
@@ -7027,7 +7027,7 @@ void NumberFormatTest::TestDecimal() {
             UnicodeString formattedResult;
             DecimalQuantity dl;
             StringPiece num("123.4566666666666666666666666666666666621E+40");
-            dl.setToDecNumber(num);
+            dl.setToDecNumber(num, status);
             ASSERT_SUCCESS(status);
             fmtr->format(dl, formattedResult, NULL, status);
             ASSERT_SUCCESS(status);
@@ -7035,7 +7035,7 @@ void NumberFormatTest::TestDecimal() {
 
             status = U_ZERO_ERROR;
             num.set("666.666");
-            dl.setToDecNumber(num);
+            dl.setToDecNumber(num, status);
             FieldPosition pos(NumberFormat::FRACTION_FIELD);
             ASSERT_SUCCESS(status);
             formattedResult.remove();
index 57221263039c7d102be48be95a97bc590117f0c7..c6b1a046a0df7dc488fb3183f87b68e49501cfff 100644 (file)
@@ -636,7 +636,7 @@ void PluralRulesTest::checkSelect(const LocalPointer<PluralRules> &rules, UError
 
         // DigitList is a convenient way to parse the decimal number string and get a double.
         DecimalQuantity  dl;
-        dl.setToDecNumber(StringPiece(num));
+        dl.setToDecNumber(StringPiece(num), status);
         if (U_FAILURE(status)) {
             errln("file %s, line %d, ICU error status: %s.", __FILE__, line, u_errorName(status));
             status = U_ZERO_ERROR;
index 6b8d77c9ac829ef25725f7111ec414da26c9b3d7..e6138e280df933951babcd566deb5e13ef43d439 100644 (file)
@@ -850,11 +850,14 @@ class NumberSkeletonImpl {
 
         private static void parseCurrencyOption(StringSegment segment, MacroProps macros) {
             String currencyCode = segment.subSequence(0, segment.length()).toString();
+            Currency currency;
             try {
-                macros.unit = Currency.getInstance(currencyCode);
+                currency = Currency.getInstance(currencyCode);
             } catch (IllegalArgumentException e) {
+                // Not 3 ascii chars
                 throw new SkeletonSyntaxException("Invalid currency", segment, e);
             }
+            macros.unit = currency;
         }
 
         private static void generateCurrencyOption(Currency currency, StringBuilder sb) {
index 4d5509f4f1c178c76f0ae03652fdffca73c3d2dd..37815f69157b4b611087abb98f62e56d5f683fb0 100644 (file)
@@ -21,16 +21,6 @@ import com.ibm.icu.util.ULocale;
  */
 public class NumberSkeletonTest {
 
-    @Test
-    public void duplicateValues() {
-        try {
-            NumberFormatter.fromSkeleton("round-integer round-integer");
-            fail();
-        } catch (SkeletonSyntaxException expected) {
-            assertTrue(expected.getMessage(), expected.getMessage().contains("Duplicated setting"));
-        }
-    }
-
     @Test
     public void validTokens() {
         // This tests only if the tokens are valid, not their behavior.
@@ -69,6 +59,7 @@ public class NumberSkeletonTest {
                 "measure-unit/area-square-meter",
                 "measure-unit/energy-joule per-measure-unit/length-meter",
                 "currency/XXX",
+                "currency/ZZZ",
                 "group-off",
                 "group-min2",
                 "group-auto",
@@ -138,7 +129,7 @@ public class NumberSkeletonTest {
         for (String cas : cases) {
             try {
                 NumberFormatter.fromSkeleton(cas);
-                fail("Skeleton parses, but it should have failed: " + cas);
+                fail(cas);
             } catch (SkeletonSyntaxException expected) {
                 assertTrue(expected.getMessage(), expected.getMessage().contains("Invalid"));
             }
@@ -147,12 +138,20 @@ public class NumberSkeletonTest {
 
     @Test
     public void unknownTokens() {
-        String[] cases = { "maesure-unit", "measure-unit/foo-bar", "numbering-system/dummy" };
+        String[] cases = {
+                "maesure-unit",
+                "measure-unit/foo-bar",
+                "numbering-system/dummy",
+                "français",
+                "measure-unit/français-français", // non-invariant characters for C++
+                "numbering-system/français", // non-invariant characters for C++
+                "round-increment/français", // non-invariant characters for C++
+                "currency-USD" };
 
         for (String cas : cases) {
             try {
                 NumberFormatter.fromSkeleton(cas);
-                fail();
+                fail(cas);
             } catch (SkeletonSyntaxException expected) {
                 assertTrue(expected.getMessage(), expected.getMessage().contains("Unknown"));
             }
@@ -171,13 +170,34 @@ public class NumberSkeletonTest {
         for (String cas : cases) {
             try {
                 NumberFormatter.fromSkeleton(cas);
-                fail();
+                fail(cas);
             } catch (SkeletonSyntaxException expected) {
                 assertTrue(expected.getMessage(), expected.getMessage().contains("Unexpected"));
             }
         }
     }
 
+    @Test
+    public void duplicateValues() {
+        String[] cases = {
+                "round-integer round-integer",
+                "round-integer .00+",
+                "round-integer round-unlimited",
+                "round-integer @@@",
+                "scientific engineering",
+                "engineering compact-long",
+                "sign-auto sign-always" };
+
+        for (String cas : cases) {
+            try {
+                NumberFormatter.fromSkeleton(cas);
+                fail(cas);
+            } catch (SkeletonSyntaxException expected) {
+                assertTrue(expected.getMessage(), expected.getMessage().contains("Duplicated"));
+            }
+        }
+    }
+
     @Test
     public void stemsRequiringOption() {
         String[] stems = { "round-increment", "currency", "measure-unit", "integer-width", };