From eb3bb792cd3ad8ad929b00efdf195852faa09bda Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe <17109322+hugovdm@users.noreply.github.com> Date: Tue, 20 Oct 2020 21:26:17 +0000 Subject: [PATCH] ICU-21339 toSkeleton: treat percent and permille correctly See #1414 --- icu4c/source/i18n/number_skeletons.cpp | 28 ++++---- icu4c/source/test/intltest/numbertest.h | 1 + icu4c/source/test/intltest/numbertest_api.cpp | 46 ++++++++++++- .../test/intltest/numbertest_skeletons.cpp | 56 +++++++++++++++ .../ibm/icu/number/NumberSkeletonImpl.java | 33 +++++---- .../test/number/NumberFormatterApiTest.java | 69 ++++++++++++++++++- .../dev/test/number/NumberSkeletonTest.java | 33 +++++++++ 7 files changed, 235 insertions(+), 31 deletions(-) diff --git a/icu4c/source/i18n/number_skeletons.cpp b/icu4c/source/i18n/number_skeletons.cpp index e6d94d27b2b..028525a589d 100644 --- a/icu4c/source/i18n/number_skeletons.cpp +++ b/icu4c/source/i18n/number_skeletons.cpp @@ -732,6 +732,7 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se case STEM_CURRENCY: CHECK_NULL(seen, unit, status); + CHECK_NULL(seen, perUnit, status); return STATE_CURRENCY_UNIT; case STEM_INTEGER_WIDTH: @@ -1500,32 +1501,33 @@ bool GeneratorHelpers::notation(const MacroProps& macros, UnicodeString& sb, UEr } bool GeneratorHelpers::unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { - if (utils::unitIsCurrency(macros.unit)) { + MeasureUnit unit = macros.unit; + if (!utils::unitIsBaseUnit(macros.perUnit)) { + if (utils::unitIsCurrency(macros.unit) || utils::unitIsCurrency(macros.perUnit)) { + status = U_UNSUPPORTED_ERROR; + return false; + } + unit = unit.product(macros.perUnit.reciprocal(status), status); + } + + if (utils::unitIsCurrency(unit)) { sb.append(u"currency/", -1); - CurrencyUnit currency(macros.unit, status); + CurrencyUnit currency(unit, status); if (U_FAILURE(status)) { return false; } blueprint_helpers::generateCurrencyOption(currency, sb, status); return true; - } else if (utils::unitIsBaseUnit(macros.unit)) { + } else if (utils::unitIsBaseUnit(unit)) { // Default value is not shown in normalized form return false; - } else if (utils::unitIsPercent(macros.unit)) { + } else if (utils::unitIsPercent(unit)) { sb.append(u"percent", -1); return true; - } else if (utils::unitIsPermille(macros.unit)) { + } else if (utils::unitIsPermille(unit)) { sb.append(u"permille", -1); return true; } else { - MeasureUnit unit = macros.unit; - if (utils::unitIsCurrency(macros.perUnit)) { - status = U_UNSUPPORTED_ERROR; - return false; - } - if (!utils::unitIsBaseUnit(macros.perUnit)) { - unit = unit.product(macros.perUnit.reciprocal(status), status); - } sb.append(u"unit/", -1); sb.append(unit.getIdentifier()); return true; diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h index 39c1a12b250..bd4c0e28cc5 100644 --- a/icu4c/source/test/intltest/numbertest.h +++ b/icu4c/source/test/intltest/numbertest.h @@ -274,6 +274,7 @@ class NumberSkeletonTest : public IntlTest { void flexibleSeparators(); void wildcardCharacters(); void perUnitInArabic(); + void perUnitToSkeleton(); void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0); diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp index 45805647ff4..35b45321508 100644 --- a/icu4c/source/test/intltest/numbertest_api.cpp +++ b/icu4c/source/test/intltest/numbertest_api.cpp @@ -974,6 +974,7 @@ void NumberFormatterApiTest::unitCompoundMeasure() { u"2.4 m/s\u00B2"); } +// TODO: merge these tests into numbertest_skeletons.cpp instead of here: void NumberFormatterApiTest::unitSkeletons() { const struct TestCase { const char *msg; @@ -1016,6 +1017,22 @@ void NumberFormatterApiTest::unitSkeletons() { u"unit/meter-per-hectosecond", // u"unit/meter-per-hectosecond"}, + {"percent compound skeletons handled correctly", // + u"unit/percent-per-meter", // + u"unit/percent-per-meter"}, + + {"permille compound skeletons handled correctly", // + u"measure-unit/concentr-permille per-measure-unit/length-meter", // + u"unit/permille-per-meter"}, + + {"percent simple unit is not actually considered a unit", // + u"unit/percent", // + u"percent"}, + + {"permille simple unit is not actually considered a unit", // + u"measure-unit/concentr-permille", // + u"permille"}, + // // TODO: binary prefixes not supported yet! // {"Round-trip example from icu-units#35", // // u"unit/kibijoule-per-furlong", // @@ -1049,20 +1066,33 @@ void NumberFormatterApiTest::unitSkeletons() { u"measure-unit/meter per-measure-unit/hectosecond", // U_NUMBER_SKELETON_SYNTAX_ERROR, // U_ZERO_ERROR}, + + {"\"currency/EUR measure-unit/length-meter\" fails, conflicting skeleton.", + u"currency/EUR measure-unit/length-meter", // + U_NUMBER_SKELETON_SYNTAX_ERROR, // + U_ZERO_ERROR}, + + {"\"measure-unit/length-meter currency/EUR\" fails, conflicting skeleton.", + u"measure-unit/length-meter currency/EUR", // + U_NUMBER_SKELETON_SYNTAX_ERROR, // + U_ZERO_ERROR}, + + {"\"currency/EUR per-measure-unit/meter\" fails, conflicting skeleton.", + u"currency/EUR per-measure-unit/length-meter", // + U_NUMBER_SKELETON_SYNTAX_ERROR, // + U_ZERO_ERROR}, }; for (auto &cas : failCases) { IcuTestErrorCode status(*this, cas.msg); auto nf = NumberFormatter::forSkeleton(cas.inputSkeleton, status); if (status.expectErrorAndReset(cas.expectedForSkelStatus, cas.msg)) { - continue; + continue; } nf.toSkeleton(status); status.expectErrorAndReset(cas.expectedToSkelStatus, cas.msg); } IcuTestErrorCode status(*this, "unitSkeletons"); - MeasureUnit METER_PER_SECOND = MeasureUnit::forIdentifier("meter-per-second", status); - assertEquals( // ".unit(METER_PER_SECOND) normalization", // u"unit/meter-per-second", // @@ -1084,6 +1114,16 @@ void NumberFormatterApiTest::unitSkeletons() { .unit(METER) .perUnit(MeasureUnit::forIdentifier("hectosecond", status)) .toSkeleton(status)); + + status.assertSuccess(); + assertEquals( // + ".unit(CURRENCY) produces a currency/CURRENCY skeleton", // + u"currency/GBP", // + NumberFormatter::with().unit(GBP).toSkeleton(status)); + status.assertSuccess(); + // .unit(CURRENCY).perUnit(ANYTHING) is not supported. + NumberFormatter::with().unit(GBP).perUnit(METER).toSkeleton(status); + status.expectErrorAndReset(U_UNSUPPORTED_ERROR); } void NumberFormatterApiTest::unitUsage() { diff --git a/icu4c/source/test/intltest/numbertest_skeletons.cpp b/icu4c/source/test/intltest/numbertest_skeletons.cpp index d5e4bcfd512..07a864a6a4b 100644 --- a/icu4c/source/test/intltest/numbertest_skeletons.cpp +++ b/icu4c/source/test/intltest/numbertest_skeletons.cpp @@ -31,6 +31,7 @@ void NumberSkeletonTest::runIndexedTest(int32_t index, UBool exec, const char*& TESTCASE_AUTO(flexibleSeparators); TESTCASE_AUTO(wildcardCharacters); TESTCASE_AUTO(perUnitInArabic); + TESTCASE_AUTO(perUnitToSkeleton); TESTCASE_AUTO_END; } @@ -436,4 +437,59 @@ void NumberSkeletonTest::perUnitInArabic() { } } +void NumberSkeletonTest::perUnitToSkeleton() { + IcuTestErrorCode status(*this, "perUnitToSkeleton"); + struct TestCase { + const char16_t* type; + const char16_t* subtype; + } cases[] = { + {u"area", u"acre"}, + {u"concentr", u"percent"}, + {u"concentr", u"permille"}, + {u"concentr", u"permillion"}, + {u"concentr", u"permyriad"}, + {u"digital", u"bit"}, + {u"length", u"yard"}, + }; + + for (const auto& cas1 : cases) { + for (const auto& cas2 : cases) { + UnicodeString skeleton(u"measure-unit/"); + skeleton += cas1.type; + skeleton += u"-"; + skeleton += cas1.subtype; + skeleton += u" "; + skeleton += u"per-measure-unit/"; + skeleton += cas2.type; + skeleton += u"-"; + skeleton += cas2.subtype; + + status.setScope(skeleton); + if (cas1.type != cas2.type && cas1.subtype != cas2.subtype) { + UnicodeString toSkeleton = NumberFormatter::forSkeleton( + skeleton, status).toSkeleton(status); + if (status.errIfFailureAndReset()) { + continue; + } + // Ensure both subtype are in the toSkeleton. + UnicodeString msg; + msg.append(toSkeleton) + .append(" should contain '") + .append(UnicodeString(cas1.subtype)) + .append("' when constructed from ") + .append(skeleton); + assertTrue(msg, toSkeleton.indexOf(cas1.subtype) >= 0); + + msg.remove(); + msg.append(toSkeleton) + .append(" should contain '") + .append(UnicodeString(cas2.subtype)) + .append("' when constructed from ") + .append(skeleton); + assertTrue(msg, toSkeleton.indexOf(cas2.subtype) >= 0); + } + } + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberSkeletonImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberSkeletonImpl.java index 2071e3321cc..9b0f1ed3f52 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberSkeletonImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberSkeletonImpl.java @@ -620,6 +620,7 @@ class NumberSkeletonImpl { case STATE_INCREMENT_PRECISION: case STATE_MEASURE_UNIT: case STATE_PER_MEASURE_UNIT: + case STATE_IDENTIFIER_UNIT: case STATE_UNIT_USAGE: case STATE_CURRENCY_UNIT: case STATE_INTEGER_WIDTH: @@ -661,7 +662,7 @@ class NumberSkeletonImpl { BlueprintHelpers.parseScientificStem(segment, macros); return ParseState.STATE_NULL; case '0': - checkNull(macros.notation, segment); + checkNull(macros.integerWidth, segment); BlueprintHelpers.parseIntegerStem(segment, macros); return ParseState.STATE_NULL; } @@ -786,6 +787,11 @@ class NumberSkeletonImpl { return ParseState.STATE_MEASURE_UNIT; case STEM_PER_MEASURE_UNIT: + // In C++, STEM_CURRENCY's checks mark perUnit as "seen". Here we do + // the inverse: checking that macros.unit is not set to a currency. + if (macros.unit instanceof Currency) { + throw new SkeletonSyntaxException("Duplicated setting", segment); + } checkNull(macros.perUnit, segment); return ParseState.STATE_PER_MEASURE_UNIT; @@ -800,6 +806,7 @@ class NumberSkeletonImpl { case STEM_CURRENCY: checkNull(macros.unit, segment); + checkNull(macros.perUnit, segment); return ParseState.STATE_CURRENCY_UNIT; case STEM_INTEGER_WIDTH: @@ -1484,25 +1491,25 @@ class NumberSkeletonImpl { } private static boolean unit(MacroProps macros, StringBuilder sb) { - if (macros.unit instanceof Currency) { + MeasureUnit unit = macros.unit; + if (macros.perUnit != null) { + if (macros.unit instanceof Currency || macros.perUnit instanceof Currency) { + throw new UnsupportedOperationException( + "Cannot generate number skeleton with currency unit and per-unit"); + } + unit = unit.product(macros.perUnit.reciprocal()); + } + if (unit instanceof Currency) { sb.append("currency/"); - BlueprintHelpers.generateCurrencyOption((Currency) macros.unit, sb); + BlueprintHelpers.generateCurrencyOption((Currency)unit, sb); return true; - } else if (macros.unit == MeasureUnit.PERCENT) { + } else if (unit.equals(MeasureUnit.PERCENT)) { sb.append("percent"); return true; - } else if (macros.unit == MeasureUnit.PERMILLE) { + } else if (unit.equals(MeasureUnit.PERMILLE)) { sb.append("permille"); return true; } else { - MeasureUnit unit = macros.unit; - if (macros.perUnit != null) { - if (macros.perUnit instanceof Currency) { - throw new UnsupportedOperationException( - "Cannot generate number skeleton with per-unit that is not a standard measure unit"); - } - unit = unit.product(macros.perUnit.reciprocal()); - } sb.append("unit/"); sb.append(unit.getIdentifier()); return true; diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java index 3613126d414..15f0e9688fa 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java @@ -936,6 +936,7 @@ public class NumberFormatterApiTest extends TestFmwk { "2.4 m/s\u00B2"); } + // TODO: merge these tests into NumberSkeletonTest.java instead of here: @Test public void unitSkeletons() { Object[][] cases = { @@ -960,11 +961,11 @@ public class NumberFormatterApiTest extends TestFmwk { "unit/meter-per-second"}, {"short-form compound units stay as is", // - "unit/square-meter-per-square-meter", // + "unit/square-meter-per-square-meter", // "unit/square-meter-per-square-meter"}, {"short-form compound units stay as is", // - "unit/joule-per-furlong", // + "unit/joule-per-furlong", // "unit/joule-per-furlong"}, {"short-form that doesn't consist of built-in units", // @@ -975,6 +976,22 @@ public class NumberFormatterApiTest extends TestFmwk { "unit/meter-per-hectosecond", // "unit/meter-per-hectosecond"}, + {"percent compound skeletons handled correctly", // + "unit/percent-per-meter", // + "unit/percent-per-meter"}, + + {"permille compound skeletons handled correctly", // + "measure-unit/concentr-permille per-measure-unit/length-meter", // + "unit/permille-per-meter"}, + + {"percent simple unit is not actually considered a unit", // + "unit/percent", // + "percent"}, + + {"permille simple unit is not actually considered a unit", // + "measure-unit/concentr-permille", // + "permille"}, + // // TODO: binary prefixes not supported yet! // {"Round-trip example from icu-units#35", // // "unit/kibijoule-per-furlong", // @@ -998,6 +1015,21 @@ public class NumberFormatterApiTest extends TestFmwk { "measure-unit/meter per-measure-unit/hectosecond", // true, // false}, + + {"\"currency/EUR measure-unit/length-meter\" fails, conflicting skeleton.", + "currency/EUR measure-unit/length-meter", // + true, // + false}, + + {"\"measure-unit/length-meter currency/EUR\" fails, conflicting skeleton.", + "measure-unit/length-meter currency/EUR", // + true, // + false}, + + {"\"currency/EUR per-measure-unit/meter\" fails, conflicting skeleton.", + "currency/EUR per-measure-unit/length-meter", // + true, // + false}, }; for (Object[] cas : failCases) { String msg = (String)cas[0]; @@ -1027,6 +1059,39 @@ public class NumberFormatterApiTest extends TestFmwk { } } } + + assertEquals( // + ".unit(METER_PER_SECOND) normalization", // + "unit/meter-per-second", // + NumberFormatter.with().unit(MeasureUnit.METER_PER_SECOND).toSkeleton()); + assertEquals( // + ".unit(METER).perUnit(SECOND) normalization", // + "unit/meter-per-second", + NumberFormatter.with().unit(MeasureUnit.METER).perUnit(MeasureUnit.SECOND).toSkeleton()); + assertEquals( // + ".unit(MeasureUnit.forIdentifier(\"hectometer\")) normalization", // + "unit/hectometer", + NumberFormatter.with().unit(MeasureUnit.forIdentifier("hectometer")).toSkeleton()); + assertEquals( // + ".unit(MeasureUnit.forIdentifier(\"hectometer\")) normalization", // + "unit/meter-per-hectosecond", + NumberFormatter.with() + .unit(MeasureUnit.METER) + .perUnit(MeasureUnit.forIdentifier("hectosecond")) + .toSkeleton()); + + assertEquals( // + ".unit(CURRENCY) produces a currency/CURRENCY skeleton", // + "currency/GBP", // + NumberFormatter.with().unit(GBP).toSkeleton()); + + // .unit(CURRENCY).perUnit(ANYTHING) is not supported. + try { + NumberFormatter.with().unit(GBP).perUnit(MeasureUnit.METER).toSkeleton(); + fail("should give an error, unit(currency) with perUnit() is invalid."); + } catch (UnsupportedOperationException e) { + // Pass + } } @Test diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java index d921c9a9dc4..bda900be151 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java @@ -411,4 +411,37 @@ public class NumberSkeletonTest { } } } + + @Test + public void perUnitToSkeleton() { + String[][] cases = { + {"area", "acre"}, + {"concentr", "percent"}, + {"concentr", "permille"}, + {"concentr", "permillion"}, + {"concentr", "permyriad"}, + {"digital", "bit"}, + {"length", "yard"}, + }; + + for (String[] cas1 : cases) { + for (String[] cas2 : cases) { + String skeleton = "measure-unit/" + cas1[0] + "-" + cas1[1] + " per-measure-unit/" + + cas2[0] + "-" + cas2[1]; + + if (cas1[0] != cas2[0] && cas1[1] != cas2[1]) { + String toSkeleton = NumberFormatter.forSkeleton(skeleton).toSkeleton(); + + // Ensure both subtype are in the toSkeleton. + String msg; + msg = toSkeleton + " should contain '" + cas1[1] + "' when constructed from " + + skeleton; + assertTrue(msg, toSkeleton.indexOf(cas1[1]) >= 0); + msg = toSkeleton + " should contain '" + cas2[1] + "' when constructed from " + + skeleton; + assertTrue(msg, toSkeleton.indexOf(cas2[1]) >= 0); + } + } + } + } } -- 2.40.0