From: Shane Carr Date: Thu, 6 Sep 2018 04:46:37 +0000 (-0700) Subject: ICU-11276 Adding number range spacing heuristic and fixing data loading. X-Git-Tag: release-63-rc~63^2~20 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7155e1fbcfe71887078ec919095eb1053fb9e50f;p=icu ICU-11276 Adding number range spacing heuristic and fixing data loading. --- diff --git a/icu4c/source/i18n/numrange_fluent.cpp b/icu4c/source/i18n/numrange_fluent.cpp index 0deb352055e..d2c5edcd2bf 100644 --- a/icu4c/source/i18n/numrange_fluent.cpp +++ b/icu4c/source/i18n/numrange_fluent.cpp @@ -22,7 +22,7 @@ template Derived NumberRangeFormatterSettings::numberFormatterBoth(const UnlocalizedNumberFormatter& formatter) const& { Derived copy(*this); copy.fMacros.formatter1 = formatter; - copy.fMacros.formatter2 = formatter; + copy.fMacros.singleFormatter = true; return copy; } @@ -30,23 +30,23 @@ template Derived NumberRangeFormatterSettings::numberFormatterBoth(const UnlocalizedNumberFormatter& formatter) && { Derived move(std::move(*this)); move.fMacros.formatter1 = formatter; - move.fMacros.formatter2 = formatter; + move.fMacros.singleFormatter = true; return move; } template Derived NumberRangeFormatterSettings::numberFormatterBoth(UnlocalizedNumberFormatter&& formatter) const& { Derived copy(*this); - copy.fMacros.formatter1 = formatter; - copy.fMacros.formatter2 = std::move(formatter); + copy.fMacros.formatter1 = std::move(formatter); + copy.fMacros.singleFormatter = true; return copy; } template Derived NumberRangeFormatterSettings::numberFormatterBoth(UnlocalizedNumberFormatter&& formatter) && { Derived move(std::move(*this)); - move.fMacros.formatter1 = formatter; - move.fMacros.formatter2 = std::move(formatter); + move.fMacros.formatter1 = std::move(formatter); + move.fMacros.singleFormatter = true; return move; } @@ -54,6 +54,7 @@ template Derived NumberRangeFormatterSettings::numberFormatterFirst(const UnlocalizedNumberFormatter& formatter) const& { Derived copy(*this); copy.fMacros.formatter1 = formatter; + copy.fMacros.singleFormatter = false; return copy; } @@ -61,6 +62,7 @@ template Derived NumberRangeFormatterSettings::numberFormatterFirst(const UnlocalizedNumberFormatter& formatter) && { Derived move(std::move(*this)); move.fMacros.formatter1 = formatter; + move.fMacros.singleFormatter = false; return move; } @@ -68,6 +70,7 @@ template Derived NumberRangeFormatterSettings::numberFormatterFirst(UnlocalizedNumberFormatter&& formatter) const& { Derived copy(*this); copy.fMacros.formatter1 = std::move(formatter); + copy.fMacros.singleFormatter = false; return copy; } @@ -75,6 +78,7 @@ template Derived NumberRangeFormatterSettings::numberFormatterFirst(UnlocalizedNumberFormatter&& formatter) && { Derived move(std::move(*this)); move.fMacros.formatter1 = std::move(formatter); + move.fMacros.singleFormatter = false; return move; } @@ -82,6 +86,7 @@ template Derived NumberRangeFormatterSettings::numberFormatterSecond(const UnlocalizedNumberFormatter& formatter) const& { Derived copy(*this); copy.fMacros.formatter2 = formatter; + copy.fMacros.singleFormatter = false; return copy; } @@ -89,6 +94,7 @@ template Derived NumberRangeFormatterSettings::numberFormatterSecond(const UnlocalizedNumberFormatter& formatter) && { Derived move(std::move(*this)); move.fMacros.formatter2 = formatter; + move.fMacros.singleFormatter = false; return move; } @@ -96,6 +102,7 @@ template Derived NumberRangeFormatterSettings::numberFormatterSecond(UnlocalizedNumberFormatter&& formatter) const& { Derived copy(*this); copy.fMacros.formatter2 = std::move(formatter); + copy.fMacros.singleFormatter = false; return copy; } @@ -103,6 +110,7 @@ template Derived NumberRangeFormatterSettings::numberFormatterSecond(UnlocalizedNumberFormatter&& formatter) && { Derived move(std::move(*this)); move.fMacros.formatter2 = std::move(formatter); + move.fMacros.singleFormatter = false; return move; } diff --git a/icu4c/source/i18n/numrange_impl.cpp b/icu4c/source/i18n/numrange_impl.cpp index 022d8faa76f..6e52d3498b0 100644 --- a/icu4c/source/i18n/numrange_impl.cpp +++ b/icu4c/source/i18n/numrange_impl.cpp @@ -35,12 +35,12 @@ class NumberRangeDataSink : public ResourceSink { if (U_FAILURE(status)) { return; } for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) { if (uprv_strcmp(key, "range") == 0) { - if (fData.rangePattern.getArgumentLimit() == 0) { + if (fData.rangePattern.getArgumentLimit() != 0) { continue; // have already seen this pattern } fData.rangePattern = {value.getUnicodeString(status), status}; } else if (uprv_strcmp(key, "approximately") == 0) { - if (fData.approximatelyPattern.getArgumentLimit() == 0) { + if (fData.approximatelyPattern.getArgumentLimit() != 0) { continue; // have already seen this pattern } fData.approximatelyPattern = {value.getUnicodeString(status), status}; @@ -65,12 +65,7 @@ void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeD ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status); if (U_FAILURE(status)) { return; } - if (uprv_strcmp(nsName, "latn") != 0 && (data.rangePattern.getArgumentLimit() == 0 - || data.approximatelyPattern.getArgumentLimit() == 0)) { - // fall back to Latin data - ures_getAllItemsWithFallback(rb.getAlias(), "NumberElements/latn/miscPatterns", sink, status); - if (U_FAILURE(status)) { return; } - } + // TODO: Is it necessary to maually fall back to latn, or does the data sink take care of that? if (data.rangePattern.getArgumentLimit() == 0) { // No data! @@ -88,10 +83,16 @@ void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeD NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status) : formatterImpl1(macros.formatter1.fMacros, status), formatterImpl2(macros.formatter2.fMacros, status), - fSameFormatters(true), // FIXME + fSameFormatters(macros.singleFormatter), fCollapse(macros.collapse), fIdentityFallback(macros.identityFallback) { - // TODO: get local ns + + // TODO: As of this writing (ICU 63), there is no locale that has different number miscPatterns + // based on numbering system. Therefore, data is loaded only from latn. If this changes, + // this part of the code should be updated to load from the local numbering system. + // The numbering system could come from the one specified in the NumberFormatter passed to + // numberFormatterBoth() or similar. + NumberRangeData data; getNumberRangeData(macros.locale.getName(), "latn", data, status); if (U_FAILURE(status)) { return; } @@ -104,15 +105,17 @@ void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equa return; } - // Identity case 1: equal before rounding - if (equalBeforeRounding) { - } - MicroProps micros1; MicroProps micros2; - formatterImpl1.preProcess(data.quantity1, micros1, status); - formatterImpl2.preProcess(data.quantity2, micros2, status); - if (U_FAILURE(status)) { + if (fSameFormatters) { + formatterImpl1.preProcess(data.quantity1, micros1, status); + formatterImpl1.preProcess(data.quantity2, micros2, status); + } else { + // If the formatters are different, an identity is not possible. + // Always use formatRange(). + formatterImpl1.preProcess(data.quantity1, micros1, status); + formatterImpl2.preProcess(data.quantity2, micros2, status); + formatRange(data, micros1, micros2, status); return; } @@ -169,6 +172,7 @@ void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equa void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data, MicroProps& micros1, MicroProps& micros2, UErrorCode& status) const { + if (U_FAILURE(status)) { return; } if (fSameFormatters) { formatterImpl1.format(data.quantity1, data.string, status); } else { @@ -180,6 +184,7 @@ void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data, MicroProps& micros1, MicroProps& micros2, UErrorCode& status) const { + if (U_FAILURE(status)) { return; } if (fSameFormatters) { int32_t length = formatterImpl1.format(data.quantity1, data.string, status); fApproximatelyModifier.apply(data.string, 0, length, status); @@ -192,6 +197,7 @@ void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& d void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, MicroProps& micros1, MicroProps& micros2, UErrorCode& status) const { + if (U_FAILURE(status)) { return; } // modInner is always notation (scientific); collapsable in ALL. // modOuter is always units; collapsable in ALL, AUTO, and UNIT. @@ -283,6 +289,19 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, if (U_FAILURE(status)) { return; } lengthInfix = lengthRange - lengthPrefix - lengthSuffix; + // SPACING HEURISTIC + // Add spacing unless all modifiers are collapsed. + { + bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0; + bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0; + bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0; + if (repeatInner || repeatMiddle || repeatOuter) { + // Add spacing + lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', UNUM_FIELD_COUNT, status); + lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', UNUM_FIELD_COUNT, status); + } + } + length1 += NumberFormatterImpl::writeNumber(micros1, data.quantity1, string, UPRV_INDEX_0, status); length2 += NumberFormatterImpl::writeNumber(micros2, data.quantity2, string, UPRV_INDEX_2, status); @@ -292,21 +311,21 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, lengthInfix += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); } else { length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); - length2 += micros1.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); } if (collapseMiddle) { lengthInfix += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); } else { length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); - length2 += micros1.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); } if (collapseOuter) { - micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); + lengthInfix += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); } else { - micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); - micros1.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); + length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); } } diff --git a/icu4c/source/i18n/unicode/numberrangeformatter.h b/icu4c/source/i18n/unicode/numberrangeformatter.h index 7f4000f4423..583361df1da 100644 --- a/icu4c/source/i18n/unicode/numberrangeformatter.h +++ b/icu4c/source/i18n/unicode/numberrangeformatter.h @@ -192,6 +192,9 @@ struct U_I18N_API RangeMacroProps : public UMemory { /** @internal */ UnlocalizedNumberFormatter formatter2; // = NumberFormatter::with(); + /** @internal */ + bool singleFormatter = true; + /** @internal */ UNumberRangeCollapse collapse = UNUM_RANGE_COLLAPSE_AUTO; diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h index 2618b97ddf3..74e5987156d 100644 --- a/icu4c/source/test/intltest/numbertest.h +++ b/icu4c/source/test/intltest/numbertest.h @@ -248,12 +248,24 @@ class NumberSkeletonTest : public IntlTest { class NumberRangeFormatterTest : public IntlTest { public: + NumberRangeFormatterTest(); + NumberRangeFormatterTest(UErrorCode &status); + void testSanity(); void testBasic(); void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0); private: + CurrencyUnit USD; + CurrencyUnit GBP; + CurrencyUnit PTE; + + MeasureUnit METER; + MeasureUnit KILOMETER; + MeasureUnit FAHRENHEIT; + MeasureUnit KELVIN; + void assertFormatRange( const char16_t* message, const UnlocalizedNumberRangeFormatter& f, diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp index 59c0d20b653..0604a9d9db1 100644 --- a/icu4c/source/test/intltest/numbertest_api.cpp +++ b/icu4c/source/test/intltest/numbertest_api.cpp @@ -17,6 +17,7 @@ #include "unicode/utypes.h" // Horrible workaround for the lack of a status code in the constructor... +// (Also affects numbertest_range.cpp) UErrorCode globalNumberFormatterApiTestStatus = U_ZERO_ERROR; NumberFormatterApiTest::NumberFormatterApiTest() diff --git a/icu4c/source/test/intltest/numbertest_range.cpp b/icu4c/source/test/intltest/numbertest_range.cpp index 9e47bab6e98..6257e03f968 100644 --- a/icu4c/source/test/intltest/numbertest_range.cpp +++ b/icu4c/source/test/intltest/numbertest_range.cpp @@ -11,7 +11,31 @@ #include #include -using icu::unisets::get; +// Horrible workaround for the lack of a status code in the constructor... +// (Also affects numbertest_api.cpp) +UErrorCode globalNumberRangeFormatterTestStatus = U_ZERO_ERROR; + +NumberRangeFormatterTest::NumberRangeFormatterTest() + : NumberRangeFormatterTest(globalNumberRangeFormatterTestStatus) { +} + +NumberRangeFormatterTest::NumberRangeFormatterTest(UErrorCode& status) + : USD(u"USD", status), + GBP(u"GBP", status), + PTE(u"PTE", status) { + + // Check for error on the first MeasureUnit in case there is no data + LocalPointer unit(MeasureUnit::createMeter(status)); + if (U_FAILURE(status)) { + dataerrln("%s %d status = %s", __FILE__, __LINE__, u_errorName(status)); + return; + } + METER = *unit; + + KILOMETER = *LocalPointer(MeasureUnit::createKilometer(status)); + FAHRENHEIT = *LocalPointer(MeasureUnit::createFahrenheit(status)); + KELVIN = *LocalPointer(MeasureUnit::createKelvin(status)); +} void NumberRangeFormatterTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) { if (exec) { @@ -37,16 +61,49 @@ void NumberRangeFormatterTest::testBasic() { u"Basic", NumberRangeFormatter::with(), Locale("en-us"), - u"1 --- 5", + u"1–5", u"~5", u"~5", - u"0 --- 3", + u"0–3", u"~0", - u"3 --- 3,000", - u"3,000 --- 5,000", - u"4,999 --- 5,001", + u"3–3,000", + u"3,000–5,000", + u"4,999–5,001", u"~5,000", - u"5,000 --- 5,000,000"); + u"5,000–5,000,000"); + + assertFormatRange( + u"Basic with units", + NumberRangeFormatter::with() + .numberFormatterBoth(NumberFormatter::with().unit(METER)), + Locale("en-us"), + u"1–5 m", + u"~5 m", + u"~5 m", + u"0–3 m", + u"~0 m", + u"3–3,000 m", + u"3,000–5,000 m", + u"4,999–5,001 m", + u"~5,000 m", + u"5,000–5,000,000 m"); + + assertFormatRange( + u"Basic with different units", + NumberRangeFormatter::with() + .numberFormatterFirst(NumberFormatter::with().unit(METER)) + .numberFormatterSecond(NumberFormatter::with().unit(KILOMETER)), + Locale("en-us"), + u"1 m – 5 km", + u"5 m – 5 km", + u"5 m – 5 km", + u"0 m – 3 km", + u"0 m – 0 km", + u"3 m – 3,000 km", + u"3,000 m – 5,000 km", + u"4,999 m – 5,001 km", + u"5,000 m – 5,000 km", + u"5,000 m – 5,000,000 km"); } void NumberRangeFormatterTest::assertFormatRange(