From: Hugo van der Merwe <17109322+hugovdm@users.noreply.github.com> Date: Wed, 17 Feb 2021 15:58:16 +0000 (+0000) Subject: ICU-21123 Support unit inflections in ICU4C X-Git-Tag: cldr/2021-03-09~59 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1dbe70ac189328afc3b6515b2f1964445c07d1da;p=icu ICU-21123 Support unit inflections in ICU4C See #1574 --- diff --git a/icu4c/source/i18n/number_fluent.cpp b/icu4c/source/i18n/number_fluent.cpp index 8569a36e5b2..0e27405132c 100644 --- a/icu4c/source/i18n/number_fluent.cpp +++ b/icu4c/source/i18n/number_fluent.cpp @@ -288,6 +288,20 @@ Derived NumberFormatterSettings::usage(const StringPiece usage)&& { return move; } +template +Derived NumberFormatterSettings::unitDisplayCase(const StringPiece unitDisplayCase) const& { + Derived copy(*this); + copy.fMacros.unitDisplayCase.set(unitDisplayCase); + return copy; +} + +template +Derived NumberFormatterSettings::unitDisplayCase(const StringPiece unitDisplayCase)&& { + Derived move(std::move(*this)); + move.fMacros.unitDisplayCase.set(unitDisplayCase); + return move; +} + template Derived NumberFormatterSettings::padding(const Padder& padder) const& { Derived copy(*this); diff --git a/icu4c/source/i18n/number_formatimpl.cpp b/icu4c/source/i18n/number_formatimpl.cpp index 4661c27fa1c..fb0eec93e8f 100644 --- a/icu4c/source/i18n/number_formatimpl.cpp +++ b/icu4c/source/i18n/number_formatimpl.cpp @@ -39,6 +39,7 @@ int32_t NumberFormatterImpl::formatStatic(const MacroProps ¯os, UFormattedNu int32_t length = writeNumber(micros, inValue, outString, 0, status); length += writeAffixes(micros, outString, 0, length, status); results->outputUnit = std::move(micros.outputUnit); + results->gender = micros.gender; return length; } @@ -63,6 +64,7 @@ int32_t NumberFormatterImpl::format(UFormattedNumberData *results, UErrorCode &s int32_t length = writeNumber(micros, inValue, outString, 0, status); length += writeAffixes(micros, outString, 0, length, status); results->outputUnit = std::move(micros.outputUnit); + results->gender = micros.gender; return length; } @@ -177,6 +179,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, uprv_strncpy(fMicros.nsName, nsName, 8); fMicros.nsName[8] = 0; // guarantee NUL-terminated + // Default gender: none. + fMicros.gender = ""; + // Resolve the symbols. Do this here because currency may need to customize them. if (macros.symbols.isDecimalFormatSymbols()) { fMicros.symbols = macros.symbols.getDecimalFormatSymbols(); @@ -246,7 +251,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, return nullptr; } auto usagePrefsHandler = - new UsagePrefsHandler(macros.locale, macros.unit, macros.usage.fUsage, chain, status); + new UsagePrefsHandler(macros.locale, macros.unit, macros.usage.fValue, chain, status); fUsagePrefsHandler.adoptInsteadAndCheckErrorCode(usagePrefsHandler, status); chain = fUsagePrefsHandler.getAlias(); } else if (isMixedUnit) { @@ -370,10 +375,14 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, // Outer modifier (CLDR units and currency long names) if (isCldrUnit) { + StringPiece unitDisplayCase(""); + if (macros.unitDisplayCase.isSet()) { + unitDisplayCase = macros.unitDisplayCase.fValue; + } if (macros.usage.isSet()) { fLongNameMultiplexer.adoptInsteadAndCheckErrorCode( LongNameMultiplexer::forMeasureUnits( - macros.locale, *fUsagePrefsHandler->getOutputUnits(), unitWidth, + macros.locale, *fUsagePrefsHandler->getOutputUnits(), unitWidth, unitDisplayCase, resolvePluralRules(macros.rules, macros.locale, status), chain, status), status); chain = fLongNameMultiplexer.getAlias(); @@ -381,7 +390,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, fMixedUnitLongNameHandler.adoptInsteadAndCheckErrorCode(new MixedUnitLongNameHandler(), status); MixedUnitLongNameHandler::forMeasureUnit( - macros.locale, macros.unit, unitWidth, + macros.locale, macros.unit, unitWidth, unitDisplayCase, resolvePluralRules(macros.rules, macros.locale, status), chain, fMixedUnitLongNameHandler.getAlias(), status); chain = fMixedUnitLongNameHandler.getAlias(); @@ -391,7 +400,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, unit = unit.product(macros.perUnit.reciprocal(status), status); } fLongNameHandler.adoptInsteadAndCheckErrorCode(new LongNameHandler(), status); - LongNameHandler::forMeasureUnit(macros.locale, unit, unitWidth, + LongNameHandler::forMeasureUnit(macros.locale, unit, unitWidth, unitDisplayCase, resolvePluralRules(macros.rules, macros.locale, status), chain, fLongNameHandler.getAlias(), status); chain = fLongNameHandler.getAlias(); diff --git a/icu4c/source/i18n/number_longnames.cpp b/icu4c/source/i18n/number_longnames.cpp index 98637e803d5..e7402725c35 100644 --- a/icu4c/source/i18n/number_longnames.cpp +++ b/icu4c/source/i18n/number_longnames.cpp @@ -46,6 +46,34 @@ constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2; // Number of keys in the array populated by PluralTableSink. constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3; +// TODO(inflections): load this list from resources, after creating a "&set" +// function for use in ldml2icu rules. +const int32_t GENDER_COUNT = 7; +const char *gGenders[GENDER_COUNT] = {"animate", "common", "feminine", "inanimate", + "masculine", "neuter", "personal"}; + +const char *getGenderString(UnicodeString uGender, UErrorCode status) { + CharString gender; + gender.appendInvariantChars(uGender, status); + if (U_FAILURE(status)) { + return ""; + } + int32_t first = 0; + int32_t last = GENDER_COUNT; + while (first < last) { + int32_t mid = (first + last) / 2; + int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]); + if (cmp == 0) { + return gGenders[mid]; + } else if (cmp > 0) { + first = mid + 1; + } else if (cmp < 0) { + last = mid; + } + } + return ""; +} + static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) { // pluralKeyword can also be "dnam", "per", or "gender" switch (*pluralKeyword) { @@ -109,7 +137,6 @@ class PluralTableSink : public ResourceSink { ResourceTable pluralsTable = value.getTable(status); if (U_FAILURE(status)) { return; } for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) { - // TODO(ICU-21123): Load the correct inflected form, possibly from the "case" structure. if (uprv_strcmp(key, "case") == 0) { continue; } @@ -137,12 +164,19 @@ class PluralTableSink : public ResourceSink { * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units". * - * @param unit must have a type and subtype (i.e. it must be a unit listed in - * gTypes and gSubTypes in measunit.cpp). + * @param unit must be a built-in unit, i.e. must have a type and subtype, + * listed in gTypes and gSubTypes in measunit.cpp. + * @param unitDisplayCase the empty string and "nominative" are treated the + * same. For other cases, strings for the requested case are used if found. + * (For any missing case-specific data, we fall back to nominative.) * @param outArray must be of fixed length ARRAY_LENGTH. */ -void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumberUnitWidth &width, - UnicodeString *outArray, UErrorCode &status) { +void getMeasureData(const Locale &locale, + const MeasureUnit &unit, + const UNumberUnitWidth &width, + StringPiece unitDisplayCase, + UnicodeString *outArray, + UErrorCode &status) { PluralTableSink sink(outArray); LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); if (U_FAILURE(status)) { return; } @@ -159,6 +193,7 @@ void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumber CharString key; key.append("units", status); + // TODO(icu-units#140): support gender for other unit widths. if (width == UNUM_UNIT_WIDTH_NARROW) { key.append("Narrow", status); } else if (width == UNUM_UNIT_WIDTH_SHORT) { @@ -169,6 +204,23 @@ void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumber key.append("/", status); key.append(subtypeForResource, status); + // Grab desired case first, if available. Then grab no-case data to fill in + // the gaps. + if (width == UNUM_UNIT_WIDTH_FULL_NAME && !unitDisplayCase.empty()) { + CharString caseKey; + caseKey.append(key, status); + caseKey.append("/case/", status); + caseKey.append(unitDisplayCase, status); + + UErrorCode localStatus = U_ZERO_ERROR; + ures_getAllItemsWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus); + // TODO(icu-units#138): our fallback logic is not spec-compliant: we + // check the given case, then go straight to the no-case data. The spec + // states we should first look for case="nominative". As part of #138, + // either get the spec changed, or add unit tests that warn us if + // case="nominative" data differs from no-case data? + } + UErrorCode localStatus = U_ZERO_ERROR; ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus); if (width == UNUM_UNIT_WIDTH_SHORT) { @@ -232,15 +284,156 @@ UnicodeString getPerUnitFormat(const Locale& locale, const UNumberUnitWidth &wid return UnicodeString(ptr, len); } +/** + * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml. + * + * Consider a deriveComponent rule that looks like this: + * + * + * + * Instantiating an instance as follows: + * + * DerivedComponents d(loc, "case", "per", "foo"); + * + * Applying the rule in the XML element above, `d.value0()` will be "foo", and + * `d.value1()` will be "nominative". + * + * In case of any kind of failure, value0() and value1() will simply return "". + */ +class DerivedComponents { + public: + /** + * Constructor. + * + * The feature and structure parameters must be null-terminated. The string + * referenced by compoundValue must exist for longer than the + * DerivedComponents instance. + */ + DerivedComponents(const Locale &locale, + const char *feature, + const char *structure, + const StringPiece compoundValue) { + StackUResourceBundle derivationsBundle, stackBundle; + ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status); + ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(), + &status); + ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), + &status); + if (U_FAILURE(status)) { + return; + } + UErrorCode localStatus = U_ZERO_ERROR; + // TODO: use standard normal locale resolution algorithms rather than just grabbing language: + ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), + &localStatus); + // TODO: + // - code currently assumes if the locale exists, the rules are there - + // instead of falling back to root when the requested rule is missing. + // - investigate ures.h functions, see if one that uses res_findResource() + // might be better (or use res_findResource directly), or maybe help + // improve ures documentation to guide function selection? + if (localStatus == U_MISSING_RESOURCE_ERROR) { + ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status); + } else { + status = localStatus; + } + ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status); + ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status); + ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status); + UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status); + UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status); + if (U_SUCCESS(status)) { + if (val0.compare(UnicodeString(u"compound")) == 0) { + sp0 = compoundValue; + } else { + memory0.appendInvariantChars(val0, status); + sp0 = memory0.toStringPiece(); + } + if (val1.compare(UnicodeString(u"compound")) == 0) { + sp1 = compoundValue; + } else { + memory1.appendInvariantChars(val1, status); + sp1 = memory1.toStringPiece(); + } + } + } + // The returned StringPiece is only valid as long as both the instance + // exists, and the compoundValue passed to the constructor is valid. + StringPiece value0() const { + return sp0; + } + // The returned StringPiece is only valid as long as both the instance + // exists, and the compoundValue passed to the constructor is valid. + StringPiece value1() const { + return sp1; + } + + private: + UErrorCode status = U_ZERO_ERROR; + + // Holds strings referred to by value0 and value1; + CharString memory0, memory1; + StringPiece sp0, sp1; +}; + +UnicodeString +getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) { + StackUResourceBundle derivationsBundle, stackBundle; + ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status); + ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(), + &status); + ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status); + // TODO: use standard normal locale resolution algorithms rather than just grabbing language: + ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status); + // TODO: + // - code currently assumes if the locale exists, the rules are there - + // instead of falling back to root when the requested rule is missing. + // - investigate ures.h functions, see if one that uses res_findResource() + // might be better (or use res_findResource directly), or maybe help + // improve ures documentation to guide function selection? + if (status == U_MISSING_RESOURCE_ERROR) { + status = U_ZERO_ERROR; + ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status); + } + ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status); + ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status); + return ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status); +} + //////////////////////// /// END DATA LOADING /// //////////////////////// +// TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace +const UChar *trimSpaceChars(const UChar *s, int32_t &length) { + if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) { + return s; + } + int32_t start = 0; + int32_t limit = length; + while (start < limit && u_isJavaSpaceChar(s[start])) { + ++start; + } + if (start < limit) { + // There is non-white space at start; we will not move limit below that, + // so we need not test startparent = parent; fillIn->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); + if (!simpleFormats[GENDER_INDEX].isBogus()) { + fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status); + } } -void LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit, - const MeasureUnit &perUnit, const UNumberUnitWidth &width, - const PluralRules *rules, const MicroPropsGenerator *parent, - LongNameHandler *fillIn, UErrorCode &status) { +void LongNameHandler::forCompoundUnit(const Locale &loc, + const MeasureUnit &unit, + const MeasureUnit &perUnit, + const UNumberUnitWidth &width, + StringPiece unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + LongNameHandler *fillIn, + UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } if (uprv_strcmp(unit.getType(), "") == 0 || uprv_strcmp(perUnit.getType(), "") == 0) { // TODO(ICU-20941): Unsanctioned unit. Not yet fully supported. Set an // error code. Once we support not-built-in units here, unitRef may be @@ -295,17 +502,24 @@ void LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit status = U_INTERNAL_PROGRAM_ERROR; return; } + + DerivedComponents derivedPerCases(loc, "case", "per", unitDisplayCase); + UnicodeString primaryData[ARRAY_LENGTH]; - getMeasureData(loc, unit, width, primaryData, status); + getMeasureData(loc, unit, width, derivedPerCases.value0(), primaryData, status); if (U_FAILURE(status)) { return; } UnicodeString secondaryData[ARRAY_LENGTH]; - getMeasureData(loc, perUnit, width, secondaryData, status); + getMeasureData(loc, perUnit, width, derivedPerCases.value1(), secondaryData, status); if (U_FAILURE(status)) { return; } + // TODO(icu-units#139): implement these rules: + // + // This has impact on multiSimpleFormatsToModifiers(...) below too. + // These rules are currently (ICU 69) all the same and hard-coded below. UnicodeString perUnitFormat; if (!secondaryData[PER_INDEX].isBogus()) { perUnitFormat = secondaryData[PER_INDEX]; @@ -314,7 +528,7 @@ void LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit if (U_FAILURE(status)) { return; } - // rawPerUnitFormat is something like "{0}/{1}"; we need to substitute in the secondary unit. + // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit. SimpleFormatter compiled(rawPerUnitFormat, 2, 2, status); if (U_FAILURE(status)) { return; @@ -328,7 +542,11 @@ void LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit if (U_FAILURE(status)) { return; } - UnicodeString secondaryString = secondaryCompiled.getTextWithNoArguments().trim(); + UnicodeString secondaryFormatString = secondaryCompiled.getTextWithNoArguments(); + int32_t trimmedSecondaryLen = secondaryFormatString.length(); + const UChar *trimmedSecondaryString = + trimSpaceChars(secondaryFormatString.getBuffer(), trimmedSecondaryLen); + UnicodeString secondaryString(false, trimmedSecondaryString, trimmedSecondaryLen); // TODO: Why does UnicodeString need to be explicit in the following line? compiled.format(UnicodeString(u"{0}"), secondaryString, perUnitFormat, status); if (U_FAILURE(status)) { @@ -339,6 +557,24 @@ void LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit fillIn->parent = parent; fillIn->multiSimpleFormatsToModifiers(primaryData, perUnitFormat, {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); + + // Gender + UnicodeString uVal = getDeriveCompoundRule(loc, "gender", "per", status); + if (U_FAILURE(status)) { + return; + } + U_ASSERT(!uVal.isBogus() && uVal.length() == 1); + switch (uVal[0]) { + case u'0': + fillIn->gender = getGenderString(primaryData[GENDER_INDEX], status); + break; + case u'1': + fillIn->gender = getGenderString(secondaryData[GENDER_INDEX], status); + break; + default: + // Data error. Assert-fail in debug mode, else return no gender. + U_ASSERT(false); + } } UnicodeString LongNameHandler::getUnitDisplayName( @@ -350,7 +586,7 @@ UnicodeString LongNameHandler::getUnitDisplayName( return ICU_Utility::makeBogusString(); } UnicodeString simpleFormats[ARRAY_LENGTH]; - getMeasureData(loc, unit, width, simpleFormats, status); + getMeasureData(loc, unit, width, "", simpleFormats, status); return simpleFormats[DNAM_INDEX]; } @@ -364,7 +600,7 @@ UnicodeString LongNameHandler::getUnitPattern( return ICU_Utility::makeBogusString(); } UnicodeString simpleFormats[ARRAY_LENGTH]; - getMeasureData(loc, unit, width, simpleFormats, status); + getMeasureData(loc, unit, width, "", simpleFormats, status); // The above already handles fallback from other widths to short if (U_FAILURE(status)) { return ICU_Utility::makeBogusString(); @@ -387,6 +623,7 @@ LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const getCurrencyLongNameData(loc, currency, simpleFormats, status); if (U_FAILURE(status)) { return nullptr; } result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status); + // TODO(inflections): currency gender? return result; } @@ -426,29 +663,41 @@ void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &mic } StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status); micros.modOuter = &fModifiers[pluralForm]; + micros.gender = gender; } const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const { return &fModifiers[plural]; } -void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &mixedUnit, - const UNumberUnitWidth &width, const PluralRules *rules, +void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc, + const MeasureUnit &mixedUnit, + const UNumberUnitWidth &width, + StringPiece unitDisplayCase, + const PluralRules *rules, const MicroPropsGenerator *parent, - MixedUnitLongNameHandler *fillIn, UErrorCode &status) { + MixedUnitLongNameHandler *fillIn, + UErrorCode &status) { U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED); U_ASSERT(fillIn != nullptr); MeasureUnitImpl temp; - const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status); + const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status); fillIn->fMixedUnitCount = impl.singleUnits.length(); fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]); for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) { // Grab data for each of the components. UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH]; - getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitData, status); + // TODO(CLDR-14502): check from the CLDR-14502 ticket whether this + // propagation of unitDisplayCase is correct: + getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData, + status); } + // TODO(icu-units#120): Make sure ICU doesn't output zero-valued + // high-magnitude fields + // * for mixed units count N, produce N listFormatters, one for each subset + // that might be formatted. UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT; if (width == UNUM_UNIT_WIDTH_NARROW) { listWidth = ULISTFMT_WIDTH_NARROW; @@ -458,6 +707,8 @@ void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUn } fillIn->fListFormatter.adoptInsteadAndCheckErrorCode( ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status); + // TODO(ICU-21494): grab gender of each unit, calculate the gender + // associated with this list formatter, save it for later. fillIn->rules = rules; fillIn->parent = parent; @@ -541,7 +792,11 @@ const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity & } } - + // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we + // can set micros.gender to the gender associated with the list formatter in + // use below (once we have correct support for that). And then document this + // appropriately? "getMixedUnitModifier" doesn't sound like it would do + // something like this. // Combine list into a "premixed" pattern UnicodeString premixedFormatPattern; @@ -560,16 +815,19 @@ const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity & const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form /*plural*/) const { // TODO(units): investigate this method when investigating where - // LongNameHandler::getModifier() gets used. To be sure it remains + // ModifierStore::getModifier() gets used. To be sure it remains // unreachable: UPRV_UNREACHABLE; return nullptr; } -LongNameMultiplexer * -LongNameMultiplexer::forMeasureUnits(const Locale &loc, const MaybeStackVector &units, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, UErrorCode &status) { +LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc, + const MaybeStackVector &units, + const UNumberUnitWidth &width, + StringPiece unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + UErrorCode &status) { LocalPointer result(new LongNameMultiplexer(parent), status); if (U_FAILURE(status)) { return nullptr; @@ -581,15 +839,16 @@ LongNameMultiplexer::forMeasureUnits(const Locale &loc, const MaybeStackVectorfMeasureUnits.adoptInstead(new MeasureUnit[units.length()]); for (int32_t i = 0, length = units.length(); i < length; i++) { - const MeasureUnit& unit = *units[i]; + const MeasureUnit &unit = *units[i]; result->fMeasureUnits[i] = unit; if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) { MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status); - MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, rules, NULL, mlnh, status); + MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL, + mlnh, status); result->fHandlers[i] = mlnh; } else { LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status); - LongNameHandler::forMeasureUnit(loc, unit, width, rules, NULL, lnh, status); + LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL, lnh, status); result->fHandlers[i] = lnh; } if (U_FAILURE(status)) { diff --git a/icu4c/source/i18n/number_longnames.h b/icu4c/source/i18n/number_longnames.h index 9d34c3cdd5f..8b004f02710 100644 --- a/icu4c/source/i18n/number_longnames.h +++ b/icu4c/source/i18n/number_longnames.h @@ -16,6 +16,8 @@ U_NAMESPACE_BEGIN namespace number { namespace impl { +// LongNameHandler takes care of formatting currency and measurement unit names, +// as well as populating the gender of measure units. class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public UMemory { public: static UnicodeString getUnitDisplayName( @@ -24,6 +26,8 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public UNumberUnitWidth width, UErrorCode& status); + // This function does not support inflections or other newer NumberFormatter + // features: it exists to support the older not-recommended MeasureFormat. static UnicodeString getUnitPattern( const Locale& loc, const MeasureUnit& unit, @@ -47,13 +51,21 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public * @param loc The desired locale. * @param unitRef The measure unit to construct a LongNameHandler for. * @param width Specifies the desired unit rendering. + * @param unitDisplayCase Specifies the desired grammatical case. The empty + * string and "nominative" are treated the same. For other cases, + * strings for the requested case are used if found. (For any missing + * case-specific data, we fall back to nominative.) * @param rules Does not take ownership. * @param parent Does not take ownership. * @param fillIn Required. */ - static void forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, LongNameHandler *fillIn, + static void forMeasureUnit(const Locale &loc, + const MeasureUnit &unitRef, + const UNumberUnitWidth &width, + StringPiece unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + LongNameHandler *fillIn, UErrorCode &status); /** @@ -63,10 +75,6 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public void processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const U_OVERRIDE; - // TODO(units): investigate whether we might run into Mixed Unit trouble - // with this. This override for ModifierStore::getModifier does not support - // mixed units: investigate under which circumstances it gets called (check - // both ImmutablePatternModifier and in NumberRangeFormatterImpl). const Modifier* getModifier(Signum signum, StandardPlural::Form plural) const U_OVERRIDE; private: @@ -76,6 +84,9 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public const PluralRules *rules; // Not owned const MicroPropsGenerator *parent; + // Grammatical gender of the formatted result. Not owned: must point at + // static or global strings. + const char *gender = ""; LongNameHandler(const PluralRules *rules, const MicroPropsGenerator *parent) : rules(rules), parent(parent) { @@ -94,9 +105,14 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public // Fills in LongNameHandler fields for formatting compound units identified // via `unit` and `perUnit`. Both `unit` and `perUnit` need to be built-in // units (for which data exists). - static void forCompoundUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, LongNameHandler *fillIn, + static void forCompoundUnit(const Locale &loc, + const MeasureUnit &unit, + const MeasureUnit &perUnit, + const UNumberUnitWidth &width, + StringPiece unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + LongNameHandler *fillIn, UErrorCode &status); // Sets fModifiers to use the patterns from `simpleFormats`. @@ -126,13 +142,21 @@ class MixedUnitLongNameHandler : public MicroPropsGenerator, public ModifierStor * @param mixedUnit The mixed measure unit to construct a * MixedUnitLongNameHandler for. * @param width Specifies the desired unit rendering. + * @param unitDisplayCase Specifies the desired grammatical case. The empty + * string and "nominative" are treated the same. For other cases, + * strings for the requested case are used if found. (For any missing + * case-specific data, we fall back to nominative.) * @param rules Does not take ownership. * @param parent Does not take ownership. * @param fillIn Required. */ - static void forMeasureUnit(const Locale &loc, const MeasureUnit &mixedUnit, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, MixedUnitLongNameHandler *fillIn, + static void forMeasureUnit(const Locale &loc, + const MeasureUnit &mixedUnit, + const UNumberUnitWidth &width, + StringPiece unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + MixedUnitLongNameHandler *fillIn, UErrorCode &status); /** @@ -205,8 +229,11 @@ class LongNameMultiplexer : public MicroPropsGenerator, public UMemory { // `units`. An individual unit might be a mixed unit. static LongNameMultiplexer *forMeasureUnits(const Locale &loc, const MaybeStackVector &units, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, UErrorCode &status); + const UNumberUnitWidth &width, + StringPiece unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + UErrorCode &status); // The output unit must be provided via `micros.outputUnit`, it must match // one of the units provided to the factory function. diff --git a/icu4c/source/i18n/number_microprops.h b/icu4c/source/i18n/number_microprops.h index 98bfa40d0a8..a18d5fc470e 100644 --- a/icu4c/source/i18n/number_microprops.h +++ b/icu4c/source/i18n/number_microprops.h @@ -83,6 +83,11 @@ struct MicroProps : public MicroPropsGenerator { bool useCurrency; char nsName[9]; + // No ownership: must point at a string which will outlive MicroProps + // instances, e.g. a string with static storage duration, or just a string + // that will never be deallocated or modified. + const char *gender; + // Note: This struct has no direct ownership of the following pointers. const DecimalFormatSymbols* symbols; diff --git a/icu4c/source/i18n/number_output.cpp b/icu4c/source/i18n/number_output.cpp index 77e2e302a2a..2c2c25eaedb 100644 --- a/icu4c/source/i18n/number_output.cpp +++ b/icu4c/source/i18n/number_output.cpp @@ -39,6 +39,11 @@ MeasureUnit FormattedNumber::getOutputUnit(UErrorCode& status) const { return fData->outputUnit; } +const char *FormattedNumber::getGender(UErrorCode &status) const { + UPRV_FORMATTED_VALUE_METHOD_GUARD("") + return fData->gender; +} + void FormattedNumber::getDecimalQuantity(impl::DecimalQuantity& output, UErrorCode& status) const { UPRV_FORMATTED_VALUE_METHOD_GUARD(UPRV_NOARG) output = fData->quantity; diff --git a/icu4c/source/i18n/number_roundingutils.h b/icu4c/source/i18n/number_roundingutils.h index e85cbae9fdd..11e70abda07 100644 --- a/icu4c/source/i18n/number_roundingutils.h +++ b/icu4c/source/i18n/number_roundingutils.h @@ -204,7 +204,7 @@ class RoundingImpl { * - see blueprint_helpers::parseIncrementOption(). * * Referencing MacroProps means needing to pull in the .o files that have the - * destructors for the SymbolsWrapper, Usage, and Scale classes. + * destructors for the SymbolsWrapper, StringProp, and Scale classes. */ void parseIncrementOption(const StringSegment &segment, Precision &outPrecision, UErrorCode &status); diff --git a/icu4c/source/i18n/number_skeletons.cpp b/icu4c/source/i18n/number_skeletons.cpp index 958bf129f6a..53ebf80c042 100644 --- a/icu4c/source/i18n/number_skeletons.cpp +++ b/icu4c/source/i18n/number_skeletons.cpp @@ -890,6 +890,10 @@ void GeneratorHelpers::generateSkeleton(const MacroProps& macros, UnicodeString& status = U_UNSUPPORTED_ERROR; return; } + if (macros.unitDisplayCase.isSet()) { + status = U_UNSUPPORTED_ERROR; + return; + } if (macros.affixProvider != nullptr) { status = U_UNSUPPORTED_ERROR; return; @@ -1512,7 +1516,7 @@ bool GeneratorHelpers::unit(const MacroProps& macros, UnicodeString& sb, UErrorC bool GeneratorHelpers::usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& /* status */) { if (macros.usage.isSet()) { sb.append(u"usage/", -1); - sb.append(UnicodeString(macros.usage.fUsage, -1, US_INV)); + sb.append(UnicodeString(macros.usage.fValue, -1, US_INV)); return true; } return false; diff --git a/icu4c/source/i18n/number_usageprefs.cpp b/icu4c/source/i18n/number_usageprefs.cpp index a0f265da652..ff285dbf972 100644 --- a/icu4c/source/i18n/number_usageprefs.cpp +++ b/icu4c/source/i18n/number_usageprefs.cpp @@ -28,79 +28,81 @@ using icu::StringSegment; using icu::units::ConversionRates; // Copy constructor -Usage::Usage(const Usage &other) : Usage() { +StringProp::StringProp(const StringProp &other) : StringProp() { this->operator=(other); } // Copy assignment operator -Usage &Usage::operator=(const Usage &other) { +StringProp &StringProp::operator=(const StringProp &other) { fLength = 0; fError = other.fError; - if (fUsage != nullptr) { - uprv_free(fUsage); - fUsage = nullptr; + if (fValue != nullptr) { + uprv_free(fValue); + fValue = nullptr; } - if (other.fUsage == nullptr) { + if (other.fValue == nullptr) { return *this; } if (U_FAILURE(other.fError)) { // We don't bother trying to allocating memory if we're in any case busy - // copying an errored Usage. + // copying an errored StringProp. return *this; } - fUsage = (char *)uprv_malloc(other.fLength + 1); - if (fUsage == nullptr) { + fValue = (char *)uprv_malloc(other.fLength + 1); + if (fValue == nullptr) { fError = U_MEMORY_ALLOCATION_ERROR; return *this; } fLength = other.fLength; - uprv_strncpy(fUsage, other.fUsage, fLength + 1); + uprv_strncpy(fValue, other.fValue, fLength + 1); return *this; } // Move constructor -Usage::Usage(Usage &&src) U_NOEXCEPT : fUsage(src.fUsage), fLength(src.fLength), fError(src.fError) { +StringProp::StringProp(StringProp &&src) U_NOEXCEPT : fValue(src.fValue), + fLength(src.fLength), + fError(src.fError) { // Take ownership away from src if necessary - src.fUsage = nullptr; + src.fValue = nullptr; } // Move assignment operator -Usage &Usage::operator=(Usage &&src) U_NOEXCEPT { +StringProp &StringProp::operator=(StringProp &&src) U_NOEXCEPT { if (this == &src) { return *this; } - if (fUsage != nullptr) { - uprv_free(fUsage); + if (fValue != nullptr) { + uprv_free(fValue); } - fUsage = src.fUsage; + fValue = src.fValue; fLength = src.fLength; fError = src.fError; // Take ownership away from src if necessary - src.fUsage = nullptr; + src.fValue = nullptr; return *this; } -Usage::~Usage() { - if (fUsage != nullptr) { - uprv_free(fUsage); - fUsage = nullptr; +StringProp::~StringProp() { + if (fValue != nullptr) { + uprv_free(fValue); + fValue = nullptr; } } -void Usage::set(StringPiece value) { - if (fUsage != nullptr) { - uprv_free(fUsage); - fUsage = nullptr; +void StringProp::set(StringPiece value) { + if (fValue != nullptr) { + uprv_free(fValue); + fValue = nullptr; } fLength = value.length(); - fUsage = (char *)uprv_malloc(fLength + 1); - if (fUsage == nullptr) { + fValue = (char *)uprv_malloc(fLength + 1); + if (fValue == nullptr) { fLength = 0; fError = U_MEMORY_ALLOCATION_ERROR; return; } - uprv_strncpy(fUsage, value.data(), fLength); - fUsage[fLength] = 0; + uprv_strncpy(fValue, value.data(), fLength); + fValue[fLength] = 0; } // Populates micros.mixedMeasures and modifies quantity, based on the values in diff --git a/icu4c/source/i18n/number_utypes.h b/icu4c/source/i18n/number_utypes.h index d97eadc5cdb..50c861787f4 100644 --- a/icu4c/source/i18n/number_utypes.h +++ b/icu4c/source/i18n/number_utypes.h @@ -42,6 +42,9 @@ public: // TODO(units,hugovdm): populate this correctly for the general case - it's // currently only implemented for the .usage() use case. MeasureUnit outputUnit; + + // The gender of the formatted output. + const char *gender = ""; }; diff --git a/icu4c/source/i18n/unicode/numberformatter.h b/icu4c/source/i18n/unicode/numberformatter.h index 06329b8e7aa..5bbb837d5f9 100644 --- a/icu4c/source/i18n/unicode/numberformatter.h +++ b/icu4c/source/i18n/unicode/numberformatter.h @@ -1131,33 +1131,35 @@ class U_I18N_API Scale : public UMemory { namespace impl { -// Do not enclose entire Usage with #ifndef U_HIDE_INTERNAL_API, needed for a protected field +// Do not enclose entire StringProp with #ifndef U_HIDE_INTERNAL_API, needed for a protected field /** * Manages NumberFormatterSettings::usage()'s char* instance on the heap. * @internal */ -class U_I18N_API Usage : public UMemory { +class U_I18N_API StringProp : public UMemory { #ifndef U_HIDE_INTERNAL_API public: /** @internal */ - Usage(const Usage& other); + StringProp(const StringProp &other); /** @internal */ - Usage& operator=(const Usage& other); + StringProp &operator=(const StringProp &other); /** @internal */ - Usage(Usage &&src) U_NOEXCEPT; + StringProp(StringProp &&src) U_NOEXCEPT; /** @internal */ - Usage& operator=(Usage&& src) U_NOEXCEPT; + StringProp &operator=(StringProp &&src) U_NOEXCEPT; /** @internal */ - ~Usage(); + ~StringProp(); /** @internal */ - int16_t length() const { return fLength; } + int16_t length() const { + return fLength; + } /** @internal * Makes a copy of value. Set to "" to unset. @@ -1165,16 +1167,19 @@ class U_I18N_API Usage : public UMemory { void set(StringPiece value); /** @internal */ - bool isSet() const { return fLength > 0; } + bool isSet() const { + return fLength > 0; + } #endif // U_HIDE_INTERNAL_API private: - char *fUsage; + char *fValue; int16_t fLength; UErrorCode fError; - Usage() : fUsage(nullptr), fLength(0), fError(U_ZERO_ERROR) {} + StringProp() : fValue(nullptr), fLength(0), fError(U_ZERO_ERROR) { + } /** @internal */ UBool copyErrorTo(UErrorCode &status) const { @@ -1185,7 +1190,7 @@ class U_I18N_API Usage : public UMemory { return false; } - // Allow NumberFormatterImpl to access fUsage. + // Allow NumberFormatterImpl to access fValue. friend class impl::NumberFormatterImpl; // Allow skeleton generation code to access private members. @@ -1480,7 +1485,10 @@ struct U_I18N_API MacroProps : public UMemory { Scale scale; // = Scale(); (benign value) /** @internal */ - Usage usage; // = Usage(); (no usage) + StringProp usage; // = StringProp(); (no usage) + + /** @internal */ + StringProp unitDisplayCase; // = StringProp(); (nominative) /** @internal */ const AffixPatternProvider* affixProvider = nullptr; // no ownership @@ -1503,7 +1511,8 @@ struct U_I18N_API MacroProps : public UMemory { bool copyErrorTo(UErrorCode &status) const { return notation.copyErrorTo(status) || precision.copyErrorTo(status) || padder.copyErrorTo(status) || integerWidth.copyErrorTo(status) || - symbols.copyErrorTo(status) || scale.copyErrorTo(status) || usage.copyErrorTo(status); + symbols.copyErrorTo(status) || scale.copyErrorTo(status) || usage.copyErrorTo(status) || + unitDisplayCase.copyErrorTo(status); } }; @@ -2169,6 +2178,21 @@ class U_I18N_API NumberFormatterSettings { * @draft ICU 68 */ Derived usage(StringPiece usage) &&; + + /** + * Specifies the desired case for a unit formatter's output (e.g. + * accusative, dative, genitive). + * + * @internal ICU 69 technology preview + */ + Derived unitDisplayCase(StringPiece unitDisplayCase) const &; + + /** + * Overload of unitDisplayCase() for use on an rvalue reference. + * + * @internal ICU 69 technology preview + */ + Derived unitDisplayCase(StringPiece unitDisplayCase) &&; #endif // U_HIDE_DRAFT_API #ifndef U_HIDE_INTERNAL_API @@ -2658,6 +2682,14 @@ class U_I18N_API FormattedNumber : public UMemory, public FormattedValue { * @draft ICU 68 */ MeasureUnit getOutputUnit(UErrorCode& status) const; + + /** + * Gets the gender of the formatted output. Returns "" when the gender is + * unknown, or for ungendered languages. + * + * @internal ICU 69 technology preview. + */ + const char *getGender(UErrorCode& status) const; #endif // U_HIDE_DRAFT_API #ifndef U_HIDE_INTERNAL_API diff --git a/icu4c/source/test/intltest/intltest.cpp b/icu4c/source/test/intltest/intltest.cpp index 59d2c460bbd..a905436bb89 100644 --- a/icu4c/source/test/intltest/intltest.cpp +++ b/icu4c/source/test/intltest/intltest.cpp @@ -1974,6 +1974,8 @@ UBool IntlTest::assertEquals(const char* message, UBool IntlTest::assertEquals(const char* message, const char* expected, const char* actual) { + U_ASSERT(expected != nullptr); + U_ASSERT(actual != nullptr); if (uprv_strcmp(expected, actual) != 0) { errln((UnicodeString)"FAIL: " + message + "; got \"" + actual + diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h index bd4c0e28cc5..12ce0450fe7 100644 --- a/icu4c/source/test/intltest/numbertest.h +++ b/icu4c/source/test/intltest/numbertest.h @@ -64,6 +64,8 @@ class NumberFormatterApiTest : public IntlTestWithFieldPosition { void unitUsageErrorCodes(); void unitUsageSkeletons(); void unitCurrency(); + void unitInflections(); + void unitGender(); void unitPercent(); void percentParity(); void roundingFraction(); @@ -170,6 +172,19 @@ class NumberFormatterApiTest : public IntlTestWithFieldPosition { const FormattedNumber& formattedNumber, const UFieldPosition* expectedFieldPositions, int32_t length); + + struct UnitInflectionTestCase { + const char *locale; + const char *unitDisplayCase; + double value; + const UChar *expected; + }; + + void runUnitInflectionsTestCases(UnlocalizedNumberFormatter unf, + const UChar *skeleton, + const UChar *conciseSkeleton, + const UnitInflectionTestCase *cases, + int32_t numCases); }; class DecimalQuantityTest : public IntlTest { diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp index 1208cfcbd60..8ddaa532a3b 100644 --- a/icu4c/source/test/intltest/numbertest_api.cpp +++ b/icu4c/source/test/intltest/numbertest_api.cpp @@ -85,6 +85,8 @@ void NumberFormatterApiTest::runIndexedTest(int32_t index, UBool exec, const cha TESTCASE_AUTO(unitUsageErrorCodes); TESTCASE_AUTO(unitUsageSkeletons); TESTCASE_AUTO(unitCurrency); + TESTCASE_AUTO(unitInflections); + TESTCASE_AUTO(unitGender); TESTCASE_AUTO(unitPercent); if (!quick) { // Slow test: run in exhaustive mode only @@ -1926,6 +1928,197 @@ void NumberFormatterApiTest::unitCurrency() { u"123,12 CNÂ¥"); } +void NumberFormatterApiTest::runUnitInflectionsTestCases(UnlocalizedNumberFormatter unf, + const UChar *skeleton, + const UChar *conciseSkeleton, + const UnitInflectionTestCase *cases, + int32_t numCases) { + for (int32_t i = 0; i < numCases; i++) { + UnitInflectionTestCase t = cases[i]; + const UChar *skel; + const UChar *cSkel; + if (t.unitDisplayCase == nullptr || t.unitDisplayCase[0] == 0) { + unf = unf.unitDisplayCase(""); + skel = skeleton; + cSkel = conciseSkeleton; + } else { + unf = unf.unitDisplayCase(t.unitDisplayCase); + skel = nullptr; + cSkel = nullptr; + } + assertFormatSingle((UnicodeString("\"") + skeleton + u"\", locale=\"" + t.locale + + u"\", case=\"" + (t.unitDisplayCase ? t.unitDisplayCase : "") + + u"\", value=" + t.value) + .getTerminatedBuffer(), + skel, cSkel, unf, Locale(t.locale), t.value, t.expected); + } +} + +void NumberFormatterApiTest::unitInflections() { + IcuTestErrorCode status(*this, "unitInflections"); + + UnlocalizedNumberFormatter unf; + const UChar *skeleton; + const UChar *conciseSkeleton; + { + // Simple inflected form test - test case based on the example in CLDR's + // grammaticalFeatures.xml + unf = NumberFormatter::with().unit(NoUnit::percent()).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME); + skeleton = u"percent unit-width-full-name"; + conciseSkeleton = u"% unit-width-full-name"; + const UnitInflectionTestCase percentCases[] = { + {"ru", nullptr, 10, u"10 процентов"}, // many + {"ru", "genitive", 10, u"10 процентов"}, // many + {"ru", nullptr, 33, u"33 процента"}, // few + {"ru", "genitive", 33, u"33 процентов"}, // few + {"ru", nullptr, 1, u"1 процент"}, // one + {"ru", "genitive", 1, u"1 процента"}, // one + }; + runUnitInflectionsTestCases(unf, skeleton, conciseSkeleton, percentCases, + UPRV_LENGTHOF(percentCases)); + } + { + // Testing "de" rules: + // + // + // + // per-patterns use accusative, but happen to match nominative, so we're + // not testing value1 in the first rule above. + + unf = NumberFormatter::with().unit(MeasureUnit::getMeter()).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME); + skeleton = u"unit/meter unit-width-full-name"; + conciseSkeleton = u"unit/meter unit-width-full-name"; + const UnitInflectionTestCase meterCases[] = { + {"de", nullptr, 1, u"1 Meter"}, + {"de", "genitive", 1, u"1 Meters"}, + {"de", nullptr, 2, u"2 Meter"}, + {"de", "dative", 2, u"2 Metern"}, + }; + runUnitInflectionsTestCases(unf, skeleton, conciseSkeleton, meterCases, + UPRV_LENGTHOF(meterCases)); + + unf = NumberFormatter::with().unit(MeasureUnit::getDay()).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME); + skeleton = u"unit/day unit-width-full-name"; + conciseSkeleton = u"unit/day unit-width-full-name"; + const UnitInflectionTestCase dayCases[] = { + {"de", nullptr, 1, u"1 Tag"}, + {"de", "genitive", 1, u"1 Tages"}, + {"de", nullptr, 2, u"2 Tage"}, + {"de", "dative", 2, u"2 Tagen"}, + }; + runUnitInflectionsTestCases(unf, skeleton, conciseSkeleton, dayCases, UPRV_LENGTHOF(dayCases)); + + // Day has a perUnitPattern + unf = NumberFormatter::with() + .unit(MeasureUnit::forIdentifier("meter-per-day", status)) + .unitWidth(UNUM_UNIT_WIDTH_FULL_NAME); + skeleton = u"unit/meter-per-day unit-width-full-name"; + conciseSkeleton = u"unit/meter-per-day unit-width-full-name"; + const UnitInflectionTestCase meterPerDayCases[] = { + {"de", nullptr, 1, u"1 Meter pro Tag"}, + {"de", "genitive", 1, u"1 Meters pro Tag"}, + {"de", nullptr, 2, u"2 Meter pro Tag"}, + {"de", "dative", 2, u"2 Metern pro Tag"}, + // testing code path that falls back to "root" but does not inflect: + {"af", nullptr, 1, u"1 meter per dag"}, + {"af", "dative", 1, u"1 meter per dag"}, + }; + runUnitInflectionsTestCases(unf, skeleton, conciseSkeleton, meterPerDayCases, + UPRV_LENGTHOF(meterPerDayCases)); + + // Decade does not have a perUnitPattern at this time (CLDR 39 / ICU + // 69), so we can test for the correct form of the per part: + unf = NumberFormatter::with() + .unit(MeasureUnit::forIdentifier("parsec-per-decade", status)) + .unitWidth(UNUM_UNIT_WIDTH_FULL_NAME); + skeleton = u"unit/parsec-per-decade unit-width-full-name"; + conciseSkeleton = u"unit/parsec-per-decade unit-width-full-name"; + // Fragile test cases: these cases will break when whitespace is more + // consistently applied. + const UnitInflectionTestCase parsecPerDecadeCases[] = { + {"de", nullptr, 1, u"1\u00A0Parsec pro Jahrzehnt"}, + {"de", "genitive", 1, u"1 Parsec pro Jahrzehnt"}, + {"de", nullptr, 2, u"2\u00A0Parsec pro Jahrzehnt"}, + {"de", "dative", 2, u"2 Parsec pro Jahrzehnt"}, + }; + runUnitInflectionsTestCases(unf, skeleton, conciseSkeleton, parsecPerDecadeCases, + UPRV_LENGTHOF(parsecPerDecadeCases)); + } + { + // Testing inflection of mixed units: + unf = NumberFormatter::with() + .unit(MeasureUnit::forIdentifier("meter-and-centimeter", status)) + .unitWidth(UNUM_UNIT_WIDTH_FULL_NAME); + skeleton = u"unit/meter-and-centimeter unit-width-full-name"; + conciseSkeleton = u"unit/meter-and-centimeter unit-width-full-name"; + const UnitInflectionTestCase meterPerDayCases[] = { + // TODO(CLDR-14502): check that these inflections are correct, and + // whether CLDR needs any rules for them (presumably CLDR spec + // should mention it, if it's a consistent rule): + {"de", nullptr, 1.01, u"1 Meter, 1 Zentimeter"}, + {"de", "genitive", 1.01, u"1 Meters, 1 Zentimeters"}, + {"de", "genitive", 1.1, u"1 Meters, 10 Zentimeter"}, + {"de", "dative", 1.1, u"1 Meter, 10 Zentimetern"}, + {"de", "dative", 2.1, u"2 Metern, 10 Zentimetern"}, + }; + runUnitInflectionsTestCases(unf, skeleton, conciseSkeleton, meterPerDayCases, + UPRV_LENGTHOF(meterPerDayCases)); + } + // TODO: add a usage case that selects between preferences with different + // genders (e.g. year, month, day, hour). + // TODO: look at "↑↑↑" cases: check that inheritance is done right. +} + +void NumberFormatterApiTest::unitGender() { + IcuTestErrorCode status(*this, "unitGender"); + + const struct TestCase { + const char *locale; + const char *unitIdentifier; + const char *expectedGender; + } cases[] = { + {"de", "meter", "masculine"}, + {"de", "minute", "feminine"}, + {"de", "hour", "feminine"}, + {"de", "day", "masculine"}, + {"de", "year", "neuter"}, + {"fr", "minute", "feminine"}, + {"fr", "hour", "feminine"}, + {"fr", "day", "masculine"}, + // grammaticalFeatures deriveCompound "per" rule: + {"de", "meter-per-hour", "masculine"}, + {"af", "meter-per-hour", ""}, + // TODO(ICU-21494): determine whether list genders behave as follows, + // and implement proper getListGender support (covering more than just + // two genders): + // // gender rule for lists of people: de "neutral", fr "maleTaints" + // {"de", "day-and-hour-and-minute", "neuter"}, + // {"de", "hour-and-minute", "feminine"}, + // {"fr", "day-and-hour-and-minute", "masculine"}, + // {"fr", "hour-and-minute", "feminine"}, + }; + LocalizedNumberFormatter formatter; + FormattedNumber fn; + for (const TestCase &t : cases) { + // TODO(icu-units#140): make this work for more than just UNUM_UNIT_WIDTH_FULL_NAME + formatter = NumberFormatter::with() + .unit(MeasureUnit::forIdentifier(t.unitIdentifier, status)) + .unitWidth(UNUM_UNIT_WIDTH_FULL_NAME) + .locale(Locale(t.locale)); + fn = formatter.formatDouble(1.1, status); + assertEquals(UnicodeString("Testing gender, unit: ") + t.unitIdentifier + + ", locale: " + t.locale, + t.expectedGender, fn.getGender(status)); + status.assertSuccess(); + } + + // Make sure getGender does not return garbage for genderless languages + formatter = NumberFormatter::with().locale(Locale::getEnglish()); + fn = formatter.formatDouble(1.1, status); + status.assertSuccess(); + assertEquals("getGender for a genderless language", "", fn.getGender(status)); +} + void NumberFormatterApiTest::unitPercent() { assertFormatDescending( u"Percent", diff --git a/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt b/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt index 8001cf2f8e6..a8afd6d5a5c 100644 --- a/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt +++ b/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt @@ -320,6 +320,7 @@ //ldml/units/unitLength[@type="short"]/unit[@type="(\w++)-(%A)"]/displayName ; /unitsShort/$1/$2/dnam //ldml/units/unitLength[@type="long"]/unit[@type="(\w++)-(%A)"]/displayName ; /units/$1/$2/dnam +# TODO(icu-units#138): homogenize with compoundUnitPattern1 rules below by using "_" as case when case is absent in XML. //ldml/units/unitLength[@type="narrow"]/unit[@type="(\w++)-(%A)"]/unitPattern[@count="(%A)"][@case="(%A)"] ; /unitsNarrow/$1/$2/case/$4/$3 //ldml/units/unitLength[@type="short"]/unit[@type="(\w++)-(%A)"]/unitPattern[@count="(%A)"][@case="(%A)"] ; /unitsShort/$1/$2/case/$4/$3 //ldml/units/unitLength[@type="long"]/unit[@type="(\w++)-(%A)"]/unitPattern[@count="(%A)"][@case="(%A)"] ; /units/$1/$2/case/$4/$3 @@ -338,6 +339,7 @@ //ldml/units/unitLength[@type="short"]/compoundUnit[@type="(%A)"]/compoundUnitPattern ; /unitsShort/compound/$1 //ldml/units/unitLength[@type="long"]/compoundUnit[@type="(%A)"]/compoundUnitPattern ; /units/compound/$1 +# TODO(icu-units#138): the style of output paths used in these rules is the proposed format for all count/gender/case lateral inheritance rules. //ldml/units/unitLength[@type="narrow"]/compoundUnit[@type="(%A)"]/compoundUnitPattern1[@count="(%A)"][@gender="(%A)"][@case="(%A)"] ; /unitsNarrow/compound/$1/$2/$3/$4 //ldml/units/unitLength[@type="short"]/compoundUnit[@type="(%A)"]/compoundUnitPattern1[@count="(%A)"][@gender="(%A)"][@case="(%A)"] ; /unitsShort/compound/$1/$2/$3/$4 //ldml/units/unitLength[@type="long"]/compoundUnit[@type="(%A)"]/compoundUnitPattern1[@count="(%A)"][@gender="(%A)"][@case="(%A)"] ; /units/compound/$1/$2/$3/$4