From: Hugo van der Merwe <17109322+hugovdm@users.noreply.github.com> Date: Wed, 25 Mar 2020 09:48:38 +0000 (+0100) Subject: Implement getConversionRatesInfo, plus cleanups&improvements. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8928fb9951e5c24240c061bb269f0f9d7bb38bc2;p=icu Implement getConversionRatesInfo, plus cleanups&improvements. --- diff --git a/icu4c/source/i18n/getunitsdata.cpp b/icu4c/source/i18n/getunitsdata.cpp index a53aaa3bad3..7217f02d49f 100644 --- a/icu4c/source/i18n/getunitsdata.cpp +++ b/icu4c/source/i18n/getunitsdata.cpp @@ -23,27 +23,33 @@ using icu::number::impl::DecimalQuantity; /** * A ResourceSink that collects conversion rate information. * - * Calls to put() collects ConversionRateInfo instances for into the vector - * passed to the constructor, but only if an identical instance isn't already - * present. - * * This class is for use by ures_getAllItemsWithFallback. Example code for * collecting conversion info for "mile" and "foot" into conversionInfoOutput: * * UErrorCode status = U_ZERO_ERROR; + * ures_getByKey(unitsBundle, "convertUnits", &fillIn, &status); * MaybeStackVector conversionInfoOutput; * ConversionRateDataSink convertSink(conversionInfoOutput); - * ures_getByKey(unitsBundle, "convertUnits", &fillIn, &status); * ures_getAllItemsWithFallback(fillIn, "mile", convertSink, status); * ures_getAllItemsWithFallback(fillIn, "foot", convertSink, status); */ class ConversionRateDataSink : public ResourceSink { public: + /** + * Constructor. + * @param out The vector to which ConversionRateInfo instances are to be + * added. + */ explicit ConversionRateDataSink(MaybeStackVector &out) : outVector(out) {} /** * Adds the conversion rate information found in value to the output vector. - * @param key The source unit identifier. + * + * Each call to put() collects a ConversionRateInfo instance for the + * specified source unit identifier into the vector passed to the + * constructor, but only if an identical instance isn't already present. + * + * @param source The source unit identifier. * @param value A resource containing conversion rate info (a target and * factor, and possibly an offset). * @param noFallback Ignored. @@ -82,12 +88,12 @@ class ConversionRateDataSink : public ResourceSink { // category. I should make this code more efficient after // double-checking we're fine with relying on such a detail from the // CLDR spec? - CharString tmpTarget; - tmpTarget.appendInvariantChars(target, lenTarget, status); + fLastTarget.clear(); + fLastTarget.appendInvariantChars(target, lenTarget, status); if (U_FAILURE(status)) return; for (int32_t i = 0, len = outVector.length(); i < len; i++) { if (strcmp(outVector[i]->source.data(), source) == 0 && - strcmp(outVector[i]->target.data(), tmpTarget.data()) == 0) { + strcmp(outVector[i]->target.data(), fLastTarget.data()) == 0) { return; } } @@ -100,20 +106,37 @@ class ConversionRateDataSink : public ResourceSink { return; } else { cr->source.append(source, lenSource, status); - cr->target.append(tmpTarget.data(), tmpTarget.length(), status); + cr->target.append(fLastTarget.data(), fLastTarget.length(), status); cr->factor.appendInvariantChars(factor, lenFactor, status); if (offset != NULL) cr->offset.appendInvariantChars(offset, lenOffset, status); } } + /** + * Returns the MeasureUnit that was the conversion target of the most recent + * call to put() - typically meaning the most recent call to + * ures_getAllItemsWithFallback(). + */ + MeasureUnit getLastTarget(UErrorCode &status) { + return MeasureUnit::forIdentifier(fLastTarget.data(), status); + } + private: MaybeStackVector &outVector; + + // TODO(review): felt like a hack: provides easy access to the most recent + // target. This hack is another point making me wonder if doing this + // ResourceSink thing is worthwhile. Functional style is not more verbose, + // and IMHO more readable than this object-based approach where the output + // seems/feels like a side-effect. + CharString fLastTarget; }; -// WIP/FIXME: this class is currently unused code (dead?) - putUnitPref() has -// all the features we need whereas this doesn't handle fallback to -// usage="default" and region="001" yet. If we want to fix that here, this code -// will get quite a bit more complicated. +// WIP/FIXME: partially due to my skepticism of using the ResourceSink design +// for units resources, this class is currently unused code (dead?) - +// collectUnitPrefs() has all the features we need whereas this doesn't handle +// fallback to usage="default" and region="001" yet. If we want to fix that +// here, this code will get quite a bit more complicated. class UnitPreferencesSink : public ResourceSink { public: explicit UnitPreferencesSink(MaybeStackVector &out) : outVector(out) {} @@ -165,8 +188,8 @@ class UnitPreferencesSink : public ResourceSink { * down to a particular usage and region (example: * "unitPreferenceData/length/road/GB"). */ -void putUnitPref(UResourceBundle *usageData, MaybeStackVector &outVector, - UErrorCode &status) { +void collectUnitPrefs(UResourceBundle *usageData, MaybeStackVector &outVector, + UErrorCode &status) { if (U_FAILURE(status)) return; StackUResourceBundle prefBundle; @@ -219,8 +242,72 @@ void putUnitPref(UResourceBundle *usageData, MaybeStackVector &o } } +void processSingleUnit(const MeasureUnit &unit, const UResourceBundle *convertUnitsBundle, + ConversionRateDataSink &convertSink, MeasureUnit *baseSingleUnit, + UErrorCode &status) { + int32_t dimensionality = unit.getDimensionality(status); + + MeasureUnit simple = unit; + if (dimensionality != 1 || simple.getSIPrefix(status) != UMEASURE_SI_PREFIX_ONE) { + simple = unit.withDimensionality(1, status).withSIPrefix(UMEASURE_SI_PREFIX_ONE, status); + } + ures_getAllItemsWithFallback(convertUnitsBundle, simple.getIdentifier(), convertSink, status); + + if (baseSingleUnit != NULL) { + *baseSingleUnit = convertSink.getLastTarget(status).withDimensionality(dimensionality, status); + } +} + } // namespace +MaybeStackVector getConversionRatesInfo(const MeasureUnit source, const MeasureUnit target, + MeasureUnit *baseCompoundUnit, + UErrorCode &status) { + MaybeStackVector result; + + int32_t sourceUnitsLength, targetUnitsLength; + LocalArray sourceUnits = source.splitToSingleUnits(sourceUnitsLength, status); + LocalArray targetUnits = target.splitToSingleUnits(targetUnitsLength, status); + + LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status)); + StackUResourceBundle convertUnitsBundle; + ures_getByKey(unitsBundle.getAlias(), "convertUnits", convertUnitsBundle.getAlias(), &status); + + ConversionRateDataSink convertSink(result); + if (baseCompoundUnit != NULL) { + *baseCompoundUnit = MeasureUnit(); + } + for (int i = 0; i < sourceUnitsLength; i++) { + MeasureUnit baseUnit; + processSingleUnit(sourceUnits[i], convertUnitsBundle.getAlias(), convertSink, &baseUnit, status); + if (baseCompoundUnit != NULL) { + if (source.getComplexity(status) == UMEASURE_UNIT_SEQUENCE) { + // TODO(hugovdm): add consistency checks. + *baseCompoundUnit = baseUnit; + } else { + *baseCompoundUnit = baseCompoundUnit->product(baseUnit, status); + } + } + } + if (baseCompoundUnit != NULL) { + fprintf(stderr, "source base: %s\n", baseCompoundUnit->getIdentifier()); + *baseCompoundUnit = MeasureUnit(); + } + for (int i = 0; i < targetUnitsLength; i++) { + MeasureUnit baseUnit; + processSingleUnit(targetUnits[i], convertUnitsBundle.getAlias(), convertSink, &baseUnit, status); + if (baseCompoundUnit != NULL) { + if (source.getComplexity(status) == UMEASURE_UNIT_SEQUENCE) { + // TODO(hugovdm): add consistency checks. + *baseCompoundUnit = baseUnit; + } else { + *baseCompoundUnit = baseCompoundUnit->product(baseUnit, status); + } + } + } + return result; +} + /** * Fetches the units data that would be needed for the given usage. * @@ -286,7 +373,7 @@ void getUnitsData(const char *outputRegion, const char *usage, const MeasureUnit } // Collect all the preferences into unitPreferences - putUnitPref(stackBundle.getAlias(), unitPreferences, status); + collectUnitPrefs(stackBundle.getAlias(), unitPreferences, status); // Load ConversionRateInfo for each of the units in unitPreferences for (int32_t i = 0; i < unitPreferences.length(); i++) { @@ -300,4 +387,4 @@ void getUnitsData(const char *outputRegion, const char *usage, const MeasureUnit U_NAMESPACE_END -#endif /* #if !UCONFIG_NO_FORMATTING */ \ No newline at end of file +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/getunitsdata.h b/icu4c/source/i18n/getunitsdata.h index cdfb77a25d1..78b985431a7 100644 --- a/icu4c/source/i18n/getunitsdata.h +++ b/icu4c/source/i18n/getunitsdata.h @@ -16,14 +16,28 @@ U_NAMESPACE_BEGIN -struct U_I18N_API ConversionRateInfo { +// Encapsulates "convertUnits" information from units resources, specifying how +// to convert from one unit to another. +class U_I18N_API ConversionRateInfo { + public: + ConversionRateInfo() {}; + ConversionRateInfo(StringPiece source, StringPiece target, StringPiece factor, StringPiece offset, + UErrorCode &status) + : source(), target(), factor(), offset() { + this->source.append(source, status); + this->target.append(target, status); + this->factor.append(factor, status); + this->offset.append(offset, status); + }; CharString source; - CharString target; + CharString target; // FIXME/WIP: baseUnit CharString factor; CharString offset; bool reciprocal = false; }; +// Encapsulates unitPreferenceData information from units resources, specifying +// a sequence of output unit preferences. struct U_I18N_API UnitPreference { UnitPreference() : geq(1) {} CharString unit; @@ -31,15 +45,58 @@ struct U_I18N_API UnitPreference { CharString skeleton; }; -// TODO(hugo): Add a comment. +/** + * Collects and returns ConversionRateInfo needed to convert from source to + * target. + * + * @param source The source unit (the unit type converted from). + * @param target The target unit (the unit type converted to). + * @param baseCompoundUnit Output parameter: if not NULL, it will be set to the + * base unit type used as pivot for converting from source to target. + * @param status Receives status. + */ +MaybeStackVector U_I18N_API getConversionRatesInfo(MeasureUnit source, + MeasureUnit target, + MeasureUnit *baseCompoundUnit, + UErrorCode &status); + +/** + * Collects the data needed for converting the inputUnit type to output units + * for the given region and usage. + * + * WARNING: This function only supports simple units presently. + * WIP/TODO(hugovdm): add support for UMEASURE_UNIT_SEQUENCE and + * UMEASURE_UNIT_COMPOUND complexities. + * + * @param outputRegion The region code for which output preferences are desired + * (e.g. US or 001). + * @param usage The "usage" parameter, such as "person", "road", or "fluid". + * Unrecognised usages are treated as "default". + * @param inputUnit The input unit type: the type of the units that will be + * converted. + * @param category Output parameter, this will be set to the category associated + * with the inputUnit. TODO(hugovdm): this might get specified instead of + * requested. Or it may be unnecessary to return it. + * @param baseUnit Output parameter, this will be set to the base unit through + * which conversions are made (e.g. "kilogram", "meter", or "year"). + * TODO(hugovdm): find out if this is needed/useful. + * @param conversionInfo Output parameter, a vector of ConversionRateInfo + * instances needed to be able to convert from inputUnit to all the output units + * found in unitPreferences. + * @param unitPreferences Output parameter, a vector of all the output + * preferences for the given region, usage, and input unit type (which + * determines the category). + * @param status Receives status. + */ void U_I18N_API getUnitsData(const char *outputRegion, const char *usage, const MeasureUnit &inputUnit, CharString &category, MeasureUnit &baseUnit, MaybeStackVector &conversionInfo, MaybeStackVector &unitPreferences, UErrorCode &status); -// TODO(hugo): Implement -MaybeStackVector - U_I18N_API getConversionRatesInfo(MeasureUnit source, MeasureUnit target, UErrorCode &status); +// // TODO(hugo): Implement +// // Compound units as source and target, conversion rates for each piece. +// MaybeStackVector +// U_I18N_API getConversionRatesInfo(MeasureUnit source, MeasureUnit target, UErrorCode &status); U_NAMESPACE_END diff --git a/icu4c/source/i18n/unitconverter.cpp b/icu4c/source/i18n/unitconverter.cpp index 7366613ab65..392e95aa8af 100644 --- a/icu4c/source/i18n/unitconverter.cpp +++ b/icu4c/source/i18n/unitconverter.cpp @@ -194,7 +194,11 @@ const ConversionRateInfo& extractConversionRateInfo(StringPiece source, } status = U_INTERNAL_PROGRAM_ERROR; - return ConversionRateInfo(); + // WIP/TODO(review): cargo-culting or magic-incantation, this fixes the warning: + // unitconverter.cpp:197:12: warning: returning reference to local temporary object [-Wreturn-stack-address] + // But I'm not confident in what I'm doing, having only done some casual + // reading about the possible negative consequencies of returning std::move. + return std::move(ConversionRateInfo("pound", "kilogram", "0.453592", "0", status)); } /*/ diff --git a/icu4c/source/test/intltest/unitstest.cpp b/icu4c/source/test/intltest/unitstest.cpp index e7719f22ef2..2086f458e9f 100644 --- a/icu4c/source/test/intltest/unitstest.cpp +++ b/icu4c/source/test/intltest/unitstest.cpp @@ -37,6 +37,7 @@ class UnitsTest : public IntlTest { void testConversions(); void testPreferences(); + void testGetConversionRateInfo(); void testGetUnitsData(); void testBasic(); @@ -58,6 +59,7 @@ void UnitsTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha TESTCASE_AUTO_BEGIN; TESTCASE_AUTO(testConversions); TESTCASE_AUTO(testPreferences); + TESTCASE_AUTO(testGetConversionRateInfo); TESTCASE_AUTO(testGetUnitsData); TESTCASE_AUTO(testBasic); @@ -75,6 +77,8 @@ void UnitsTest::verifyTestCase(const UnitConversionTestCase &testCase) { MeasureUnit sourceUnit = MeasureUnit::forIdentifier(testCase.source, status); MeasureUnit targetUnit = MeasureUnit::forIdentifier(testCase.target, status); + // TODO(younies): enable this again + // UnitConverter converter(sourceUnit, targetUnit, status); // double actual = converter.convert(testCase.inputValue); @@ -95,6 +99,8 @@ void UnitsTest::testBasic() { MeasureUnit sourceUnit = MeasureUnit::forIdentifier(testCase.source, status); MeasureUnit targetUnit = MeasureUnit::forIdentifier(testCase.target, status); + // TODO(younies): enable this again + // UnitConverter converter(sourceUnit, targetUnit, status); // double actual = converter.convert(testCase.inputValue); @@ -273,6 +279,8 @@ void runDataDrivenConversionTest(void *context, char *fields[][2], int32_t field quantity.length(), quantity.data(), x.length(), x.data(), y.length(), y.data(), expected, commentConversionFormula.length(), commentConversionFormula.data()); } else { + // TODO(younies): enable this again + // // UnitConverter converter(sourceUnit, targetUnit, status); // if (status.errIfFailureAndReset("constructor: UnitConverter(<%s>, <%s>, status)", // sourceUnit.getIdentifier(), targetUnit.getIdentifier())) { @@ -572,6 +580,44 @@ void UnitsTest::testPreferences() { } } +void UnitsTest::testGetConversionRateInfo() { + struct { + const char *sourceUnit; + const char *targetUnit; + const char *threeExpectedOutputs[3]; + const char *baseUnit; + } testCases[]{ + {"centimeter-per-square-milligram", + "inch-per-square-ounce", + {"pound", "stone", "ton"}, + "kilogram"}, + {"liter", "gallon", {"liter", "gallon", NULL}, "cubic-meter"}, + {"stone-and-pound", "ton", {"pound", "stone", "ton"}, "kilogram"}, + {"mile-per-hour", "dekameter-per-hour", {"mile", "hour", "meter"}, "meter-per-second"}, + }; + for (const auto &t : testCases) { + logln("---testing: source=\"%s\", target=\"%s\", expectedBaseUnit=\"%s\"", t.sourceUnit, + t.targetUnit, t.baseUnit); + IcuTestErrorCode status(*this, "testGetConversionRateInfo"); + + MeasureUnit baseCompoundUnit; + MeasureUnit sourceUnit = MeasureUnit::forIdentifier(t.sourceUnit, status); + MeasureUnit targetUnit = MeasureUnit::forIdentifier(t.targetUnit, status); + MaybeStackVector conversionInfo = + getConversionRatesInfo(sourceUnit, targetUnit, &baseCompoundUnit, status); + + logln("---found BaseUnit=\"%s\"", baseCompoundUnit.getIdentifier()); + for (int i = 0; i < conversionInfo.length(); i++) { + ConversionRateInfo *cri; + cri = conversionInfo[i]; + logln("* conversionInfo %d: source=\"%s\", target=\"%s\", factor=\"%s\", offset=\"%s\"", i, + cri->source.data(), cri->target.data(), cri->factor.data(), cri->offset.data()); + assertTrue("ConversionRateInfo has source, target, and factor", + cri->source.length() > 0 && cri->target.length() > 0 && cri->factor.length() > 0); + } + } +} + // We test "successfully loading some data", not specific output values, since // this would duplicate some of the input data. We leave end-to-end testing to // take care of that. Running `intltest` with `-v` will print out the loaded