]> granicus.if.org Git - icu/commitdiff
Manual cherry-pick of getConversionRatesInfo
authorHugo van der Merwe <17109322+hugovdm@users.noreply.github.com>
Sat, 28 Mar 2020 18:47:32 +0000 (19:47 +0100)
committerHugo van der Merwe <17109322+hugovdm@users.noreply.github.com>
Sat, 28 Mar 2020 19:13:04 +0000 (20:13 +0100)
icu4c/source/i18n/Makefile.in
icu4c/source/i18n/i18n.vcxproj
icu4c/source/i18n/i18n.vcxproj.filters
icu4c/source/i18n/i18n_uwp.vcxproj
icu4c/source/i18n/unitsdata.cpp [new file with mode: 0644]
icu4c/source/i18n/unitsdata.h [new file with mode: 0644]
icu4c/source/test/intltest/Makefile.in
icu4c/source/test/intltest/itformat.cpp
icu4c/source/test/intltest/unitsdatatest.cpp [new file with mode: 0644]

index 827f2c207badf43e5cd36b2acbac6340f4d4d710..fe525c510122ab1b0d1c10ed5371b3b8da9a1a70 100644 (file)
@@ -115,7 +115,7 @@ numparse_affixes.o numparse_compositions.o numparse_validators.o \
 numrange_fluent.o numrange_impl.o \
 erarules.o \
 formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o \
-unitconverter.o
+unitconverter.o unitsdata.o
 
 ## Header files to install
 HEADERS = $(srcdir)/unicode/*.h
index f5cad11f5228967f799b4729aa74f306c0fedd38..8d8c2d265ae732af0075821ed53958849a40286f 100644 (file)
     <ClCompile Include="ulocdata.cpp" />
     <ClCompile Include="umsg.cpp" />
     <ClCompile Include="unitconverter.cpp" />
+    <ClCompile Include="unitsdata.cpp" />
     <ClCompile Include="unum.cpp" />
     <ClCompile Include="unumsys.cpp" />
     <ClCompile Include="upluralrules.cpp" />
     <ClInclude Include="numrange_impl.h" />
     <ClInclude Include="formattedval_impl.h" />
     <ClInclude Include="unitconverter.h" />
+    <ClInclude Include="unitsdata.h" />
   </ItemGroup>
   <ItemGroup>
     <ResourceCompile Include="i18n.rc" />
index 5feb9f252fbf277d6886c8392516493faf4606e1..319648d785b87f77316135285654a0f88f6ca996 100644 (file)
     <ClCompile Include="unitconverter.cpp">
       <Filter>formatting</Filter>
     </ClCompile>
+    <ClCompile Include="unitsdata.cpp">
+      <Filter>formatting</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="bocsu.cpp">
     <ClInclude Include="unitconveter.h">
       <Filter>formatting</Filter>
     </ClInclude>
+    <ClInclude Include="unitsdata.h">
+      <Filter>formatting</Filter>
+    </ClInclude>
     <ClInclude Include="vzone.h">
       <Filter>formatting</Filter>
     </ClInclude>
index 39b80d454d4226e96f540dad8b9c463ee2560cea..94381983ab74f4fa649b7b1403b9b06fabd3206e 100644 (file)
     <ClCompile Include="ulocdata.cpp" />
     <ClCompile Include="umsg.cpp" />
     <ClCompile Include="unitconverter.cpp" />
+    <ClCompile Include="unitsdata.cpp" />
     <ClCompile Include="unum.cpp" />
     <ClCompile Include="unumsys.cpp" />
     <ClCompile Include="upluralrules.cpp" />
     <ClInclude Include="numrange_impl.h" />
     <ClInclude Include="formattedval_impl.h" />
     <ClInclude Include="unitconverter.h" />
+    <ClInclude Include="unitsdata.h" />
   </ItemGroup>
   <ItemGroup>
     <ResourceCompile Include="i18n.rc" />
diff --git a/icu4c/source/i18n/unitsdata.cpp b/icu4c/source/i18n/unitsdata.cpp
new file mode 100644 (file)
index 0000000..b39005b
--- /dev/null
@@ -0,0 +1,238 @@
+// © 2020 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include <utility>
+
+#include "cstring.h"
+#include "resource.h"
+#include "unitsdata.h"
+#include "uresimp.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+/**
+ * A ResourceSink that collects conversion rate information.
+ *
+ * This class is for use by ures_getAllItemsWithFallback. Example code for
+ * collecting conversion info for "mile" and "foot" into conversionInfoOutput:
+ *
+ *     UErrorCode status = U_ZERO_ERROR;
+ *     ures_getByKey(unitsBundle, "convertUnits", &fillIn, &status);
+ *     MaybeStackVector<ConversionRateInfo> conversionInfoOutput;
+ *     ConversionRateDataSink convertSink(conversionInfoOutput);
+ *     ures_getAllItemsWithFallback(fillIn, "mile", convertSink, status);
+ *     ures_getAllItemsWithFallback(fillIn, "foot", convertSink, status);
+ */
+class ConversionRateDataSink : public ResourceSink {
+  public:
+    /**
+     * Constructor.
+     * @param out The vector to which ConversionRateInfo instances are to be
+     * added.
+     */
+    explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> &out) : outVector(out) {}
+
+    /**
+     * Adds the conversion rate information found in value to the output vector.
+     *
+     * Each call to put() collects a ConversionRateInfo instance for the
+     * specified source unit identifier into the vector passed to the
+     * constructor, but only if an identical instance isn't already present.
+     *
+     * @param source The source unit identifier.
+     * @param value A resource containing conversion rate info (the base unit
+     * and factor, and possibly an offset).
+     * @param noFallback Ignored.
+     * @param status The standard ICU error code output parameter.
+     */
+    void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
+        ResourceTable conversionRateTable = value.getTable(status);
+        if (U_FAILURE(status)) return;
+
+        // Collect base unit, factor and offset from the resource.
+        int32_t lenSource = uprv_strlen(source);
+        const UChar *baseUnit = NULL, *factor = NULL, *offset = NULL;
+        int32_t lenBaseUnit, lenFactor, lenOffset;
+        const char *key;
+        for (int32_t i = 0; conversionRateTable.getKeyAndValue(i, key, value); ++i) {
+            if (uprv_strcmp(key, "target") == 0) {
+                baseUnit = value.getString(lenBaseUnit, status);
+            } else if (uprv_strcmp(key, "factor") == 0) {
+                factor = value.getString(lenFactor, status);
+            } else if (uprv_strcmp(key, "offset") == 0) {
+                offset = value.getString(lenOffset, status);
+            }
+        }
+        if (baseUnit == NULL || factor == NULL) {
+            status = U_MISSING_RESOURCE_ERROR;
+            return;
+        }
+
+        // Check if we already have the conversion rate in question.
+        //
+        // TODO(revieW): We could do this skip-check *before* we fetch
+        // baseUnit/factor/offset based only on the source unit, but only if
+        // we're certain we'll never get two different baseUnits for a given
+        // source. This should be the case, since convertUnit entries in CLDR's
+        // units.xml should all point at a defined base unit for the unit
+        // category. I should make this code more efficient after
+        // double-checking we're fine with relying on such a detail from the
+        // CLDR spec?
+        fLastBaseUnit.clear();
+        fLastBaseUnit.appendInvariantChars(baseUnit, lenBaseUnit, status);
+        if (U_FAILURE(status)) return;
+        for (int32_t i = 0, len = outVector.length(); i < len; i++) {
+            if (strcmp(outVector[i]->sourceUnit.data(), source) == 0 &&
+                strcmp(outVector[i]->baseUnit.data(), fLastBaseUnit.data()) == 0) {
+                return;
+            }
+        }
+        if (U_FAILURE(status)) return;
+
+        // We don't have this ConversionRateInfo yet: add it.
+        ConversionRateInfo *cr = outVector.emplaceBack();
+        if (!cr) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        } else {
+            cr->sourceUnit.append(source, lenSource, status);
+            cr->baseUnit.append(fLastBaseUnit.data(), fLastBaseUnit.length(), status);
+            cr->factor.appendInvariantChars(factor, lenFactor, status);
+            if (offset != NULL) cr->offset.appendInvariantChars(offset, lenOffset, status);
+        }
+    }
+
+    /**
+     * Returns the MeasureUnit that was the conversion base unit of the most
+     * recent call to put() - typically meaning the most recent call to
+     * ures_getAllItemsWithFallback().
+     */
+    MeasureUnit getLastBaseUnit(UErrorCode &status) {
+        return MeasureUnit::forIdentifier(fLastBaseUnit.data(), status);
+    }
+
+  private:
+    MaybeStackVector<ConversionRateInfo> &outVector;
+
+    // TODO(review): felt like a hack: provides easy access to the most recent
+    // baseUnit. This hack is another point making me wonder if doing this
+    // ResourceSink thing is worthwhile. Functional style is not more verbose,
+    // and IMHO more readable than this object-based approach where the output
+    // seems/feels like a side-effect.
+    CharString fLastBaseUnit;
+};
+
+// The input unit needs to be simple, but can have dimensionality != 1.
+void processSingleUnit(const MeasureUnit &unit, const UResourceBundle *convertUnitsBundle,
+                       ConversionRateDataSink &convertSink, MeasureUnit *baseSingleUnit,
+                       UErrorCode &status) {
+    int32_t dimensionality = unit.getDimensionality(status);
+
+    MeasureUnit simple = unit;
+    if (dimensionality != 1 || simple.getSIPrefix(status) != UMEASURE_SI_PREFIX_ONE) {
+        simple = unit.withDimensionality(1, status).withSIPrefix(UMEASURE_SI_PREFIX_ONE, status);
+    }
+    ures_getAllItemsWithFallback(convertUnitsBundle, simple.getIdentifier(), convertSink, status);
+
+    if (baseSingleUnit != NULL) {
+        MeasureUnit baseUnit = convertSink.getLastBaseUnit(status);
+
+        if (dimensionality == 1) {
+            *baseSingleUnit = baseUnit;
+        } else if (baseUnit.getComplexity(status) == UMEASURE_UNIT_SINGLE) {
+            // TODO(hugovdm): find examples where we're converting a *-per-* to
+            // a square-*? Does one ever square frequency? What about
+            // squared-speed in the case of mv^2? Or F=ma^2?
+            //
+            // baseUnit might also have dimensionality, e.g. cubic-meter -
+            // retain this instead of overriding with input unit dimensionality:
+            dimensionality *= baseUnit.getDimensionality(status);
+            *baseSingleUnit = baseUnit.withDimensionality(dimensionality, status);
+        } else {
+            // We only support higher dimensionality input units if they map to
+            // simple base units, such that that base unit can have the
+            // dimensionality easily applied.
+            //
+            // TODO(hugovdm): produce succeeding examples of simple input unit
+            // mapped to a different simple target/base unit.
+            //
+            // TODO(hugovdm): produce failing examples of higher-dimensionality
+            // or inverted input units that map to compound output units.
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+    }
+}
+
+} // namespace
+
+MaybeStackVector<ConversionRateInfo> getConversionRatesInfo(const MeasureUnit source, const MeasureUnit target,
+                                                            MeasureUnit *baseCompoundUnit,
+                                                            UErrorCode &status) {
+    MaybeStackVector<ConversionRateInfo> result;
+
+    int32_t sourceUnitsLength, targetUnitsLength;
+    LocalArray<MeasureUnit> sourceUnits = source.splitToSingleUnits(sourceUnitsLength, status);
+    LocalArray<MeasureUnit> targetUnits = target.splitToSingleUnits(targetUnitsLength, status);
+
+    LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
+    StackUResourceBundle convertUnitsBundle;
+    ures_getByKey(unitsBundle.getAlias(), "convertUnits", convertUnitsBundle.getAlias(), &status);
+
+    ConversionRateDataSink convertSink(result);
+    MeasureUnit sourceBaseUnit;
+    for (int i = 0; i < sourceUnitsLength; i++) {
+        MeasureUnit baseUnit;
+        processSingleUnit(sourceUnits[i], convertUnitsBundle.getAlias(), convertSink, &baseUnit, status);
+        if (source.getComplexity(status) == UMEASURE_UNIT_SEQUENCE) {
+            if (i == 0) {
+                sourceBaseUnit = baseUnit;
+            } else {
+                if (baseUnit != sourceBaseUnit) {
+                    status = U_ILLEGAL_ARGUMENT_ERROR;
+                    return result;
+                }
+            }
+        } else {
+            sourceBaseUnit = sourceBaseUnit.product(baseUnit, status);
+        }
+    }
+    MeasureUnit targetBaseUnit;
+    for (int i = 0; i < targetUnitsLength; i++) {
+        MeasureUnit baseUnit;
+        processSingleUnit(targetUnits[i], convertUnitsBundle.getAlias(), convertSink, &baseUnit, status);
+        if (target.getComplexity(status) == UMEASURE_UNIT_SEQUENCE) {
+            // WIP/TODO(hugovdm): add consistency checks.
+            if (baseUnit != sourceBaseUnit) {
+                status = U_ILLEGAL_ARGUMENT_ERROR;
+                return result;
+            }
+            targetBaseUnit = baseUnit;
+        } else {
+            // WIP/FIXME(hugovdm): I think I found a bug in targetBaseUnit.product():
+            // Target Base: <kilogram-square-meter-per-square-second> x <one-per-meter> => <meter>
+            //
+            // fprintf(stderr, "Target Base: <%s> x <%s> => ", targetBaseUnit.getIdentifier(),
+            //         baseUnit.getIdentifier());
+            targetBaseUnit = targetBaseUnit.product(baseUnit, status);
+            // fprintf(stderr, "<%s>\n", targetBaseUnit.getIdentifier());
+            // fprintf(stderr, "Status: %s\n", u_errorName(status));
+        }
+    }
+    if (targetBaseUnit != sourceBaseUnit) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return result;
+    }
+    if (baseCompoundUnit != NULL) { *baseCompoundUnit = sourceBaseUnit; }
+    return result;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/icu4c/source/i18n/unitsdata.h b/icu4c/source/i18n/unitsdata.h
new file mode 100644 (file)
index 0000000..a3e106d
--- /dev/null
@@ -0,0 +1,60 @@
+// © 2020 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+#ifndef __GETUNITSDATA_H__
+#define __GETUNITSDATA_H__
+
+#include "charstr.h"
+#include "cmemory.h"
+#include "unicode/measunit.h"
+#include "unicode/stringpiece.h"
+#include "unicode/utypes.h"
+
+U_NAMESPACE_BEGIN
+
+// Encapsulates "convertUnits" information from units resources, specifying how
+// to convert from one unit to another.
+class U_I18N_API ConversionRateInfo {
+  public:
+    ConversionRateInfo(){};
+    ConversionRateInfo(StringPiece sourceUnit, StringPiece baseUnit, StringPiece factor,
+                       StringPiece offset, UErrorCode &status)
+        : sourceUnit(), baseUnit(), factor(), offset() {
+        this->sourceUnit.append(sourceUnit, status);
+        this->baseUnit.append(baseUnit, status);
+        this->factor.append(factor, status);
+        this->offset.append(offset, status);
+    };
+    CharString sourceUnit;
+    CharString baseUnit; // FIXME/WIP: baseUnit
+    CharString factor;
+    CharString offset;
+    bool reciprocal = false;
+};
+
+/**
+ * Collects and returns ConversionRateInfo needed to convert from source to
+ * baseUnit.
+ *
+ * If source and target are not compatible for conversion, status will be set to
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * @param source The source unit (the unit type converted from).
+ * @param target The target unit (the unit type converted to).
+ * @param baseCompoundUnit Output parameter: if not NULL, it will be set to the
+ * compound base unit type used as pivot for converting from source to target.
+ * @param status Receives status.
+ */
+MaybeStackVector<ConversionRateInfo> U_I18N_API getConversionRatesInfo(MeasureUnit source,
+                                                                       MeasureUnit target,
+                                                                       MeasureUnit *baseCompoundUnit,
+                                                                       UErrorCode &status);
+
+U_NAMESPACE_END
+
+#endif //__GETUNITSDATA_H__
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
index 594d491f6f73efe65eb534e3ae019c0857556b24..74a6f90756482b69856756583959afd122e5c931 100644 (file)
@@ -69,7 +69,7 @@ string_segment_test.o \
 numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o \
 static_unisets_test.o numfmtdatadriventest.o numbertest_range.o erarulestest.o \
 formattedvaluetest.o formatted_string_builder_test.o numbertest_permutation.o \
-unitstest.o
+unitsdatatest.o unitstest.o
 
 DEPS = $(OBJECTS:.o=.d)
 
index 9126d528fa25ff60bd47beb2f5ffcb17630f2a6a..bd804888fbf3e8b9e3cba98f5084996df7d533e3 100644 (file)
@@ -74,6 +74,7 @@ extern IntlTest *createScientificNumberFormatterTest();
 extern IntlTest *createFormattedValueTest();
 extern IntlTest *createFormattedStringBuilderTest();
 extern IntlTest *createStringSegmentTest();
+extern IntlTest *createUnitsDataTest();
 extern IntlTest *createUnitsTest();
 
 
@@ -257,6 +258,15 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam
             callTest(*test, par);
           }
           break;
+        case 57:
+          name = "UnitsDataTest";
+          if (exec) {
+            logln("UnitsDataTest test---");
+            logln((UnicodeString)"");
+            LocalPointer<IntlTest> test(createUnitsDataTest());
+            callTest(*test, par);
+          }
+          break;
         default: name = ""; break; //needed to end loop
     }
     if (exec) {
diff --git a/icu4c/source/test/intltest/unitsdatatest.cpp b/icu4c/source/test/intltest/unitsdatatest.cpp
new file mode 100644 (file)
index 0000000..225482d
--- /dev/null
@@ -0,0 +1,123 @@
+// © 2020 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "intltest.h"
+#include "unitsdata.h"
+
+class UnitsDataTest : public IntlTest {
+    public:
+    UnitsDataTest() {}
+
+    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = NULL);
+
+    void testGetConversionRateInfo();
+};
+
+extern IntlTest *createUnitsDataTest() { return new UnitsDataTest(); }
+
+void UnitsDataTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
+    if (exec) { logln("TestSuite UnitsDataTest: "); }
+    TESTCASE_AUTO_BEGIN;
+    TESTCASE_AUTO(testGetConversionRateInfo);
+    TESTCASE_AUTO_END;
+}
+
+void UnitsDataTest::testGetConversionRateInfo() {
+    const int MAX_NUM_RATES = 5;
+    struct {
+        // The source unit passed to getConversionRateInfo.
+        const char *sourceUnit;
+        // The target unit passed to getConversionRateInfo.
+        const char *targetUnit;
+        // Expected: units whose conversion rates are expected in the results.
+        const char *expectedOutputs[MAX_NUM_RATES];
+        // Expected "base unit", to serve as pivot between source and target.
+        const char *expectedBaseUnit;
+    } testCases[]{
+        {"centimeter-per-square-milligram",
+         "inch-per-square-ounce",
+         {"meter", "gram", "inch", "ounce", NULL},
+         "meter-per-square-kilogram"},
+
+        {"liter", "gallon", {"liter", "gallon", NULL, NULL, NULL}, "cubic-meter"},
+
+        // Sequence
+        {"stone-and-pound", "ton", {"pound", "stone", "ton", NULL, NULL}, "kilogram"},
+
+        {"mile-per-hour",
+         "dekameter-per-hour",
+         {"mile", "hour", "meter", NULL, NULL},
+         "meter-per-second"},
+
+        // Power: watt
+        {"watt",
+         "horsepower",
+         {"watt", "horsepower", NULL, NULL, NULL},
+         "kilogram-square-meter-per-cubic-second"},
+
+        // Energy: joule
+        {"therm-us",
+         "kilogram-square-meter-per-square-second",
+         {"therm-us", "kilogram", "meter", "second", NULL},
+         "kilogram-square-meter-per-square-second"},
+
+        // WIP/FIXME(hugovdm): I think I found a bug in targetBaseUnit.product():
+        // Target Base: <kilogram-square-meter-per-square-second> x <one-per-meter> => <meter>
+        //
+        // // Joule-per-meter
+        // {"therm-us-per-meter",
+        //  "joule-per-meter",
+        //  {"therm-us", "joule", "meter", NULL, NULL},
+        //  "kilogram-meter-per-square-second"},
+
+        // TODO: include capacitance test case with base unit:
+        // pow4-second-square-ampere-per-kilogram-square-meter;
+    };
+    for (const auto &t : testCases) {
+        logln("---testing: source=\"%s\", target=\"%s\", expectedBaseUnit=\"%s\"", t.sourceUnit,
+              t.targetUnit, t.expectedBaseUnit);
+        IcuTestErrorCode status(*this, "testGetConversionRateInfo");
+
+        MeasureUnit baseCompoundUnit;
+        MeasureUnit sourceUnit = MeasureUnit::forIdentifier(t.sourceUnit, status);
+        MeasureUnit targetUnit = MeasureUnit::forIdentifier(t.targetUnit, status);
+        MaybeStackVector<ConversionRateInfo> conversionInfo =
+            getConversionRatesInfo(sourceUnit, targetUnit, &baseCompoundUnit, status);
+        if (status.errIfFailureAndReset("getConversionRatesInfo(<%s>, <%s>, ...)",
+                                        sourceUnit.getIdentifier(), targetUnit.getIdentifier())) {
+            continue;
+        }
+
+        assertEquals("baseCompoundUnit returned by getConversionRatesInfo", t.expectedBaseUnit,
+                     baseCompoundUnit.getIdentifier());
+        int countExpected;
+        for (countExpected = 0; countExpected < MAX_NUM_RATES; countExpected++) {
+            auto expected = t.expectedOutputs[countExpected];
+            if (expected == NULL) break;
+            // Check if this conversion rate was expected
+            bool found = false;
+            for (int i = 0; i < conversionInfo.length(); i++) {
+                auto cri = conversionInfo[i];
+                if (strcmp(expected, cri->sourceUnit.data()) == 0) {
+                    found = true;
+                    break;
+                }
+            }
+            assertTrue(UnicodeString("<") + expected + "> expected", found);
+        }
+        assertEquals("number of conversion rates", countExpected, conversionInfo.length());
+
+        // Convenience output for debugging
+        for (int i = 0; i < conversionInfo.length(); i++) {
+            ConversionRateInfo *cri = conversionInfo[i];
+            logln("* conversionInfo %d: source=\"%s\", baseUnit=\"%s\", factor=\"%s\", "
+                  "offset=\"%s\"",
+                  i, cri->sourceUnit.data(), cri->baseUnit.data(), cri->factor.data(),
+                  cri->offset.data());
+        }
+    }
+}
+
+#endif /* #if !UCONFIG_NO_FORMATTING */