}
}
unitQuantities{
- ampere{"electric-current"}
- ampere-per-meter{"magnetic-field-strength"}
- ampere-per-square-meter{"current-density"}
- bit{"digital"}
- candela{"luminous-intensity"}
- candela-per-square-meter{"illuminance"}
- candela-square-meter-per-square-meter{"luminous-flux"}
- cubic-meter{"volume"}
- cubic-meter-per-kilogram{"specific-volume"}
- cubic-meter-per-meter{"consumption"}
- cubic-second-square-ampere-per-kilogram-square-meter{"electric-conductance"}
- em{"typewidth"}
- item{"substance-amount"}
- item-per-cubic-meter{"concentration"}
- item-per-kilogram{"concentration-mass"}
- kelvin{"temperature"}
- kilogram{"mass"}
- kilogram-meter-per-square-second{"force"}
- kilogram-per-cubic-meter{"mass-density"}
- kilogram-per-kilogram{"mass-fraction"}
- kilogram-per-meter-square-second{"pressure"}
- kilogram-per-square-meter-square-second{"pressure-per-length"}
- kilogram-per-square-second-ampere{"magnetic-induction"}
- kilogram-square-meter-per-cubic-second{"power"}
- kilogram-square-meter-per-cubic-second-ampere{"voltage"}
- kilogram-square-meter-per-cubic-second-square-ampere{"electric-resistance"}
- kilogram-square-meter-per-square-second{"energy"}
- kilogram-square-meter-per-square-second-ampere{"magnetic-flux"}
- kilogram-square-meter-per-square-second-square-ampere{"electric-inductance"}
- meter{"length"}
- meter-per-second{"speed"}
- meter-per-square-second{"acceleration"}
- pixel{"graphics"}
- pixel-per-meter{"resolution"}
- portion{"portion"}
- pow4-second-square-ampere-per-kilogram-square-meter{"electric-capacitance"}
- revolution{"angle"}
- revolution-per-meter{"wave-number"}
- revolution-per-second{"frequency"}
- second{"duration"}
- second-ampere{"electric-charge"}
- square-meter{"area"}
- square-meter-per-square-second{"dose"}
- square-revolution{"solid-angle"}
- year{"year-duration"}
+ {
+ candela{"luminous-intensity"}
+ }
+ {
+ candela-per-square-meter{"illuminance"}
+ }
+ {
+ candela-square-meter-per-square-meter{"luminous-flux"}
+ }
+ {
+ kilogram{"mass"}
+ }
+ {
+ kilogram-per-kilogram{"mass-fraction"}
+ }
+ {
+ kilogram-per-cubic-meter{"mass-density"}
+ }
+ {
+ kilogram-per-meter-square-second{"pressure"}
+ }
+ {
+ kilogram-per-square-second-ampere{"magnetic-induction"}
+ }
+ {
+ kilogram-meter-per-square-second{"force"}
+ }
+ {
+ kilogram-square-meter-per-cubic-second{"power"}
+ }
+ {
+ kilogram-square-meter-per-cubic-second-ampere{"voltage"}
+ }
+ {
+ kilogram-square-meter-per-cubic-second-square-ampere{"electric-resistance"}
+ }
+ {
+ kilogram-square-meter-per-square-second{"energy"}
+ }
+ {
+ kilogram-square-meter-per-square-second-ampere{"magnetic-flux"}
+ }
+ {
+ kilogram-square-meter-per-square-second-square-ampere{"electric-inductance"}
+ }
+ {
+ cubic-meter{"volume"}
+ }
+ {
+ cubic-meter-per-kilogram{"specific-volume"}
+ }
+ {
+ cubic-meter-per-meter{"consumption"}
+ }
+ {
+ square-meter{"area"}
+ }
+ {
+ square-meter-per-square-second{"dose"}
+ }
+ {
+ meter{"length"}
+ }
+ {
+ meter-per-second{"speed"}
+ }
+ {
+ meter-per-square-second{"acceleration"}
+ }
+ {
+ kilogram-per-square-meter-square-second{"pressure-per-length"}
+ }
+ {
+ pow4-second-square-ampere-per-kilogram-square-meter{"electric-capacitance"}
+ }
+ {
+ cubic-second-square-ampere-per-kilogram-square-meter{"electric-conductance"}
+ }
+ {
+ second{"duration"}
+ }
+ {
+ second-ampere{"electric-charge"}
+ }
+ {
+ year{"year-duration"}
+ }
+ {
+ ampere{"electric-current"}
+ }
+ {
+ ampere-per-square-meter{"current-density"}
+ }
+ {
+ ampere-per-meter{"magnetic-field-strength"}
+ }
+ {
+ kelvin{"temperature"}
+ }
+ {
+ square-revolution{"solid-angle"}
+ }
+ {
+ revolution{"angle"}
+ }
+ {
+ revolution-per-meter{"wave-number"}
+ }
+ {
+ revolution-per-second{"frequency"}
+ }
+ {
+ item{"substance-amount"}
+ }
+ {
+ item-per-kilogram{"concentration-mass"}
+ }
+ {
+ item-per-cubic-meter{"concentration"}
+ }
+ {
+ portion{"portion"}
+ }
+ {
+ bit{"digital"}
+ }
+ {
+ pixel{"graphics"}
+ }
+ {
+ pixel-per-meter{"resolution"}
+ }
+ {
+ em{"typewidth"}
+ }
}
}
* A ResourceSink that collects simple unit identifiers from the keys of the
* convertUnits table into an array, and adds these values to a TrieBuilder,
* with associated values being their index into this array plus a specified
- * offset, to a trie.
+ * offset.
*
* Example code:
*
* UErrorCode status = U_ZERO_ERROR;
* BytesTrieBuilder b(status);
- * const char *unitIdentifiers[200];
- * SimpleUnitIdentifiersSink identifierSink(unitIdentifiers, 200, b, kTrieValueOffset);
+ * int32_t ARR_SIZE = 200;
+ * const char *unitIdentifiers[ARR_SIZE];
+ * int32_t *unitCategories[ARR_SIZE];
+ * SimpleUnitIdentifiersSink identifierSink(gSerializedUnitCategoriesTrie, unitIdentifiers,
+ * unitCategories, ARR_SIZE, b, kTrieValueOffset);
* LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
* ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
*/
public:
/**
* Constructor.
- * @param out Array of char* to which the simple unit identifiers will be
- * saved.
- * @param outSize The size of `out`.
+ * @param quantitiesTrieData The data for constructing a quantitiesTrie,
+ * which maps from a simple unit identifier to an index into the
+ * gCategories array.
+ * @param out Array of char* to which pointers to the simple unit
+ * identifiers will be saved. (Does not take ownership.)
+ * @param outCategories Array of int32_t to which category indexes will be
+ * saved: this corresponds to simple unit IDs saved to `out`, mapping
+ * from the ID to the value produced by the quantitiesTrie (which is an
+ * index into the gCategories array).
+ * @param outSize The size of `out` and `outCategories`.
* @param trieBuilder The trie builder to which the simple unit identifier
* should be added. The trie builder must outlive this resource sink.
* @param trieValueOffset This is added to the index of the identifier in
* the `out` array, before adding to `trieBuilder` as the value
* associated with the identifier.
*/
- explicit SimpleUnitIdentifiersSink(const char **out, int32_t outSize, BytesTrieBuilder &trieBuilder,
- int32_t trieValueOffset)
- : outArray(out), outSize(outSize), trieBuilder(trieBuilder), trieValueOffset(trieValueOffset),
- outIndex(0) {
- }
+ explicit SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData, const char **out,
+ int32_t *outCategories, int32_t outSize,
+ BytesTrieBuilder &trieBuilder, int32_t trieValueOffset)
+ : outArray(out), outCategories(outCategories), outSize(outSize), trieBuilder(trieBuilder),
+ trieValueOffset(trieValueOffset), quantitiesTrieData(quantitiesTrieData), outIndex(0) {}
/**
* Adds the table keys found in value to the output vector.
return;
}
+ BytesTrie quantitiesTrie(quantitiesTrieData.data());
+
// Collect keys from the table resource.
- const char *key;
- for (int32_t i = 0; table.getKeyAndValue(i, key, value); ++i) {
+ const char *simpleUnitID;
+ for (int32_t i = 0; table.getKeyAndValue(i, simpleUnitID, value); ++i) {
U_ASSERT(i < table.getSize());
U_ASSERT(outIndex < outSize);
- if (uprv_strcmp(key, "kilogram") == 0) {
+ if (uprv_strcmp(simpleUnitID, "kilogram") == 0) {
// For parsing, we use "gram", the prefixless metric mass unit. We
// thus ignore the SI Base Unit of Mass: it exists due to being the
// mass conversion target unit, but not needed for MeasureUnit
// parsing.
continue;
}
- outArray[outIndex] = key;
- trieBuilder.add(key, trieValueOffset + outIndex, status);
+ outArray[outIndex] = simpleUnitID;
+ trieBuilder.add(simpleUnitID, trieValueOffset + outIndex, status);
+
+ // Find the base target unit for this simple unit
+ ResourceTable table = value.getTable(status);
+ if (U_FAILURE(status)) { return; }
+ if (!table.findValue("target", value)) {
+ status = U_INVALID_FORMAT_ERROR;
+ break;
+ }
+ int32_t len;
+ const UChar* uTarget = value.getString(len, status);
+ CharString target;
+ target.appendInvariantChars(uTarget, len, status);
+ if (U_FAILURE(status)) { return; }
+ quantitiesTrie.reset();
+ UStringTrieResult result = quantitiesTrie.next(target.data(), target.length());
+ if (!USTRINGTRIE_HAS_VALUE(result)) {
+ status = U_INVALID_FORMAT_ERROR;
+ break;
+ }
+ outCategories[outIndex] = quantitiesTrie.getValue();
+
outIndex++;
}
}
private:
const char **outArray;
+ int32_t *outCategories;
int32_t outSize;
BytesTrieBuilder &trieBuilder;
int32_t trieValueOffset;
+ StringPiece quantitiesTrieData;
+
+ int32_t outIndex;
+};
+
+/**
+ * A ResourceSink that collects information from `unitQuantities` in the `units`
+ * resource to provide key->value lookups from base unit to category, as well as
+ * preserving ordering information for these categories. See `units.txt`.
+ *
+ * For example: "kilogram" -> "mass", "meter-per-second" -> "speed".
+ *
+ * In C++ unitQuantity values are collected in order into a UChar* array, while
+ * unitQuantity keys are added added to a TrieBuilder, with associated values
+ * being the index into the aforementioned UChar* array.
+ */
+class CategoriesSink : public icu::ResourceSink {
+ public:
+ /**
+ * Constructor.
+ * @param out Array of UChar* to which unitQuantity values will be saved.
+ * The pointers returned not owned: they point directly at the resource
+ * strings in static memory.
+ * @param outSize The size of the `out` array.
+ * @param trieBuilder The trie builder to which the keys (base units) of
+ * each unitQuantity will be added, each with value being the offset
+ * into `out`.
+ */
+ explicit CategoriesSink(const UChar **out, int32_t &outSize, BytesTrieBuilder &trieBuilder)
+ : outQuantitiesArray(out), outSize(outSize), trieBuilder(trieBuilder), outIndex(0) {}
+
+ void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
+ ResourceArray array = value.getArray(status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ if (outIndex + array.getSize() > outSize) {
+ status = U_INDEX_OUTOFBOUNDS_ERROR;
+ return;
+ }
+
+ for (int32_t i = 0; array.getValue(i, value); ++i) {
+ U_ASSERT(outIndex < outSize);
+ ResourceTable table = value.getTable(status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (table.getSize() != 1) {
+ status = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ const char *key;
+ table.getKeyAndValue(0, key, value);
+ int32_t uTmpLen;
+ outQuantitiesArray[outIndex] = value.getString(uTmpLen, status);
+ trieBuilder.add(key, outIndex, status);
+ outIndex++;
+ }
+ }
+
+ private:
+ const UChar **outQuantitiesArray;
+ int32_t &outSize;
+ BytesTrieBuilder &trieBuilder;
+
int32_t outIndex;
};
// by SingleUnitImpl::getSimpleUnitID().)
const char **gSimpleUnits = nullptr;
+// Maps from the value associated with each simple unit ID to an index into the
+// gCategories array.
+int32_t *gSimpleUnitCategories = nullptr;
+
char *gSerializedUnitExtrasStemTrie = nullptr;
+// Array of UChar* pointing at the unit categories (aka "quantities", aka
+// "types"), as found in the `unitQuantities` resource. The array memory itself
+// is owned by this pointer, but the individual UChar* in that array point at
+// static memory.
+const UChar **gCategories = nullptr;
+// Number of items in `gCategories`.
+int32_t gCategoriesCount = 0;
+// TODO: rather save an index into gCategories?
+const char *kConsumption = "consumption";
+size_t kConsumptionLen = strlen("consumption");
+// Serialized BytesTrie for mapping from base units to indices into gCategories.
+char *gSerializedUnitCategoriesTrie = nullptr;
+
UBool U_CALLCONV cleanupUnitExtras() {
+ uprv_free(gSerializedUnitCategoriesTrie);
+ gSerializedUnitCategoriesTrie = nullptr;
+ uprv_free(gCategories);
+ gCategories = nullptr;
uprv_free(gSerializedUnitExtrasStemTrie);
gSerializedUnitExtrasStemTrie = nullptr;
+ uprv_free(gSimpleUnitCategories);
+ gSimpleUnitCategories = nullptr;
uprv_free(gSimpleUnits);
gSimpleUnits = nullptr;
gUnitExtrasInitOnce.reset();
void U_CALLCONV initUnitExtras(UErrorCode& status) {
ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
+ LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
+
+ // Collect unitQuantities information into gSerializedUnitCategoriesTrie and gCategories.
+ const char *CATEGORY_TABLE_NAME = "unitQuantities";
+ LocalUResourceBundlePointer unitQuantities(
+ ures_getByKey(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, nullptr, &status));
+ if (U_FAILURE(status)) { return; }
+ gCategoriesCount = unitQuantities.getAlias()->fSize;
+ size_t quantitiesMallocSize = sizeof(UChar *) * gCategoriesCount;
+ gCategories = static_cast<const UChar **>(uprv_malloc(quantitiesMallocSize));
+ if (gCategories == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_memset(gCategories, 0, quantitiesMallocSize);
+ BytesTrieBuilder quantitiesBuilder(status);
+ CategoriesSink categoriesSink(gCategories, gCategoriesCount, quantitiesBuilder);
+ ures_getAllItemsWithFallback(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, categoriesSink, status);
+ StringPiece resultQuantities = quantitiesBuilder.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
+ if (U_FAILURE(status)) { return; }
+ // Copy the result into the global constant pointer
+ size_t numBytesQuantities = resultQuantities.length();
+ gSerializedUnitCategoriesTrie = static_cast<char *>(uprv_malloc(numBytesQuantities));
+ if (gSerializedUnitCategoriesTrie == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_memcpy(gSerializedUnitCategoriesTrie, resultQuantities.data(), numBytesQuantities);
+
+ // Build the BytesTrie that Parser needs for parsing unit identifiers.
BytesTrieBuilder b(status);
if (U_FAILURE(status)) { return; }
// Add sanctioned simple units by offset: simple units all have entries in
// units/convertUnits resources.
- // TODO(ICU-21059): confirm whether this is clean enough, or whether we need to
- // filter units' validity list instead.
- LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
LocalUResourceBundlePointer convertUnits(
- ures_getByKey(unitsBundle.getAlias(), "convertUnits", NULL, &status));
+ ures_getByKey(unitsBundle.getAlias(), "convertUnits", nullptr, &status));
if (U_FAILURE(status)) { return; }
// Allocate enough space: with identifierSink below skipping kilogram, we're
return;
}
uprv_memset(gSimpleUnits, 0, arrayMallocSize);
+ arrayMallocSize = sizeof(int32_t) * simpleUnitsCount;
+ gSimpleUnitCategories = static_cast<int32_t *>(uprv_malloc(arrayMallocSize));
+ if (gSimpleUnitCategories == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_memset(gSimpleUnitCategories, 0, arrayMallocSize);
// Populate gSimpleUnits and build the associated trie.
- SimpleUnitIdentifiersSink identifierSink(gSimpleUnits, simpleUnitsCount, b, kSimpleUnitOffset);
+ SimpleUnitIdentifiersSink identifierSink(resultQuantities, gSimpleUnits, gSimpleUnitCategories,
+ simpleUnitsCount, b, kSimpleUnitOffset);
ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
// Build the CharsTrie
return (*realLeft)->compareTo(**realRight);
}
+// Returns an index into the gCategories array, for the "unitQuantity" (aka
+// "type" or "category") associated with the given base unit identifier. Returns
+// -1 on failure, together with U_UNSUPPORTED_ERROR.
+int32_t getUnitCategoryIndex(StringPiece baseUnitIdentifier, UErrorCode &status) {
+ umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
+ if (U_FAILURE(status)) {
+ return -1;
+ }
+ BytesTrie trie(gSerializedUnitCategoriesTrie);
+ UStringTrieResult result = trie.next(baseUnitIdentifier.data(), baseUnitIdentifier.length());
+ if (!USTRINGTRIE_HAS_VALUE(result)) {
+ status = U_UNSUPPORTED_ERROR;
+ return -1;
+ }
+ return trie.getValue();
+}
+
} // namespace
U_CAPI int32_t U_EXPORT2
return 10;
}
+CharString U_I18N_API getUnitQuantity(StringPiece baseUnitIdentifier, UErrorCode &status) {
+ CharString result;
+ U_ASSERT(result.length() == 0);
+ if (U_FAILURE(status)) {
+ return result;
+ }
+ UErrorCode localStatus = U_ZERO_ERROR;
+ int32_t idx = getUnitCategoryIndex(baseUnitIdentifier, localStatus);
+ if (U_FAILURE(localStatus)) {
+ // TODO(icu-units#130): support inverting any unit, with correct
+ // fallback logic: inversion and fallback may depend on presence or
+ // absence of a usage for that category.
+ if (uprv_strcmp(baseUnitIdentifier.data(), "meter-per-cubic-meter") == 0) {
+ result.append(kConsumption, (int32_t)kConsumptionLen, status);
+ return result;
+ }
+ status = U_INVALID_FORMAT_ERROR;
+ return result;
+ }
+ if (idx < 0 || idx >= gCategoriesCount) {
+ status = U_INVALID_FORMAT_ERROR;
+ return result;
+ }
+ result.appendInvariantChars(gCategories[idx], u_strlen(gCategories[idx]), status);
+ return result;
+}
+
// In ICU4J, this is MeasureUnit.getSingleUnitImpl().
SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) {
MeasureUnitImpl temp;
result.append(StringPiece(this->getSimpleUnitID()), status);
}
+int32_t SingleUnitImpl::getUnitCategoryIndex() const {
+ return gSimpleUnitCategories[index];
+}
+
MeasureUnitImpl::MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) {
*this = other.copy(status);
}
: index(index), unitImpl(unitImpl) {}
};
+/**
+ * Looks up the "unitQuantity" (aka "type" or "category") of a base unit
+ * identifier. The category is returned via `result`, which must initially be
+ * empty.
+ *
+ * This only supports base units: other units must be resolved to base units
+ * before passing to this function, otherwise U_UNSUPPORTED_ERROR status will be
+ * returned.
+ *
+ * Categories are found in `unitQuantities` in the `units` resource (see
+ * `units.txt`).
+ */
+CharString U_I18N_API getUnitQuantity(StringPiece baseUnitIdentifier, UErrorCode &status);
+
/**
* A struct representing a single unit (optional SI or binary prefix, and dimensionality).
*/
*/
void appendNeutralIdentifier(CharString &result, UErrorCode &status) const;
+ /**
+ * Returns the index of this unit's "quantity" in unitQuantities (in
+ * measunit_extra.cpp). The value of this index determines sort order for
+ * normalization of unit identifiers.
+ */
+ int32_t getUnitCategoryIndex() const;
+
/**
* Compare this SingleUnitImpl to another SingleUnitImpl for the sake of
* sorting and coalescing.
*
+ * Sort order of units is specified by UTS #35
+ * (https://unicode.org/reports/tr35/tr35-info.html#Unit_Identifier_Normalization).
+ *
* Takes the sign of dimensionality into account, but not the absolute
* value: per-meter is not considered the same as meter, but meter is
* considered the same as square-meter.
if (dimensionality > 0 && other.dimensionality < 0) {
return -1;
}
+ // Sort by official quantity order
+ int32_t thisQuantity = this->getUnitCategoryIndex();
+ int32_t otherQuantity = other.getUnitCategoryIndex();
+ if (thisQuantity < otherQuantity) {
+ return -1;
+ }
+ if (thisQuantity > otherQuantity) {
+ return 1;
+ }
+ // If quantity order didn't help, then we go by index.
if (index < other.index) {
return -1;
}
/**
* Simple unit index, unique for every simple unit, -1 for the dimensionless
- * unit. This is an index into a string list in measunit_extra.cpp.
+ * unit. This is an index into a string list in measunit_extra.cpp, as
+ * loaded by SimpleUnitIdentifiersSink.
*
* The default value is -1, meaning the dimensionless unit:
* isDimensionless() will return true, until index is changed.
return cmp;
}
-CharString U_I18N_API getUnitCategory(const char *baseUnitIdentifier, UErrorCode &status) {
- CharString result;
- LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
- LocalUResourceBundlePointer unitQuantities(
- ures_getByKey(unitsBundle.getAlias(), "unitQuantities", NULL, &status));
- int32_t categoryLength;
- if (U_FAILURE(status)) { return result; }
- const UChar *uCategory =
- ures_getStringByKey(unitQuantities.getAlias(), baseUnitIdentifier, &categoryLength, &status);
- if (U_FAILURE(status)) {
- // TODO(icu-units#130): support inverting any unit, with correct
- // fallback logic: inversion and fallback may depend on presence or
- // absence of a usage for that category.
- if (uprv_strcmp(baseUnitIdentifier, "meter-per-cubic-meter") == 0) {
- status = U_ZERO_ERROR;
- result.append("consumption", status);
- return result;
- }
- }
- result.appendInvariantChars(uCategory, categoryLength, status);
- return result;
-}
-
// TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
U_NAMESPACE_BEGIN
namespace units {
-/**
- * Looks up the unit category of a base unit identifier.
- *
- * Only supports base units, other units must be resolved to base units before
- * passing to this function.
- *
- * Categories are found in `unitQuantities` in the `units` resource (see
- * `units.txt`).
- *
- * TODO(hugovdm): if we give units_data.cpp access to the functionality of
- * `extractCompoundBaseUnit` which is currently in units_converter.cpp, we could
- * support all units for which there is a category. Does it make sense to move
- * that function to units_data.cpp?
- */
-CharString U_I18N_API getUnitCategory(const char *baseUnitIdentifier, UErrorCode &status);
-
/**
* Encapsulates "convertUnits" information from units resources, specifying how
* to convert from one unit to another.
MeasureUnitImpl inputUnitImpl = MeasureUnitImpl::forMeasureUnitMaybeCopy(inputUnit, status);
MeasureUnit baseUnit =
(extractCompoundBaseUnit(inputUnitImpl, conversionRates, status)).build(status);
- CharString category = getUnitCategory(baseUnit.getIdentifier(), status);
+ CharString category = getUnitQuantity(baseUnit.getIdentifier(), status);
+ if (U_FAILURE(status)) {
+ return;
+ }
const UnitPreference *const *unitPreferences;
int32_t preferencesCount = 0;
- prefs.getPreferencesFor(category.data(), usage, region, unitPreferences, preferencesCount, status);
+ prefs.getPreferencesFor(category.toStringPiece(), usage, region, unitPreferences, preferencesCount,
+ status);
for (int i = 0; i < preferencesCount; ++i) {
+ U_ASSERT(unitPreferences[i] != nullptr);
const auto &preference = *unitPreferences[i];
MeasureUnitImpl complexTargetUnitImpl =
{"kilometer-per-second-per-megaparsec", "kilometer-per-megaparsec-second"},
// TODO(ICU-21284): Add more test cases once the proper ranking is available.
- // TODO(ICU-21284,icu-units#70): These cases are the wrong way around:
- {"pound-force-foot", "foot-pound-force"},
- {"foot-pound-force", "foot-pound-force"},
- {"kilowatt-hour", "hour-kilowatt"},
- {"hour-kilowatt", "hour-kilowatt"},
- {"newton-meter", "meter-newton"},
- {"meter-newton", "meter-newton"},
+ {"newton-meter", "newton-meter"},
+ {"meter-newton", "newton-meter"},
+ {"pound-force-foot", "pound-force-foot"},
+ {"foot-pound-force", "pound-force-foot"},
+ {"kilowatt-hour", "kilowatt-hour"},
+ {"hour-kilowatt", "kilowatt-hour"},
// Testing prefixes are parsed and produced correctly (ensures no
// collisions in the enum values)
#if !UCONFIG_NO_FORMATTING
+#include "measunit_impl.h"
#include "units_data.h"
+
#include "intltest.h"
using namespace ::icu::units;
IcuTestErrorCode status(*this, "testGetUnitCategory");
for (const auto &t : testCases) {
- CharString category = getUnitCategory(t.unit, status);
- status.errIfFailureAndReset("getUnitCategory(%s)", t.unit);
- assertEquals("category", t.expectedCategory, category.data());
+ CharString category = getUnitQuantity(t.unit, status);
+ if (!status.errIfFailureAndReset("getUnitCategory(%s)", t.unit)) {
+ assertEquals("category", t.expectedCategory, category.data());
+ }
}
}
import com.ibm.icu.util.MeasureUnit;
+// TODO: revisit documentation in this file. E.g. we don't do dimensionless
+// units in Java? We use null instead.
+
/**
* A class representing a single unit (optional SI or binary prefix, and dimensionality).
*/
* Compare this SingleUnitImpl to another SingleUnitImpl for the sake of
* sorting and coalescing.
* <p>
+ * Sort order of units is specified by UTS #35
+ * (https://unicode.org/reports/tr35/tr35-info.html#Unit_Identifier_Normalization).
+ * <p>
* Takes the sign of dimensionality into account, but not the absolute
* value: per-meter is not considered the same as meter, but meter is
* considered the same as square-meter.
if (dimensionality > 0 && other.dimensionality < 0) {
return -1;
}
+ // Sort by official quantity order
+ int thisCategoryIndex = UnitsData.getCategoryIndexOfSimpleUnit(index);
+ int otherCategoryIndex = UnitsData.getCategoryIndexOfSimpleUnit(other.index);
+ if (thisCategoryIndex < otherCategoryIndex) {
+ return -1;
+ }
+ if (thisCategoryIndex > otherCategoryIndex) {
+ return 1;
+ }
+ // If quantity order didn't help, then we go by index.
if (index < other.index) {
return -1;
}
this.unitPrefix = unitPrefix;
}
+ // TODO: unused? Delete?
public int getIndex() {
return index;
}
// © 2020 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
-
package com.ibm.icu.impl.units;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.Iterator;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
* Responsible for all units data operations (retriever, analysis, extraction certain data ... etc.).
*/
public class UnitsData {
- private volatile static String[] simpleUnits = null;
+ // TODO(icu-units#122): this class can use static initialization to load the
+ // data once, and provide access to it via static methods. (Partial change
+ // has been done already.)
+
+ // Array of simple unit IDs.
+ private static String[] simpleUnits = null;
+
+ // Maps from the value associated with each simple unit ID to a category
+ // index number.
+ private static int[] simpleUnitCategories = null;
+
private ConversionRates conversionRates;
private UnitPreferences unitPreferences;
- /**
- * Pairs of categories and the corresponding base units.
- */
- private Categories categories;
+
public UnitsData() {
this.conversionRates = new ConversionRates();
this.unitPreferences = new UnitPreferences();
- this.categories = new Categories();
}
public static String[] getSimpleUnits() {
- if (simpleUnits != null) {
- return simpleUnits;
- }
+ return simpleUnits;
+ }
+ static {
// Read simple units
ICUResourceBundle resource;
resource = (ICUResourceBundle) UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, "units");
SimpleUnitIdentifiersSink sink = new SimpleUnitIdentifiersSink();
resource.getAllItemsWithFallback("convertUnits", sink);
simpleUnits = sink.simpleUnits;
-
- return simpleUnits;
+ simpleUnitCategories = sink.simpleUnitCategories;
}
public ConversionRates getConversionRates() {
return unitPreferences;
}
+ public static int getCategoryIndexOfSimpleUnit(int simpleUnitIndex) {
+ return simpleUnitCategories[simpleUnitIndex];
+ }
+
/**
* @param measureUnit An instance of MeasureUnitImpl.
* @return the corresponding category.
return "consumption";
}
- return this.categories.mapFromUnitToCategory.get(baseUnitIdentifier);
+ int index = Categories.baseUnitToIndex.get(baseUnitIdentifier);
+ return Categories.indexToCategory[index];
}
public UnitPreferences.UnitPreference[] getPreferencesFor(String category, String usage, String region) {
public static class SimpleUnitIdentifiersSink extends UResource.Sink {
String[] simpleUnits = null;
+ int[] simpleUnitCategories = null;
@Override
public void put(UResource.Key key, UResource.Value value, boolean noFallback) {
UResource.Table simpleUnitsTable = value.getTable();
ArrayList<String> simpleUnits = new ArrayList<>();
+ ArrayList<Integer> simpleUnitCategories = new ArrayList<>();
for (int i = 0; simpleUnitsTable.getKeyAndValue(i, key, value); i++) {
if (key.toString().equals("kilogram")) {
continue;
}
+ // Find the base target unit for this simple unit
+ UResource.Table table = value.getTable();
+ if (!table.findValue("target", value)) {
+ // TODO: is there a more idiomatic way to deal with Resource
+ // Sink data errors in ICU4J? For now we just assert-fail,
+ // and otherwise skip bad data:
+ assert false : "Could not find \"target\" for simple unit: " + key;
+ continue;
+ }
+ String target = value.getString();
+
simpleUnits.add(key.toString());
+ simpleUnitCategories.add(Categories.baseUnitToIndex.get(target));
}
this.simpleUnits = simpleUnits.toArray(new String[0]);
+ this.simpleUnitCategories = new int[simpleUnitCategories.size()];
+ Iterator<Integer> iter = simpleUnitCategories.iterator();
+ for (int i = 0; i < this.simpleUnitCategories.length; i++)
+ {
+ this.simpleUnitCategories[i] = iter.next().intValue();
+ }
}
}
public static final String DEFAULT_USAGE = "default";
}
+ // Deals with base units and categories, e.g. "meter-per-second" --> "speed".
public static class Categories {
-
/**
- * Contains the map between units in their base units into their category.
- * For example: meter-per-second --> "speed"
+ * Maps from base unit to an index value: an index into the
+ * indexToCategory array.
*/
- HashMap<String, String> mapFromUnitToCategory;
+ static HashMap<String, Integer> baseUnitToIndex;
+ /**
+ * Our official array of category strings - categories are identified by
+ * indeces into this array.
+ */
+ static String[] indexToCategory;
- public Categories() {
+ static {
// Read unit Categories
ICUResourceBundle resource;
resource = (ICUResourceBundle) UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, "units");
CategoriesSink sink = new CategoriesSink();
resource.getAllItemsWithFallback(Constants.CATEGORY_TABLE_NAME, sink);
- this.mapFromUnitToCategory = sink.getMapFromUnitToCategory();
+ baseUnitToIndex = sink.mapFromUnitToIndex;
+ indexToCategory = sink.categories.toArray(new String[0]);
}
}
+ /**
+ * A Resource Sink that collects information from `unitQuantities` in the
+ * `units` resource to provide key->value lookups from base unit to
+ * category, as well as preserving ordering information for these
+ * categories. See `units.txt`.
+ *
+ * For example: "kilogram" -> "mass", "meter-per-second" -> "speed".
+ *
+ * In Java unitQuantity values are collected in order into an ArrayList,
+ * while unitQuantity key-to-index lookups are handled with a HashMap.
+ */
public static class CategoriesSink extends UResource.Sink {
/**
* Contains the map between units in their base units into their category.
* For example: meter-per-second --> "speed"
*/
- HashMap<String, String> mapFromUnitToCategory;
+ HashMap<String, Integer> mapFromUnitToIndex;
+ ArrayList<String> categories;
public CategoriesSink() {
- mapFromUnitToCategory = new HashMap<>();
+ mapFromUnitToIndex = new HashMap<>();
+ categories = new ArrayList<>();
}
@Override
public void put(UResource.Key key, UResource.Value value, boolean noFallback) {
assert (key.toString().equals(Constants.CATEGORY_TABLE_NAME));
- assert (value.getType() == UResourceBundle.TABLE);
-
- UResource.Table categoryTable = value.getTable();
- for (int i = 0; categoryTable.getKeyAndValue(i, key, value); i++) {
- assert (value.getType() == UResourceBundle.STRING);
- mapFromUnitToCategory.put(key.toString(), value.toString());
+ assert (value.getType() == UResourceBundle.ARRAY);
+
+ UResource.Array categoryArray = value.getArray();
+ for (int i=0; categoryArray.getValue(i, value); i++) {
+ assert (value.getType() == UResourceBundle.TABLE);
+ UResource.Table table = value.getTable();
+ assert (table.getSize() == 1)
+ : "expecting single-entry table, got size: " + table.getSize();
+ table.getKeyAndValue(0, key, value);
+ assert value.getType() == UResourceBundle.STRING : "expecting category string";
+ mapFromUnitToIndex.put(key.toString(), categories.size());
+ categories.add(value.toString());
}
}
-
- public HashMap<String, String> getMapFromUnitToCategory() {
- return mapFromUnitToCategory;
- }
}
}
version https://git-lfs.github.com/spec/v1
-oid sha256:e738e530bcd2dcafff1de1d603c79d5a1edc04c095ca52366259c354f19e56ed
-size 13306751
+oid sha256:f4a144335f9c6c6a6df5a95d882d8841de82be4e86db650c643c67ac84ef8f84
+size 13306908
version https://git-lfs.github.com/spec/v1
-oid sha256:19f02ee2a2dc722a729fa9258175a738fc6021d252769b85c023a927135c7c26
+oid sha256:09736746668a9d57494331b4533ae8ba1e38f55f433f5ecd9026e1c57735a413
size 95080
version https://git-lfs.github.com/spec/v1
-oid sha256:1970fbcc18ec8a8b86702fe73ffbba842e9379bd973edbfb4e189ac6ac6d2a83
+oid sha256:056761b1169f3ba2b2c63e3f71c8bce2e61a7a80d7e21bcd9c38e98fbd3414a0
size 723496
new TestCase("kilometer-per-second-per-megaparsec", "kilometer-per-megaparsec-second"),
// TODO(ICU-21284): Add more test cases once the proper ranking is available.
- // TODO(ICU-21284,icu-units#70): These cases are the wrong way around:
- new TestCase("pound-force-foot", "foot-pound-force"),
- new TestCase("foot-pound-force", "foot-pound-force"),
- new TestCase("kilowatt-hour", "hour-kilowatt"),
- new TestCase("hour-kilowatt", "hour-kilowatt"),
- new TestCase("newton-meter", "meter-newton"),
- new TestCase("meter-newton", "meter-newton"),
+ new TestCase("newton-meter", "newton-meter"),
+ new TestCase("meter-newton", "newton-meter"),
+ new TestCase("pound-force-foot", "pound-force-foot"),
+ new TestCase("foot-pound-force", "pound-force-foot"),
+ new TestCase("kilowatt-hour", "kilowatt-hour"),
+ new TestCase("hour-kilowatt", "kilowatt-hour"),
// Testing prefixes are parsed and produced correctly (ensures no
// collisions in the enum values)
; /unitConstants/$1 ; values="$2"
//supplementalData/unitQuantities/unitQuantity[@baseUnit="(%W)"][@quantity="(%W)"](?:[@status="%W"])?
- ; /unitQuantities/$1 ; values="$2"
+ ; /unitQuantities/<FIFO>/$1 ; values="$2"
//supplementalData/convertUnits/convertUnit[@source="(%W)"][@baseUnit="(%W)"](?:[@systems="%W"])?
; /convertUnits/$1/target ; values=$2