From cb3e32625e740f7ea4fe59ada02186ec01ab1526 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe <17109322+hugovdm@users.noreply.github.com> Date: Wed, 29 Apr 2020 23:30:04 +0200 Subject: [PATCH] UnitPreferences class in unitsdata.cpp --- icu4c/source/i18n/unitsdata.cpp | 246 +++++++++++++++++++ icu4c/source/i18n/unitsdata.h | 53 ++++ icu4c/source/test/intltest/unitsdatatest.cpp | 77 ++++++ 3 files changed, 376 insertions(+) diff --git a/icu4c/source/i18n/unitsdata.cpp b/icu4c/source/i18n/unitsdata.cpp index c92e151604a..4cc85d131ec 100644 --- a/icu4c/source/i18n/unitsdata.cpp +++ b/icu4c/source/i18n/unitsdata.cpp @@ -6,6 +6,7 @@ #if !UCONFIG_NO_FORMATTING #include "cstring.h" +#include "number_decimalquantity.h" #include "resource.h" #include "unitsdata.h" #include "uresimp.h" @@ -15,6 +16,8 @@ U_NAMESPACE_BEGIN namespace { +using number::impl::DecimalQuantity; + /** * A ResourceSink that collects conversion rate information. * @@ -94,8 +97,213 @@ class ConversionRateDataSink : public ResourceSink { MaybeStackVector *outVector; }; +/** + * A ResourceSink that collects unit preferences information. + * + * This class is for use by ures_getAllItemsWithFallback. + */ +class UnitPreferencesSink : public ResourceSink { + public: + /** + * Constructor. + * @param outPrefs The vector to which UnitPreference instances are to be + * added. This vector must outlive the use of the ResourceSink. + * @param outMetadata The vector to which UnitPreferenceMetadata instances + * are to be added. This vector must outlive the use of the ResourceSink. + */ + explicit UnitPreferencesSink(MaybeStackVector *outPrefs, + MaybeStackVector *outMetadata) + : preferences(outPrefs), metadata(outMetadata) {} + + // WIP/FIXME: document me! + void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) { + if (U_FAILURE(status)) { return; } + if (uprv_strcmp(key, "unitPreferenceData") != 0) { + // This is very strict, however it is the cheapest way to be sure + // that with `value`, we're looking at the convertUnits table. + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + ResourceTable unitPreferenceDataTable = value.getTable(status); + const char *category; + for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) { + ResourceTable categoryTable = value.getTable(status); + const char *usage; + for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) { + ResourceTable regionTable = value.getTable(status); + const char *region; + for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) { + // Asserting that unit preferences are sorted (and there are + // no duplicates: region check is "strictly ascending", when + // category and usage match): + U_ASSERT( + metadata->length() == 0 || + 0 < uprv_strcmp(category, + (*metadata)[metadata->length() - 1]->category.data()) || + (0 == uprv_strcmp(category, + (*metadata)[metadata->length() - 1]->category.data()) && + (0 < uprv_strcmp(usage, (*metadata)[metadata->length() - 1]->usage.data()) || + (0 == uprv_strcmp(usage, (*metadata)[metadata->length() - 1]->usage.data()) && + 0 < uprv_strcmp(region, + (*metadata)[metadata->length() - 1]->region.data()))))); + ResourceArray unitPrefs = value.getArray(status); + if (U_FAILURE(status)) { return; } + int32_t prefLen = unitPrefs.getSize(); + UnitPreferenceMetadata *meta = metadata->emplaceBack(); + if (!meta) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + meta->category.append(category, status); + meta->usage.append(usage, status); + meta->region.append(region, status); + meta->prefsOffset = preferences->length(); + meta->prefsCount = prefLen; + for (int32_t i = 0; unitPrefs.getValue(i, value); i++) { + UnitPreference *up = preferences->emplaceBack(); + if (!up) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + ResourceTable unitPref = value.getTable(status); + if (U_FAILURE(status)) { return; } + for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) { + if (uprv_strcmp(key, "unit") == 0) { + int32_t length; + const UChar *u = value.getString(length, status); + up->unit.appendInvariantChars(u, length, status); + } else if (uprv_strcmp(key, "geq") == 0) { + int32_t length; + const UChar *g = value.getString(length, status); + CharString geq; + geq.appendInvariantChars(g, length, status); + DecimalQuantity dq; + dq.setToDecNumber(geq.data(), status); + up->geq = dq.toDouble(); + } else if (uprv_strcmp(key, "skeleton") == 0) { + int32_t length; + const UChar *s = value.getString(length, status); + up->skeleton.appendInvariantChars(s, length, status); + } + } + } + } + } + } + } + + private: + MaybeStackVector *preferences; + MaybeStackVector *metadata; +}; + +/** + * Finds the UnitPreferenceMetadata instance that matches the given category, + * usage and region: if missing, region falls back to "001", and usage falls + * back to "default". + * + * This is implemented as a binary search, with fallback restarting the search + * from the search range at which the parent in the category/usage/region + * hierarchy was found. + * + * @param metadata The full list of UnitPreferenceMetadata instances. + * @param category The category to search for. If category is not known, it can + * be resolved from the baseunit of the input (for supported unit categories). + * TODO(hugovdm): implement the unit->category lookup (via "unitQuantities" in + * the units resource bundle). + * @param usage The usage for which formatting preferences is needed. If the + * given usage is not known, this function automatically falls back to "default" + * usage. + * @param region The region for which preferences are needed. If there are no + * region-specific preferences, this function automatically falls back to the + * "001" region (global). + * @param status The standard ICU error code output parameter. If an invalid + * category is given, status will be U_ILLEGAL_ARGUMENT_ERROR. If fallback to + * "default" or "001" didn't resolve, status will be U_MISSING_RESOURCE. + * @return The index into the metadata vector which represents the appropriate + * preferences. If appropriate preferences are not found, -1 is returned. + */ +int32_t getPreferenceIndex(const MaybeStackVector *metadata, + const char *category, const char *usage, const char *region, + UErrorCode &status) { + if (U_FAILURE(status)) { return -1; } + // SearchStage stage, const char *category, const char *usage, const char *region) { + int32_t start = 0; + int32_t end = metadata->length(); + bool foundCategory = false; + bool foundUsage = false; + int32_t checkpointStart = start; + int32_t checkpointEnd = end; + while (start < end) { + int32_t mid = (start + end) / 2; + int32_t cmp; + cmp = uprv_strcmp((*metadata)[mid]->category.data(), category); + if (cmp == 0) { + if (!foundCategory) { + foundCategory = true; + checkpointStart = start; + checkpointEnd = end; + } + cmp = uprv_strcmp((*metadata)[mid]->usage.data(), usage); + if (cmp == 0) { + if (!foundUsage) { + foundUsage = true; + checkpointStart = start; + checkpointEnd = end; + } + cmp = uprv_strcmp((*metadata)[mid]->region.data(), region); + if (cmp == 0) { + // We found a full metch. + return mid; + } + } + } + if (cmp < 0) { + start = mid + 1; + } else { + U_ASSERT(cmp > 0); + end = mid; + } + if (start >= end) { + if (!foundCategory) { + // We don't do fallback categories - an invalid category was + // requested. + status = U_ILLEGAL_ARGUMENT_ERROR; + return false; + } else if (!foundUsage) { + if (uprv_strcmp(usage, "default") != 0) { + // Try "default" usage. Every category should have at least + // this usage. + usage = "default"; + start = checkpointStart; + end = checkpointEnd; + } else { + // Usage did not match. This is actually a data problem. + status = U_MISSING_RESOURCE_ERROR; + return -1; + } + } else { // We didn't find region (else we'd have returned already): + if (uprv_strcmp(region, "001") != 0) { + // Try region "001" - every usage should have at least this + // region. + region = "001"; + start = checkpointStart; + end = checkpointEnd; + } else { + // Region did not match. This is actually a data problem. + status = U_MISSING_RESOURCE_ERROR; + return -1; + } + } + } + } + // We should never get here. (FYI: unit test code coverage analysis.) + UPRV_UNREACHABLE; +} + } // namespace +// TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace? void U_I18N_API getAllConversionRates(MaybeStackVector &result, UErrorCode &status) { LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status)); ConversionRateDataSink sink(&result); @@ -112,6 +320,44 @@ const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece sou return nullptr; } +U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) { + LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status)); + UnitPreferencesSink sink(&unitPrefs_, &metadata_); + ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status); +} + +void U_I18N_API +UnitPreferences::getPreferencesFor(const char *category, const char *usage, const char *region, + MaybeStackVector *outPreferences, + UErrorCode &status) { + // UnitPreferenceMetadata *m = getMetadata(category, usage, region); + int32_t idx = getPreferenceIndex(&metadata_, category, usage, region, status); + if (U_FAILURE(status)) { return; } + if (idx < 0) { // Unnecessary if "status" was set correctly. TODO(review): should I delete this? + // (Excessively "defensive"?) + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + UnitPreferenceMetadata *m = metadata_[idx]; + for (int32_t pref = m->prefsOffset; pref < m->prefsOffset + m->prefsCount; pref++) { + UnitPreference *p = unitPrefs_[pref]; + // TODO(review): we're making a full copy of the preferences here. + // Considering UnitPreferences instances should simply stick around, we + // could also simply return pointers at these instances. What is the + // appropriate data structure (array/vector) for variable set of + // pointers? MaybeStackVector could probably work, but + // ugly as a double-dereference?) + UnitPreference *outP = outPreferences->emplaceBack(); + if (!outP) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + outP->unit.copyFrom(p->unit, status); + outP->geq = p->geq; + outP->skeleton.copyFrom(p->skeleton, status); + } +} + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/unitsdata.h b/icu4c/source/i18n/unitsdata.h index 99383574c7b..a7481207b22 100644 --- a/icu4c/source/i18n/unitsdata.h +++ b/icu4c/source/i18n/unitsdata.h @@ -72,6 +72,59 @@ class U_I18N_API ConversionRates { MaybeStackVector conversionInfo_; }; +// Encapsulates unitPreferenceData information from units resources, specifying +// a sequence of output unit preferences. +struct U_I18N_API UnitPreference { + UnitPreference() : geq(1) {} + CharString unit; + double geq; + CharString skeleton; +}; + +namespace { + +// UnitPreferenceMetadata lives in the anonymous namespace, because it should +// only be useful to internal code and unit testing code. +struct U_I18N_API UnitPreferenceMetadata { + CharString category; + CharString usage; + CharString region; + int32_t prefsOffset; + int32_t prefsCount; +}; + +} // namespace + +/** + * Unit Preferences information for various locales and usages. + */ +class U_I18N_API UnitPreferences { + public: + /** + * Constructor that loads data. + * + * @param status Receives status. + */ + UnitPreferences(UErrorCode &status); + + /** + * FIXME/WIP document me! + * + * If region can't be found, falls back to global (001). If usage can't be found, falls back to + * "default". + * + * Copies the preferences structures. Consider returning pointers (references) instead? + */ + void getPreferencesFor(const char *category, const char *usage, const char *region, + MaybeStackVector *outPreferences, UErrorCode &status); + + protected: + int32_t binarySearch(const char *category, const char *usage, const char *region); + + MaybeStackVector metadata_; + MaybeStackVector unitPrefs_; +}; + U_NAMESPACE_END #endif //__GETUNITSDATA_H__ diff --git a/icu4c/source/test/intltest/unitsdatatest.cpp b/icu4c/source/test/intltest/unitsdatatest.cpp index 817082212a7..7827b0ef8e5 100644 --- a/icu4c/source/test/intltest/unitsdatatest.cpp +++ b/icu4c/source/test/intltest/unitsdatatest.cpp @@ -13,6 +13,7 @@ class UnitsDataTest : public IntlTest { void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = NULL); void testGetAllConversionRates(); + void testGetPreferences(); }; extern IntlTest *createUnitsDataTest() { return new UnitsDataTest(); } @@ -21,6 +22,7 @@ void UnitsDataTest::runIndexedTest(int32_t index, UBool exec, const char *&name, if (exec) { logln("TestSuite UnitsDataTest: "); } TESTCASE_AUTO_BEGIN; TESTCASE_AUTO(testGetAllConversionRates); + TESTCASE_AUTO(testGetPreferences); TESTCASE_AUTO_END; } @@ -40,4 +42,79 @@ void UnitsDataTest::testGetAllConversionRates() { } } +class UnitPreferencesOpenedUp : public UnitPreferences { + public: + UnitPreferencesOpenedUp(UErrorCode &status) : UnitPreferences(status) {}; + const MaybeStackVector *getInternalMetadata() const { return &metadata_; } + const MaybeStackVector *getInternalUnitPrefs() const { return &unitPrefs_; } +}; + +/** + * This test is dependent upon CLDR Data: when the preferences change, the test + * may fail: see the constants for expected Max/Min unit identifiers, for US and + * World, and for Roads and default lengths. + */ +void UnitsDataTest::testGetPreferences() { + const char* USRoadMax = "mile"; + const char* USRoadMin = "foot"; + const char* USLenMax = "mile"; + const char* USLenMin = "inch"; + const char* WorldRoadMax = "kilometer"; + const char* WorldRoadMin = "meter"; + const char* WorldLenMax = "kilometer"; + const char* WorldLenMin = "centimeter"; + struct TestCase { + const char *name; + const char *category; + const char *usage; + const char *region; + const char *expectedBiggest; + const char *expectedSmallest; + } testCases[]{ + {"US road", "length", "road", "US", USRoadMax, USRoadMin}, + {"001 road", "length", "road", "001", WorldRoadMax, WorldRoadMin}, + {"US lengths", "length", "default", "US", USLenMax, USLenMin}, + {"001 lengths", "length", "default", "001", WorldLenMax, WorldLenMin}, + {"XX road falls back to 001", "length", "road", "XX", WorldRoadMax, WorldRoadMin}, + {"XX default falls back to 001", "length", "default", "XX", WorldLenMax, WorldLenMin}, + {"Unknown usage US", "length", "foobar", "US", USLenMax, USLenMin}, + {"Unknown usage 001", "length", "foobar", "XX", WorldLenMax, WorldLenMin}, + }; + IcuTestErrorCode status(*this, "testGetPreferences"); + UnitPreferencesOpenedUp preferences(status); + auto *metadata = preferences.getInternalMetadata(); + auto *unitPrefs = preferences.getInternalUnitPrefs(); + assertTrue(UnicodeString("Metadata count: ") + metadata->length() + " > 200", + metadata->length() > 200); + assertTrue(UnicodeString("Preferences count: ") + unitPrefs->length() + " > 250", + unitPrefs->length() > 250); + + // Dump all preferences... TODO: remove? This was just debugging/development output. + logln("Unit Preferences:"); + for (int32_t i = 0; i < metadata->length(); i++) { + logln("%d: category %s, usage %s, region %s, offset %d, count %d", i, + (*metadata)[i]->category.data(), (*metadata)[i]->usage.data(), + (*metadata)[i]->region.data(), (*metadata)[i]->prefsOffset, (*metadata)[i]->prefsCount); + for (int32_t j = (*metadata)[i]->prefsOffset; + j < (*metadata)[i]->prefsOffset + (*metadata)[i]->prefsCount; j++) { + auto p = (*unitPrefs)[j]; + logln(" %d: unit %s, geq %f, skeleton \"%s\"", j, p->unit.data(), p->geq, p->skeleton.data()); + } + } + + for (const auto &t : testCases) { + MaybeStackVector prefs; + logln(t.name); + preferences.getPreferencesFor(t.category, t.usage, t.region, &prefs, status); + if (prefs.length() > 0) { + assertEquals(UnicodeString(t.name) + " - max unit", t.expectedBiggest, + prefs[0]->unit.data()); + assertEquals(UnicodeString(t.name) + " - min unit", t.expectedSmallest, + prefs[prefs.length() - 1]->unit.data()); + } else { + errln(UnicodeString(t.name) + ": failed to find preferences"); + } + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ -- 2.40.0