]> granicus.if.org Git - icu/commitdiff
Adding measunit_extra.cpp with basic trie
authorShane F. Carr <shane@unicode.org>
Tue, 14 Jan 2020 15:47:18 +0000 (16:47 +0100)
committerShane F. Carr <shane@unicode.org>
Fri, 17 Jan 2020 16:14:18 +0000 (17:14 +0100)
icu4c/source/i18n/Makefile.in
icu4c/source/i18n/i18n.vcxproj
icu4c/source/i18n/i18n.vcxproj.filters
icu4c/source/i18n/i18n_uwp.vcxproj
icu4c/source/i18n/measunit_extra.cpp [new file with mode: 0644]
icu4c/source/i18n/ucln_in.h
icu4c/source/i18n/unicode/measunit.h
icu4c/source/test/depstest/dependencies.txt

index f22c20eabcaa0c7665bf3172b5db6cc3876176a6..f6f9860fb3a68d491ddd486ef006f1d93e86d67f 100644 (file)
@@ -114,7 +114,8 @@ numparse_symbols.o numparse_decimal.o numparse_scientific.o numparse_currency.o
 numparse_affixes.o numparse_compositions.o numparse_validators.o \
 numrange_fluent.o numrange_impl.o \
 erarules.o \
-formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o
+formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o \
+measunit_extra.o
 
 ## Header files to install
 HEADERS = $(srcdir)/unicode/*.h
index 5c6760d220e0f314ab19c433e70466c71dfbb7dc..f22fd2ff09760ea303425a8a0c56fad257524704 100644 (file)
     <ClCompile Include="ulistformatter.cpp" />
     <ClCompile Include="measfmt.cpp" />
     <ClCompile Include="measunit.cpp" />
+    <ClCompile Include="measunit_extra.cpp" />
     <ClCompile Include="measure.cpp" />
     <ClCompile Include="msgfmt.cpp" />
     <ClCompile Include="nfrs.cpp" />
index a1813fc0696f897a9d4372886274945f22fda5ff..eade0d07354ffa902da801a8a66c7b22c7ab105e 100644 (file)
     <ClCompile Include="measunit.cpp">
       <Filter>formatting</Filter>
     </ClCompile>
+    <ClCompile Include="measunit_extra.cpp">
+      <Filter>formatting</Filter>
+    </ClCompile>
     <ClCompile Include="measure.cpp">
       <Filter>formatting</Filter>
     </ClCompile>
index 989cef88fe5f499eb6ff457d47a693dabd83ecba..2466ad9586423c93c75e7523cbabb55793e4ab3f 100644 (file)
     <ClCompile Include="ulistformatter.cpp" />
     <ClCompile Include="measfmt.cpp" />
     <ClCompile Include="measunit.cpp" />
+    <ClCompile Include="measunit_extra.cpp" />
     <ClCompile Include="measure.cpp" />
     <ClCompile Include="msgfmt.cpp" />
     <ClCompile Include="nfrs.cpp" />
diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp
new file mode 100644 (file)
index 0000000..d84e50a
--- /dev/null
@@ -0,0 +1,231 @@
+// © 2020 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// Extra functions for MeasureUnit not needed for all clients.
+// Separate .o file so that it can be removed for modularity.
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
+#include "cstring.h"
+#include "ucln_in.h"
+#include "umutex.h"
+#include "unicode/errorcode.h"
+#include "unicode/measunit.h"
+#include "unicode/ucharstrie.h"
+#include "unicode/ucharstriebuilder.h"
+
+U_NAMESPACE_BEGIN
+
+
+namespace {
+
+// This is to ensure we only insert positive integers into the trie
+constexpr int32_t kSIPrefixOffset = 64;
+
+constexpr int32_t kSyntaxPartOffset = 256;
+
+enum SyntaxPart {
+    SYNTAX_PART_PER = kSyntaxPartOffset,
+    SYNTAX_PART_SQUARE,
+    SYNTAX_PART_CUBIC,
+    SYNTAX_PART_P1,
+    SYNTAX_PART_P2,
+    SYNTAX_PART_P3,
+    SYNTAX_PART_P4,
+    SYNTAX_PART_P5,
+    SYNTAX_PART_P6,
+    SYNTAX_PART_P7,
+    SYNTAX_PART_P8,
+    SYNTAX_PART_P9,
+};
+
+constexpr int32_t kSimpleUnitOffset = 512;
+
+// FIXME: Get this list from data
+const char16_t* gSimpleUnits[] = {
+    u"100kilometer",
+    u"acre",
+    u"ampere",
+    u"arc-minute",
+    u"arc-second",
+    u"astronomical-unit",
+    u"atmosphere",
+    u"bar",
+    u"barrel",
+    u"bit",
+    u"british-thermal-unit",
+    u"bushel",
+    u"byte",
+    u"calorie",
+    u"carat",
+    u"celsius",
+    u"century",
+    u"cup",
+    u"cup-metric",
+    u"dalton",
+    u"day",
+    u"day-person",
+    u"decade",
+    u"degree",
+    u"dot", // (as in "dot-per-inch")
+    u"dunam",
+    u"earth-mass",
+    u"electronvolt",
+    u"em",
+    u"fahrenheit",
+    u"fathom",
+    u"fluid-ounce",
+    u"fluid-ounce-imperial",
+    u"foodcalorie",
+    u"foot",
+    u"furlong",
+    u"g-force",
+    u"gallon",
+    u"gallon-imperial",
+    u"generic", // (i.e., "temperature-generic")
+    u"gram",
+    u"hectare", // (note: other "are" derivatives are uncommon)
+    u"hertz",
+    u"horsepower",
+    u"hour",
+    u"inch",
+    u"inch-hg",
+    u"joule",
+    u"karat",
+    u"kelvin",
+    u"knot",
+    u"light-year",
+    u"liter",
+    u"lux",
+    u"meter",
+    u"meter-of-mercury", // (not "millimeter-of-mercury")
+    u"metric-ton",
+    u"mile",
+    u"mile-scandinavian",
+    u"minute",
+    u"mole",
+    u"month",
+    u"month-person",
+    u"nautical-mile",
+    u"newton",
+    u"ohm",
+    u"ounce",
+    u"ounce-troy",
+    u"parsec",
+    u"pascal",
+    u"percent",
+    u"permille",
+    u"permillion",
+    u"permyriad",
+    u"pint",
+    u"pint-metric",
+    u"pixel",
+    u"point",
+    u"pound",
+    u"pound-force",
+    u"quart",
+    u"radian",
+    u"revolution",
+    u"second",
+    u"solar-luminosity",
+    u"solar-mass",
+    u"solar-radius",
+    u"stone",
+    u"tablespoon",
+    u"teaspoon",
+    u"therm-us",
+    u"ton",
+    u"volt",
+    u"watt",
+    u"week",
+    u"week-person",
+    u"yard",
+    u"year",
+    u"year-person",
+};
+
+icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER;
+
+char16_t* kSerializedUnitExtrasStemTrie = nullptr;
+
+UBool U_CALLCONV cleanupUnitExtras() {
+    uprv_free(kSerializedUnitExtrasStemTrie);
+    kSerializedUnitExtrasStemTrie = nullptr;
+    gUnitExtrasInitOnce.reset();
+    return TRUE;
+}
+
+void U_CALLCONV initUnitExtras(UErrorCode& status) {
+    ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
+
+    UCharsTrieBuilder b(status);
+    if (U_FAILURE(status)) { return; }
+
+    // Add SI prefixes
+    b.add(u"yotta", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOTTA, status);
+    b.add(u"zetta", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZETTA, status);
+    b.add(u"exa", kSIPrefixOffset + UMEASURE_SI_PREFIX_EXA, status);
+    b.add(u"peta", kSIPrefixOffset + UMEASURE_SI_PREFIX_PETA, status);
+    b.add(u"tera", kSIPrefixOffset + UMEASURE_SI_PREFIX_TERA, status);
+    b.add(u"giga", kSIPrefixOffset + UMEASURE_SI_PREFIX_GIGA, status);
+    b.add(u"mega", kSIPrefixOffset + UMEASURE_SI_PREFIX_MEGA, status);
+    b.add(u"kilo", kSIPrefixOffset + UMEASURE_SI_PREFIX_KILO, status);
+    b.add(u"hecto", kSIPrefixOffset + UMEASURE_SI_PREFIX_HECTO, status);
+    b.add(u"deka", kSIPrefixOffset + UMEASURE_SI_PREFIX_DEKA, status);
+    b.add(u"deci", kSIPrefixOffset + UMEASURE_SI_PREFIX_DECI, status);
+    b.add(u"centi", kSIPrefixOffset + UMEASURE_SI_PREFIX_CENTI, status);
+    b.add(u"milli", kSIPrefixOffset + UMEASURE_SI_PREFIX_MILLI, status);
+    b.add(u"micro", kSIPrefixOffset + UMEASURE_SI_PREFIX_MICRO, status);
+    b.add(u"nano", kSIPrefixOffset + UMEASURE_SI_PREFIX_NANO, status);
+    b.add(u"pico", kSIPrefixOffset + UMEASURE_SI_PREFIX_PICO, status);
+    b.add(u"femto", kSIPrefixOffset + UMEASURE_SI_PREFIX_FEMTO, status);
+    b.add(u"atto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ATTO, status);
+    b.add(u"zepto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZEPTO, status);
+    b.add(u"yocto", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOCTO, status);
+    if (U_FAILURE(status)) { return; }
+
+    // Add syntax parts (per, power prefixes)
+    b.add(u"-per-", SYNTAX_PART_PER, status);
+    b.add(u"square-", SYNTAX_PART_SQUARE, status);
+    b.add(u"cubic-", SYNTAX_PART_CUBIC, status);
+    b.add(u"p1", SYNTAX_PART_P1, status);
+    b.add(u"p2", SYNTAX_PART_P2, status);
+    b.add(u"p3", SYNTAX_PART_P3, status);
+    b.add(u"p4", SYNTAX_PART_P4, status);
+    b.add(u"p5", SYNTAX_PART_P5, status);
+    b.add(u"p6", SYNTAX_PART_P6, status);
+    b.add(u"p7", SYNTAX_PART_P7, status);
+    b.add(u"p8", SYNTAX_PART_P8, status);
+    b.add(u"p9", SYNTAX_PART_P9, status);
+    if (U_FAILURE(status)) { return; }
+
+    // Add sanctioned simple units by offset
+    int32_t simpleUnitOffset = kSimpleUnitOffset;
+    for (auto simpleUnit : gSimpleUnits) {
+        b.add(simpleUnit, simpleUnitOffset++, status);
+    }
+
+    // Build the CharsTrie
+    // TODO: Use SLOW or FAST here?
+    UnicodeString result;
+    b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status);
+    if (U_FAILURE(status)) { return; }
+
+    // Copy the result into the global constant pointer
+    size_t numBytes = result.length() * sizeof(char16_t);
+    kSerializedUnitExtrasStemTrie = static_cast<char16_t*>(uprv_malloc(numBytes));
+    uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes);
+}
+
+} // namespace
+
+
+U_NAMESPACE_END
+
+#endif /* !UNCONFIG_NO_FORMATTING */
index 2f70a8500e1c09eaff3011342c390b799fa76c87..765cdd559fb4e240928ccc339ef5cc78ae7a0f3e 100644 (file)
@@ -26,6 +26,7 @@ as the functions are suppose to be called.
 It's usually best to have child dependencies called first. */
 typedef enum ECleanupI18NType {
     UCLN_I18N_START = -1,
+    UCLN_I18N_UNIT_EXTRAS,
     UCLN_I18N_NUMBER_SKELETONS,
     UCLN_I18N_CURRENCY_SPACING,
     UCLN_I18N_SPOOF,
index 16790f8a3d9d792bc0abb9c7ac9478b045d2fbaa..1c5355404671ce8cca5895137de0f3c7b56eb558 100644 (file)
@@ -29,7 +29,6 @@
 U_NAMESPACE_BEGIN
 
 class StringEnumeration;
-class MeasureUnitFields;
 
 /**
  * Enumeration for SI prefixes, such as "kilo".
index 1d726b6ea32a6c61b53a363213b14ddc857e7d66..080beeff637895b1583eb0635dc33c3f666ac354 100644 (file)
@@ -870,6 +870,7 @@ library: i18n
     listformatter
     formatting formattable_cnv regex regex_cnv translit
     double_conversion number_representation number_output numberformatter numberparser
+    units_extra
     universal_time_scale
     uclean_i18n
 
@@ -1053,6 +1054,11 @@ group: sharedbreakiterator
   deps
     breakiterator
 
+group: units_extra
+    measunit_extra.o
+  deps
+    units
+
 group: units
     measunit.o currunit.o nounit.o
   deps