#define UNISTR_FROM_STRING_EXPLICIT
#include "cstring.h"
+#include "uassert.h"
#include "ucln_in.h"
#include "umutex.h"
#include "unicode/errorcode.h"
#include "unicode/ucharstrie.h"
#include "unicode/ucharstriebuilder.h"
+#include "cstr.h"
+
U_NAMESPACE_BEGIN
// This is to ensure we only insert positive integers into the trie
constexpr int32_t kSIPrefixOffset = 64;
-constexpr int32_t kSyntaxPartOffset = 256;
-
-enum SyntaxPart {
- SYNTAX_PART_PER = kSyntaxPartOffset,
- SYNTAX_PART_SQUARE,
- SYNTAX_PART_CUBIC,
- SYNTAX_PART_P1,
- SYNTAX_PART_P2,
- SYNTAX_PART_P3,
- SYNTAX_PART_P4,
- SYNTAX_PART_P5,
- SYNTAX_PART_P6,
- SYNTAX_PART_P7,
- SYNTAX_PART_P8,
- SYNTAX_PART_P9,
+constexpr int32_t kCompoundPartOffset = 128;
+
+enum CompoundPart {
+ COMPOUND_PART_PER = kCompoundPartOffset,
+ COMPOUND_PART_TIMES,
+ COMPOUND_PART_ONE_PER,
+ COMPOUND_PART_PLUS,
+};
+
+constexpr int32_t kPowerPartOffset = 256;
+
+enum PowerPart {
+ POWER_PART_P2 = kPowerPartOffset + 2,
+ POWER_PART_P3,
+ POWER_PART_P4,
+ POWER_PART_P5,
+ POWER_PART_P6,
+ POWER_PART_P7,
+ POWER_PART_P8,
+ POWER_PART_P9,
+ POWER_PART_P10,
+ POWER_PART_P11,
+ POWER_PART_P12,
+ POWER_PART_P13,
+ POWER_PART_P14,
+ POWER_PART_P15,
};
constexpr int32_t kSimpleUnitOffset = 512;
+const struct SIPrefixStrings {
+ const char* const string;
+ UMeasureSIPrefix value;
+} gSIPrefixStrings[] = {
+ { "yotta", UMEASURE_SI_PREFIX_YOTTA },
+ { "zetta", UMEASURE_SI_PREFIX_ZETTA },
+ { "exa", UMEASURE_SI_PREFIX_EXA },
+ { "peta", UMEASURE_SI_PREFIX_PETA },
+ { "tera", UMEASURE_SI_PREFIX_TERA },
+ { "giga", UMEASURE_SI_PREFIX_GIGA },
+ { "mega", UMEASURE_SI_PREFIX_MEGA },
+ { "kilo", UMEASURE_SI_PREFIX_KILO },
+ { "hecto", UMEASURE_SI_PREFIX_HECTO },
+ { "deka", UMEASURE_SI_PREFIX_DEKA },
+ { "deci", UMEASURE_SI_PREFIX_DECI },
+ { "centi", UMEASURE_SI_PREFIX_CENTI },
+ { "milli", UMEASURE_SI_PREFIX_MILLI },
+ { "micro", UMEASURE_SI_PREFIX_MICRO },
+ { "nano", UMEASURE_SI_PREFIX_NANO },
+ { "pico", UMEASURE_SI_PREFIX_PICO },
+ { "femto", UMEASURE_SI_PREFIX_FEMTO },
+ { "atto", UMEASURE_SI_PREFIX_ATTO },
+ { "zepto", UMEASURE_SI_PREFIX_ZEPTO },
+ { "yocto", UMEASURE_SI_PREFIX_YOCTO },
+};
+
// FIXME: Get this list from data
-const char16_t* gSimpleUnits[] = {
+const char16_t* const gSimpleUnits[] = {
u"100kilometer",
u"acre",
u"ampere",
if (U_FAILURE(status)) { return; }
// Add SI prefixes
- b.add(u"yotta", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOTTA, status);
- b.add(u"zetta", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZETTA, status);
- b.add(u"exa", kSIPrefixOffset + UMEASURE_SI_PREFIX_EXA, status);
- b.add(u"peta", kSIPrefixOffset + UMEASURE_SI_PREFIX_PETA, status);
- b.add(u"tera", kSIPrefixOffset + UMEASURE_SI_PREFIX_TERA, status);
- b.add(u"giga", kSIPrefixOffset + UMEASURE_SI_PREFIX_GIGA, status);
- b.add(u"mega", kSIPrefixOffset + UMEASURE_SI_PREFIX_MEGA, status);
- b.add(u"kilo", kSIPrefixOffset + UMEASURE_SI_PREFIX_KILO, status);
- b.add(u"hecto", kSIPrefixOffset + UMEASURE_SI_PREFIX_HECTO, status);
- b.add(u"deka", kSIPrefixOffset + UMEASURE_SI_PREFIX_DEKA, status);
- b.add(u"deci", kSIPrefixOffset + UMEASURE_SI_PREFIX_DECI, status);
- b.add(u"centi", kSIPrefixOffset + UMEASURE_SI_PREFIX_CENTI, status);
- b.add(u"milli", kSIPrefixOffset + UMEASURE_SI_PREFIX_MILLI, status);
- b.add(u"micro", kSIPrefixOffset + UMEASURE_SI_PREFIX_MICRO, status);
- b.add(u"nano", kSIPrefixOffset + UMEASURE_SI_PREFIX_NANO, status);
- b.add(u"pico", kSIPrefixOffset + UMEASURE_SI_PREFIX_PICO, status);
- b.add(u"femto", kSIPrefixOffset + UMEASURE_SI_PREFIX_FEMTO, status);
- b.add(u"atto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ATTO, status);
- b.add(u"zepto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZEPTO, status);
- b.add(u"yocto", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOCTO, status);
+ for (const auto& siPrefixInfo : gSIPrefixStrings) {
+ UnicodeString uSIPrefix(siPrefixInfo.string, -1, US_INV);
+ b.add(uSIPrefix, siPrefixInfo.value + kSIPrefixOffset, status);
+ }
if (U_FAILURE(status)) { return; }
- // Add syntax parts (per, power prefixes)
- b.add(u"-per-", SYNTAX_PART_PER, status);
- b.add(u"square-", SYNTAX_PART_SQUARE, status);
- b.add(u"cubic-", SYNTAX_PART_CUBIC, status);
- b.add(u"p1", SYNTAX_PART_P1, status);
- b.add(u"p2", SYNTAX_PART_P2, status);
- b.add(u"p3", SYNTAX_PART_P3, status);
- b.add(u"p4", SYNTAX_PART_P4, status);
- b.add(u"p5", SYNTAX_PART_P5, status);
- b.add(u"p6", SYNTAX_PART_P6, status);
- b.add(u"p7", SYNTAX_PART_P7, status);
- b.add(u"p8", SYNTAX_PART_P8, status);
- b.add(u"p9", SYNTAX_PART_P9, status);
+ // Add syntax parts (compound, power prefixes)
+ b.add(u"-per-", COMPOUND_PART_PER, status);
+ b.add(u"-", COMPOUND_PART_TIMES, status);
+ b.add(u"one-per-", COMPOUND_PART_ONE_PER, status);
+ b.add(u"+", COMPOUND_PART_PLUS, status);
+ b.add(u"square-", POWER_PART_P2, status);
+ b.add(u"cubic-", POWER_PART_P3, status);
+ b.add(u"p2-", POWER_PART_P2, status);
+ b.add(u"p3-", POWER_PART_P3, status);
+ b.add(u"p4-", POWER_PART_P4, status);
+ b.add(u"p5-", POWER_PART_P5, status);
+ b.add(u"p6-", POWER_PART_P6, status);
+ b.add(u"p7-", POWER_PART_P7, status);
+ b.add(u"p8-", POWER_PART_P8, status);
+ b.add(u"p9-", POWER_PART_P9, status);
+ b.add(u"p10-", POWER_PART_P10, status);
+ b.add(u"p11-", POWER_PART_P11, status);
+ b.add(u"p12-", POWER_PART_P12, status);
+ b.add(u"p13-", POWER_PART_P13, status);
+ b.add(u"p14-", POWER_PART_P14, status);
+ b.add(u"p15-", POWER_PART_P15, status);
if (U_FAILURE(status)) { return; }
// Add sanctioned simple units by offset
uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes);
}
+class UnitIdentifierParser {
+public:
+ static UnitIdentifierParser from(StringPiece source, UErrorCode& status) {
+ umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
+ if (U_FAILURE(status)) {
+ return UnitIdentifierParser();
+ }
+ return UnitIdentifierParser(source);
+ }
+
+ int32_t nextToken(UErrorCode& status) {
+ fTrie.reset();
+ int32_t match = -1;
+ int32_t previ = -1;
+ do {
+ fTrie.next(fSource.data()[fIndex++]);
+ if (fTrie.current() == USTRINGTRIE_NO_MATCH) {
+ break;
+ } else if (fTrie.current() == USTRINGTRIE_NO_VALUE) {
+ continue;
+ } else if (fTrie.current() == USTRINGTRIE_FINAL_VALUE) {
+ match = fTrie.getValue();
+ previ = fIndex;
+ break;
+ } else if (fTrie.current() == USTRINGTRIE_INTERMEDIATE_VALUE) {
+ match = fTrie.getValue();
+ previ = fIndex;
+ continue;
+ } else {
+ UPRV_UNREACHABLE;
+ }
+ } while (fIndex < fSource.length());
+
+ if (match < 0) {
+ // TODO: Make a new status code?
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ } else {
+ fIndex = previ;
+ }
+ return match;
+ }
+
+ bool hasNext() const {
+ return fIndex < fSource.length();
+ }
+
+ int32_t currentIndex() const {
+ return fIndex;
+ }
+
+private:
+ int32_t fIndex = 0;
+ StringPiece fSource;
+ UCharsTrie fTrie;
+
+ UnitIdentifierParser() : fSource(""), fTrie(u"") {}
+
+ UnitIdentifierParser(StringPiece source)
+ : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {}
+};
+
} // namespace
+MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& status) {
+ UnitIdentifierParser parser = UnitIdentifierParser::from(identifier, status);
+ if (U_FAILURE(status)) {
+ // Unrecoverable error
+ return MeasureUnit();
+ }
+
+ while (parser.hasNext()) {
+ parser.nextToken(status);
+ if (U_FAILURE(status)) {
+ // Invalid syntax
+ return MeasureUnit();
+ }
+
+ // if (match < kCompoundPartOffset) {
+ // // SI Prefix
+ // auto prefix = static_cast<UMeasureSIPrefix>(match - kSIPrefixOffset);
+ // } else if (match < kPowerPartOffset) {
+ // // Compound part
+ // const char* operation = (match == COMPOUND_PART_PER) ? "per" : "times/plus";
+ // } else if (match < kSimpleUnitOffset) {
+ // // Power part
+ // int32_t power = match - kPowerPartOffset;
+ // } else {
+ // // Simple unit
+ // const char16_t* simpleUnit = gSimpleUnits[match - kSimpleUnitOffset];
+ // }
+ }
+
+ // Success
+ return MeasureUnit(uprv_strdup(identifier));
+}
+
+UMeasureSIPrefix MeasureUnit::getSIPrefix() const {
+ ErrorCode status;
+ const char* id = toString();
+ UnitIdentifierParser parser = UnitIdentifierParser::from(id, status);
+ if (status.isFailure()) {
+ // Unrecoverable error
+ return UMEASURE_SI_PREFIX_ONE;
+ }
+
+ int32_t match = parser.nextToken(status);
+ if (status.isFailure()) {
+ // Invalid syntax
+ return UMEASURE_SI_PREFIX_ONE;
+ }
+
+ if (match >= kPowerPartOffset && match < kSimpleUnitOffset) {
+ // Skip the power part
+ match = parser.nextToken(status);
+ if (status.isFailure()) {
+ // Invalid syntax
+ return UMEASURE_SI_PREFIX_ONE;
+ }
+ }
+
+ if (match >= kCompoundPartOffset) {
+ // No SI prefix
+ return UMEASURE_SI_PREFIX_ONE;
+ }
+
+ return static_cast<UMeasureSIPrefix>(match - kSIPrefixOffset);
+}
+
+MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix) const {
+ ErrorCode status;
+ const char* id = toString();
+ UnitIdentifierParser parser = UnitIdentifierParser::from(id, status);
+ if (status.isFailure()) {
+ // Unrecoverable error
+ return *this;
+ }
+
+ int32_t match = parser.nextToken(status);
+ if (status.isFailure()) {
+ // Invalid syntax
+ return *this;
+ }
+
+ CharString builder;
+ int32_t unitStart = 0;
+ if (match >= kPowerPartOffset && match < kSimpleUnitOffset) {
+ // Skip the power part
+ unitStart = parser.currentIndex();
+ builder.append(id, unitStart, status);
+ match = parser.nextToken(status);
+ }
+
+ // Append the new SI prefix
+ for (const auto& siPrefixInfo : gSIPrefixStrings) {
+ if (siPrefixInfo.value == prefix) {
+ builder.append(siPrefixInfo.string, status);
+ break;
+ }
+ }
+
+ if (match < kCompoundPartOffset) {
+ // Remove the old SI prefix
+ unitStart = parser.currentIndex();
+ }
+ builder.append(id + unitStart, status);
+ if (status.isFailure()) {
+ // Unrecoverable error
+ return *this;
+ }
+
+ return MeasureUnit(builder.cloneData(status));
+}
+
+int8_t MeasureUnit::getPower() const {
+ ErrorCode status;
+ const char* id = toString();
+ UnitIdentifierParser parser = UnitIdentifierParser::from(id, status);
+ if (status.isFailure()) {
+ // Unrecoverable error
+ return 0;
+ }
+
+ int32_t match = parser.nextToken(status);
+ if (status.isFailure()) {
+ // Invalid syntax
+ return 0;
+ }
+
+ if (match < kPowerPartOffset || match >= kSimpleUnitOffset) {
+ // No power part
+ return 0;
+ }
+
+ return static_cast<int8_t>(match - kPowerPartOffset);
+}
+
+MeasureUnit MeasureUnit::withPower(int8_t power) const {
+ if (power < 0) {
+ // Don't know how to handle this yet
+ U_ASSERT(FALSE);
+ }
+
+ ErrorCode status;
+ const char* id = toString();
+ UnitIdentifierParser parser = UnitIdentifierParser::from(id, status);
+ if (status.isFailure()) {
+ // Unrecoverable error
+ return *this;
+ }
+
+ int32_t match = parser.nextToken(status);
+ if (status.isFailure()) {
+ // Invalid syntax
+ return *this;
+ }
+
+ // Append the new power
+ CharString builder;
+ if (power == 2) {
+ builder.append("square-", status);
+ } else if (power == 3) {
+ builder.append("cubic-", status);
+ } else if (power < 10) {
+ builder.append('p', status);
+ builder.append(power + '0', status);
+ builder.append('-', status);
+ } else {
+ builder.append("p1", status);
+ builder.append('0' + (power % 10), status);
+ builder.append('-', status);
+ }
+
+ if (match < kCompoundPartOffset) {
+ // Remove the old power
+ builder.append(id + parser.currentIndex(), status);
+ } else {
+ // Append the whole identifier
+ builder.append(id, status);
+ }
+ if (status.isFailure()) {
+ // Unrecoverable error
+ return *this;
+ }
+
+ return MeasureUnit(builder.cloneData(status));
+}
+
+
U_NAMESPACE_END
#endif /* !UNCONFIG_NO_FORMATTING */