From: Shane Carr Date: Thu, 12 Dec 2019 04:46:18 +0000 (-0800) Subject: ICU-20418 Adding *internal* parse method for core unit identifiers. X-Git-Tag: release-67-rc~116 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=df8841aa6fb6798d1d98bf6b7acce7a3830770a7;p=icu ICU-20418 Adding *internal* parse method for core unit identifiers. Also see ICU-20286 --- diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 7a0a6060536..66ddd0a1d54 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -1990,10 +1990,10 @@ MeasureUnit MeasureUnit::getTeaspoon() { // End generated code static int32_t binarySearch( - const char * const * array, int32_t start, int32_t end, const char * key) { + const char * const * array, int32_t start, int32_t end, StringPiece key) { while (start < end) { int32_t mid = (start + end) / 2; - int32_t cmp = uprv_strcmp(array[mid], key); + int32_t cmp = StringPiece(array[mid]).compare(key); if (cmp < 0) { start = mid + 1; continue; @@ -2138,6 +2138,53 @@ int32_t MeasureUnit::internalGetIndexForTypeAndSubtype(const char *type, const c return gIndexes[t] + st - gOffsets[t]; } +bool MeasureUnit::findBySubType(StringPiece subType, MeasureUnit* output) { + for (int32_t t = 0; t < UPRV_LENGTHOF(gOffsets) - 1; t++) { + // Skip currency units + if (gIndexes[t] == gIndexes[t + 1]) { + continue; + } + int32_t st = binarySearch(gSubTypes, gOffsets[t], gOffsets[t + 1], subType); + if (st >= 0) { + output->setTo(t, st - gOffsets[t]); + return true; + } + } + return false; +} + +bool MeasureUnit::parseCoreUnitIdentifier( + StringPiece coreUnitIdentifier, + MeasureUnit* numerator, + MeasureUnit* denominator, + UErrorCode& status) { + if (U_FAILURE(status)) { + return false; + } + + // First search for the whole code unit identifier as a subType + if (findBySubType(coreUnitIdentifier, numerator)) { + return false; // found a numerator but not denominator + } + + // If not found, try breaking apart numerator and denominator + int32_t perIdx = coreUnitIdentifier.find("-per-", 0); + if (perIdx == -1) { + // String does not contain "-per-" + status = U_ILLEGAL_ARGUMENT_ERROR; + return false; + } + StringPiece numeratorStr(coreUnitIdentifier, 0, perIdx); + StringPiece denominatorStr(coreUnitIdentifier, perIdx + 5); + if (findBySubType(numeratorStr, numerator) && findBySubType(denominatorStr, denominator)) { + return true; // found both a numerator and denominator + } + + // The numerator or denominator were invalid + status = U_ILLEGAL_ARGUMENT_ERROR; + return false; +} + MeasureUnit MeasureUnit::resolveUnitPerUnit( const MeasureUnit &unit, const MeasureUnit &perUnit, bool* isResolved) { int32_t unitOffset = unit.getOffset(); diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index dbcad62f229..ebcf6914fdb 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -194,6 +194,26 @@ class U_I18N_API MeasureUnit: public UObject { */ static int32_t internalGetIndexForTypeAndSubtype(const char *type, const char *subtype); + /** + * ICU use only. + * @return Whether subType is known to ICU. + * @internal + */ + static bool findBySubType(StringPiece subType, MeasureUnit* output); + + /** + * ICU use only. + * Parse a core unit identifier into a numerator and denominator unit. + * @param coreUnitIdentifier The string to parse. + * @param numerator Output: set to the numerator unit. + * @param denominator Output: set to the denominator unit, if present. + * @param status Set to U_ILLEGAL_ARGUMENT_ERROR if the core unit identifier is not known. + * @return Whether both a numerator and denominator are returned. + * @internal + */ + static bool parseCoreUnitIdentifier( + StringPiece coreUnitIdentifier, MeasureUnit* numerator, MeasureUnit* denominator, UErrorCode& status); + /** * ICU use only. * @internal