From 41fed01112875f8d1b0d284f180dcb0e8d2d9cb3 Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Tue, 31 May 2016 23:49:11 +0000 Subject: [PATCH] ICU-11782 Optimizing storage of skeleton field strings in DateTimePatternGenerator (C++). X-SVN-Rev: 38780 --- icu4c/source/i18n/dtptngen.cpp | 274 +++++++++++++++------------ icu4c/source/i18n/dtptngen_impl.h | 51 ++++- icu4c/source/i18n/unicode/dtptngen.h | 5 +- 3 files changed, 197 insertions(+), 133 deletions(-) diff --git a/icu4c/source/i18n/dtptngen.cpp b/icu4c/source/i18n/dtptngen.cpp index 0a6d0fe2cb0..5754ba32f9f 100644 --- a/icu4c/source/i18n/dtptngen.cpp +++ b/icu4c/source/i18n/dtptngen.cpp @@ -420,16 +420,17 @@ DateTimePatternGenerator::initData(const Locale& locale, UErrorCode &status) { skipMatcher = NULL; fAvailableFormatKeyHash=NULL; - addCanonicalItems(); - addICUPatterns(locale, status); - if (U_FAILURE(status)) { - return; - } + addCanonicalItems(status); + UnicodeString hackPattern = addICUPatterns(locale, status); addCLDRData(locale, status); setDateTimeFromCalendar(locale, status); setDecimalSymbols(locale, status); umtx_initOnce(initOnce, loadAllowedHourFormatsData, status); getAllowedHourFormats(locale, status); + + if (!hackPattern.isEmpty()) { + hackTimes(hackPattern, status); + } } // DateTimePatternGenerator::initData namespace { @@ -517,7 +518,6 @@ void DateTimePatternGenerator::loadAllowedHourFormatsData(UErrorCode &status) { void DateTimePatternGenerator::getAllowedHourFormats(const Locale &locale, UErrorCode &status) { if (U_FAILURE(status)) { return; } - const char *localeID = locale.getName(); char maxLocaleID[ULOC_FULLNAME_CAPACITY]; int32_t length = uloc_addLikelySubtags(localeID, maxLocaleID, ULOC_FULLNAME_CAPACITY, &status); @@ -596,14 +596,16 @@ DateTimePatternGenerator::staticGetBaseSkeleton( return localSkeleton.getBaseSkeleton(); } -void +UnicodeString DateTimePatternGenerator::addICUPatterns(const Locale& locale, UErrorCode& status) { + UnicodeString hackPattern; + if (U_FAILURE(status)) { return hackPattern; } UnicodeString dfPattern; UnicodeString conflictingString; DateFormat* df; if (U_FAILURE(status)) { - return; + return hackPattern; } // Load with ICU patterns @@ -617,7 +619,7 @@ DateTimePatternGenerator::addICUPatterns(const Locale& locale, UErrorCode& statu // TODO Maybe we should return an error when the date format isn't simple. delete df; if (U_FAILURE(status)) { - return; + return hackPattern; } df = DateFormat::createTimeInstance(style, locale); @@ -631,9 +633,11 @@ DateTimePatternGenerator::addICUPatterns(const Locale& locale, UErrorCode& statu // TODO Maybe we should return an error when the date format isn't simple. delete df; if (U_FAILURE(status)) { - return; + return hackPattern; } } + + return hackPattern; } void @@ -688,6 +692,7 @@ static const UChar hourFormatChars[] = { CAP_H, LOW_H, CAP_K, LOW_K, 0 }; // HhK void DateTimePatternGenerator::addCLDRData(const Locale& locale, UErrorCode& err) { + if (U_FAILURE(err)) { return; } UResourceBundle *rb, *calTypeBundle, *calBundle; UResourceBundle *patBundle, *fieldBundle, *fBundle; UnicodeString rbPattern, value, field; @@ -898,10 +903,6 @@ DateTimePatternGenerator::addCLDRData(const Locale& locale, UErrorCode& err) { } // Go to the top of the loop to process contents of calTypeBundle } - - if (hackPattern.length()>0) { - hackTimes(hackPattern, err); - } } void @@ -915,7 +916,6 @@ DateTimePatternGenerator::initHashtable(UErrorCode& err) { } } - void DateTimePatternGenerator::setAppendItemFormat(UDateTimePatternField field, const UnicodeString& value) { appendItemFormats[field] = value; @@ -1062,12 +1062,15 @@ DateTimePatternGenerator::getDecimal() const { } void -DateTimePatternGenerator::addCanonicalItems() { +DateTimePatternGenerator::addCanonicalItems(UErrorCode& status) { + if (U_FAILURE(status)) { return; } UnicodeString conflictingPattern; - UErrorCode status = U_ZERO_ERROR; for (int32_t i=0; i 0) { + addPattern(UnicodeString(Canonical_Items[i]), FALSE, conflictingPattern, status); + } + if (U_FAILURE(status)) { return; } } } @@ -1089,9 +1092,10 @@ DateTimePatternGenerator::setDateTimeFromCalendar(const Locale& locale, UErrorCo int32_t resStrLen = 0; Calendar* fCalendar = Calendar::createInstance(locale, status); + if (U_FAILURE(status)) { return; } CalendarData calData(locale, fCalendar?fCalendar->getType():NULL, status); UResourceBundle *dateTimePatterns = calData.getByKey(DT_DateTimePatternsTag, status); - if (U_FAILURE(status)) return; + if (U_FAILURE(status)) { return; } if (ures_getSize(dateTimePatterns) <= DateFormat::kDateTime) { @@ -1290,8 +1294,8 @@ DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern, } if ((flags & kDTPGFixFractionalSeconds) != 0 && typeValue == UDATPG_SECOND_FIELD) { - UnicodeString newField=dtMatcher->skeleton.original[UDATPG_FRACTIONAL_SECOND_FIELD]; - field = field + decimal + newField; + field += decimal; + dtMatcher->skeleton.original.appendFieldTo(UDATPG_FRACTIONAL_SECOND_FIELD, field); } else if (dtMatcher->skeleton.type[typeValue]!=0) { // Here: // - "reqField" is the field from the originally requested skeleton, with length @@ -1315,9 +1319,9 @@ DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern, // a) The length of the field in the skeleton (skelFieldLen) is equal to reqFieldLen. // b) The pattern field is numeric and the skeleton field is not, or vice versa. - UnicodeString reqField = dtMatcher->skeleton.original[typeValue]; - int32_t reqFieldLen = reqField.length(); - if (reqField.charAt(0) == CAP_E && reqFieldLen < 3) + UChar reqFieldChar = dtMatcher->skeleton.original.getFieldChar(typeValue); + int32_t reqFieldLen = dtMatcher->skeleton.original.getFieldLength(typeValue); + if (reqFieldChar == CAP_E && reqFieldLen < 3) reqFieldLen = 3; // 1-3 for E are equivalent to 3 for c,e int32_t adjFieldLen = reqFieldLen; if ( (typeValue==UDATPG_HOUR_FIELD && (options & UDATPG_MATCH_HOUR_FIELD_LENGTH)==0) || @@ -1325,8 +1329,7 @@ DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern, (typeValue==UDATPG_SECOND_FIELD && (options & UDATPG_MATCH_SECOND_FIELD_LENGTH)==0) ) { adjFieldLen = field.length(); } else if (specifiedSkeleton) { - UnicodeString skelField = specifiedSkeleton->original[typeValue]; - int32_t skelFieldLen = skelField.length(); + int32_t skelFieldLen = specifiedSkeleton->original.getFieldLength(typeValue); UBool patFieldIsNumeric = (row->type > 0); UBool skelFieldIsNumeric = (specifiedSkeleton->type[typeValue] > 0); if (skelFieldLen == reqFieldLen || (patFieldIsNumeric && !skelFieldIsNumeric) || (skelFieldIsNumeric && !patFieldIsNumeric)) { @@ -1334,9 +1337,12 @@ DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern, adjFieldLen = field.length(); } } - UChar c = (typeValue!= UDATPG_HOUR_FIELD && typeValue!= UDATPG_MONTH_FIELD && - typeValue!= UDATPG_WEEKDAY_FIELD && (typeValue!= UDATPG_YEAR_FIELD || reqField.charAt(0)==CAP_Y))? - reqField.charAt(0): field.charAt(0); + UChar c = (typeValue!= UDATPG_HOUR_FIELD + && typeValue!= UDATPG_MONTH_FIELD + && typeValue!= UDATPG_WEEKDAY_FIELD + && (typeValue!= UDATPG_YEAR_FIELD || reqFieldChar==CAP_Y)) + ? reqFieldChar + : field.charAt(0); if (typeValue == UDATPG_HOUR_FIELD && (flags & kDTPGSkeletonUsesCapJ) != 0) { c = fDefaultHourFormatChar; } @@ -1699,36 +1705,19 @@ PatternMap::getPatternFromSkeleton(PtnSkeleton& skeleton, const PtnSkeleton** sp } // find boot entry - UChar baseChar='\0'; - for (int32_t i=0; iskeleton->original[i].compare(skeleton.original[i]) != 0 ) - { - break; - } - } + equal = curElem->skeleton->original == skeleton.original; } else { // called from DateTimePatternGenerator::getRedundants, use baseOriginal - for (i=0; iskeleton->baseOriginal[i].compare(skeleton.baseOriginal[i]) != 0 ) - { - break; - } - } + equal = curElem->skeleton->baseOriginal == skeleton.baseOriginal; } - if (i == UDATPG_FIELD_COUNT) { + if (equal) { if (specifiedSkeletonPtr && curElem->skeletonWasSpecified) { *specifiedSkeletonPtr = curElem->skeleton; } @@ -1832,37 +1821,35 @@ void DateTimeMatcher::set(const UnicodeString& pattern, FormatParser* fp, PtnSkeleton& skeletonResult) { int32_t i; for (i=0; iset(pattern); for (i=0; i < fp->itemNumber; i++) { - UnicodeString field = fp->items[i]; - if ( field.charAt(0) == LOW_A ) { + const UnicodeString& value = fp->items[i]; + if ( value.charAt(0) == LOW_A ) { continue; // skip 'a' } - if ( fp->isQuoteLiteral(field) ) { + if ( fp->isQuoteLiteral(value) ) { UnicodeString quoteLiteral; fp->getQuoteLiteral(quoteLiteral, &i); continue; } - int32_t canonicalIndex = fp->getCanonicalIndex(field); + int32_t canonicalIndex = fp->getCanonicalIndex(value); if (canonicalIndex < 0 ) { continue; } const dtTypeElem *row = &dtTypes[canonicalIndex]; - int32_t typeValue = row->field; - skeletonResult.original[typeValue]=field; + int32_t field = row->field; + skeletonResult.original.populate(field, value); UChar repeatChar = row->patternChar; int32_t repeatCount = row->minLen; // #7930 removes cap at 3 - while (repeatCount-- > 0) { - skeletonResult.baseOriginal[typeValue] += repeatChar; - } - int16_t subTypeValue = row->type; + skeletonResult.baseOriginal.populate(field, repeatChar, repeatCount); + int16_t subField = row->type; if ( row->type > 0) { - subTypeValue += field.length(); + subField += value.length(); } - skeletonResult.type[typeValue] = subTypeValue; + skeletonResult.type[field] = subField; } copyFrom(skeletonResult); } @@ -1870,23 +1857,13 @@ DateTimeMatcher::set(const UnicodeString& pattern, FormatParser* fp, PtnSkeleton void DateTimeMatcher::getBasePattern(UnicodeString &result ) { result.remove(); // Reset the result first. - for (int32_t i=0; iskeleton.type[i]=newSkeleton.type[i]; - this->skeleton.original[i]=newSkeleton.original[i]; - this->skeleton.baseOriginal[i]=newSkeleton.baseOriginal[i]; - } + skeleton.copyFrom(newSkeleton); } void DateTimeMatcher::copyFrom() { // same as clear - for (int32_t i=0; iskeleton.type[i]=0; - this->skeleton.original[i].remove(); - this->skeleton.baseOriginal[i].remove(); - } + skeleton.clear(); } UBool DateTimeMatcher::equals(const DateTimeMatcher* other) const { - if (other==NULL) { - return FALSE; - } - for (int32_t i=0; iskeleton.original[i]!=other->skeleton.original[i] ) { - return FALSE; - } - } - return TRUE; + if (other==NULL) { return FALSE; } + return skeleton.original == other->skeleton.original; } int32_t @@ -2212,52 +2174,116 @@ PatternMapIterator::next() { return *matcher; } -PtnSkeleton::PtnSkeleton() { + +SkeletonFields::SkeletonFields() { + // Set initial values to zero + clear(); } +void SkeletonFields::clear() { + uprv_memset(chars, 0, sizeof(chars)); + uprv_memset(lengths, 0, sizeof(lengths)); +} -PtnSkeleton::PtnSkeleton(const PtnSkeleton& other) { - for (int32_t i=0; itype[i]=other.type[i]; - this->original[i]=other.original[i]; - this->baseOriginal[i]=other.baseOriginal[i]; +void SkeletonFields::copyFrom(const SkeletonFields& other) { + uprv_memcpy(chars, other.chars, sizeof(chars)); + uprv_memcpy(lengths, other.lengths, sizeof(lengths)); +} + +void SkeletonFields::clearField(int32_t field) { + chars[field] = 0; + lengths[field] = 0; +} + +UChar SkeletonFields::getFieldChar(int32_t field) const { + return chars[field]; +} + +int32_t SkeletonFields::getFieldLength(int32_t field) const { + return lengths[field]; +} + +void SkeletonFields::populate(int32_t field, const UnicodeString& value) { + populate(field, value.charAt(0), value.length()); +} + +void SkeletonFields::populate(int32_t field, UChar ch, int32_t length) { + chars[field] = (int8_t) ch; + lengths[field] = (int8_t) length; +} + +UBool SkeletonFields::isFieldEmpty(int32_t field) const { + return lengths[field] == 0; +} + +UnicodeString& SkeletonFields::appendTo(UnicodeString& string) const { + for (int32_t i = 0; i < UDATPG_FIELD_COUNT; ++i) { + appendFieldTo(i, string); } + return string; } -UBool -PtnSkeleton::equals(const PtnSkeleton& other) { - for (int32_t i=0; i