From 22bfb7668a1085fde2c935f32bcc12fa5fe93b2c Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Wed, 7 Sep 2011 14:30:32 +0000 Subject: [PATCH] ICU-8801 fix for performance regression of parse (mostly malloc/free) X-SVN-Rev: 30628 --- icu4c/source/i18n/decfmtst.cpp | 10 ++-- icu4c/source/i18n/decfmtst.h | 2 +- icu4c/source/i18n/decimfmt.cpp | 86 +++++++++++++++++----------- icu4c/source/i18n/unicode/decimfmt.h | 10 ++++ 4 files changed, 69 insertions(+), 39 deletions(-) diff --git a/icu4c/source/i18n/decfmtst.cpp b/icu4c/source/i18n/decfmtst.cpp index 687188e1a0d..c991c0098e0 100644 --- a/icu4c/source/i18n/decfmtst.cpp +++ b/icu4c/source/i18n/decfmtst.cpp @@ -212,15 +212,14 @@ void DecimalFormatStaticSets::initSets(UErrorCode *status) } } -UnicodeSet *DecimalFormatStaticSets::getSimilarDecimals(UChar32 decimal, UBool strictParse, UnicodeSet *fallback) +const UnicodeSet *DecimalFormatStaticSets::getSimilarDecimals(UChar32 decimal, UBool strictParse) { UErrorCode status = U_ZERO_ERROR; initSets(&status); if (U_FAILURE(status)) { - fallback->set(decimal, decimal); - return fallback; + return NULL; } if (gStaticSets->fDotEquivalents->contains(decimal)) { @@ -231,9 +230,8 @@ UnicodeSet *DecimalFormatStaticSets::getSimilarDecimals(UChar32 decimal, UBool s return strictParse ? gStaticSets->fStrictCommaEquivalents : gStaticSets->fCommaEquivalents; } - // if there is no match, return the character itself - fallback->set(decimal, decimal); - return fallback; + // if there is no match, return NULL + return NULL; } diff --git a/icu4c/source/i18n/decfmtst.h b/icu4c/source/i18n/decfmtst.h index d2db3f23d4c..16c5b7c950e 100644 --- a/icu4c/source/i18n/decfmtst.h +++ b/icu4c/source/i18n/decfmtst.h @@ -35,7 +35,7 @@ public: static void initSets(UErrorCode *status); static UBool cleanup(); - static UnicodeSet *getSimilarDecimals(UChar32 decimal, UBool strictParse, UnicodeSet *fallback); + static const UnicodeSet *getSimilarDecimals(UChar32 decimal, UBool strictParse); UnicodeSet *fDotEquivalents; UnicodeSet *fCommaEquivalents; diff --git a/icu4c/source/i18n/decimfmt.cpp b/icu4c/source/i18n/decimfmt.cpp index 8cb72361266..c8f3cf03e17 100644 --- a/icu4c/source/i18n/decimfmt.cpp +++ b/icu4c/source/i18n/decimfmt.cpp @@ -1945,6 +1945,9 @@ UBool DecimalFormat::subparse(const UnicodeString& text, const UnicodeString *groupingString = &getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); UChar32 groupingChar = groupingString->char32At(0); UBool sawDecimal = FALSE; + UChar32 sawDecimalChar = 0xFFFF; + UBool sawGrouping = FALSE; + UChar32 sawGroupingChar = 0xFFFF; UBool sawDigit = FALSE; int32_t backup = -1; int32_t digit; @@ -1955,33 +1958,24 @@ UBool DecimalFormat::subparse(const UnicodeString& text, int32_t groupingCharLength = U16_LENGTH(groupingChar); // equivalent grouping and decimal support - // TODO markdavis Cache these if it makes a difference in performance. - UnicodeSet decimalFallback; - UnicodeSet *decimalSet = NULL; - UnicodeSet *groupingSet = NULL; + const UnicodeSet *decimalSet = NULL; + const UnicodeSet *groupingSet = NULL; if (decimalCharLength == decimalStringLength) { - decimalSet = (UnicodeSet *) DecimalFormatStaticSets::getSimilarDecimals(decimalChar, strictParse, &decimalFallback)->cloneAsThawed(); + decimalSet = DecimalFormatStaticSets::getSimilarDecimals(decimalChar, strictParse); } if (groupingCharLength == groupingStringLength) { if (strictParse) { - groupingSet = (UnicodeSet *) DecimalFormatStaticSets::gStaticSets->fStrictDefaultGroupingSeparators->cloneAsThawed(); + groupingSet = DecimalFormatStaticSets::gStaticSets->fStrictDefaultGroupingSeparators; } else { - groupingSet = (UnicodeSet *) DecimalFormatStaticSets::gStaticSets->fDefaultGroupingSeparators->cloneAsThawed(); - } - - groupingSet->add(groupingChar); - - if (decimalSet != NULL) { - groupingSet->removeAll(*decimalSet); + groupingSet = DecimalFormatStaticSets::gStaticSets->fDefaultGroupingSeparators; } } - // we are guaranteed that - // decimalSet contains the decimal, and - // groupingSet contains the groupingSeparator - // (unless decimal and grouping are the same, which should never happen. But in that case, groupingSet will just be empty.) + // We need to test groupingChar and decimalChar separately from groupingSet and decimalSet, if the sets are even initialized. + // If sawDecimal is TRUE, only consider sawDecimalChar and NOT decimalSet + // If a character matches decimalSet, don't consider it to be a member of the groupingSet. // We have to track digitCount ourselves, because digits.fCount will // pin when the maximum allowable digits is reached. @@ -2059,7 +2053,10 @@ UBool DecimalFormat::subparse(const UnicodeString& text, position += U16_LENGTH(ch); } - else if (groupingStringLength > 0 && matchSymbol(text, position, groupingStringLength, *groupingString, groupingSet, ch) && isGroupingUsed()) + else if (groupingStringLength > 0 && + matchGrouping(groupingChar, sawGrouping, sawGroupingChar, groupingSet, + decimalChar, decimalSet, + ch) && isGroupingUsed()) { if (sawDecimal) { break; @@ -2078,13 +2075,11 @@ UBool DecimalFormat::subparse(const UnicodeString& text, // reprocess them. backup = position; position += groupingStringLength; - - if (groupingSet != NULL) { - // Once we see a grouping character, we only accept that grouping character from then on. - groupingSet->set(ch, ch); - } + sawGrouping=TRUE; + // Once we see a grouping character, we only accept that grouping character from then on. + sawGroupingChar=ch; } - else if (matchSymbol(text, position, decimalStringLength, *decimalString, decimalSet, ch)) + else if (matchDecimal(decimalChar,sawDecimal,sawDecimalChar, decimalSet, ch)) { if (strictParse) { if (backup != -1 || @@ -2103,11 +2098,9 @@ UBool DecimalFormat::subparse(const UnicodeString& text, parsedNum.append('.', err); position += decimalStringLength; sawDecimal = TRUE; - - if (decimalSet != NULL) { - // Once we see a decimal character, we only accept that decimal character from then on. - decimalSet->set(ch, ch); - } + // Once we see a decimal character, we only accept that decimal character from then on. + sawDecimalChar=ch; + // decimalSet is considered to consist of (ch,ch) } else { const UnicodeString *tmp; @@ -2168,9 +2161,6 @@ UBool DecimalFormat::subparse(const UnicodeString& text, } } - delete groupingSet; - delete decimalSet; - if (backup != -1) { position = backup; @@ -2642,6 +2632,38 @@ UBool DecimalFormat::matchSymbol(const UnicodeString &text, int32_t position, in return text.compare(position, length, symbol) == 0; } +UBool DecimalFormat::matchDecimal(UChar32 symbolChar, + UBool sawDecimal, UChar32 sawDecimalChar, + const UnicodeSet *sset, UChar32 schar) { + if(sawDecimal) { + return schar==sawDecimalChar; + } else if(schar==symbolChar) { + return TRUE; + } else if(sset!=NULL) { + return sset->contains(schar); + } else { + return FALSE; + } +} + +UBool DecimalFormat::matchGrouping(UChar32 groupingChar, + UBool sawGrouping, UChar32 sawGroupingChar, + const UnicodeSet *sset, + UChar32 decimalChar, const UnicodeSet *decimalSet, + UChar32 schar) { + if(sawGrouping) { + return schar==sawGroupingChar; // previously found + } else if(schar==groupingChar) { + return TRUE; // char from symbols + } else if(sset!=NULL) { + return sset->contains(schar) && // in groupingSet but... + ((decimalSet==NULL) || !decimalSet->contains(schar)); // Exclude decimalSet from groupingSet + } else { + return FALSE; + } +} + + //------------------------------------------------------------------------------ // Gets the pointer to the localized decimal format symbols diff --git a/icu4c/source/i18n/unicode/decimfmt.h b/icu4c/source/i18n/unicode/decimfmt.h index 1cdaa7ee3ec..ab81b2ce353 100644 --- a/icu4c/source/i18n/unicode/decimfmt.h +++ b/icu4c/source/i18n/unicode/decimfmt.h @@ -2006,6 +2006,16 @@ private: static UBool matchSymbol(const UnicodeString &text, int32_t position, int32_t length, const UnicodeString &symbol, UnicodeSet *sset, UChar32 schar); + static UBool matchDecimal(UChar32 symbolChar, + UBool sawDecimal, UChar32 sawDecimalChar, + const UnicodeSet *sset, UChar32 schar); + + static UBool matchGrouping(UChar32 groupingChar, + UBool sawGrouping, UChar32 sawGroupingChar, + const UnicodeSet *sset, + UChar32 decimalChar, const UnicodeSet *decimalSet, + UChar32 schar); + /** * Get a decimal format symbol. * Returns a const reference to the symbol string. -- 2.40.0