From: Shane Carr Date: Sat, 10 Feb 2018 10:57:30 +0000 (+0000) Subject: ICU-13574 Improving object lifecycle of AffixPatternMatcher and helper classes. ... X-Git-Tag: release-62-rc~200^2~124 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=59587ad9dbc2ea554566152d67e9e0db94d7a741;p=icu ICU-13574 Improving object lifecycle of AffixPatternMatcher and helper classes. Should be safe now. X-SVN-Rev: 40892 --- diff --git a/icu4c/source/i18n/numparse_affixes.cpp b/icu4c/source/i18n/numparse_affixes.cpp index 84a7f751633..2f317041905 100644 --- a/icu4c/source/i18n/numparse_affixes.cpp +++ b/icu4c/source/i18n/numparse_affixes.cpp @@ -18,14 +18,12 @@ using namespace icu::number::impl; AffixPatternMatcherBuilder::AffixPatternMatcherBuilder(const UnicodeString& pattern, - AffixTokenMatcherFactory& factory, + AffixTokenMatcherWarehouse& warehouse, IgnorablesMatcher* ignorables) : fMatchersLen(0), fLastTypeOrCp(0), - fCodePointMatchers(new CodePointMatcher[100]), - fCodePointMatchersLen(0), fPattern(pattern), - fFactory(factory), + fWarehouse(warehouse), fIgnorables(ignorables) {} void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, UErrorCode& status) { @@ -42,16 +40,16 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, // Case 1: the token is a symbol. switch (type) { case TYPE_MINUS_SIGN: - addMatcher(fFactory.minusSign = {fFactory.dfs, true}); + addMatcher(fWarehouse.minusSign = {fWarehouse.dfs, true}); break; case TYPE_PLUS_SIGN: - addMatcher(fFactory.plusSign = {fFactory.dfs, true}); + addMatcher(fWarehouse.plusSign = {fWarehouse.dfs, true}); break; case TYPE_PERCENT: - addMatcher(fFactory.percent = {fFactory.dfs}); + addMatcher(fWarehouse.percent = {fWarehouse.dfs}); break; case TYPE_PERMILLE: - addMatcher(fFactory.permille = {fFactory.dfs}); + addMatcher(fWarehouse.permille = {fWarehouse.dfs}); break; case TYPE_CURRENCY_SINGLE: case TYPE_CURRENCY_DOUBLE: @@ -60,10 +58,12 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, case TYPE_CURRENCY_QUINT: // All currency symbols use the same matcher addMatcher( - fFactory.currency = { + fWarehouse.currency = { CurrencyNamesMatcher( - fFactory.locale, status), CurrencyCustomMatcher( - fFactory.currencyCode, fFactory.currency1, fFactory.currency2)}); + fWarehouse.locale, status), CurrencyCustomMatcher( + fWarehouse.currencyCode, + fWarehouse.currency1, + fWarehouse.currency2)}); break; default: U_ASSERT(FALSE); @@ -75,10 +75,7 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, } else { // Case 3: the token is a non-ignorable literal. - // TODO: This is really clunky. Just trying to get something that works. - fCodePointMatchers[fCodePointMatchersLen] = {cp}; - addMatcher(fCodePointMatchers[fCodePointMatchersLen]); - fCodePointMatchersLen++; + addMatcher(fWarehouse.nextCodePointMatcher(cp)); } fLastTypeOrCp = type != TYPE_CODEPOINT ? type : cp; } @@ -91,19 +88,50 @@ void AffixPatternMatcherBuilder::addMatcher(NumberParseMatcher& matcher) { } AffixPatternMatcher AffixPatternMatcherBuilder::build() { - return AffixPatternMatcher(fMatchers, fMatchersLen, fPattern, fCodePointMatchers.orphan()); + return AffixPatternMatcher(fMatchers, fMatchersLen, fPattern); } -AffixTokenMatcherFactory::AffixTokenMatcherFactory(const UChar* currencyCode, - const UnicodeString& currency1, - const UnicodeString& currency2, - const DecimalFormatSymbols& dfs, - IgnorablesMatcher* ignorables, const Locale& locale) - : currency1(currency1), currency2(currency2), dfs(dfs), ignorables(ignorables), locale(locale) { +AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const UChar* currencyCode, + const UnicodeString& currency1, + const UnicodeString& currency2, + const DecimalFormatSymbols& dfs, + IgnorablesMatcher* ignorables, const Locale& locale) + : currency1(currency1), + currency2(currency2), + dfs(dfs), + ignorables(ignorables), + locale(locale), + codePointCount(0), + codePointNumBatches(0) { utils::copyCurrencyCode(this->currencyCode, currencyCode); } +AffixTokenMatcherWarehouse::~AffixTokenMatcherWarehouse() { + // Delete the variable number of batches of code point matchers + for (int32_t i=0; i= totalCapacity) { + // Need a new batch + auto* nextBatch = new CodePointMatcher[CODE_POINT_BATCH_SIZE]; + if (codePointNumBatches >= codePointsOverflow.getCapacity()) { + // Need more room for storing pointers to batches + codePointsOverflow.resize(codePointNumBatches * 2, codePointNumBatches); + } + codePointsOverflow[codePointNumBatches++] = nextBatch; + } + return codePointsOverflow[codePointNumBatches - 1][(codePointCount++ - CODE_POINT_STACK_CAPACITY) % + CODE_POINT_BATCH_SIZE] = {cp}; +} + CodePointMatcher::CodePointMatcher(UChar32 cp) : fCp(cp) {} @@ -127,9 +155,10 @@ const UnicodeSet& CodePointMatcher::getLeadCodePoints() { } -AffixPatternMatcher -AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern, AffixTokenMatcherFactory& factory, - parse_flags_t parseFlags, bool* success, UErrorCode& status) { +AffixPatternMatcher AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern, + AffixTokenMatcherWarehouse& warehouse, + parse_flags_t parseFlags, bool* success, + UErrorCode& status) { if (affixPattern.isEmpty()) { *success = false; return {}; @@ -140,19 +169,17 @@ AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern, AffixTo if (0 != (parseFlags & PARSE_FLAG_EXACT_AFFIX)) { ignorables = nullptr; } else { - ignorables = factory.ignorables; + ignorables = warehouse.ignorables; } - AffixPatternMatcherBuilder builder(affixPattern, factory, ignorables); + AffixPatternMatcherBuilder builder(affixPattern, warehouse, ignorables); AffixUtils::iterateWithConsumer(UnicodeStringCharSequence(affixPattern), builder, status); return builder.build(); } AffixPatternMatcher::AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, - const UnicodeString& pattern, CodePointMatcher* codePointMatchers) - : ArraySeriesMatcher(matchers, matchersLen), - fPattern(pattern), - fCodePointMatchers(codePointMatchers) { + const UnicodeString& pattern) + : ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) { } diff --git a/icu4c/source/i18n/numparse_affixes.h b/icu4c/source/i18n/numparse_affixes.h index 460034e3fa8..77a5aa18daa 100644 --- a/icu4c/source/i18n/numparse_affixes.h +++ b/icu4c/source/i18n/numparse_affixes.h @@ -19,11 +19,45 @@ namespace impl { class AffixPatternMatcherBuilder; class AffixPatternMatcher; -class AffixTokenMatcherFactory { + +class CodePointMatcher : public NumberParseMatcher, public UMemory { public: - AffixTokenMatcherFactory(const UChar* currencyCode, const UnicodeString& currency1, - const UnicodeString& currency2, const DecimalFormatSymbols& dfs, - IgnorablesMatcher* ignorables, const Locale& locale); + CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state + + CodePointMatcher(UChar32 cp); + + bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; + + const UnicodeSet& getLeadCodePoints() override; + + private: + UChar32 fCp; +}; + + +/** + * Small helper class that generates matchers for individual tokens for AffixPatternMatcher. + * + * In Java, this is called AffixTokenMatcherFactory (a "factory"). However, in C++, it is called a + * "warehouse", because in addition to generating the matchers, it also retains ownership of them. The + * warehouse must stay in scope for the whole lifespan of the AffixPatternMatcher that uses matchers from + * the warehouse. + * + * @author sffc + */ +class AffixTokenMatcherWarehouse { + private: + static constexpr int32_t CODE_POINT_STACK_CAPACITY = 5; // Number of entries directly on the stack + static constexpr int32_t CODE_POINT_BATCH_SIZE = 10; // Number of entries per heap allocation + + public: + AffixTokenMatcherWarehouse(const UChar* currencyCode, const UnicodeString& currency1, + const UnicodeString& currency2, const DecimalFormatSymbols& dfs, + IgnorablesMatcher* ignorables, const Locale& locale); + + ~AffixTokenMatcherWarehouse(); + + CodePointMatcher& nextCodePointMatcher(UChar32 cp); private: UChar currencyCode[4]; @@ -40,29 +74,19 @@ class AffixTokenMatcherFactory { PermilleMatcher permille; CurrencyAnyMatcher currency; + CodePointMatcher codePoints[CODE_POINT_STACK_CAPACITY]; // By value + MaybeStackArray codePointsOverflow; // On heap in "batches" + int32_t codePointCount; // Total for both the ones by value and on heap + int32_t codePointNumBatches; // Number of batches in codePointsOverflow + friend class AffixPatternMatcherBuilder; friend class AffixPatternMatcher; }; -class CodePointMatcher : public NumberParseMatcher, public UMemory { - public: - CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state - - CodePointMatcher(UChar32 cp); - - bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; - - const UnicodeSet& getLeadCodePoints() override; - - private: - UChar32 fCp; -}; - - class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer { public: - AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherFactory& factory, + AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse, IgnorablesMatcher* ignorables); void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override; @@ -75,11 +99,8 @@ class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer { int32_t fMatchersLen; int32_t fLastTypeOrCp; - LocalArray fCodePointMatchers; - int32_t fCodePointMatchersLen; - const UnicodeString& fPattern; - AffixTokenMatcherFactory& fFactory; + AffixTokenMatcherWarehouse& fWarehouse; IgnorablesMatcher* fIgnorables; void addMatcher(NumberParseMatcher& matcher); @@ -89,20 +110,16 @@ class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer { class AffixPatternMatcher : public ArraySeriesMatcher { public: static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern, - AffixTokenMatcherFactory& factory, + AffixTokenMatcherWarehouse& warehouse, parse_flags_t parseFlags, bool* success, UErrorCode& status); private: UnicodeString fPattern; - // We need to own the variable number of CodePointMatchers. - LocalArray fCodePointMatchers; - AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state - AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern, - CodePointMatcher* codePointMatchers); + AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern); friend class AffixPatternMatcherBuilder; }; diff --git a/icu4c/source/test/intltest/numbertest_parse.cpp b/icu4c/source/test/intltest/numbertest_parse.cpp index 776223044bd..78492aa391e 100644 --- a/icu4c/source/test/intltest/numbertest_parse.cpp +++ b/icu4c/source/test/intltest/numbertest_parse.cpp @@ -215,7 +215,7 @@ void NumberParserTest::testAffixPatternMatcher() { IcuTestErrorCode status(*this, "testAffixPatternMatcher"); IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES); - AffixTokenMatcherFactory factory(u"EUR", u"foo", u"bar", {"en", status}, &ignorables, "en"); + AffixTokenMatcherWarehouse warehouse(u"EUR", u"foo", u"bar", {"en", status}, &ignorables, "en"); static const struct TestCase { bool exactMatch; @@ -237,7 +237,7 @@ void NumberParserTest::testAffixPatternMatcher() { bool success; AffixPatternMatcher matcher = AffixPatternMatcher::fromAffixPattern( - affixPattern, factory, parseFlags, &success, status); + affixPattern, warehouse, parseFlags, &success, status); assertTrue("Creation should be successful", success); // Check that the matcher has the expected number of children diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixTokenMatcherFactory.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixTokenMatcherFactory.java index 142b29faefd..4c051572aa1 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixTokenMatcherFactory.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixTokenMatcherFactory.java @@ -7,7 +7,7 @@ import com.ibm.icu.util.Currency; import com.ibm.icu.util.ULocale; /** - * Small helper class that generates matchers for SeriesMatcher. + * Small helper class that generates matchers for individual tokens for AffixPatternMatcher. * * @author sffc */