]> granicus.if.org Git - icu/commitdiff
ICU-13574 Improving object lifecycle of AffixPatternMatcher and helper classes. ...
authorShane Carr <shane@unicode.org>
Sat, 10 Feb 2018 10:57:30 +0000 (10:57 +0000)
committerShane Carr <shane@unicode.org>
Sat, 10 Feb 2018 10:57:30 +0000 (10:57 +0000)
X-SVN-Rev: 40892

icu4c/source/i18n/numparse_affixes.cpp
icu4c/source/i18n/numparse_affixes.h
icu4c/source/test/intltest/numbertest_parse.cpp
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixTokenMatcherFactory.java

index 84a7f751633ec4c82fa9bad344027c57233853fd..2f317041905f9582d1f97c8209d65e38ff9034f0 100644 (file)
@@ -18,14 +18,12 @@ using namespace icu::number::impl;
 
 
 AffixPatternMatcherBuilder::AffixPatternMatcherBuilder(const UnicodeString& pattern,
-                                                       AffixTokenMatcherFactory& factory,
+                                                       AffixTokenMatcherWarehouse& warehouse,
                                                        IgnorablesMatcher* ignorables)
         : fMatchersLen(0),
           fLastTypeOrCp(0),
-          fCodePointMatchers(new CodePointMatcher[100]),
-          fCodePointMatchersLen(0),
           fPattern(pattern),
-          fFactory(factory),
+          fWarehouse(warehouse),
           fIgnorables(ignorables) {}
 
 void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, UErrorCode& status) {
@@ -42,16 +40,16 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp,
         // Case 1: the token is a symbol.
         switch (type) {
             case TYPE_MINUS_SIGN:
-                addMatcher(fFactory.minusSign = {fFactory.dfs, true});
+                addMatcher(fWarehouse.minusSign = {fWarehouse.dfs, true});
                 break;
             case TYPE_PLUS_SIGN:
-                addMatcher(fFactory.plusSign = {fFactory.dfs, true});
+                addMatcher(fWarehouse.plusSign = {fWarehouse.dfs, true});
                 break;
             case TYPE_PERCENT:
-                addMatcher(fFactory.percent = {fFactory.dfs});
+                addMatcher(fWarehouse.percent = {fWarehouse.dfs});
                 break;
             case TYPE_PERMILLE:
-                addMatcher(fFactory.permille = {fFactory.dfs});
+                addMatcher(fWarehouse.permille = {fWarehouse.dfs});
                 break;
             case TYPE_CURRENCY_SINGLE:
             case TYPE_CURRENCY_DOUBLE:
@@ -60,10 +58,12 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp,
             case TYPE_CURRENCY_QUINT:
                 // All currency symbols use the same matcher
                 addMatcher(
-                        fFactory.currency = {
+                        fWarehouse.currency = {
                                 CurrencyNamesMatcher(
-                                        fFactory.locale, status), CurrencyCustomMatcher(
-                                        fFactory.currencyCode, fFactory.currency1, fFactory.currency2)});
+                                        fWarehouse.locale, status), CurrencyCustomMatcher(
+                                        fWarehouse.currencyCode,
+                                        fWarehouse.currency1,
+                                        fWarehouse.currency2)});
                 break;
             default:
                 U_ASSERT(FALSE);
@@ -75,10 +75,7 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp,
 
     } else {
         // Case 3: the token is a non-ignorable literal.
-        // TODO: This is really clunky. Just trying to get something that works.
-        fCodePointMatchers[fCodePointMatchersLen] = {cp};
-        addMatcher(fCodePointMatchers[fCodePointMatchersLen]);
-        fCodePointMatchersLen++;
+        addMatcher(fWarehouse.nextCodePointMatcher(cp));
     }
     fLastTypeOrCp = type != TYPE_CODEPOINT ? type : cp;
 }
@@ -91,19 +88,50 @@ void AffixPatternMatcherBuilder::addMatcher(NumberParseMatcher& matcher) {
 }
 
 AffixPatternMatcher AffixPatternMatcherBuilder::build() {
-    return AffixPatternMatcher(fMatchers, fMatchersLen, fPattern, fCodePointMatchers.orphan());
+    return AffixPatternMatcher(fMatchers, fMatchersLen, fPattern);
 }
 
 
-AffixTokenMatcherFactory::AffixTokenMatcherFactory(const UChar* currencyCode,
-                                                   const UnicodeString& currency1,
-                                                   const UnicodeString& currency2,
-                                                   const DecimalFormatSymbols& dfs,
-                                                   IgnorablesMatcher* ignorables, const Locale& locale)
-        : currency1(currency1), currency2(currency2), dfs(dfs), ignorables(ignorables), locale(locale) {
+AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const UChar* currencyCode,
+                                                       const UnicodeString& currency1,
+                                                       const UnicodeString& currency2,
+                                                       const DecimalFormatSymbols& dfs,
+                                                       IgnorablesMatcher* ignorables, const Locale& locale)
+        : currency1(currency1),
+          currency2(currency2),
+          dfs(dfs),
+          ignorables(ignorables),
+          locale(locale),
+          codePointCount(0),
+          codePointNumBatches(0) {
     utils::copyCurrencyCode(this->currencyCode, currencyCode);
 }
 
+AffixTokenMatcherWarehouse::~AffixTokenMatcherWarehouse() {
+    // Delete the variable number of batches of code point matchers
+    for (int32_t i=0; i<codePointNumBatches; i++) {
+        delete[] codePointsOverflow[i];
+    }
+}
+
+CodePointMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
+    if (codePointCount < CODE_POINT_STACK_CAPACITY) {
+        return codePoints[codePointCount++] = {cp};
+    }
+    int32_t totalCapacity = CODE_POINT_STACK_CAPACITY + codePointNumBatches * CODE_POINT_BATCH_SIZE;
+    if (codePointCount >= totalCapacity) {
+        // Need a new batch
+        auto* nextBatch = new CodePointMatcher[CODE_POINT_BATCH_SIZE];
+        if (codePointNumBatches >= codePointsOverflow.getCapacity()) {
+            // Need more room for storing pointers to batches
+            codePointsOverflow.resize(codePointNumBatches * 2, codePointNumBatches);
+        }
+        codePointsOverflow[codePointNumBatches++] = nextBatch;
+    }
+    return codePointsOverflow[codePointNumBatches - 1][(codePointCount++ - CODE_POINT_STACK_CAPACITY) %
+                                                        CODE_POINT_BATCH_SIZE] = {cp};
+}
+
 
 CodePointMatcher::CodePointMatcher(UChar32 cp)
         : fCp(cp) {}
@@ -127,9 +155,10 @@ const UnicodeSet& CodePointMatcher::getLeadCodePoints() {
 }
 
 
-AffixPatternMatcher
-AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern, AffixTokenMatcherFactory& factory,
-                                      parse_flags_t parseFlags, bool* success, UErrorCode& status) {
+AffixPatternMatcher AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern,
+                                                          AffixTokenMatcherWarehouse& warehouse,
+                                                          parse_flags_t parseFlags, bool* success,
+                                                          UErrorCode& status) {
     if (affixPattern.isEmpty()) {
         *success = false;
         return {};
@@ -140,19 +169,17 @@ AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern, AffixTo
     if (0 != (parseFlags & PARSE_FLAG_EXACT_AFFIX)) {
         ignorables = nullptr;
     } else {
-        ignorables = factory.ignorables;
+        ignorables = warehouse.ignorables;
     }
 
-    AffixPatternMatcherBuilder builder(affixPattern, factory, ignorables);
+    AffixPatternMatcherBuilder builder(affixPattern, warehouse, ignorables);
     AffixUtils::iterateWithConsumer(UnicodeStringCharSequence(affixPattern), builder, status);
     return builder.build();
 }
 
 AffixPatternMatcher::AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen,
-                                         const UnicodeString& pattern, CodePointMatcher* codePointMatchers)
-        : ArraySeriesMatcher(matchers, matchersLen),
-          fPattern(pattern),
-          fCodePointMatchers(codePointMatchers) {
+                                         const UnicodeString& pattern)
+        : ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) {
 }
 
 
index 460034e3fa84665a57f82fb5c97c8fa1ed0a44b9..77a5aa18daa56f0b9bb75fc8bb21e13d71aa03ee 100644 (file)
@@ -19,11 +19,45 @@ namespace impl {
 class AffixPatternMatcherBuilder;
 class AffixPatternMatcher;
 
-class AffixTokenMatcherFactory {
+
+class CodePointMatcher : public NumberParseMatcher, public UMemory {
   public:
-    AffixTokenMatcherFactory(const UChar* currencyCode, const UnicodeString& currency1,
-                             const UnicodeString& currency2, const DecimalFormatSymbols& dfs,
-                             IgnorablesMatcher* ignorables, const Locale& locale);
+    CodePointMatcher() = default;  // WARNING: Leaves the object in an unusable state
+
+    CodePointMatcher(UChar32 cp);
+
+    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
+
+    const UnicodeSet& getLeadCodePoints() override;
+
+  private:
+    UChar32 fCp;
+};
+
+
+/**
+ * Small helper class that generates matchers for individual tokens for AffixPatternMatcher.
+ *
+ * In Java, this is called AffixTokenMatcherFactory (a "factory"). However, in C++, it is called a
+ * "warehouse", because in addition to generating the matchers, it also retains ownership of them. The
+ * warehouse must stay in scope for the whole lifespan of the AffixPatternMatcher that uses matchers from
+ * the warehouse.
+ *
+ * @author sffc
+ */
+class AffixTokenMatcherWarehouse {
+  private:
+    static constexpr int32_t CODE_POINT_STACK_CAPACITY = 5; // Number of entries directly on the stack
+    static constexpr int32_t CODE_POINT_BATCH_SIZE = 10; // Number of entries per heap allocation
+
+  public:
+    AffixTokenMatcherWarehouse(const UChar* currencyCode, const UnicodeString& currency1,
+                               const UnicodeString& currency2, const DecimalFormatSymbols& dfs,
+                               IgnorablesMatcher* ignorables, const Locale& locale);
+
+    ~AffixTokenMatcherWarehouse();
+
+    CodePointMatcher& nextCodePointMatcher(UChar32 cp);
 
   private:
     UChar currencyCode[4];
@@ -40,29 +74,19 @@ class AffixTokenMatcherFactory {
     PermilleMatcher permille;
     CurrencyAnyMatcher currency;
 
+    CodePointMatcher codePoints[CODE_POINT_STACK_CAPACITY]; // By value
+    MaybeStackArray<CodePointMatcher*, 3> codePointsOverflow; // On heap in "batches"
+    int32_t codePointCount; // Total for both the ones by value and on heap
+    int32_t codePointNumBatches; // Number of batches in codePointsOverflow
+
     friend class AffixPatternMatcherBuilder;
     friend class AffixPatternMatcher;
 };
 
 
-class CodePointMatcher : public NumberParseMatcher, public UMemory {
-  public:
-    CodePointMatcher() = default;  // WARNING: Leaves the object in an unusable state
-
-    CodePointMatcher(UChar32 cp);
-
-    bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
-
-    const UnicodeSet& getLeadCodePoints() override;
-
-  private:
-    UChar32 fCp;
-};
-
-
 class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer {
   public:
-    AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherFactory& factory,
+    AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse,
                                IgnorablesMatcher* ignorables);
 
     void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override;
@@ -75,11 +99,8 @@ class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer {
     int32_t fMatchersLen;
     int32_t fLastTypeOrCp;
 
-    LocalArray<CodePointMatcher> fCodePointMatchers;
-    int32_t fCodePointMatchersLen;
-
     const UnicodeString& fPattern;
-    AffixTokenMatcherFactory& fFactory;
+    AffixTokenMatcherWarehouse& fWarehouse;
     IgnorablesMatcher* fIgnorables;
 
     void addMatcher(NumberParseMatcher& matcher);
@@ -89,20 +110,16 @@ class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer {
 class AffixPatternMatcher : public ArraySeriesMatcher {
   public:
     static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern,
-                                                AffixTokenMatcherFactory& factory,
+                                                AffixTokenMatcherWarehouse& warehouse,
                                                 parse_flags_t parseFlags, bool* success,
                                                 UErrorCode& status);
 
   private:
     UnicodeString fPattern;
 
-    // We need to own the variable number of CodePointMatchers.
-    LocalArray<CodePointMatcher> fCodePointMatchers;
-
     AffixPatternMatcher() = default;  // WARNING: Leaves the object in an unusable state
 
-    AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern,
-                        CodePointMatcher* codePointMatchers);
+    AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern);
 
     friend class AffixPatternMatcherBuilder;
 };
index 776223044bdfd16b04496572ac7da986f3a7c49f..78492aa391e3729ffdbf37f1bb1c9d27254ff381 100644 (file)
@@ -215,7 +215,7 @@ void NumberParserTest::testAffixPatternMatcher() {
     IcuTestErrorCode status(*this, "testAffixPatternMatcher");
 
     IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
-    AffixTokenMatcherFactory factory(u"EUR", u"foo", u"bar", {"en", status}, &ignorables, "en");
+    AffixTokenMatcherWarehouse warehouse(u"EUR", u"foo", u"bar", {"en", status}, &ignorables, "en");
 
     static const struct TestCase {
         bool exactMatch;
@@ -237,7 +237,7 @@ void NumberParserTest::testAffixPatternMatcher() {
 
         bool success;
         AffixPatternMatcher matcher = AffixPatternMatcher::fromAffixPattern(
-                affixPattern, factory, parseFlags, &success, status);
+                affixPattern, warehouse, parseFlags, &success, status);
         assertTrue("Creation should be successful", success);
 
         // Check that the matcher has the expected number of children
index 142b29faefd0405efbc842dce9072aca97b7b696..4c051572aa12523ebb1ded4c2252fde2704ac8dc 100644 (file)
@@ -7,7 +7,7 @@ import com.ibm.icu.util.Currency;
 import com.ibm.icu.util.ULocale;
 
 /**
- * Small helper class that generates matchers for SeriesMatcher.
+ * Small helper class that generates matchers for individual tokens for AffixPatternMatcher.
  *
  * @author sffc
  */