AffixPatternMatcherBuilder::AffixPatternMatcherBuilder(const UnicodeString& pattern,
- AffixTokenMatcherFactory& factory,
+ AffixTokenMatcherWarehouse& warehouse,
IgnorablesMatcher* ignorables)
: fMatchersLen(0),
fLastTypeOrCp(0),
- fCodePointMatchers(new CodePointMatcher[100]),
- fCodePointMatchersLen(0),
fPattern(pattern),
- fFactory(factory),
+ fWarehouse(warehouse),
fIgnorables(ignorables) {}
void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, UErrorCode& status) {
// Case 1: the token is a symbol.
switch (type) {
case TYPE_MINUS_SIGN:
- addMatcher(fFactory.minusSign = {fFactory.dfs, true});
+ addMatcher(fWarehouse.minusSign = {fWarehouse.dfs, true});
break;
case TYPE_PLUS_SIGN:
- addMatcher(fFactory.plusSign = {fFactory.dfs, true});
+ addMatcher(fWarehouse.plusSign = {fWarehouse.dfs, true});
break;
case TYPE_PERCENT:
- addMatcher(fFactory.percent = {fFactory.dfs});
+ addMatcher(fWarehouse.percent = {fWarehouse.dfs});
break;
case TYPE_PERMILLE:
- addMatcher(fFactory.permille = {fFactory.dfs});
+ addMatcher(fWarehouse.permille = {fWarehouse.dfs});
break;
case TYPE_CURRENCY_SINGLE:
case TYPE_CURRENCY_DOUBLE:
case TYPE_CURRENCY_QUINT:
// All currency symbols use the same matcher
addMatcher(
- fFactory.currency = {
+ fWarehouse.currency = {
CurrencyNamesMatcher(
- fFactory.locale, status), CurrencyCustomMatcher(
- fFactory.currencyCode, fFactory.currency1, fFactory.currency2)});
+ fWarehouse.locale, status), CurrencyCustomMatcher(
+ fWarehouse.currencyCode,
+ fWarehouse.currency1,
+ fWarehouse.currency2)});
break;
default:
U_ASSERT(FALSE);
} else {
// Case 3: the token is a non-ignorable literal.
- // TODO: This is really clunky. Just trying to get something that works.
- fCodePointMatchers[fCodePointMatchersLen] = {cp};
- addMatcher(fCodePointMatchers[fCodePointMatchersLen]);
- fCodePointMatchersLen++;
+ addMatcher(fWarehouse.nextCodePointMatcher(cp));
}
fLastTypeOrCp = type != TYPE_CODEPOINT ? type : cp;
}
}
AffixPatternMatcher AffixPatternMatcherBuilder::build() {
- return AffixPatternMatcher(fMatchers, fMatchersLen, fPattern, fCodePointMatchers.orphan());
+ return AffixPatternMatcher(fMatchers, fMatchersLen, fPattern);
}
-AffixTokenMatcherFactory::AffixTokenMatcherFactory(const UChar* currencyCode,
- const UnicodeString& currency1,
- const UnicodeString& currency2,
- const DecimalFormatSymbols& dfs,
- IgnorablesMatcher* ignorables, const Locale& locale)
- : currency1(currency1), currency2(currency2), dfs(dfs), ignorables(ignorables), locale(locale) {
+AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const UChar* currencyCode,
+ const UnicodeString& currency1,
+ const UnicodeString& currency2,
+ const DecimalFormatSymbols& dfs,
+ IgnorablesMatcher* ignorables, const Locale& locale)
+ : currency1(currency1),
+ currency2(currency2),
+ dfs(dfs),
+ ignorables(ignorables),
+ locale(locale),
+ codePointCount(0),
+ codePointNumBatches(0) {
utils::copyCurrencyCode(this->currencyCode, currencyCode);
}
+AffixTokenMatcherWarehouse::~AffixTokenMatcherWarehouse() {
+ // Delete the variable number of batches of code point matchers
+ for (int32_t i=0; i<codePointNumBatches; i++) {
+ delete[] codePointsOverflow[i];
+ }
+}
+
+CodePointMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
+ if (codePointCount < CODE_POINT_STACK_CAPACITY) {
+ return codePoints[codePointCount++] = {cp};
+ }
+ int32_t totalCapacity = CODE_POINT_STACK_CAPACITY + codePointNumBatches * CODE_POINT_BATCH_SIZE;
+ if (codePointCount >= totalCapacity) {
+ // Need a new batch
+ auto* nextBatch = new CodePointMatcher[CODE_POINT_BATCH_SIZE];
+ if (codePointNumBatches >= codePointsOverflow.getCapacity()) {
+ // Need more room for storing pointers to batches
+ codePointsOverflow.resize(codePointNumBatches * 2, codePointNumBatches);
+ }
+ codePointsOverflow[codePointNumBatches++] = nextBatch;
+ }
+ return codePointsOverflow[codePointNumBatches - 1][(codePointCount++ - CODE_POINT_STACK_CAPACITY) %
+ CODE_POINT_BATCH_SIZE] = {cp};
+}
+
CodePointMatcher::CodePointMatcher(UChar32 cp)
: fCp(cp) {}
}
-AffixPatternMatcher
-AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern, AffixTokenMatcherFactory& factory,
- parse_flags_t parseFlags, bool* success, UErrorCode& status) {
+AffixPatternMatcher AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern,
+ AffixTokenMatcherWarehouse& warehouse,
+ parse_flags_t parseFlags, bool* success,
+ UErrorCode& status) {
if (affixPattern.isEmpty()) {
*success = false;
return {};
if (0 != (parseFlags & PARSE_FLAG_EXACT_AFFIX)) {
ignorables = nullptr;
} else {
- ignorables = factory.ignorables;
+ ignorables = warehouse.ignorables;
}
- AffixPatternMatcherBuilder builder(affixPattern, factory, ignorables);
+ AffixPatternMatcherBuilder builder(affixPattern, warehouse, ignorables);
AffixUtils::iterateWithConsumer(UnicodeStringCharSequence(affixPattern), builder, status);
return builder.build();
}
AffixPatternMatcher::AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen,
- const UnicodeString& pattern, CodePointMatcher* codePointMatchers)
- : ArraySeriesMatcher(matchers, matchersLen),
- fPattern(pattern),
- fCodePointMatchers(codePointMatchers) {
+ const UnicodeString& pattern)
+ : ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) {
}
class AffixPatternMatcherBuilder;
class AffixPatternMatcher;
-class AffixTokenMatcherFactory {
+
+class CodePointMatcher : public NumberParseMatcher, public UMemory {
public:
- AffixTokenMatcherFactory(const UChar* currencyCode, const UnicodeString& currency1,
- const UnicodeString& currency2, const DecimalFormatSymbols& dfs,
- IgnorablesMatcher* ignorables, const Locale& locale);
+ CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state
+
+ CodePointMatcher(UChar32 cp);
+
+ bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
+
+ const UnicodeSet& getLeadCodePoints() override;
+
+ private:
+ UChar32 fCp;
+};
+
+
+/**
+ * Small helper class that generates matchers for individual tokens for AffixPatternMatcher.
+ *
+ * In Java, this is called AffixTokenMatcherFactory (a "factory"). However, in C++, it is called a
+ * "warehouse", because in addition to generating the matchers, it also retains ownership of them. The
+ * warehouse must stay in scope for the whole lifespan of the AffixPatternMatcher that uses matchers from
+ * the warehouse.
+ *
+ * @author sffc
+ */
+class AffixTokenMatcherWarehouse {
+ private:
+ static constexpr int32_t CODE_POINT_STACK_CAPACITY = 5; // Number of entries directly on the stack
+ static constexpr int32_t CODE_POINT_BATCH_SIZE = 10; // Number of entries per heap allocation
+
+ public:
+ AffixTokenMatcherWarehouse(const UChar* currencyCode, const UnicodeString& currency1,
+ const UnicodeString& currency2, const DecimalFormatSymbols& dfs,
+ IgnorablesMatcher* ignorables, const Locale& locale);
+
+ ~AffixTokenMatcherWarehouse();
+
+ CodePointMatcher& nextCodePointMatcher(UChar32 cp);
private:
UChar currencyCode[4];
PermilleMatcher permille;
CurrencyAnyMatcher currency;
+ CodePointMatcher codePoints[CODE_POINT_STACK_CAPACITY]; // By value
+ MaybeStackArray<CodePointMatcher*, 3> codePointsOverflow; // On heap in "batches"
+ int32_t codePointCount; // Total for both the ones by value and on heap
+ int32_t codePointNumBatches; // Number of batches in codePointsOverflow
+
friend class AffixPatternMatcherBuilder;
friend class AffixPatternMatcher;
};
-class CodePointMatcher : public NumberParseMatcher, public UMemory {
- public:
- CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state
-
- CodePointMatcher(UChar32 cp);
-
- bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
-
- const UnicodeSet& getLeadCodePoints() override;
-
- private:
- UChar32 fCp;
-};
-
-
class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer {
public:
- AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherFactory& factory,
+ AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse,
IgnorablesMatcher* ignorables);
void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override;
int32_t fMatchersLen;
int32_t fLastTypeOrCp;
- LocalArray<CodePointMatcher> fCodePointMatchers;
- int32_t fCodePointMatchersLen;
-
const UnicodeString& fPattern;
- AffixTokenMatcherFactory& fFactory;
+ AffixTokenMatcherWarehouse& fWarehouse;
IgnorablesMatcher* fIgnorables;
void addMatcher(NumberParseMatcher& matcher);
class AffixPatternMatcher : public ArraySeriesMatcher {
public:
static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern,
- AffixTokenMatcherFactory& factory,
+ AffixTokenMatcherWarehouse& warehouse,
parse_flags_t parseFlags, bool* success,
UErrorCode& status);
private:
UnicodeString fPattern;
- // We need to own the variable number of CodePointMatchers.
- LocalArray<CodePointMatcher> fCodePointMatchers;
-
AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state
- AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern,
- CodePointMatcher* codePointMatchers);
+ AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern);
friend class AffixPatternMatcherBuilder;
};