number_padding.o number_patternmodifier.o number_patternstring.o \
number_rounding.o number_scientific.o number_stringbuilder.o \
numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o \
-numparse_impl.o numparse_symbols.o numparse_decimal.o
+numparse_impl.o numparse_symbols.o numparse_decimal.o numparse_scientific.o
## Header files to install
parser->addMatcher(parser->fLocalMatchers.nan = {symbols});
parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
-// parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
+ parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
// parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
// parser.addMatcher(new RequireNumberMatcher());
#include "numparse_types.h"
#include "numparse_decimal.h"
#include "numparse_symbols.h"
+#include "numparse_scientific.h"
#include "unicode/uniset.h"
U_NAMESPACE_BEGIN namespace numparse {
PermilleMatcher permille;
PlusSignMatcher plusSign;
DecimalMatcher decimal;
+ ScientificMatcher scientific;
} fLocalMatchers;
NumberParserImpl(parse_flags_t parseFlags, bool computeLeads);
--- /dev/null
+// ยฉ 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+#include "numparse_types.h"
+#include "numparse_scientific.h"
+#include "numparse_unisets.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+
+
+ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
+ : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
+ fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY) {
+}
+
+bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
+ // Only accept scientific notation after the mantissa.
+ // Most places use result.hasNumber(), but we need a stronger condition here (i.e., exponent is
+ // not well-defined after NaN or infinity).
+ if (result.quantity.bogus) {
+ return false;
+ }
+
+ // First match the scientific separator, and then match another number after it.
+ int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString);
+ if (overlap1 == fExponentSeparatorString.length()) {
+ // Full exponent separator match.
+
+ // First attempt to get a code point, returning true if we can't get one.
+ segment.adjustOffset(overlap1);
+ if (segment.length() == 0) {
+ return true;
+ }
+
+ // Allow a sign, and then try to match digits.
+ int8_t exponentSign = 1;
+ if (segment.matches(*unisets::get(unisets::MINUS_SIGN))) {
+ exponentSign = -1;
+ segment.adjustOffsetByCodePoint();
+ } else if (segment.matches(*unisets::get(unisets::PLUS_SIGN))) {
+ segment.adjustOffsetByCodePoint();
+ }
+
+ int digitsOffset = segment.getOffset();
+ bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
+ if (segment.getOffset() != digitsOffset) {
+ // At least one exponent digit was matched.
+ result.flags |= FLAG_HAS_EXPONENT;
+ } else {
+ // No exponent digits were matched; un-match the exponent separator.
+ segment.adjustOffset(-overlap1);
+ }
+ return digitsReturnValue;
+
+ } else if (overlap1 == segment.length()) {
+ // Partial exponent separator match
+ return true;
+ }
+
+ // No match
+ return false;
+}
+
+const UnicodeSet* ScientificMatcher::getLeadCodePoints() const {
+ UChar32 leadCp = fExponentSeparatorString.char32At(0);
+ const UnicodeSet* s = unisets::get(unisets::SCIENTIFIC_LEAD);
+ if (s->contains(leadCp)) {
+ return new UnicodeSet(*s);
+ } else {
+ UnicodeSet* leadCodePoints = new UnicodeSet();
+ leadCodePoints->add(leadCp);
+ leadCodePoints->freeze();
+ return leadCodePoints;
+ }
+}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- /dev/null
+// ยฉ 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+#ifndef __NUMPARSE_SCIENTIFIC_H__
+#define __NUMPARSE_SCIENTIFIC_H__
+
+#include "numparse_types.h"
+#include "numparse_decimal.h"
+#include "unicode/numberformatter.h"
+
+using icu::number::impl::Grouper;
+
+U_NAMESPACE_BEGIN namespace numparse {
+namespace impl {
+
+
+class ScientificMatcher : public NumberParseMatcher, public UMemory {
+ public:
+ ScientificMatcher() = default; // WARNING: Leaves the object in an unusable state
+
+ ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper);
+
+ bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
+
+ const UnicodeSet* getLeadCodePoints() const override;
+
+ private:
+ UnicodeString fExponentSeparatorString;
+ DecimalMatcher fExponentMatcher;
+};
+
+
+} // namespace impl
+} // namespace numparse
+U_NAMESPACE_END
+
+#endif //__NUMPARSE_SCIENTIFIC_H__
+#endif /* #if !UCONFIG_NO_FORMATTING */
// {3, u"{๐ฑ๐ญ๐ฐ๐ฎ๐ฏ}", u"{0};{0}", 12, 51423.},
// {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
// {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
-// {3, u"๐ฑ.๐ญ๐ฐ๐ฎE๐ฏ", u"0", 12, 5142.},
-// {3, u"๐ฑ.๐ญ๐ฐ๐ฎE-๐ฏ", u"0", 13, 0.005142},
-// {3, u"๐ฑ.๐ญ๐ฐ๐ฎe-๐ฏ", u"0", 13, 0.005142},
+ {3, u"๐ฑ.๐ญ๐ฐ๐ฎE๐ฏ", u"0", 12, 5142.},
+ {3, u"๐ฑ.๐ญ๐ฐ๐ฎE-๐ฏ", u"0", 13, 0.005142},
+ {3, u"๐ฑ.๐ญ๐ฐ๐ฎe-๐ฏ", u"0", 13, 0.005142},
// {7, u"5,142.50 Canadian dollars", u"#,##,##0 ยคยคยค", 25, 5142.5},
// {3, u"a$ b5", u"a ยค b0", 5, 5.0},
// {3, u"๐บ1.23", u"๐บ0;๐ป0", 6, 1.23},