ICU-13574 Implementing final two SymbolMatchers in ICU4C (infinity and padding).

author Shane Carr <shane@unicode.org>

Fri, 9 Feb 2018 06:30:40 +0000 (06:30 +0000)

committer Shane Carr <shane@unicode.org>

Fri, 9 Feb 2018 06:30:40 +0000 (06:30 +0000)
author Shane Carr <shane@unicode.org>
Fri, 9 Feb 2018 06:30:40 +0000 (06:30 +0000)
committer Shane Carr <shane@unicode.org>
Fri, 9 Feb 2018 06:30:40 +0000 (06:30 +0000)
diff --git a/icu4c/source/i18n/numparse_impl.cpp b/icu4c/source/i18n/numparse_impl.cpp

index 1df9c56b43b3ebc065829b6e25b37cdebd44d3ac..575e0e16799e698b83de7c2591e97d814be82953 100644 (file)
--- a/icu4c/source/i18n/numparse_impl.cpp
+++ b/icu4c/source/i18n/numparse_impl.cpp
@@ -5,6 +5,9 @@
  
  #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
  
+// Allow implicit conversion from char16_t* to UnicodeString for this file
+#define UNISTR_FROM_STRING_EXPLICIT
+
  #include "number_types.h"
  #include "number_patternstring.h"
  #include "numparse_types.h"
@@ -52,6 +55,8 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
      parser->addMatcher(parser->fLocalMatchers.percent = {symbols});
      parser->addMatcher(parser->fLocalMatchers.permille = {symbols});
      parser->addMatcher(parser->fLocalMatchers.nan = {symbols});
+    parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
+    parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
  //    parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
  //    parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
  //    parser.addMatcher(new RequireNumberMatcher());
diff --git a/icu4c/source/i18n/numparse_impl.h b/icu4c/source/i18n/numparse_impl.h

index 105f8c71abe7dde80b8130a2a86011f1651f4d8d..3f9b5d4b355179622590b2884875deac97c08630 100644 (file)
--- a/icu4c/source/i18n/numparse_impl.h
+++ b/icu4c/source/i18n/numparse_impl.h
@@ -46,8 +46,10 @@ class NumberParserImpl {
      // You must use an assignment operator on them before using.
      struct {
          IgnorablesMatcher ignorables;
+        InfinityMatcher infinity;
          MinusSignMatcher minusSign;
          NanMatcher nan;
+        PaddingMatcher padding;
          PercentMatcher percent;
          PermilleMatcher permille;
          PlusSignMatcher plusSign;
diff --git a/icu4c/source/i18n/numparse_symbols.cpp b/icu4c/source/i18n/numparse_symbols.cpp

index 5fabd2fb17f7b45e7a14733c4634a7e0d84fbd3a..8e192cf7736756416602b06cb1973c8da69097d5 100644 (file)
--- a/icu4c/source/i18n/numparse_symbols.cpp
+++ b/icu4c/source/i18n/numparse_symbols.cpp
@@ -85,6 +85,20 @@ void IgnorablesMatcher::accept(StringSegment&, ParsedNumber&) const {
  }
  
  
+InfinityMatcher::InfinityMatcher(const DecimalFormatSymbols& dfs)
+        : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), unisets::INFINITY) {
+}
+
+bool InfinityMatcher::isDisabled(const ParsedNumber& result) const {
+    return 0 != (result.flags & FLAG_INFINITY);
+}
+
+void InfinityMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
+    result.flags |= FLAG_INFINITY;
+    result.setCharsConsumed(segment);
+}
+
+
  MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing)
          : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol), unisets::MINUS_SIGN),
            fAllowTrailing(allowTrailing) {
@@ -125,6 +139,22 @@ void NanMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
  }
  
  
+PaddingMatcher::PaddingMatcher(const UnicodeString& padString)
+        : SymbolMatcher(padString, unisets::EMPTY) {}
+
+bool PaddingMatcher::isFlexible() const {
+    return true;
+}
+
+bool PaddingMatcher::isDisabled(const ParsedNumber& result) const {
+    return false;
+}
+
+void PaddingMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
+    // No-op
+}
+
+
  PercentMatcher::PercentMatcher(const DecimalFormatSymbols& dfs)
          : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol), unisets::PERCENT_SIGN) {
  }
diff --git a/icu4c/source/i18n/numparse_symbols.h b/icu4c/source/i18n/numparse_symbols.h

index c8ba913911f6bcb013b09ca5081d001b8ff29472..40a57f02baf7f046a75221572280b57c39d662dc 100644 (file)
--- a/icu4c/source/i18n/numparse_symbols.h
+++ b/icu4c/source/i18n/numparse_symbols.h
@@ -15,6 +15,11 @@ U_NAMESPACE_BEGIN namespace numparse {
  namespace impl {
  
  
+/**
+ * A base class for many matchers that performs a simple match against a UnicodeString and/or UnicodeSet.
+ *
+ * @author sffc
+ */
  class SymbolMatcher : public NumberParseMatcher, public UMemory {
    public:
      SymbolMatcher() = default;  // WARNING: Leaves the object in an unusable state
@@ -52,6 +57,19 @@ class IgnorablesMatcher : public SymbolMatcher {
  };
  
  
+class InfinityMatcher : public SymbolMatcher {
+  public:
+    InfinityMatcher() = default;  // WARNING: Leaves the object in an unusable state
+
+    InfinityMatcher(const DecimalFormatSymbols& dfs);
+
+  protected:
+    bool isDisabled(const ParsedNumber& result) const override;
+
+    void accept(StringSegment& segment, ParsedNumber& result) const override;
+};
+
+
  class MinusSignMatcher : public SymbolMatcher {
    public:
      MinusSignMatcher() = default;  // WARNING: Leaves the object in an unusable state
@@ -83,6 +101,21 @@ class NanMatcher : public SymbolMatcher {
  };
  
  
+class PaddingMatcher : public SymbolMatcher {
+  public:
+    PaddingMatcher() = default;  // WARNING: Leaves the object in an unusable state
+
+    PaddingMatcher(const UnicodeString& padString);
+
+    bool isFlexible() const override;
+
+  protected:
+    bool isDisabled(const ParsedNumber& result) const override;
+
+    void accept(StringSegment& segment, ParsedNumber& result) const override;
+};
+
+
  class PercentMatcher : public SymbolMatcher {
    public:
      PercentMatcher() = default;  // WARNING: Leaves the object in an unusable state
diff --git a/icu4c/source/i18n/numparse_unisets.cpp b/icu4c/source/i18n/numparse_unisets.cpp

index f259f7a6467078a0f8d30ff92dac6a641ee90196..625e1ac31dc5306e5e9007790aa7ddbc6ba940c7 100644 (file)
--- a/icu4c/source/i18n/numparse_unisets.cpp
+++ b/icu4c/source/i18n/numparse_unisets.cpp
@@ -5,6 +5,10 @@
  
  #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
  
+// Allow implicit conversion from char16_t* to UnicodeString for this file
+// (useful for UnicodeSet constructor)
+#define UNISTR_FROM_STRING_EXPLICIT
+
  #include "numparse_unisets.h"
  #include "numparse_types.h"
  #include "umutex.h"
@@ -56,44 +60,42 @@ UBool U_CALLCONV cleanupNumberParseUniSets() {
  
  void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
      ucln_i18n_registerCleanup(UCLN_I18N_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
-#define NEW_UNISET(pattern, status) new UnicodeSet(UnicodeString(pattern), status)
  
      gUnicodeSets[EMPTY] = new UnicodeSet();
  
      // BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
-    gUnicodeSets[BIDI] = NEW_UNISET(u"[[\\u200E\\u200F\\u061C]]", status);
+    gUnicodeSets[BIDI] = new UnicodeSet(u"[[\\u200E\\u200F\\u061C]]", status);
  
      // This set was decided after discussion with icu-design@. See ticket #13309.
      // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
-    gUnicodeSets[WHITESPACE] = NEW_UNISET(u"[[:Zs:][\\u0009]]", status);
+    gUnicodeSets[WHITESPACE] = new UnicodeSet(u"[[:Zs:][\\u0009]]", status);
  
      gUnicodeSets[DEFAULT_IGNORABLES] = computeUnion(BIDI, WHITESPACE);
      gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(*gUnicodeSets[BIDI]);
  
      // TODO: Re-generate these sets from the UCD. They probably haven't been updated in a while.
-    gUnicodeSets[COMMA] = NEW_UNISET(u"[,،٫、︐︑﹐﹑，､]", status);
-    gUnicodeSets[STRICT_COMMA] = NEW_UNISET(u"[,٫︐﹐，]", status);
-    gUnicodeSets[PERIOD] = NEW_UNISET(u"[.․。︒﹒．｡]", status);
-    gUnicodeSets[STRICT_PERIOD] = NEW_UNISET(u"[.․﹒．｡]", status);
-    gUnicodeSets[OTHER_GROUPING_SEPARATORS] = NEW_UNISET(
-            u"['٬‘’＇\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]",
-            status);
+    gUnicodeSets[COMMA] = new UnicodeSet(u"[,،٫、︐︑﹐﹑，､]", status);
+    gUnicodeSets[STRICT_COMMA] = new UnicodeSet(u"[,٫︐﹐，]", status);
+    gUnicodeSets[PERIOD] = new UnicodeSet(u"[.․。︒﹒．｡]", status);
+    gUnicodeSets[STRICT_PERIOD] = new UnicodeSet(u"[.․﹒．｡]", status);
+    gUnicodeSets[OTHER_GROUPING_SEPARATORS] = new UnicodeSet(
+            u"['٬‘’＇\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status);
      gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
      gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
              STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
  
-    gUnicodeSets[MINUS_SIGN] = NEW_UNISET(u"[-⁻₋−➖﹣－]", status);
-    gUnicodeSets[PLUS_SIGN] = NEW_UNISET(u"[+⁺₊➕﬩﹢＋]", status);
+    gUnicodeSets[MINUS_SIGN] = new UnicodeSet(u"[-⁻₋−➖﹣－]", status);
+    gUnicodeSets[PLUS_SIGN] = new UnicodeSet(u"[+⁺₊➕﬩﹢＋]", status);
  
-    gUnicodeSets[PERCENT_SIGN] = NEW_UNISET(u"[%٪]", status);
-    gUnicodeSets[PERMILLE_SIGN] = NEW_UNISET(u"[‰؉]", status);
-    gUnicodeSets[INFINITY] = NEW_UNISET(u"[∞]", status);
+    gUnicodeSets[PERCENT_SIGN] = new UnicodeSet(u"[%٪]", status);
+    gUnicodeSets[PERMILLE_SIGN] = new UnicodeSet(u"[‰؉]", status);
+    gUnicodeSets[INFINITY] = new UnicodeSet(u"[∞]", status);
  
-    gUnicodeSets[DIGITS] = NEW_UNISET(u"[:digit:]", status);
-    gUnicodeSets[NAN_LEAD] = NEW_UNISET(u"[NnТтmeՈոс¤НнчTtsҳ\u975e\u1002\u0e9a\u10d0\u0f68\u0644\u0646]",
-            status);
-    gUnicodeSets[SCIENTIFIC_LEAD] = NEW_UNISET(u"[Ee×·е\u0627]", status);
-    gUnicodeSets[CWCF] = NEW_UNISET(u"[:CWCF:]", status);
+    gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
+    gUnicodeSets[NAN_LEAD] = new UnicodeSet(
+            u"[NnТтmeՈոс¤НнчTtsҳ\u975e\u1002\u0e9a\u10d0\u0f68\u0644\u0646]", status);
+    gUnicodeSets[SCIENTIFIC_LEAD] = new UnicodeSet(u"[Ee×·е\u0627]", status);
+    gUnicodeSets[CWCF] = new UnicodeSet(u"[:CWCF:]", status);
  
      gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
      gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
diff --git a/icu4c/source/test/intltest/numbertest_parse.cpp b/icu4c/source/test/intltest/numbertest_parse.cpp

index 4e091048d84ae7e6b5f45b8939f5bd6724399b6f..018296a2c447aacb2d62b87787475cba3c5b4e1d 100644 (file)
--- a/icu4c/source/test/intltest/numbertest_parse.cpp
+++ b/icu4c/source/test/intltest/numbertest_parse.cpp
@@ -61,6 +61,10 @@ void NumberParserTest::testBasic() {
                   {3, u"‰51423", u"0", 6, 51.423},
                   {3, u"51423‰", u"0", 6, 51.423},
                   {3, u"51423‰‰", u"0", 6, 51.423},
+                 {3, u"∞", u"0", 1, INFINITY},
+                 {3, u"-∞", u"0", 2, -INFINITY},
+                 {3, u"@@@123  @@", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
+                 {3, u"@@@123@@  ", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
  //                 {3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
  //                 {3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
  //                 {3, u"514.23 USD", u"¤0", 10, 514.23},
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java

index 843a1c7db1f699bea20df95e0fbbe0aa0493100f..2317435947e9f905f5dfbd1d392b04a173c8bde6 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java
@@ -42,6 +42,6 @@ public class InfinityMatcher extends SymbolMatcher {
  
      @Override
      public String toString() {
-        return "<PercentMatcher>";
+        return "<InfinityMatcher>";
      }
  }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java

index ae5650c31191fd67b88c1afb2c5516b08c0b1de4..5060b9518d39a62247f2d1354763010dacc6c6ee 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
@@ -92,6 +92,8 @@ public class NumberParserImpl {
          parser.addMatcher(PercentMatcher.getInstance(symbols));
          parser.addMatcher(PermilleMatcher.getInstance(symbols));
          parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
+        parser.addMatcher(InfinityMatcher.getInstance(symbols));
+        parser.addMatcher(PaddingMatcher.getInstance("@"));
          parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
          parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
          parser.addMatcher(new RequireNumberMatcher());
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java

index bf15d726b7a3d172f2687834f6398badf4888539..94f3035574cda5504b3fde7ac148cfd1294c3e3b 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java
@@ -5,8 +5,9 @@ package com.ibm.icu.impl.number.parse;
  import com.ibm.icu.text.UnicodeSet;
  
  /**
- * @author sffc
+ * A base class for many matchers that performs a simple match against a UnicodeString and/or UnicodeSet.
   *
+ * @author sffc
   */
  public abstract class SymbolMatcher implements NumberParseMatcher {
      protected final String string;
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java

index 541ead1945a236b56871eb8ed12138331ac1f2ff..912529479a513e41631c123765922a7100e708a1 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
@@ -67,6 +67,10 @@ public class NumberParserTest {
                  { 3, "‰51423", "0", 6, 51.423 },
                  { 3, "51423‰", "0", 6, 51.423 },
                  { 3, "51423‰‰", "0", 6, 51.423 },
+                { 3, "∞", "0", 1, Double.POSITIVE_INFINITY },
+                { 3, "-∞", "0", 2, Double.NEGATIVE_INFINITY },
+                { 3, "@@@123  @@", "0", 6, 123. }, // TODO: Should padding be strong instead of weak?
+                { 3, "@@@123@@  ", "0", 6, 123. }, // TODO: Should padding be strong instead of weak?
                  { 3, "a51423US dollars", "a0¤¤¤", 16, 51423. },
                  { 3, "a 51423 US dollars", "a0¤¤¤", 18, 51423. },
                  { 3, "514.23 USD", "¤0", 10, 514.23 },
author	Shane Carr <shane@unicode.org>
	Fri, 9 Feb 2018 06:30:40 +0000 (06:30 +0000)
committer	Shane Carr <shane@unicode.org>
	Fri, 9 Feb 2018 06:30:40 +0000 (06:30 +0000)
icu4c/source/i18n/numparse_impl.cpp		patch \| blob \| history
icu4c/source/i18n/numparse_impl.h		patch \| blob \| history
icu4c/source/i18n/numparse_symbols.cpp		patch \| blob \| history
icu4c/source/i18n/numparse_symbols.h		patch \| blob \| history
icu4c/source/i18n/numparse_unisets.cpp		patch \| blob \| history
icu4c/source/test/intltest/numbertest_parse.cpp		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java		patch \| blob \| history