From 48a633f41f36a42f9331fdeb6813bd6181d07313 Mon Sep 17 00:00:00 2001
From: Shane Carr <shane@unicode.org>
Date: Tue, 6 Feb 2018 09:43:37 +0000
Subject: [PATCH] ICU-13574 Defining more fundamental parsing types.

X-SVN-Rev: 40843
---
 icu4c/source/i18n/Makefile.in                 |   2 +-
 icu4c/source/i18n/number_decimalquantity.cpp  |  30 +++
 icu4c/source/i18n/number_decimalquantity.h    |  13 +-
 icu4c/source/i18n/numparse_parsednumber.cpp   |  75 ++++++
 icu4c/source/i18n/numparse_stringsegment.h    |  55 -----
 icu4c/source/i18n/numparse_types.h            | 216 +++++++++++++++++-
 icu4c/source/i18n/plurrule.cpp                |   2 +
 icu4c/source/i18n/plurrule_impl.h             | 126 +++++-----
 .../icu/impl/number/parse/AffixMatcher.java   |   2 +-
 .../impl/number/parse/NumberParseMatcher.java |   2 +
 .../impl/number/parse/NumberParserImpl.java   |  16 +-
 .../icu/impl/number/parse/ParsedNumber.java   |   6 +-
 .../number/parse/UnicodeSetStaticCache.java   |   5 -
 .../icu/dev/test/number/NumberParserTest.java |  17 +-
 .../number/UnicodeSetStaticCacheTest.java     |   2 +-
 15 files changed, 423 insertions(+), 146 deletions(-)
 create mode 100644 icu4c/source/i18n/numparse_parsednumber.cpp

diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in
index 2b9cca70556..a5752781f2f 100644
--- a/icu4c/source/i18n/Makefile.in
+++ b/icu4c/source/i18n/Makefile.in
@@ -108,7 +108,7 @@ number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \
 number_integerwidth.o number_longnames.o number_modifiers.o number_notation.o \
 number_padding.o number_patternmodifier.o number_patternstring.o \
 number_rounding.o number_scientific.o number_stringbuilder.o \
-numparse_stringsegment.o numparse_unisets.o
+numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o
 
 
 ## Header files to install
diff --git a/icu4c/source/i18n/number_decimalquantity.cpp b/icu4c/source/i18n/number_decimalquantity.cpp
index 6f6ac9def65..3342771b987 100644
--- a/icu4c/source/i18n/number_decimalquantity.cpp
+++ b/icu4c/source/i18n/number_decimalquantity.cpp
@@ -103,6 +103,7 @@ DecimalQuantity &DecimalQuantity::operator=(const DecimalQuantity &other) {
         return *this;
     }
     copyBcdFrom(other);
+    bogus = other.bogus;
     lOptPos = other.lOptPos;
     lReqPos = other.lReqPos;
     rReqPos = other.rReqPos;
@@ -466,6 +467,35 @@ int64_t DecimalQuantity::toFractionLong(bool includeTrailingZeros) const {
     return result;
 }
 
+bool DecimalQuantity::fitsInLong() const {
+    if (isZero()) {
+        return true;
+    }
+    if (scale < 0) {
+        return false;
+    }
+    int magnitude = getMagnitude();
+    if (magnitude < 18) {
+        return true;
+    }
+    if (magnitude > 18) {
+        return false;
+    }
+    // Hard case: the magnitude is 10^18.
+    // The largest int64 is: 9,223,372,036,854,775,807
+    for (int p = 0; p < precision; p++) {
+        int8_t digit = getDigit(18 - p);
+        static int8_t INT64_BCD[] = { 9, 2, 2, 3, 3, 7, 2, 0, 3, 6, 8, 5, 4, 7, 7, 5, 8, 0, 7 };
+        if (digit < INT64_BCD[p]) {
+            return true;
+        } else if (digit > INT64_BCD[p]) {
+            return false;
+        }
+    }
+    // Exactly equal to max long.
+    return true;
+}
+
 double DecimalQuantity::toDouble() const {
     if (isApproximate) {
         return toDoubleFromOriginal();
diff --git a/icu4c/source/i18n/number_decimalquantity.h b/icu4c/source/i18n/number_decimalquantity.h
index 3ff9fbeffef..aea66fdb7cd 100644
--- a/icu4c/source/i18n/number_decimalquantity.h
+++ b/icu4c/source/i18n/number_decimalquantity.h
@@ -35,7 +35,7 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory {
 
     DecimalQuantity();
 
-    ~DecimalQuantity();
+    ~DecimalQuantity() override;
 
     /**
      * Sets this instance to be equal to another instance.
@@ -128,6 +128,12 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory {
 
     int64_t toFractionLong(bool includeTrailingZeros) const;
 
+    /**
+     * Returns whether or not a Long can fully represent the value stored in this DecimalQuantity.
+     * Assumes that the DecimalQuantity is positive.
+     */
+    bool fitsInLong() const;
+
     /** @return The value contained in this {@link DecimalQuantity} approximated as a double. */
     double toDouble() const;
 
@@ -235,6 +241,11 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory {
     /** Visible for testing */
     inline bool isExplicitExactDouble() { return explicitExactDouble; };
 
+    /**
+     * Bogus flag for when a DecimalQuantity is stored on the stack.
+     */
+    bool bogus = false;
+
   private:
     /**
      * The power of ten corresponding to the least significant digit in the BCD. For example, if this
diff --git a/icu4c/source/i18n/numparse_parsednumber.cpp b/icu4c/source/i18n/numparse_parsednumber.cpp
new file mode 100644
index 00000000000..9db933502a3
--- /dev/null
+++ b/icu4c/source/i18n/numparse_parsednumber.cpp
@@ -0,0 +1,75 @@
+// Â© 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+#include "numparse_types.h"
+#include <cmath>
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+
+
+ParsedNumber::ParsedNumber() {
+    clear();
+}
+
+void ParsedNumber::clear() {
+    quantity.bogus = true;
+    charEnd = 0;
+    flags = 0;
+    prefix.setToBogus();
+    suffix.setToBogus();
+    currencyCode.setToBogus();
+}
+
+void ParsedNumber::setCharsConsumed(const StringSegment& segment) {
+    charEnd = segment.getOffset();
+}
+
+bool ParsedNumber::success() const {
+    return charEnd > 0 && 0 == (flags & FLAG_FAIL);
+}
+
+bool ParsedNumber::seenNumber() const {
+    return !quantity.bogus || 0 != (flags & FLAG_NAN) || 0 != (flags & FLAG_INFINITY);
+}
+
+double ParsedNumber::getDouble() const {
+    bool sawNegative = 0 != (flags & FLAG_NEGATIVE);
+    bool sawNaN = 0 != (flags & FLAG_NAN);
+    bool sawInfinity = 0 != (flags & FLAG_INFINITY);
+
+    // Check for NaN, infinity, and -0.0
+    if (sawNaN) {
+        return NAN;
+    }
+    if (sawInfinity) {
+        if (sawNegative) {
+            return -INFINITY;
+        } else {
+            return INFINITY;
+        }
+    }
+    if (quantity.isZero() && sawNegative) {
+        return -0.0;
+    }
+
+    if (quantity.fitsInLong()) {
+        long l = quantity.toLong();
+        if (0 != (flags & FLAG_NEGATIVE)) {
+            l *= -1;
+        }
+        return l;
+    }
+
+    // TODO: MIN_LONG
+    return quantity.toDouble();
+}
+
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/icu4c/source/i18n/numparse_stringsegment.h b/icu4c/source/i18n/numparse_stringsegment.h
index 30f11af7a19..d2e6154cd3d 100644
--- a/icu4c/source/i18n/numparse_stringsegment.h
+++ b/icu4c/source/i18n/numparse_stringsegment.h
@@ -15,61 +15,6 @@ U_NAMESPACE_BEGIN
 namespace numparse {
 namespace impl {
 
-/**
- * A mutable class allowing for a String with a variable offset and length. The charAt, length, and
- * subSequence methods all operate relative to the fixed offset into the String.
- *
- * @author sffc
- */
-class StringSegment : public UMemory, public ::icu::number::impl::CharSequence {
-  public:
-    explicit StringSegment(const UnicodeString &str);
-
-    int32_t getOffset() const;
-
-    void setOffset(int32_t start);
-
-    /**
-     * Equivalent to <code>setOffset(getOffset()+delta)</code>.
-     *
-     * <p>
-     * This method is usually called by a Matcher to register that a char was consumed. If the char is
-     * strong (it usually is, except for things like whitespace), follow this with a call to
-     * {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
-     */
-    void adjustOffset(int32_t delta);
-
-    void setLength(int32_t length);
-
-    void resetLength();
-
-    int32_t length() const override;
-
-    char16_t charAt(int32_t index) const override;
-
-    UChar32 codePointAt(int32_t index) const override;
-
-    UnicodeString toUnicodeString() const override;
-
-    /**
-     * Returns the first code point in the string segment, or -1 if the string starts with an invalid
-     * code point.
-     */
-    UChar32 getCodePoint() const;
-
-    /**
-     * Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
-     * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
-     * since the first 2 characters are the same.
-     */
-    int32_t getCommonPrefixLength(const UnicodeString &other);
-
-  private:
-    const UnicodeString fStr;
-    int32_t fStart;
-    int32_t fEnd;
-};
-
 
 } // namespace impl
 } // namespace numparse
diff --git a/icu4c/source/i18n/numparse_types.h b/icu4c/source/i18n/numparse_types.h
index b607f36cc99..92957204baa 100644
--- a/icu4c/source/i18n/numparse_types.h
+++ b/icu4c/source/i18n/numparse_types.h
@@ -8,11 +8,223 @@
 #define __NUMPARSE_TYPES_H__
 
 #include "unicode/uobject.h"
+#include "number_decimalquantity.h"
 
-U_NAMESPACE_BEGIN
-namespace numparse {
+U_NAMESPACE_BEGIN namespace numparse {
 namespace impl {
 
+// Forward-declarations
+class StringSegment;
+class ParsedNumber;
+
+
+/**
+ * Struct-like class to hold the results of a parsing routine.
+ *
+ * @author sffc
+ */
+class ParsedNumber {
+  public:
+    enum ParsedNumberFlags {
+        FLAG_NEGATIVE = 0x0001,
+        FLAG_PERCENT = 0x0002,
+        FLAG_PERMILLE = 0x0004,
+        FLAG_HAS_EXPONENT = 0x0008,
+        FLAG_HAS_DEFAULT_CURRENCY = 0x0010,
+        FLAG_HAS_DECIMAL_SEPARATOR = 0x0020,
+        FLAG_NAN = 0x0040,
+        FLAG_INFINITY = 0x0080,
+        FLAG_FAIL = 0x0100,
+    };
+
+    /**
+     * The numerical value that was parsed.
+     */
+    ::icu::number::impl::DecimalQuantity quantity;
+
+    /**
+     * The index of the last char consumed during parsing. If parsing started at index 0, this is equal
+     * to the number of chars consumed. This is NOT necessarily the same as the StringSegment offset;
+     * "weak" chars, like whitespace, change the offset, but the charsConsumed is not touched until a
+     * "strong" char is encountered.
+     */
+    int32_t charEnd;
+
+    /**
+     * Boolean flags (see constants below).
+     */
+    int32_t flags;
+
+    /**
+     * The pattern string corresponding to the prefix that got consumed.
+     */
+    UnicodeString prefix;
+
+    /**
+     * The pattern string corresponding to the suffix that got consumed.
+     */
+    UnicodeString suffix;
+
+    /**
+     * The currency that got consumed.
+     */
+    UnicodeString currencyCode;
+
+    ParsedNumber();
+
+    ParsedNumber(const ParsedNumber& other) = default;
+
+    ParsedNumber& operator=(const ParsedNumber& other) = default;
+
+    void clear();
+
+    /**
+     * Call this method to register that a "strong" char was consumed. This should be done after calling
+     * {@link StringSegment#setOffset} or {@link StringSegment#adjustOffset} except when the char is
+     * "weak", like whitespace.
+     *
+     * <p>
+     * <strong>What is a strong versus weak char?</strong> The behavior of number parsing is to "stop"
+     * after reading the number, even if there is other content following the number. For example, after
+     * parsing the string "123 " (123 followed by a space), the cursor should be set to 3, not 4, even
+     * though there are matchers that accept whitespace. In this example, the digits are strong, whereas
+     * the whitespace is weak. Grouping separators are weak, whereas decimal separators are strong. Most
+     * other chars are strong.
+     *
+     * @param segment
+     *            The current StringSegment, usually immediately following a call to setOffset.
+     */
+    void setCharsConsumed(const StringSegment& segment);
+
+    /**
+     * Returns whether this the parse was successful. To be successful, at least one char must have been
+     * consumed, and the failure flag must not be set.
+     */
+    bool success() const;
+
+    bool seenNumber() const;
+
+    double getDouble() const;
+};
+
+
+/**
+ * A mutable class allowing for a String with a variable offset and length. The charAt, length, and
+ * subSequence methods all operate relative to the fixed offset into the String.
+ *
+ * @author sffc
+ */
+class StringSegment : public UMemory, public ::icu::number::impl::CharSequence {
+  public:
+    explicit StringSegment(const UnicodeString& str);
+
+    int32_t getOffset() const;
+
+    void setOffset(int32_t start);
+
+    /**
+     * Equivalent to <code>setOffset(getOffset()+delta)</code>.
+     *
+     * <p>
+     * This method is usually called by a Matcher to register that a char was consumed. If the char is
+     * strong (it usually is, except for things like whitespace), follow this with a call to
+     * {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
+     */
+    void adjustOffset(int32_t delta);
+
+    void setLength(int32_t length);
+
+    void resetLength();
+
+    int32_t length() const override;
+
+    char16_t charAt(int32_t index) const override;
+
+    UChar32 codePointAt(int32_t index) const override;
+
+    UnicodeString toUnicodeString() const override;
+
+    /**
+     * Returns the first code point in the string segment, or -1 if the string starts with an invalid
+     * code point.
+     */
+    UChar32 getCodePoint() const;
+
+    /**
+     * Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
+     * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
+     * since the first 2 characters are the same.
+     */
+    int32_t getCommonPrefixLength(const UnicodeString& other);
+
+  private:
+    const UnicodeString fStr;
+    int32_t fStart;
+    int32_t fEnd;
+};
+
+
+/**
+ * The core interface implemented by all matchers used for number parsing.
+ *
+ * Given a string, there should NOT be more than one way to consume the string with the same matcher
+ * applied multiple times. If there is, the non-greedy parsing algorithm will be unhappy and may enter an
+ * exponential-time loop. For example, consider the "A Matcher" that accepts "any number of As". Given
+ * the string "AAAA", there are 2^N = 8 ways to apply the A Matcher to this string: you could have the A
+ * Matcher apply 4 times to each character; you could have it apply just once to all the characters; you
+ * could have it apply to the first 2 characters and the second 2 characters; and so on. A better version
+ * of the "A Matcher" would be for it to accept exactly one A, and allow the algorithm to run it
+ * repeatedly to consume a string of multiple As. The A Matcher can implement the Flexible interface
+ * below to signal that it can be applied multiple times in a row.
+ *
+ * @author sffc
+ */
+class NumberParseMatcher {
+  public:
+    virtual ~NumberParseMatcher() = default;
+
+    /**
+     * Matchers can override this method to return true to indicate that they are optional and can be run
+     * repeatedly. Used by SeriesMatcher, primarily in the context of IgnorablesMatcher.
+     */
+    virtual bool isFlexible() const {
+        return false;
+    }
+
+    /**
+     * Runs this matcher starting at the beginning of the given StringSegment. If this matcher finds
+     * something interesting in the StringSegment, it should update the offset of the StringSegment
+     * corresponding to how many chars were matched.
+     *
+     * @param segment
+     *            The StringSegment to match against. Matches always start at the beginning of the
+     *            segment. The segment is guaranteed to contain at least one char.
+     * @param result
+     *            The data structure to store results if the match succeeds.
+     * @return Whether this matcher thinks there may be more interesting chars beyond the end of the
+     *         string segment.
+     */
+    virtual bool match(StringSegment& segment, ParsedNumber& result) const = 0;
+
+    /**
+     * Should return a set representing all possible chars (UTF-16 code units) that could be the first
+     * char that this matcher can consume. This method is only called during construction phase, and its
+     * return value is used to skip this matcher unless a segment begins with a char in this set. To make
+     * this matcher always run, return {@link UnicodeSet#ALL_CODE_POINTS}.
+     */
+    virtual UnicodeSet getLeadCodePoints() const = 0;
+
+    /**
+     * Method called at the end of a parse, after all matchers have failed to consume any more chars.
+     * Allows a matcher to make final modifications to the result given the knowledge that no more
+     * matches are possible.
+     *
+     * @param result
+     *            The data structure to store results.
+     */
+    virtual void postProcess(ParsedNumber& result) const = 0;
+};
+
 
 } // namespace impl
 } // namespace numparse
diff --git a/icu4c/source/i18n/plurrule.cpp b/icu4c/source/i18n/plurrule.cpp
index dcf28b2bc1a..14b5fe6d9d1 100644
--- a/icu4c/source/i18n/plurrule.cpp
+++ b/icu4c/source/i18n/plurrule.cpp
@@ -42,6 +42,8 @@
 
 U_NAMESPACE_BEGIN
 
+using namespace icu::pluralimpl;
+
 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
 static const UChar PK_IN[]={LOW_I,LOW_N,0};
diff --git a/icu4c/source/i18n/plurrule_impl.h b/icu4c/source/i18n/plurrule_impl.h
index b93fc501bac..152c33e862d 100644
--- a/icu4c/source/i18n/plurrule_impl.h
+++ b/icu4c/source/i18n/plurrule_impl.h
@@ -40,67 +40,71 @@ class DigitInterval;
 class PluralRules;
 class VisibleDigits;
 
-static const UChar DOT             = ((UChar)0x002E);
-static const UChar SINGLE_QUOTE    = ((UChar)0x0027);
-static const UChar SLASH           = ((UChar)0x002F);
-static const UChar BACKSLASH       = ((UChar)0x005C);
-static const UChar SPACE           = ((UChar)0x0020);
-static const UChar EXCLAMATION     = ((UChar)0x0021);
-static const UChar QUOTATION_MARK  = ((UChar)0x0022);
-static const UChar NUMBER_SIGN     = ((UChar)0x0023);
-static const UChar PERCENT_SIGN    = ((UChar)0x0025);
-static const UChar ASTERISK        = ((UChar)0x002A);
-static const UChar COMMA           = ((UChar)0x002C);
-static const UChar HYPHEN          = ((UChar)0x002D);
-static const UChar U_ZERO          = ((UChar)0x0030);
-static const UChar U_ONE           = ((UChar)0x0031);
-static const UChar U_TWO           = ((UChar)0x0032);
-static const UChar U_THREE         = ((UChar)0x0033);
-static const UChar U_FOUR          = ((UChar)0x0034);
-static const UChar U_FIVE          = ((UChar)0x0035);
-static const UChar U_SIX           = ((UChar)0x0036);
-static const UChar U_SEVEN         = ((UChar)0x0037);
-static const UChar U_EIGHT         = ((UChar)0x0038);
-static const UChar U_NINE          = ((UChar)0x0039);
-static const UChar COLON           = ((UChar)0x003A);
-static const UChar SEMI_COLON      = ((UChar)0x003B);
-static const UChar EQUALS          = ((UChar)0x003D);
-static const UChar AT              = ((UChar)0x0040);
-static const UChar CAP_A           = ((UChar)0x0041);
-static const UChar CAP_B           = ((UChar)0x0042);
-static const UChar CAP_R           = ((UChar)0x0052);
-static const UChar CAP_Z           = ((UChar)0x005A);
-static const UChar LOWLINE         = ((UChar)0x005F);
-static const UChar LEFTBRACE       = ((UChar)0x007B);
-static const UChar RIGHTBRACE      = ((UChar)0x007D);
-static const UChar TILDE           = ((UChar)0x007E);
-static const UChar ELLIPSIS        = ((UChar)0x2026);
-
-static const UChar LOW_A           = ((UChar)0x0061);
-static const UChar LOW_B           = ((UChar)0x0062);
-static const UChar LOW_C           = ((UChar)0x0063);
-static const UChar LOW_D           = ((UChar)0x0064);
-static const UChar LOW_E           = ((UChar)0x0065);
-static const UChar LOW_F           = ((UChar)0x0066);
-static const UChar LOW_G           = ((UChar)0x0067);
-static const UChar LOW_H           = ((UChar)0x0068);
-static const UChar LOW_I           = ((UChar)0x0069);
-static const UChar LOW_J           = ((UChar)0x006a);
-static const UChar LOW_K           = ((UChar)0x006B);
-static const UChar LOW_L           = ((UChar)0x006C);
-static const UChar LOW_M           = ((UChar)0x006D);
-static const UChar LOW_N           = ((UChar)0x006E);
-static const UChar LOW_O           = ((UChar)0x006F);
-static const UChar LOW_P           = ((UChar)0x0070);
-static const UChar LOW_Q           = ((UChar)0x0071);
-static const UChar LOW_R           = ((UChar)0x0072);
-static const UChar LOW_S           = ((UChar)0x0073);
-static const UChar LOW_T           = ((UChar)0x0074);
-static const UChar LOW_U           = ((UChar)0x0075);
-static const UChar LOW_V           = ((UChar)0x0076);
-static const UChar LOW_W           = ((UChar)0x0077);
-static const UChar LOW_Y           = ((UChar)0x0079);
-static const UChar LOW_Z           = ((UChar)0x007A);
+namespace pluralimpl {
+
+static const UChar DOT = ((UChar) 0x002E);
+static const UChar SINGLE_QUOTE = ((UChar) 0x0027);
+static const UChar SLASH = ((UChar) 0x002F);
+static const UChar BACKSLASH = ((UChar) 0x005C);
+static const UChar SPACE = ((UChar) 0x0020);
+static const UChar EXCLAMATION = ((UChar) 0x0021);
+static const UChar QUOTATION_MARK = ((UChar) 0x0022);
+static const UChar NUMBER_SIGN = ((UChar) 0x0023);
+static const UChar PERCENT_SIGN = ((UChar) 0x0025);
+static const UChar ASTERISK = ((UChar) 0x002A);
+static const UChar COMMA = ((UChar) 0x002C);
+static const UChar HYPHEN = ((UChar) 0x002D);
+static const UChar U_ZERO = ((UChar) 0x0030);
+static const UChar U_ONE = ((UChar) 0x0031);
+static const UChar U_TWO = ((UChar) 0x0032);
+static const UChar U_THREE = ((UChar) 0x0033);
+static const UChar U_FOUR = ((UChar) 0x0034);
+static const UChar U_FIVE = ((UChar) 0x0035);
+static const UChar U_SIX = ((UChar) 0x0036);
+static const UChar U_SEVEN = ((UChar) 0x0037);
+static const UChar U_EIGHT = ((UChar) 0x0038);
+static const UChar U_NINE = ((UChar) 0x0039);
+static const UChar COLON = ((UChar) 0x003A);
+static const UChar SEMI_COLON = ((UChar) 0x003B);
+static const UChar EQUALS = ((UChar) 0x003D);
+static const UChar AT = ((UChar) 0x0040);
+static const UChar CAP_A = ((UChar) 0x0041);
+static const UChar CAP_B = ((UChar) 0x0042);
+static const UChar CAP_R = ((UChar) 0x0052);
+static const UChar CAP_Z = ((UChar) 0x005A);
+static const UChar LOWLINE = ((UChar) 0x005F);
+static const UChar LEFTBRACE = ((UChar) 0x007B);
+static const UChar RIGHTBRACE = ((UChar) 0x007D);
+static const UChar TILDE = ((UChar) 0x007E);
+static const UChar ELLIPSIS = ((UChar) 0x2026);
+
+static const UChar LOW_A = ((UChar) 0x0061);
+static const UChar LOW_B = ((UChar) 0x0062);
+static const UChar LOW_C = ((UChar) 0x0063);
+static const UChar LOW_D = ((UChar) 0x0064);
+static const UChar LOW_E = ((UChar) 0x0065);
+static const UChar LOW_F = ((UChar) 0x0066);
+static const UChar LOW_G = ((UChar) 0x0067);
+static const UChar LOW_H = ((UChar) 0x0068);
+static const UChar LOW_I = ((UChar) 0x0069);
+static const UChar LOW_J = ((UChar) 0x006a);
+static const UChar LOW_K = ((UChar) 0x006B);
+static const UChar LOW_L = ((UChar) 0x006C);
+static const UChar LOW_M = ((UChar) 0x006D);
+static const UChar LOW_N = ((UChar) 0x006E);
+static const UChar LOW_O = ((UChar) 0x006F);
+static const UChar LOW_P = ((UChar) 0x0070);
+static const UChar LOW_Q = ((UChar) 0x0071);
+static const UChar LOW_R = ((UChar) 0x0072);
+static const UChar LOW_S = ((UChar) 0x0073);
+static const UChar LOW_T = ((UChar) 0x0074);
+static const UChar LOW_U = ((UChar) 0x0075);
+static const UChar LOW_V = ((UChar) 0x0076);
+static const UChar LOW_W = ((UChar) 0x0077);
+static const UChar LOW_Y = ((UChar) 0x0079);
+static const UChar LOW_Z = ((UChar) 0x007A);
+
+};
 
 
 static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java
index 58bf69ac7d2..5104e29b9da 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java
@@ -75,7 +75,7 @@ public class AffixMatcher implements NumberParseMatcher {
         return true;
     }
 
-    public static void newGenerate(
+    public static void createMatchers(
             AffixPatternProvider patternInfo,
             NumberParserImpl output,
             MatcherFactory factory,
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java
index 1d576cee2cc..cd7b04ade65 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java
@@ -5,6 +5,8 @@ package com.ibm.icu.impl.number.parse;
 import com.ibm.icu.text.UnicodeSet;
 
 /**
+ * The core interface implemented by all matchers used for number parsing.
+ *
  * Given a string, there should NOT be more than one way to consume the string with the same matcher
  * applied multiple times. If there is, the non-greedy parsing algorithm will be unhappy and may enter an
  * exponential-time loop. For example, consider the "A Matcher" that accepts "any number of As". Given
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
index ff59ca052cc..55a046a43ad 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
@@ -66,18 +66,10 @@ public class NumberParserImpl {
         STRICT,
     }
 
-    @Deprecated
-    public static NumberParserImpl createParserFromPattern(
+    public static NumberParserImpl createSimpleParser(
             ULocale locale,
             String pattern,
-            boolean strictGrouping) {
-        // Temporary frontend for testing.
-
-        int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE
-                | ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
-        if (strictGrouping) {
-            parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE;
-        }
+            int parseFlags) {
 
         NumberParserImpl parser = new NumberParserImpl(parseFlags, true);
         DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
@@ -91,7 +83,7 @@ public class NumberParserImpl {
         factory.parseFlags = parseFlags;
 
         ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
-        AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
+        AffixMatcher.createMatchers(patternInfo, parser, factory, ignorables, parseFlags);
 
         Grouper grouper = Grouper.forStrategy(GroupingStrategy.AUTO).withLocaleData(locale, patternInfo);
 
@@ -209,7 +201,7 @@ public class NumberParserImpl {
         //////////////////////
 
         // Set up a pattern modifier with mostly defaults to generate AffixMatchers.
-        AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
+        AffixMatcher.createMatchers(patternInfo, parser, factory, ignorables, parseFlags);
 
         ////////////////////////
         /// CURRENCY MATCHER ///
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java
index 27ce15df1f6..2bd45cc08be 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java
@@ -8,11 +8,15 @@ import java.util.Comparator;
 import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
 
 /**
- * @author sffc
+ * Struct-like class to hold the results of a parsing routine.
  *
+ * @author sffc
  */
 public class ParsedNumber {
 
+    /**
+     * The numerical value that was parsed.
+     */
     public DecimalQuantity_DualStorageBCD quantity;
 
     /**
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java
index bf0593e1230..3839301cea5 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java
@@ -72,10 +72,6 @@ public class UnicodeSetStaticCache {
         return get(key1).contains(str) ? key1 : chooseFrom(str, key2);
     }
 
-    public static Key chooseFrom(String str, Key key1, Key key2, Key key3) {
-        return get(key1).contains(str) ? key1 : chooseFrom(str, key2, key3);
-    }
-
     private static UnicodeSet computeUnion(Key k1, Key k2) {
         return new UnicodeSet().addAll(get(k1)).addAll(get(k2)).freeze();
     }
@@ -110,7 +106,6 @@ public class UnicodeSetStaticCache {
         unicodeSets.put(Key.MINUS_SIGN, new UnicodeSet("[-â»âââï¹£ï¼]").freeze());
         unicodeSets.put(Key.PLUS_SIGN, new UnicodeSet("[+âºââï¬©ï¹¢ï¼]").freeze());
 
-        // TODO: Fill in the next three sets.
         unicodeSets.put(Key.PERCENT_SIGN, new UnicodeSet("[%Ùª]").freeze());
         unicodeSets.put(Key.PERMILLE_SIGN, new UnicodeSet("[â°Ø]").freeze());
         unicodeSets.put(Key.INFINITY, new UnicodeSet("[â]").freeze());
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
index 4e69a762581..5bb123968b6 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
@@ -13,6 +13,7 @@ import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
 import com.ibm.icu.impl.number.parse.MinusSignMatcher;
 import com.ibm.icu.impl.number.parse.NumberParserImpl;
 import com.ibm.icu.impl.number.parse.ParsedNumber;
+import com.ibm.icu.impl.number.parse.ParsingUtils;
 import com.ibm.icu.impl.number.parse.PercentMatcher;
 import com.ibm.icu.impl.number.parse.PlusSignMatcher;
 import com.ibm.icu.impl.number.parse.SeriesMatcher;
@@ -58,8 +59,8 @@ public class NumberParserTest {
                 { 3, "-ð±ð­ð°ð®ð¯-", "0", 11, -51423. },
                 { 3, "a51423US dollars", "a0Â¤Â¤Â¤", 16, 51423. },
                 { 3, "a 51423 US dollars", "a0Â¤Â¤Â¤", 18, 51423. },
-                { 3, "514.23 USD", "0", 10, 514.23 },
-                { 3, "514.23 GBP", "0", 10, 514.23 },
+                { 3, "514.23 USD", "Â¤0", 10, 514.23 },
+                { 3, "514.23 GBP", "Â¤0", 10, 514.23 },
                 { 3, "a ð±ð­ð°ð®ð¯ b", "a0b", 14, 51423. },
                 { 3, "-a ð±ð­ð°ð®ð¯ b", "a0b", 15, -51423. },
                 { 3, "a -ð±ð­ð°ð®ð¯ b", "a0b", 15, -51423. },
@@ -79,7 +80,7 @@ public class NumberParserTest {
                 { 3, "ð±.ð­ð°ð®Eð¯", "0", 12, 5142. },
                 { 3, "ð±.ð­ð°ð®E-ð¯", "0", 13, 0.005142 },
                 { 3, "ð±.ð­ð°ð®e-ð¯", "0", 13, 0.005142 },
-                { 7, "5,142.50 Canadian dollars", "#,##,##0", 25, 5142.5 },
+                { 7, "5,142.50 Canadian dollars", "#,##,##0 Â¤Â¤Â¤", 25, 5142.5 },
                 { 3, "a$ b5", "a Â¤ b0", 5, 5.0 },
                 { 3, "ðº1.23", "ðº0;ð»0", 6, 1.23 },
                 { 3, "ð»1.23", "ðº0;ð»0", 6, -1.23 },
@@ -87,6 +88,8 @@ public class NumberParserTest {
                 { 3, "                              0", "a0", 31, 0.0 }, // should not hang
                 { 3, "0", "0", 1, 0.0 } };
 
+        int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE
+                | ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
         for (Object[] cas : cases) {
             int flags = (Integer) cas[0];
             String input = (String) cas[1];
@@ -94,7 +97,7 @@ public class NumberParserTest {
             int expectedCharsConsumed = (Integer) cas[3];
             double resultDouble = (Double) cas[4];
             NumberParserImpl parser = NumberParserImpl
-                    .createParserFromPattern(ULocale.ENGLISH, pattern, false);
+                    .createSimpleParser(ULocale.ENGLISH, pattern, parseFlags);
             String message = "Input <" + input + "> Parser " + parser;
 
             if (0 != (flags & 0x01)) {
@@ -127,7 +130,9 @@ public class NumberParserTest {
 
             if (0 != (flags & 0x04)) {
                 // Test with strict separators
-                parser = NumberParserImpl.createParserFromPattern(ULocale.ENGLISH, pattern, true);
+                parser = NumberParserImpl.createSimpleParser(ULocale.ENGLISH,
+                        pattern,
+                        parseFlags | ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE);
                 ParsedNumber resultObject = new ParsedNumber();
                 parser.parse(input, true, resultObject);
                 assertNotNull("Strict Parse failed: " + message, resultObject.quantity);
@@ -146,7 +151,7 @@ public class NumberParserTest {
     public void testLocaleFi() {
         // This case is interesting because locale fi has NaN starting with 'e', the same as scientific
         NumberParserImpl parser = NumberParserImpl
-                .createParserFromPattern(new ULocale("fi"), "0", false);
+                .createSimpleParser(new ULocale("fi"), "0", ParsingUtils.PARSE_FLAG_IGNORE_CASE);
 
         ParsedNumber resultObject = new ParsedNumber();
         parser.parse("epÃ¤luku", false, resultObject);
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/UnicodeSetStaticCacheTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/UnicodeSetStaticCacheTest.java
index 97283a1400e..7aec4f77f17 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/UnicodeSetStaticCacheTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/UnicodeSetStaticCacheTest.java
@@ -75,7 +75,7 @@ public class UnicodeSetStaticCacheTest {
 
     static void assertInSet(ULocale locale, UnicodeSet set, int cp) {
         // If this test case fails, add the specified code point to the corresponding set in
-        // UnicodeSetStaticCache.java
+        // UnicodeSetStaticCache.java and numparse_unisets.cpp
         assertTrue(
                 locale
                         + " U+"
-- 
2.40.0