number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \
number_integerwidth.o number_longnames.o number_modifiers.o number_notation.o \
number_padding.o number_patternmodifier.o number_patternstring.o \
-number_rounding.o number_scientific.o number_stringbuilder.o
+number_rounding.o number_scientific.o number_stringbuilder.o \
+numparse_stringsegment.o numparse_unisets.o
## Header files to install
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+#include "numparse_types.h"
+#include "numparse_stringsegment.h"
+#include "putilimp.h"
+#include "unicode/utf16.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+
+
+StringSegment::StringSegment(const UnicodeString &str) : fStr(str), fStart(0), fEnd(str.length()) {}
+
+int32_t StringSegment::getOffset() const {
+ return fStart;
+}
+
+void StringSegment::setOffset(int32_t start) {
+ fStart = start;
+}
+
+void StringSegment::adjustOffset(int32_t delta) {
+ fStart += delta;
+}
+
+void StringSegment::setLength(int32_t length) {
+ fEnd = fStart + length;
+}
+
+void StringSegment::resetLength() {
+ fEnd = fStr.length();
+}
+
+int32_t StringSegment::length() const {
+ return fEnd - fStart;
+}
+
+char16_t StringSegment::charAt(int32_t index) const {
+ return fStr.charAt(index + fStart);
+}
+
+UChar32 StringSegment::codePointAt(int32_t index) const {
+ return fStr.char32At(index + fStart);
+}
+
+UnicodeString StringSegment::toUnicodeString() const {
+ return UnicodeString(fStr, fStart, fEnd - fStart);
+}
+
+UChar32 StringSegment::getCodePoint() const {
+ char16_t lead = fStr.charAt(fStart);
+ if (U16_IS_LEAD(lead) && fStart + 1 < fEnd) {
+ return fStr.char32At(fStart);
+ } else if (U16_IS_SURROGATE(lead)) {
+ return -1;
+ } else {
+ return lead;
+ }
+}
+
+int32_t StringSegment::getCommonPrefixLength(const UnicodeString &other) {
+ int32_t offset = 0;
+ for (; offset < uprv_min(length(), other.length());) {
+ if (charAt(offset) != other.charAt(offset)) {
+ break;
+ }
+ offset++;
+ }
+ return offset;
+}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+#ifndef __NUMPARSE_STRINGSEGMENT_H__
+#define __NUMPARSE_STRINGSEGMENT_H__
+
+#include "numparse_types.h"
+#include "number_types.h"
+#include "unicode/unistr.h"
+
+U_NAMESPACE_BEGIN
+namespace numparse {
+namespace impl {
+
+/**
+ * A mutable class allowing for a String with a variable offset and length. The charAt, length, and
+ * subSequence methods all operate relative to the fixed offset into the String.
+ *
+ * @author sffc
+ */
+class StringSegment : public UMemory, public ::icu::number::impl::CharSequence {
+ public:
+ explicit StringSegment(const UnicodeString &str);
+
+ int32_t getOffset() const;
+
+ void setOffset(int32_t start);
+
+ /**
+ * Equivalent to <code>setOffset(getOffset()+delta)</code>.
+ *
+ * <p>
+ * This method is usually called by a Matcher to register that a char was consumed. If the char is
+ * strong (it usually is, except for things like whitespace), follow this with a call to
+ * {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
+ */
+ void adjustOffset(int32_t delta);
+
+ void setLength(int32_t length);
+
+ void resetLength();
+
+ int32_t length() const override;
+
+ char16_t charAt(int32_t index) const override;
+
+ UChar32 codePointAt(int32_t index) const override;
+
+ UnicodeString toUnicodeString() const override;
+
+ /**
+ * Returns the first code point in the string segment, or -1 if the string starts with an invalid
+ * code point.
+ */
+ UChar32 getCodePoint() const;
+
+ /**
+ * Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
+ * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
+ * since the first 2 characters are the same.
+ */
+ int32_t getCommonPrefixLength(const UnicodeString &other);
+
+ private:
+ const UnicodeString fStr;
+ int32_t fStart;
+ int32_t fEnd;
+};
+
+
+} // namespace impl
+} // namespace numparse
+U_NAMESPACE_END
+
+#endif //__NUMPARSE_STRINGSEGMENT_H__
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+#ifndef __NUMPARSE_TYPES_H__
+#define __NUMPARSE_TYPES_H__
+
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+namespace numparse {
+namespace impl {
+
+
+} // namespace impl
+} // namespace numparse
+U_NAMESPACE_END
+
+#endif //__NUMPARSE_TYPES_H__
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+#include "numparse_unisets.h"
+#include "numparse_types.h"
+#include "umutex.h"
+#include "ucln_in.h"
+#include "unicode/uniset.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+using namespace icu::numparse::impl::unisets;
+
+
+namespace {
+
+UnicodeSet* gUnicodeSets[COUNT] = {};
+
+UnicodeSet* computeUnion(Key k1, Key k2) {
+ UnicodeSet* result = new UnicodeSet();
+ if (result == nullptr) {
+ return nullptr;
+ }
+ result->addAll(*gUnicodeSets[k1]);
+ result->addAll(*gUnicodeSets[k2]);
+ result->freeze();
+ return result;
+}
+
+UnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
+ UnicodeSet* result = new UnicodeSet();
+ if (result == nullptr) {
+ return nullptr;
+ }
+ result->addAll(*gUnicodeSets[k1]);
+ result->addAll(*gUnicodeSets[k2]);
+ result->addAll(*gUnicodeSets[k3]);
+ result->freeze();
+ return result;
+}
+
+icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV cleanupNumberParseUnitSets() {
+ for (int32_t i = 0; i < COUNT; i++) {
+ delete gUnicodeSets[i];
+ gUnicodeSets[i] = nullptr;
+ }
+ return TRUE;
+}
+
+void U_CALLCONV initNumberParseUniSets(UErrorCode &status) {
+ ucln_i18n_registerCleanup(UCLN_I18N_NUMPARSE_UNISETS, cleanupNumberParseUnitSets);
+#define NEW_UNISET(pattern, status) new UnicodeSet(UnicodeString(pattern), status)
+
+ // BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
+ gUnicodeSets[BIDI] = NEW_UNISET(u"[[\\u200E\\u200F\\u061C]]", status);
+
+ // This set was decided after discussion with icu-design@. See ticket #13309.
+ // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
+ gUnicodeSets[WHITESPACE] = NEW_UNISET(u"[[:Zs:][\\u0009]]", status);
+
+ gUnicodeSets[DEFAULT_IGNORABLES] = computeUnion(BIDI, WHITESPACE);
+ gUnicodeSets[STRICT_IGNORABLES] = gUnicodeSets[BIDI];
+
+ // TODO: Re-generate these sets from the UCD. They probably haven't been updated in a while.
+ gUnicodeSets[COMMA] = NEW_UNISET(u"[,،٫、︐︑﹐﹑,、]", status);
+ gUnicodeSets[STRICT_COMMA] = NEW_UNISET(u"[,٫︐﹐,]", status);
+ gUnicodeSets[PERIOD] = NEW_UNISET(u"[.․。︒﹒.。]", status);
+ gUnicodeSets[STRICT_PERIOD] = NEW_UNISET(u"[.․﹒.。]", status);
+ gUnicodeSets[OTHER_GROUPING_SEPARATORS] = NEW_UNISET(
+ u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status);
+ gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
+ gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
+ STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
+
+ gUnicodeSets[MINUS_SIGN] = NEW_UNISET(u"[-⁻₋−➖﹣-]", status);
+ gUnicodeSets[PLUS_SIGN] = NEW_UNISET(u"[+⁺₊➕﬩﹢+]", status);
+
+ gUnicodeSets[PERCENT_SIGN] = NEW_UNISET(u"[%٪]", status);
+ gUnicodeSets[PERMILLE_SIGN] = NEW_UNISET(u"[‰؉]", status);
+ gUnicodeSets[INFINITY] = NEW_UNISET(u"[∞]", status);
+
+ gUnicodeSets[DIGITS] = NEW_UNISET(u"[:digit:]", status);
+ gUnicodeSets[NAN_LEAD] = NEW_UNISET(
+ u"[NnТтmeՈոс¤НнчTtsҳ\u975e\u1002\u0e9a\u10d0\u0f68\u0644\u0646]", status);
+ gUnicodeSets[SCIENTIFIC_LEAD] = NEW_UNISET(u"[Ee×·е\u0627]", status);
+ gUnicodeSets[CWCF] = NEW_UNISET(u"[:CWCF:]", status);
+
+ gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
+ gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
+
+ for (int32_t i = 0; i < COUNT; i++) {
+ gUnicodeSets[i]->freeze();
+ }
+}
+
+}
+
+const UnicodeSet* unisets::get(Key key) {
+ UErrorCode localStatus = U_ZERO_ERROR;
+ umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
+ if (U_FAILURE(localStatus)) {
+ // TODO: This returns non-null in Java, and callers assume that.
+ return nullptr;
+ }
+ return gUnicodeSets[key];
+}
+
+Key unisets::chooseFrom(UnicodeString str, Key key1) {
+ return get(key1)->contains(str) ? key1 : COUNT;
+}
+
+Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
+ return get(key1)->contains(str) ? key1 : chooseFrom(str, key2);
+}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+#ifndef __NUMPARSE_UNISETS_H__
+#define __NUMPARSE_UNISETS_H__
+
+#include "numparse_types.h"
+#include "unicode/uniset.h"
+
+U_NAMESPACE_BEGIN namespace numparse {
+namespace impl {
+namespace unisets {
+
+enum Key {
+ // Ignorables
+ BIDI,
+ WHITESPACE,
+ DEFAULT_IGNORABLES,
+ STRICT_IGNORABLES,
+
+ // Separators
+ // Notes:
+ // - COMMA is a superset of STRICT_COMMA
+ // - PERIOD is a superset of SCRICT_PERIOD
+ // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS
+ // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS
+ COMMA,
+ PERIOD,
+ STRICT_COMMA,
+ STRICT_PERIOD,
+ OTHER_GROUPING_SEPARATORS,
+ ALL_SEPARATORS,
+ STRICT_ALL_SEPARATORS,
+
+ // Symbols
+ // TODO: NaN?
+ MINUS_SIGN,
+ PLUS_SIGN,
+ PERCENT_SIGN,
+ PERMILLE_SIGN,
+ INFINITY,
+
+ // Other
+ DIGITS,
+ NAN_LEAD,
+ SCIENTIFIC_LEAD,
+ CWCF,
+
+ // Combined Separators with Digits (for lead code points)
+ DIGITS_OR_ALL_SEPARATORS,
+ DIGITS_OR_STRICT_ALL_SEPARATORS,
+
+ // The number of elements in the enum. Also used to indicate null.
+ COUNT
+};
+
+const UnicodeSet* get(Key key);
+
+Key chooseFrom(UnicodeString str, Key key1);
+
+Key chooseFrom(UnicodeString str, Key key1, Key key2);
+
+} // namespace unisets
+} // namespace impl
+} // namespace numparse
+U_NAMESPACE_END
+
+#endif //__NUMPARSE_UNISETS_H__
+#endif /* #if !UCONFIG_NO_FORMATTING */
It's usually best to have child dependencies called first. */
typedef enum ECleanupI18NType {
UCLN_I18N_START = -1,
+ UCLN_I18N_NUMPARSE_UNISETS,
UCLN_I18N_CURRENCY_SPACING,
UCLN_I18N_SPOOF,
UCLN_I18N_SPOOFDATA,
numberformattesttuple.o numberformat2test.o pluralmaptest.o \
numbertest_affixutils.o numbertest_api.o numbertest_decimalquantity.o \
numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \
-numbertest_stringbuilder.o
+numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o
DEPS = $(OBJECTS:.o=.d)
#include "number_stringbuilder.h"
#include "intltest.h"
#include "number_affixutils.h"
+#include "numparse_stringsegment.h"
+#include "unicode/locid.h"
using namespace icu::number;
using namespace icu::number::impl;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
////////////////////////////////////////////////////////////////////////////////////////
// INSTRUCTIONS: //
void assertEqualsImpl(const UnicodeString &a, const NumberStringBuilder &b);
};
+class StringSegmentTest : public IntlTest {
+ public:
+ void testOffset();
+ void testLength();
+ void testCharAt();
+ void testGetCodePoint();
+ void testCommonPrefixLength();
+
+ void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
+};
+
+class UniSetsTest : public IntlTest {
+ public:
+ void testSetCoverage();
+
+ void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
+
+ private:
+ void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
+ const UnicodeSet& set, const UnicodeString& str);
+ void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
+ const UnicodeSet& set, UChar32 cp);
+};
+
// NOTE: This macro is identical to the one in itformat.cpp
#define TESTCLASS(id, TestClass) \
TESTCLASS(4, PatternModifierTest);
TESTCLASS(5, PatternStringTest);
TESTCLASS(6, NumberStringBuilderTest);
+ TESTCLASS(7, StringSegmentTest);
+ TESTCLASS(8, UniSetsTest);
default: name = ""; break; // needed to end loop
}
}
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+#include "numbertest.h"
+#include "numparse_stringsegment.h"
+
+static const char16_t* SAMPLE_STRING = u"📻 radio 📻";
+
+void StringSegmentTest::runIndexedTest(int32_t index, UBool exec, const char*&name, char*) {
+ if (exec) {
+ logln("TestSuite StringSegmentTest: ");
+ }
+ TESTCASE_AUTO_BEGIN;
+ TESTCASE_AUTO(testOffset);
+ TESTCASE_AUTO(testLength);
+ TESTCASE_AUTO(testCharAt);
+ TESTCASE_AUTO(testGetCodePoint);
+ TESTCASE_AUTO(testCommonPrefixLength);
+ TESTCASE_AUTO_END;
+}
+
+void StringSegmentTest::testOffset() {
+ StringSegment segment(SAMPLE_STRING);
+ assertEquals("Initial Offset", 0, segment.getOffset());
+ segment.adjustOffset(3);
+ assertEquals("Adjust A", 3, segment.getOffset());
+ segment.adjustOffset(2);
+ assertEquals("Adjust B", 5, segment.getOffset());
+ segment.setOffset(4);
+ assertEquals("Set Offset", 4, segment.getOffset());
+}
+
+void StringSegmentTest::testLength() {
+ StringSegment segment(SAMPLE_STRING);
+ assertEquals("Initial length", 11, segment.length());
+ segment.adjustOffset(3);
+ assertEquals("Adjust", 8, segment.length());
+ segment.setLength(4);
+ assertEquals("Set Length", 4, segment.length());
+ segment.setOffset(5);
+ assertEquals("After adjust offset", 2, segment.length());
+ segment.resetLength();
+ assertEquals("After reset length", 6, segment.length());
+}
+
+void StringSegmentTest::testCharAt() {
+ StringSegment segment(SAMPLE_STRING);
+ assertEquals("Initial", SAMPLE_STRING, segment.toUnicodeString());
+ segment.adjustOffset(3);
+ assertEquals("After adjust-offset", UnicodeString(u"radio 📻"), segment.toUnicodeString());
+ segment.setLength(5);
+ assertEquals("After adjust-length", UnicodeString(u"radio"), segment.toUnicodeString());
+}
+
+void StringSegmentTest::testGetCodePoint() {
+ StringSegment segment(SAMPLE_STRING);
+ assertEquals("Double-width code point", 0x1F4FB, segment.getCodePoint());
+ segment.setLength(1);
+ assertEquals("Inalid A", -1, segment.getCodePoint());
+ segment.resetLength();
+ segment.adjustOffset(1);
+ assertEquals("Invalid B", -1, segment.getCodePoint());
+ segment.adjustOffset(1);
+ assertEquals("Valid again", 0x20, segment.getCodePoint());
+}
+
+void StringSegmentTest::testCommonPrefixLength() {
+ StringSegment segment(SAMPLE_STRING);
+ assertEquals("", 11, segment.getCommonPrefixLength(SAMPLE_STRING));
+ assertEquals("", 4, segment.getCommonPrefixLength(u"📻 r"));
+ assertEquals("", 3, segment.getCommonPrefixLength(u"📻 x"));
+ assertEquals("", 0, segment.getCommonPrefixLength(u"x"));
+ assertEquals("", 0, segment.getCommonPrefixLength(u""));
+ segment.adjustOffset(3);
+ assertEquals("", 0, segment.getCommonPrefixLength(u"RADiO"));
+ assertEquals("", 5, segment.getCommonPrefixLength(u"radio"));
+ assertEquals("", 2, segment.getCommonPrefixLength(u"rafio"));
+ assertEquals("", 0, segment.getCommonPrefixLength(u"fadio"));
+ assertEquals("", 0, segment.getCommonPrefixLength(u""));
+ segment.setLength(3);
+ assertEquals("", 3, segment.getCommonPrefixLength(u"radio"));
+ assertEquals("", 2, segment.getCommonPrefixLength(u"rafio"));
+ assertEquals("", 0, segment.getCommonPrefixLength(u"fadio"));
+ assertEquals("", 0, segment.getCommonPrefixLength(u""));
+ segment.resetLength();
+ segment.setOffset(11); // end of string
+ assertEquals("", 0, segment.getCommonPrefixLength(u"foo"));
+}
+
+#endif
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+#include "numbertest.h"
+#include "numparse_unisets.h"
+#include "unicode/dcfmtsym.h"
+
+#include <iostream>
+#include <cstr.h>
+
+using icu::numparse::impl::unisets::get;
+
+void UniSetsTest::runIndexedTest(int32_t index, UBool exec, const char*&name, char*) {
+ if (exec) {
+ logln("TestSuite UniSetsTest: ");
+ }
+ TESTCASE_AUTO_BEGIN;
+ TESTCASE_AUTO(testSetCoverage);
+ TESTCASE_AUTO_END;
+}
+
+void UniSetsTest::testSetCoverage() {
+ UErrorCode status = U_ZERO_ERROR;
+
+ // Lenient comma/period should be supersets of strict comma/period;
+ // it also makes the coverage logic cheaper.
+ assertTrue(
+ "COMMA should be superset of STRICT_COMMA",
+ get(unisets::COMMA)->containsAll(*get(unisets::STRICT_COMMA)));
+ assertTrue(
+ "PERIOD should be superset of STRICT_PERIOD",
+ get(unisets::PERIOD)->containsAll(*get(unisets::STRICT_PERIOD)));
+
+ UnicodeSet decimals;
+ decimals.addAll(*get(unisets::STRICT_COMMA));
+ decimals.addAll(*get(unisets::STRICT_PERIOD));
+ decimals.freeze();
+ UnicodeSet grouping;
+ grouping.addAll(decimals);
+ grouping.addAll(*get(unisets::OTHER_GROUPING_SEPARATORS));
+ decimals.freeze();
+
+ const UnicodeSet &plusSign = *get(unisets::PLUS_SIGN);
+ const UnicodeSet &minusSign = *get(unisets::MINUS_SIGN);
+ const UnicodeSet &percent = *get(unisets::PERCENT_SIGN);
+ const UnicodeSet &permille = *get(unisets::PERMILLE_SIGN);
+ const UnicodeSet &infinity = *get(unisets::INFINITY);
+ const UnicodeSet &nanLead = *get(unisets::NAN_LEAD);
+ const UnicodeSet &scientificLead = *get(unisets::SCIENTIFIC_LEAD);
+
+ int32_t localeCount;
+ const Locale* allAvailableLocales = Locale::getAvailableLocales(localeCount);
+ for (int32_t i = 0; i < localeCount; i++) {
+ Locale locale = allAvailableLocales[i];
+ DecimalFormatSymbols dfs(locale, status);
+ UnicodeString localeName;
+ locale.getDisplayName(localeName);
+ assertSuccess(UnicodeString("Making DFS for ") + localeName, status);
+
+#define ASSERT_IN_SET(name, foo) assertInSet(localeName, UnicodeString("" #name ""), name, foo)
+ ASSERT_IN_SET(decimals, dfs.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol));
+ ASSERT_IN_SET(grouping, dfs.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol));
+ ASSERT_IN_SET(plusSign, dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol));
+ ASSERT_IN_SET(minusSign, dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol));
+ ASSERT_IN_SET(percent, dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol));
+ ASSERT_IN_SET(permille, dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol));
+ ASSERT_IN_SET(infinity, dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol));
+ ASSERT_IN_SET(nanLead, dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol).char32At(0));
+ ASSERT_IN_SET(nanLead,
+ u_foldCase(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol).char32At(0), 0));
+ ASSERT_IN_SET(scientificLead,
+ u_foldCase(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol).char32At(0), 0));
+ }
+}
+
+void UniSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
+ const UnicodeSet &set, const UnicodeString &str) {
+ if (str.countChar32(0, str.length()) != 1) {
+ // Ignore locale strings with more than one code point (usually a bidi mark)
+ return;
+ }
+ assertInSet(localeName, setName, set, str.char32At(0));
+}
+
+void UniSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
+ const UnicodeSet &set, UChar32 cp) {
+ // If this test case fails, add the specified code point to the corresponding set in
+ // UnicodeSetStaticCache.java and numparse_unisets.cpp
+ assertTrue(
+ localeName + UnicodeString(u" ") + UnicodeString(cp) + UnicodeString(u" is missing in ") +
+ setName, set.contains(cp));
+}
+
+
+#endif