sharedobject.o simpleformatter.o unifiedcache.o uloc_keytype.o \
ubiditransform.o \
pluralmap.o \
-numparse_unisets.o
+static_unicode_sets.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h
<ClCompile Include="utf_impl.cpp" />
<ClCompile Include="listformatter.cpp" />
<ClCompile Include="ulistformatter.cpp" />
- <ClCompile Include="numparse_unisets.cpp" />
+ <ClCompile Include="static_unicode_sets.cpp" />
<ClInclude Include="localsvc.h" />
<ClInclude Include="msvcres.h" />
<ClInclude Include="pluralmap.h" />
<ClInclude Include="uinvchar.h" />
<ClInclude Include="ustr_cnv.h" />
<ClInclude Include="ustr_imp.h" />
- <ClInclude Include="numparse_unisets.h" />
+ <ClInclude Include="static_unicode_sets.h" />
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="common.rc" />
<ClCompile Include="ubiditransform.cpp">
<Filter>bidi</Filter>
</ClCompile>
- <ClCompile Include="numparse_unisets.cpp">
+ <ClCompile Include="static_unicode_sets.cpp">
<Filter>formatting</Filter>
</ClCompile>
</ItemGroup>
<ClInclude Include="unicode\ubiditransform.h">
<Filter>bidi</Filter>
</ClInclude>
- <ClInclude Include="numparse_unisets.h">
+ <ClInclude Include="static_unicode_sets.h">
<Filter>formatting</Filter>
</ClInclude>
</ItemGroup>
<ClCompile Include="utf_impl.cpp" />
<ClCompile Include="listformatter.cpp" />
<ClCompile Include="ulistformatter.cpp" />
- <ClCompile Include="numparse_unisets.cpp" />
+ <ClCompile Include="static_unicode_sets.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="localsvc.h" />
<ClInclude Include="uinvchar.h" />
<ClInclude Include="ustr_cnv.h" />
<ClInclude Include="ustr_imp.h" />
- <ClInclude Include="numparse_unisets.h" />
+ <ClInclude Include="static_unicode_sets.h" />
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="common.rc" />
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT
-#include "numparse_unisets.h"
+#include "static_unicode_sets.h"
#include "umutex.h"
#include "ucln_cmn.h"
#include "unicode/uniset.h"
#include "uassert.h"
using namespace icu;
-using namespace icu::numparse;
-using namespace icu::numparse::impl;
-using namespace icu::numparse::impl::unisets;
+using namespace icu::unisets;
namespace {
-static UnicodeSet* gUnicodeSets[COUNT] = {};
+UnicodeSet* gUnicodeSets[COUNT] = {};
+
+// Save the empty instance in static memory to have well-defined behavior if a
+// regular UnicodeSet cannot be allocated.
+char gEmptyUnicodeSet[sizeof(UnicodeSet)];
+
+// Whether the gEmptyUnicodeSet is initialized and ready to use.
+UBool gEmptyUnicodeSetInitialized = FALSE;
+
+inline UnicodeSet* getImpl(Key key) {
+ UnicodeSet* candidate = gUnicodeSets[key];
+ if (candidate == nullptr) {
+ return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
+ }
+ return candidate;
+}
UnicodeSet* computeUnion(Key k1, Key k2) {
UnicodeSet* result = new UnicodeSet();
if (result == nullptr) {
return nullptr;
}
- result->addAll(*gUnicodeSets[k1]);
- result->addAll(*gUnicodeSets[k2]);
+ result->addAll(*getImpl(k1));
+ result->addAll(*getImpl(k2));
result->freeze();
return result;
}
if (result == nullptr) {
return nullptr;
}
- result->addAll(*gUnicodeSets[k1]);
- result->addAll(*gUnicodeSets[k2]);
- result->addAll(*gUnicodeSets[k3]);
+ result->addAll(*getImpl(k1));
+ result->addAll(*getImpl(k2));
+ result->addAll(*getImpl(k3));
result->freeze();
return result;
}
icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
UBool U_CALLCONV cleanupNumberParseUniSets() {
+ if (gEmptyUnicodeSetInitialized) {
+ reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->~UnicodeSet();
+ gEmptyUnicodeSetInitialized = FALSE;
+ }
for (int32_t i = 0; i < COUNT; i++) {
delete gUnicodeSets[i];
gUnicodeSets[i] = nullptr;
void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
- gUnicodeSets[EMPTY] = new UnicodeSet();
+ // Initialize the empty instance for well-defined fallback behavior
+ new(gEmptyUnicodeSet) UnicodeSet();
+ reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->freeze();
+ gEmptyUnicodeSetInitialized = TRUE;
// These sets were decided after discussion with icu-design@. See tickets #13084 and #13309.
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status);
if (U_FAILURE(status)) { return; }
- // TODO: Should there be fallback behavior if for some reason these sets didn't get populated?
+ // NOTE: It is OK for these assertions to fail if there was a no-data build.
U_ASSERT(gUnicodeSets[COMMA] != nullptr);
U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr);
U_ASSERT(gUnicodeSets[PERIOD] != nullptr);
gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
- for (int32_t i = 0; i < COUNT; i++) {
- gUnicodeSets[i]->freeze();
+ for (auto* uniset : gUnicodeSets) {
+ if (uniset != nullptr) {
+ uniset->freeze();
+ }
}
}
UErrorCode localStatus = U_ZERO_ERROR;
umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
if (U_FAILURE(localStatus)) {
- // TODO: This returns non-null in Java, and callers assume that.
- return nullptr;
+ return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
}
- return gUnicodeSets[key];
+ return getImpl(key);
}
Key unisets::chooseFrom(UnicodeString str, Key key1) {
- return get(key1)->contains(str) ? key1 : COUNT;
+ return get(key1)->contains(str) ? key1 : NONE;
}
Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
// } else if (get(YEN_SIGN)->contains(str)) {
// return YEN_SIGN;
// } else {
-// return COUNT;
+// return NONE;
// }
//}
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
-#ifndef __NUMPARSE_UNISETS_H__
-#define __NUMPARSE_UNISETS_H__
+#ifndef __STATIC_UNICODE_SETS_H__
+#define __STATIC_UNICODE_SETS_H__
#include "unicode/uniset.h"
#include "unicode/unistr.h"
-U_NAMESPACE_BEGIN namespace numparse {
-namespace impl {
+U_NAMESPACE_BEGIN
namespace unisets {
enum Key {
- EMPTY,
+ // NONE is used to indicate null in chooseFrom().
+ // EMPTY is used to get an empty UnicodeSet.
+ NONE = -1,
+ EMPTY = 0,
// Ignorables
DEFAULT_IGNORABLES,
DIGITS_OR_ALL_SEPARATORS,
DIGITS_OR_STRICT_ALL_SEPARATORS,
- // The number of elements in the enum. Also used to indicate null.
+ // The number of elements in the enum.
COUNT
};
-// Exported as U_COMMON_API for ucurr.cpp
+/**
+ * Gets the static-allocated UnicodeSet according to the provided key. The
+ * pointer will be deleted during u_cleanup(); the caller should NOT delete it.
+ *
+ * Exported as U_COMMON_API for ucurr.cpp
+ *
+ * @param key The desired UnicodeSet according to the enum in this file.
+ * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but
+ * may be empty if an error occurred during data loading.
+ */
U_COMMON_API const UnicodeSet* get(Key key);
-// Exported as U_COMMON_API for numparse_decimal.cpp
+/**
+ * Checks if the UnicodeSet given by key1 contains the given string.
+ *
+ * Exported as U_COMMON_API for numparse_decimal.cpp
+ *
+ * @param str The string to check.
+ * @param key1 The set to check.
+ * @return key1 if the set contains str, or NONE if not.
+ */
U_COMMON_API Key chooseFrom(UnicodeString str, Key key1);
-// Exported as U_COMMON_API for numparse_decimal.cpp
+/**
+ * Checks if the UnicodeSet given by either key1 or key2 contains the string.
+ *
+ * Exported as U_COMMON_API for numparse_decimal.cpp
+ *
+ * @param str The string to check.
+ * @param key1 The first set to check.
+ * @param key2 The second set to check.
+ * @return key1 if that set contains str; key2 if that set contains str; or
+ * NONE if neither set contains str.
+ */
U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2);
// Unused in C++:
};
} // namespace unisets
-} // namespace impl
-} // namespace numparse
U_NAMESPACE_END
-#endif //__NUMPARSE_UNISETS_H__
+#endif //__STATIC_UNICODE_SETS_H__
#endif /* #if !UCONFIG_NO_FORMATTING */
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
-#include "numparse_unisets.h"
+#include "static_unicode_sets.h"
#include "uassert.h"
#include "umutex.h"
#include "ucln_cmn.h"
}
static void populateCurrSymbolsEquiv(icu::Hashtable *hash, UErrorCode &status) {
- using namespace icu::numparse::impl;
if (U_FAILURE(status)) { return; }
for (auto& entry : unisets::kCurrencyEntries) {
UnicodeString exemplar(entry.exemplar);
#include "numparse_types.h"
#include "numparse_decimal.h"
-#include "numparse_unisets.h"
+#include "static_unicode_sets.h"
#include "numparse_utils.h"
#include "unicode/uchar.h"
#include "putilimp.h"
decimalSeparator,
strictSeparators ? unisets::STRICT_COMMA : unisets::COMMA,
strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
- if (decimalKey != unisets::COUNT) {
+ if (decimalKey >= 0) {
decimalUniSet = unisets::get(decimalKey);
} else {
auto* set = new UnicodeSet();
fLocalDecimalUniSet.adoptInstead(set);
}
- if (groupingKey != unisets::COUNT && decimalKey != unisets::COUNT) {
+ if (groupingKey >= 0 && decimalKey >= 0) {
// Everything is available in the static cache
separatorSet = groupingUniSet;
leadSet = unisets::get(
#include "unicode/numberformatter.h"
#include "cstr.h"
#include "number_mapper.h"
-#include "numparse_unisets.h"
+#include "static_unicode_sets.h"
using namespace icu;
using namespace icu::number;
#include "numparse_types.h"
#include "numparse_scientific.h"
-#include "numparse_unisets.h"
+#include "static_unicode_sets.h"
using namespace icu;
using namespace icu::numparse;
#include "numparse_types.h"
#include "unicode/uniset.h"
-#include "numparse_unisets.h"
+#include "static_unicode_sets.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {
#include "numparse_types.h"
#include "numparse_validators.h"
-#include "numparse_unisets.h"
+#include "static_unicode_sets.h"
using namespace icu;
using namespace icu::numparse;
#define __SOURCE_NUMPARSE_VALIDATORS_H__
#include "numparse_types.h"
-#include "numparse_unisets.h"
+#include "static_unicode_sets.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {
#include "unicode/utf16.h"
#include "unicode/uniset.h"
#include "unicode/decimfmt.h"
-#include "numparse_unisets.h"
+#include "static_unicode_sets.h"
U_NAMESPACE_BEGIN
numberformattesttuple.o pluralmaptest.o \
numbertest_affixutils.o numbertest_api.o numbertest_decimalquantity.o \
numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \
-numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o \
-numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o
+numbertest_stringbuilder.o numbertest_stringsegment.o \
+numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o \
+static_unisets_test.o
DEPS = $(OBJECTS:.o=.d)
return condition;
}
-UBool IntlTest::assertFalse(const char* message, UBool condition, UBool quiet) {
+UBool IntlTest::assertFalse(const char* message, UBool condition, UBool quiet, UBool possibleDataError) {
if (condition) {
- errln("FAIL: assertFalse() failed: %s", message);
+ if (possibleDataError) {
+ dataerrln("FAIL: assertTrue() failed: %s", message);
+ } else {
+ errln("FAIL: assertTrue() failed: %s", message);
+ }
} else if (!quiet) {
logln("Ok: %s", message);
}
return ASSERT_BUF;
}
-UBool IntlTest::assertTrue(const UnicodeString& message, UBool condition, UBool quiet) {
- return assertTrue(extractToAssertBuf(message), condition, quiet);
+UBool IntlTest::assertTrue(const UnicodeString& message, UBool condition, UBool quiet, UBool possibleDataError) {
+ return assertTrue(extractToAssertBuf(message), condition, quiet, possibleDataError);
}
-UBool IntlTest::assertFalse(const UnicodeString& message, UBool condition, UBool quiet) {
- return assertFalse(extractToAssertBuf(message), condition, quiet);
+UBool IntlTest::assertFalse(const UnicodeString& message, UBool condition, UBool quiet, UBool possibleDataError) {
+ return assertFalse(extractToAssertBuf(message), condition, quiet, possibleDataError);
}
UBool IntlTest::assertSuccess(const UnicodeString& message, UErrorCode ec) {
/* JUnit-like assertions. Each returns TRUE if it succeeds. */
UBool assertTrue(const char* message, UBool condition, UBool quiet=FALSE, UBool possibleDataError=FALSE, const char *file=NULL, int line=0);
- UBool assertFalse(const char* message, UBool condition, UBool quiet=FALSE);
+ UBool assertFalse(const char* message, UBool condition, UBool quiet=FALSE, UBool possibleDataError=FALSE);
/**
* @param possibleDataError - if TRUE, use dataerrln instead of errcheckln on failure
* @return TRUE on success, FALSE on failure.
UBool assertEquals(const UnicodeString& message, const Formattable& expected,
const Formattable& actual);
#endif
- UBool assertTrue(const UnicodeString& message, UBool condition, UBool quiet=FALSE);
- UBool assertFalse(const UnicodeString& message, UBool condition, UBool quiet=FALSE);
+ UBool assertTrue(const UnicodeString& message, UBool condition, UBool quiet=FALSE, UBool possibleDataError=FALSE);
+ UBool assertFalse(const UnicodeString& message, UBool condition, UBool quiet=FALSE, UBool possibleDataError=FALSE);
UBool assertSuccess(const UnicodeString& message, UErrorCode ec);
UBool assertEquals(const UnicodeString& message, const UnicodeString& expected,
const UnicodeString& actual, UBool possibleDataError=FALSE);
<ClCompile Include="numbertest_patternstring.cpp" />
<ClCompile Include="numbertest_stringbuilder.cpp" />
<ClCompile Include="numbertest_stringsegment.cpp" />
- <ClCompile Include="numbertest_unisets.cpp" />
<ClCompile Include="numbertest_parse.cpp" />
<ClCompile Include="numbertest_doubleconversion.cpp" />
<ClCompile Include="numbertest_skeletons.cpp" />
<ClCompile Include="scientificnumberformattertest.cpp" />
<ClCompile Include="sdtfmtts.cpp" />
<ClCompile Include="selfmts.cpp" />
- <ClCompile Include="tchcfmt.cpp" />
<ClCompile Include="simpleformattertest.cpp" />
+ <ClCompile Include="static_unisets_test.cpp" />
+ <ClCompile Include="tchcfmt.cpp" />
<ClCompile Include="tfsmalls.cpp" />
<ClCompile Include="tmsgfmt.cpp" />
<ClCompile Include="tsdate.cpp" />
<ClCompile Include="numbertest_stringsegment.cpp">
<Filter>formatting</Filter>
</ClCompile>
- <ClCompile Include="numbertest_unisets.cpp">
- <Filter>formatting</Filter>
- </ClCompile>
<ClCompile Include="numbertest_parse.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="simpleformattertest.cpp">
<Filter>formatting</Filter>
</ClCompile>
+ <ClCompile Include="static_unisets_test.cpp">
+ <Filter>formatting</Filter>
+ </ClCompile>
<ClCompile Include="tchcfmt.cpp">
<Filter>formatting</Filter>
</ClCompile>
extern IntlTest *createSimpleFormatterTest();
extern IntlTest *createUnifiedCacheTest();
extern IntlTest *createQuantityFormatterTest();
-extern IntlTest *createPluralMapTest();
+extern IntlTest *createPluralMapTest();
+extern IntlTest *createStaticUnicodeSetsTest();
#define CASE(id, test) case id: \
callTest(*test, par);
}
break;
+ case 24:
+ name = "StaticUnicodeSetsTest";
+ if (exec) {
+ logln("TestSuite StaticUnicodeSetsTest---"); logln();
+ LocalPointer<IntlTest> test(createStaticUnicodeSetsTest());
+ callTest(*test, par);
+ }
+ break;
default: name = ""; break; //needed to end loop
}
}
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
};
-class UniSetsTest : public IntlTest {
- public:
- void testSetCoverage();
-
- void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
-
- private:
- void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
- const UnicodeSet& set, const UnicodeString& str);
- void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
- const UnicodeSet& set, UChar32 cp);
-};
-
class NumberParserTest : public IntlTest {
public:
void testBasic();
TESTCLASS(6, NumberStringBuilderTest);
TESTCLASS(7, DoubleConversionTest);
TESTCLASS(8, StringSegmentTest);
- TESTCLASS(9, UniSetsTest);
- TESTCLASS(10, NumberParserTest);
- TESTCLASS(11, NumberSkeletonTest);
+ TESTCLASS(9, NumberParserTest);
+ TESTCLASS(10, NumberSkeletonTest);
default: name = ""; break; // needed to end loop
}
}
#include "numbertest.h"
#include "numparse_impl.h"
-#include "numparse_unisets.h"
+#include "static_unicode_sets.h"
#include "unicode/dcfmtsym.h"
#include "unicode/testlog.h"
#include <cmath>
#include <numparse_affixes.h>
-using icu::numparse::impl::unisets::get;
+using icu::unisets::get;
void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
if (exec) {
#if !UCONFIG_NO_FORMATTING
#include "numbertest.h"
-#include "numparse_unisets.h"
+#include "static_unicode_sets.h"
#include "unicode/dcfmtsym.h"
-using icu::numparse::impl::unisets::get;
+using icu::unisets::get;
-void UniSetsTest::runIndexedTest(int32_t index, UBool exec, const char*&name, char*) {
+class StaticUnicodeSetsTest : public IntlTest {
+ public:
+ void testSetCoverage();
+ void testNonEmpty();
+
+ void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
+
+ private:
+ void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
+ const UnicodeSet& set, const UnicodeString& str);
+ void assertInSet(const UnicodeString& localeName, const UnicodeString &setName,
+ const UnicodeSet& set, UChar32 cp);
+};
+
+extern IntlTest *createStaticUnicodeSetsTest() {
+ return new StaticUnicodeSetsTest();
+}
+
+void StaticUnicodeSetsTest::runIndexedTest(int32_t index, UBool exec, const char*&name, char*) {
if (exec) {
- logln("TestSuite UniSetsTest: ");
+ logln("TestSuite StaticUnicodeSetsTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testSetCoverage);
+ TESTCASE_AUTO(testNonEmpty);
TESTCASE_AUTO_END;
}
-void UniSetsTest::testSetCoverage() {
+void StaticUnicodeSetsTest::testSetCoverage() {
UErrorCode status = U_ZERO_ERROR;
// Lenient comma/period should be supersets of strict comma/period;
}
}
-void UniSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
+void StaticUnicodeSetsTest::testNonEmpty() {
+ for (int32_t i=0; i<unisets::COUNT; i++) {
+ if (i == unisets::EMPTY) {
+ continue;
+ }
+ const UnicodeSet* uset = get(static_cast<unisets::Key>(i));
+ // Can fail if no data:
+ assertFalse(UnicodeString("Set should not be empty: ") + i, uset->isEmpty(), FALSE, TRUE);
+ }
+}
+
+void StaticUnicodeSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
const UnicodeSet &set, const UnicodeString &str) {
if (str.countChar32(0, str.length()) != 1) {
// Ignore locale strings with more than one code point (usually a bidi mark)
assertInSet(localeName, setName, set, str.char32At(0));
}
-void UniSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
+void StaticUnicodeSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
const UnicodeSet &set, UChar32 cp) {
// If this test case fails, add the specified code point to the corresponding set in
// UnicodeSetStaticCache.java and numparse_unisets.cpp
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.number.parse;
+package com.ibm.icu.impl;
import java.util.EnumMap;
import java.util.Map;
-import com.ibm.icu.impl.ICUData;
-import com.ibm.icu.impl.ICUResourceBundle;
-import com.ibm.icu.impl.UResource;
import com.ibm.icu.impl.UResource.Value;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;
/**
- * This class statically initializes UnicodeSets useful for number parsing. Microbenchmarks show this to
- * bring a very sizeable performance boost.
+ * This class statically initializes UnicodeSets, originally built for number parsing. Microbenchmarks
+ * show this to bring a very sizeable performance boost.
*
- * IMPORTANT ASSUMPTION: All of the sets contain code points (no strings) and they are all case-folded.
- * If this assumption were ever broken, logic in classes such as SymbolMatcher would need to be updated
- * in order to return well-formed sets upon calls to getLeadCodePoints().
+ * IMPORTANT ASSUMPTION FOR NUMBER PARSING: All of the sets contain code points (no strings) and they are
+ * all case-folded. If this assumption were ever broken, logic in classes such as SymbolMatcher would
+ * need to be updated in order to return well-formed sets upon calls to getLeadCodePoints().
*
* @author sffc
*/
-public class UnicodeSetStaticCache {
+public class StaticUnicodeSets {
public static enum Key {
// Ignorables
DEFAULT_IGNORABLES,
private static final Map<Key, UnicodeSet> unicodeSets = new EnumMap<Key, UnicodeSet>(Key.class);
+ /**
+ * Gets the static-allocated UnicodeSet according to the provided key.
+ *
+ * @param key
+ * The desired UnicodeSet according to the enum in this file.
+ * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but may be empty if an
+ * error occurred during data loading.
+ */
public static UnicodeSet get(Key key) {
- return unicodeSets.get(key);
+ UnicodeSet candidate = unicodeSets.get(key);
+ if (candidate == null) {
+ return UnicodeSet.EMPTY;
+ }
+ return candidate;
}
+ /**
+ * Checks if the UnicodeSet given by key1 contains the given string.
+ *
+ * @param str
+ * The string to check.
+ * @param key1
+ * The set to check.
+ * @return key1 if the set contains str, or COUNT if not.
+ */
public static Key chooseFrom(String str, Key key1) {
return get(key1).contains(str) ? key1 : null;
}
+ /**
+ * Checks if the UnicodeSet given by either key1 or key2 contains the string.
+ *
+ * Exported as U_COMMON_API for numparse_decimal.cpp
+ *
+ * @param str
+ * The string to check.
+ * @param key1
+ * The first set to check.
+ * @param key2
+ * The second set to check.
+ * @return key1 if that set contains str; key2 if that set contains str; or COUNT if neither set
+ * contains str.
+ */
public static Key chooseFrom(String str, Key key1, Key key2) {
return get(key1).contains(str) ? key1 : chooseFrom(str, key2);
}
+ /**
+ * Looks through all Currency-related sets for the given string, returning the first match or null if
+ * no match was round.
+ */
public static Key chooseCurrency(String str) {
if (get(Key.DOLLAR_SIGN).contains(str)) {
return Key.DOLLAR_SIGN;
.getBundleInstance(ICUData.ICU_BASE_NAME, ULocale.ROOT);
rb.getAllItemsWithFallback("parse", new ParseDataSink());
- // TODO: Should there be fallback behavior if for some reason these sets didn't get populated?
+ // NOTE: It is OK for these assertions to fail if there was a no-data build.
assert unicodeSets.containsKey(Key.COMMA);
assert unicodeSets.containsKey(Key.STRICT_COMMA);
assert unicodeSets.containsKey(Key.PERIOD);
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
+import com.ibm.icu.impl.StaticUnicodeSets;
+import com.ibm.icu.impl.StaticUnicodeSets.Key;
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
import com.ibm.icu.impl.number.Grouper;
-import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
// Attempt to find separators in the static cache
- groupingUniSet = UnicodeSetStaticCache.get(groupingKey);
- Key decimalKey = UnicodeSetStaticCache.chooseFrom(decimalSeparator,
+ groupingUniSet = StaticUnicodeSets.get(groupingKey);
+ Key decimalKey = StaticUnicodeSets.chooseFrom(decimalSeparator,
strictSeparators ? Key.STRICT_COMMA : Key.COMMA,
strictSeparators ? Key.STRICT_PERIOD : Key.PERIOD);
if (decimalKey != null) {
- decimalUniSet = UnicodeSetStaticCache.get(decimalKey);
+ decimalUniSet = StaticUnicodeSets.get(decimalKey);
} else {
decimalUniSet = new UnicodeSet().add(decimalSeparator.codePointAt(0)).freeze();
}
if (groupingKey != null && decimalKey != null) {
// Everything is available in the static cache
separatorSet = groupingUniSet;
- leadSet = UnicodeSetStaticCache.get(strictSeparators ? Key.DIGITS_OR_ALL_SEPARATORS
+ leadSet = StaticUnicodeSets.get(strictSeparators ? Key.DIGITS_OR_ALL_SEPARATORS
: Key.DIGITS_OR_STRICT_ALL_SEPARATORS);
} else {
separatorSet = new UnicodeSet().addAll(groupingUniSet).addAll(decimalUniSet).freeze();
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
+import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.UnicodeSet;
/**
public class IgnorablesMatcher extends SymbolMatcher implements NumberParseMatcher.Flexible {
public static final IgnorablesMatcher DEFAULT = new IgnorablesMatcher(
- UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.DEFAULT_IGNORABLES));
+ StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES));
public static final IgnorablesMatcher STRICT = new IgnorablesMatcher(
- UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.STRICT_IGNORABLES));
+ StaticUnicodeSets.get(StaticUnicodeSets.Key.STRICT_IGNORABLES));
public static IgnorablesMatcher getInstance(UnicodeSet ignorables) {
assert ignorables.isFrozen();
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
+import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
}
private InfinityMatcher() {
- super(UnicodeSetStaticCache.Key.INFINITY);
+ super(StaticUnicodeSets.Key.INFINITY);
}
@Override
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
+import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
}
private MinusSignMatcher(boolean allowTrailing) {
- super(UnicodeSetStaticCache.Key.MINUS_SIGN);
+ super(StaticUnicodeSets.Key.MINUS_SIGN);
this.allowTrailing = allowTrailing;
}
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
+import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
}
private PercentMatcher() {
- super(UnicodeSetStaticCache.Key.PERCENT_SIGN);
+ super(StaticUnicodeSets.Key.PERCENT_SIGN);
}
@Override
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
+import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
}
private PermilleMatcher() {
- super(UnicodeSetStaticCache.Key.PERMILLE_SIGN);
+ super(StaticUnicodeSets.Key.PERMILLE_SIGN);
}
@Override
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
+import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
}
private PlusSignMatcher(boolean allowTrailing) {
- super(UnicodeSetStaticCache.Key.PLUS_SIGN);
+ super(StaticUnicodeSets.Key.PLUS_SIGN);
this.allowTrailing = allowTrailing;
}
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
+import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
import com.ibm.icu.impl.number.Grouper;
import com.ibm.icu.text.DecimalFormatSymbols;
}
private static UnicodeSet minusSignSet() {
- return UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN);
+ return StaticUnicodeSets.get(StaticUnicodeSets.Key.MINUS_SIGN);
}
private static UnicodeSet plusSignSet() {
- return UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN);
+ return StaticUnicodeSets.get(StaticUnicodeSets.Key.PLUS_SIGN);
}
@Override
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StringSegment;
+import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.UnicodeSet;
/**
uniSet = symbolUniSet;
}
- protected SymbolMatcher(UnicodeSetStaticCache.Key key) {
+ protected SymbolMatcher(StaticUnicodeSets.Key key) {
string = "";
- uniSet = UnicodeSetStaticCache.get(key);
+ uniSet = StaticUnicodeSets.get(key);
}
public UnicodeSet getSet() {
import java.text.CharacterIterator;
import java.util.Map;
-import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
+import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.util.ULocale;
int start = iterator.getRunStart(NumberFormat.Field.EXPONENT_SIGN);
int limit = iterator.getRunLimit(NumberFormat.Field.EXPONENT_SIGN);
int aChar = char32AtAndAdvance(iterator);
- if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN).contains(aChar)) {
+ if (StaticUnicodeSets.get(StaticUnicodeSets.Key.MINUS_SIGN).contains(aChar)) {
append(
iterator,
copyFromOffset,
start,
result);
result.append(SUPERSCRIPT_MINUS_SIGN);
- } else if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN).contains(aChar)) {
+ } else if (StaticUnicodeSets.get(StaticUnicodeSets.Key.PLUS_SIGN).contains(aChar)) {
append(
iterator,
copyFromOffset,
import com.ibm.icu.impl.SimpleCache;
import com.ibm.icu.impl.SoftCache;
import com.ibm.icu.impl.TextTrieMap;
-import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
+import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.CurrencyDisplayNames;
import com.ibm.icu.text.CurrencyMetaInfo;
import com.ibm.icu.text.CurrencyMetaInfo.CurrencyDigits;
String isoCode = e.getValue();
// Register under not just symbol, but under every equivalent symbol as well
// e.g short width yen and long width yen.
- UnicodeSetStaticCache.Key key = UnicodeSetStaticCache.chooseCurrency(symbol);
+ StaticUnicodeSets.Key key = StaticUnicodeSets.chooseCurrency(symbol);
CurrencyStringInfo value = new CurrencyStringInfo(isoCode, symbol);
if (key != null) {
- UnicodeSet equivalents = UnicodeSetStaticCache.get(key);
+ UnicodeSet equivalents = StaticUnicodeSets.get(key);
// The symbol itself is included in the UnicodeSet
for (String equivalentSymbol : equivalents) {
symTrie.put(equivalentSymbol, value);
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.dev.test.number;
-import static com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.get;
+import static com.ibm.icu.impl.StaticUnicodeSets.get;
import java.math.BigDecimal;
import java.util.Random;
import org.junit.Test;
import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.impl.StaticUnicodeSets.Key;
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
-import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.number.NumberFormatter;
import com.ibm.icu.number.Precision;
+++ /dev/null
-// © 2018 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.dev.test.number;
-
-import static com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.get;
-import static org.junit.Assert.assertTrue;
-
-import org.junit.Test;
-
-import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
-
-/**
- * This test class is thin; most of it was moved to ExhaustiveNumberTest.
- * @author sffc
- */
-public class UnicodeSetStaticCacheTest {
-
- @Test
- public void testFrozen() {
- for (Key key : Key.values()) {
- assertTrue(get(key).isFrozen());
- }
- }
-}
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.dev.test.util;
+
+import static com.ibm.icu.impl.StaticUnicodeSets.get;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+import com.ibm.icu.impl.StaticUnicodeSets.Key;
+
+/**
+ * NOTE: The test testSetCoverage can be found in ExhaustiveNumberTest.java
+ *
+ * @author sffc
+ */
+public class StaticUnicodeSetsTest {
+
+ @Test
+ public void testFrozen() {
+ for (Key key : Key.values()) {
+ assertTrue(get(key).isFrozen());
+ }
+ }
+
+ @Test
+ public void testNonEmpty() {
+ for (Key key : Key.values()) {
+ // NOTE: No key EMPTY in Java
+ assertTrue(get(key).isFrozen());
+ }
+ }
+}