ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o ulistformatter.o \
sharedobject.o simpleformatter.o unifiedcache.o uloc_keytype.o \
ubiditransform.o \
-pluralmap.o
+pluralmap.o \
+numparse_unisets.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
+#include "numparse_unisets.h"
+#include "umutex.h"
+#include "ucln_cmn.h"
+#include "unicode/uniset.h"
+#include "uresimp.h"
+#include "cstring.h"
+#include "uassert.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+using namespace icu::numparse::impl::unisets;
+
+
+namespace {
+
+static UnicodeSet* gUnicodeSets[COUNT] = {};
+
+UnicodeSet* computeUnion(Key k1, Key k2) {
+ UnicodeSet* result = new UnicodeSet();
+ if (result == nullptr) {
+ return nullptr;
+ }
+ result->addAll(*gUnicodeSets[k1]);
+ result->addAll(*gUnicodeSets[k2]);
+ result->freeze();
+ return result;
+}
+
+UnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
+ UnicodeSet* result = new UnicodeSet();
+ if (result == nullptr) {
+ return nullptr;
+ }
+ result->addAll(*gUnicodeSets[k1]);
+ result->addAll(*gUnicodeSets[k2]);
+ result->addAll(*gUnicodeSets[k3]);
+ result->freeze();
+ return result;
+}
+
+
+void saveSet(Key key, const UnicodeString& unicodeSetPattern, UErrorCode& status) {
+ // assert unicodeSets.get(key) == null;
+ gUnicodeSets[key] = new UnicodeSet(unicodeSetPattern, status);
+}
+
+class ParseDataSink : public ResourceSink {
+ public:
+ void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
+ ResourceTable contextsTable = value.getTable(status);
+ if (U_FAILURE(status)) { return; }
+ for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) {
+ if (uprv_strcmp(key, "date") == 0) {
+ // ignore
+ } else {
+ ResourceTable strictnessTable = value.getTable(status);
+ if (U_FAILURE(status)) { return; }
+ for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) {
+ bool isLenient = (uprv_strcmp(key, "lenient") == 0);
+ ResourceArray array = value.getArray(status);
+ if (U_FAILURE(status)) { return; }
+ for (int k = 0; k < array.getSize(); k++) {
+ array.getValue(k, value);
+ UnicodeString str = value.getUnicodeString(status);
+ if (U_FAILURE(status)) { return; }
+ // There is both lenient and strict data for comma/period,
+ // but not for any of the other symbols.
+ if (str.indexOf(u'.') != -1) {
+ saveSet(isLenient ? PERIOD : STRICT_PERIOD, str, status);
+ } else if (str.indexOf(u',') != -1) {
+ saveSet(isLenient ? COMMA : STRICT_COMMA, str, status);
+ } else if (str.indexOf(u'+') != -1) {
+ saveSet(PLUS_SIGN, str, status);
+ } else if (str.indexOf(u'‒') != -1) {
+ saveSet(MINUS_SIGN, str, status);
+ } else if (str.indexOf(u'$') != -1) {
+ saveSet(DOLLAR_SIGN, str, status);
+ } else if (str.indexOf(u'£') != -1) {
+ saveSet(POUND_SIGN, str, status);
+ } else if (str.indexOf(u'₨') != -1) {
+ saveSet(RUPEE_SIGN, str, status);
+ }
+ if (U_FAILURE(status)) { return; }
+ }
+ }
+ }
+ }
+ }
+};
+
+
+icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV cleanupNumberParseUniSets() {
+ for (int32_t i = 0; i < COUNT; i++) {
+ delete gUnicodeSets[i];
+ gUnicodeSets[i] = nullptr;
+ }
+ return TRUE;
+}
+
+void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
+ ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
+
+ gUnicodeSets[EMPTY] = new UnicodeSet();
+
+ // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309.
+ // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
+ gUnicodeSets[DEFAULT_IGNORABLES] = new UnicodeSet(
+ u"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status);
+ gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(u"[[:Bidi_Control:]]", status);
+
+ LocalUResourceBundlePointer rb(ures_open(nullptr, "root", &status));
+ if (U_FAILURE(status)) { return; }
+ ParseDataSink sink;
+ ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status);
+ if (U_FAILURE(status)) { return; }
+
+ // TODO: Should there be fallback behavior if for some reason these sets didn't get populated?
+ U_ASSERT(gUnicodeSets[COMMA] != nullptr);
+ U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr);
+ U_ASSERT(gUnicodeSets[PERIOD] != nullptr);
+ U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr);
+
+ gUnicodeSets[OTHER_GROUPING_SEPARATORS] = new UnicodeSet(
+ u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status);
+ gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
+ gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
+ STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
+
+ U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr);
+
+ gUnicodeSets[PERCENT_SIGN] = new UnicodeSet(u"[%٪]", status);
+ gUnicodeSets[PERMILLE_SIGN] = new UnicodeSet(u"[‰؉]", status);
+ gUnicodeSets[INFINITY_KEY] = new UnicodeSet(u"[∞]", status);
+
+ U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr);
+ gUnicodeSets[YEN_SIGN] = new UnicodeSet(u"[¥\\uffe5]", status);
+
+ gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
+
+ gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
+ gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
+
+ for (int32_t i = 0; i < COUNT; i++) {
+ gUnicodeSets[i]->freeze();
+ }
+}
+
+}
+
+const UnicodeSet* unisets::get(Key key) {
+ UErrorCode localStatus = U_ZERO_ERROR;
+ umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
+ if (U_FAILURE(localStatus)) {
+ // TODO: This returns non-null in Java, and callers assume that.
+ return nullptr;
+ }
+ return gUnicodeSets[key];
+}
+
+Key unisets::chooseFrom(UnicodeString str, Key key1) {
+ return get(key1)->contains(str) ? key1 : COUNT;
+}
+
+Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
+ return get(key1)->contains(str) ? key1 : chooseFrom(str, key2);
+}
+
+//Key unisets::chooseCurrency(UnicodeString str) {
+// if (get(DOLLAR_SIGN)->contains(str)) {
+// return DOLLAR_SIGN;
+// } else if (get(POUND_SIGN)->contains(str)) {
+// return POUND_SIGN;
+// } else if (get(RUPEE_SIGN)->contains(str)) {
+// return RUPEE_SIGN;
+// } else if (get(YEN_SIGN)->contains(str)) {
+// return YEN_SIGN;
+// } else {
+// return COUNT;
+// }
+//}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
+// This file is in common instead of i18n because it is needed by ucurr.cpp.
+
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#ifndef __NUMPARSE_UNISETS_H__
#define __NUMPARSE_UNISETS_H__
-#include "numparse_types.h"
#include "unicode/uniset.h"
+#include "unicode/unistr.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {
EMPTY,
// Ignorables
- BIDI,
- WHITESPACE,
DEFAULT_IGNORABLES,
STRICT_IGNORABLES,
// - PERIOD is a superset of SCRICT_PERIOD
// - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS
// - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS
- COMMA,
+ COMMA,
PERIOD,
STRICT_COMMA,
STRICT_PERIOD,
STRICT_ALL_SEPARATORS,
// Symbols
- // TODO: NaN?
- MINUS_SIGN,
+ MINUS_SIGN,
PLUS_SIGN,
PERCENT_SIGN,
PERMILLE_SIGN,
INFINITY_KEY, // INFINITY is defined in cmath
+ // Currency Symbols
+ DOLLAR_SIGN,
+ POUND_SIGN,
+ RUPEE_SIGN,
+ YEN_SIGN, // not in CLDR data, but Currency.java wants it
+
// Other
- DIGITS,
- CWCF,
+ DIGITS,
// Combined Separators with Digits (for lead code points)
- DIGITS_OR_ALL_SEPARATORS,
+ DIGITS_OR_ALL_SEPARATORS,
DIGITS_OR_STRICT_ALL_SEPARATORS,
// The number of elements in the enum. Also used to indicate null.
- COUNT
+ COUNT
};
const UnicodeSet* get(Key key);
Key chooseFrom(UnicodeString str, Key key1, Key key2);
+// Unused in C++:
+// Key chooseCurrency(UnicodeString str);
+// Used instead:
+static const struct {
+ Key key;
+ UChar32 exemplar;
+} kCurrencyEntries[] = {
+ {DOLLAR_SIGN, u'$'},
+ {POUND_SIGN, u'£'},
+ {RUPEE_SIGN, u'₨'},
+ {YEN_SIGN, u'¥'},
+};
+
} // namespace unisets
} // namespace impl
} // namespace numparse
as the cleanup functions are suppose to be called. */
typedef enum ECleanupCommonType {
UCLN_COMMON_START = -1,
+ UCLN_COMMON_NUMPARSE_UNISETS,
UCLN_COMMON_USPREP,
UCLN_COMMON_BREAKITERATOR,
UCLN_COMMON_RBBI,
#include "unicode/ustring.h"
#include "unicode/parsepos.h"
#include "unicode/uniset.h"
+#include "unicode/usetiter.h"
#include "unicode/utf16.h"
#include "ustr_imp.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
+#include "numparse_unisets.h"
#include "uassert.h"
#include "umutex.h"
#include "ucln_cmn.h"
static const int32_t MAX_POW10 = UPRV_LENGTHOF(POW10) - 1;
-// Defines equivalent currency symbols.
-static const char *EQUIV_CURRENCY_SYMBOLS[][2] = {
- {"\\u00a5", "\\uffe5"},
- {"$", "\\ufe69"},
- {"$", "\\uff04"},
- {"\\u20a8", "\\u20b9"},
- {"\\u00a3", "\\u20a4"}};
-
#define ISO_CURRENCY_CODE_LENGTH 3
//------------------------------------------------------------
}
static void populateCurrSymbolsEquiv(icu::Hashtable *hash, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return;
- }
- int32_t length = UPRV_LENGTHOF(EQUIV_CURRENCY_SYMBOLS);
- for (int32_t i = 0; i < length; ++i) {
- icu::UnicodeString lhs(EQUIV_CURRENCY_SYMBOLS[i][0], -1, US_INV);
- icu::UnicodeString rhs(EQUIV_CURRENCY_SYMBOLS[i][1], -1, US_INV);
- makeEquivalent(lhs.unescape(), rhs.unescape(), hash, status);
- if (U_FAILURE(status)) {
- return;
+ using namespace icu::numparse::impl;
+ if (U_FAILURE(status)) { return; }
+ for (auto& entry : unisets::kCurrencyEntries) {
+ UnicodeString exemplar(entry.exemplar);
+ const UnicodeSet* set = unisets::get(entry.key);
+ if (set == nullptr) { return; }
+ UnicodeSetIterator it(*set);
+ while (it.next()) {
+ UnicodeString value = it.getString();
+ if (value == exemplar) {
+ // No need to mark the exemplar character as an equivalent
+ continue;
+ }
+ makeEquivalent(exemplar, value, hash, status);
+ if (U_FAILURE(status)) { return; }
}
}
}
wintzimpl.o windtfmt.o winnmfmt.o basictz.o dtrule.o rbtz.o tzrule.o tztrans.o vtzone.o zonemeta.o \
standardplural.o upluralrules.o plurrule.o plurfmt.o selfmt.o dtitvfmt.o dtitvinf.o udateintervalformat.o \
tmunit.o tmutamt.o tmutfmt.o currpinf.o \
-uspoof.o uspoof_impl.o uspoof_build.o uspoof_conf.o decfmtst.o smpdtfst.o \
+uspoof.o uspoof_impl.o uspoof_build.o uspoof_conf.o smpdtfst.o \
ztrans.o zrule.o vzone.o fphdlimp.o fpositer.o ufieldpositer.o \
decNumber.o decContext.o alphaindex.o tznames.o tznames_impl.o tzgnames.o \
tzfmt.o compactdecimalformat.o gender.o region.o scriptset.o \
double-conversion.o double-conversion-bignum-dtoa.o double-conversion-bignum.o \
double-conversion-cached-powers.o double-conversion-diy-fp.o \
double-conversion-fast-dtoa.o double-conversion-strtod.o \
-numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o numparse_impl.o \
+numparse_stringsegment.o numparse_parsednumber.o numparse_impl.o \
numparse_symbols.o numparse_decimal.o numparse_scientific.o numparse_currency.o \
numparse_affixes.o numparse_compositions.o numparse_validators.o \
+++ /dev/null
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2009-2016, International Business Machines Corporation and
-* others. All Rights Reserved.
-*******************************************************************************
-*
-* This file contains the class DecimalFormatStaticSets
-*
-* DecimalFormatStaticSets holds the UnicodeSets that are needed for lenient
-* parsing of decimal and group separators.
-********************************************************************************
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/unistr.h"
-#include "unicode/uniset.h"
-#include "unicode/uchar.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "uassert.h"
-#include "ucln_in.h"
-#include "umutex.h"
-
-#include "decfmtst.h"
-
-U_NAMESPACE_BEGIN
-
-
-//------------------------------------------------------------------------------
-//
-// Unicode Set pattern strings for all of the required constant sets.
-// Initialized with hex values for portability to EBCDIC based machines.
-// Really ugly, but there's no good way to avoid it.
-//
-//------------------------------------------------------------------------------
-
-static const UChar gDotEquivalentsPattern[] = {
- // [ . \u2024 \u3002 \uFE12 \uFE52 \uFF0E \uFF61 ]
- 0x005B, 0x002E, 0x2024, 0x3002, 0xFE12, 0xFE52, 0xFF0E, 0xFF61, 0x005D, 0x0000};
-
-static const UChar gCommaEquivalentsPattern[] = {
- // [ , \u060C \u066B \u3001 \uFE10 \uFE11 \uFE50 \uFE51 \uFF0C \uFF64 ]
- 0x005B, 0x002C, 0x060C, 0x066B, 0x3001, 0xFE10, 0xFE11, 0xFE50, 0xFE51, 0xFF0C, 0xFF64, 0x005D, 0x0000};
-
-static const UChar gOtherGroupingSeparatorsPattern[] = {
- // [ \ SPACE ' NBSP \u066C \u2000 - \u200A \u2018 \u2019 \u202F \u205F \u3000 \uFF07 ]
- 0x005B, 0x005C, 0x0020, 0x0027, 0x00A0, 0x066C, 0x2000, 0x002D, 0x200A, 0x2018, 0x2019, 0x202F, 0x205F, 0x3000, 0xFF07, 0x005D, 0x0000};
-
-static const UChar gDashEquivalentsPattern[] = {
- // [ \ - HYPHEN F_DASH N_DASH MINUS ]
- 0x005B, 0x005C, 0x002D, 0x2010, 0x2012, 0x2013, 0x2212, 0x005D, 0x0000};
-
-static const UChar gStrictDotEquivalentsPattern[] = {
- // [ . \u2024 \uFE52 \uFF0E \uFF61 ]
- 0x005B, 0x002E, 0x2024, 0xFE52, 0xFF0E, 0xFF61, 0x005D, 0x0000};
-
-static const UChar gStrictCommaEquivalentsPattern[] = {
- // [ , \u066B \uFE10 \uFE50 \uFF0C ]
- 0x005B, 0x002C, 0x066B, 0xFE10, 0xFE50, 0xFF0C, 0x005D, 0x0000};
-
-static const UChar gStrictOtherGroupingSeparatorsPattern[] = {
- // [ \ SPACE ' NBSP \u066C \u2000 - \u200A \u2018 \u2019 \u202F \u205F \u3000 \uFF07 ]
- 0x005B, 0x005C, 0x0020, 0x0027, 0x00A0, 0x066C, 0x2000, 0x002D, 0x200A, 0x2018, 0x2019, 0x202F, 0x205F, 0x3000, 0xFF07, 0x005D, 0x0000};
-
-static const UChar gStrictDashEquivalentsPattern[] = {
- // [ \ - MINUS ]
- 0x005B, 0x005C, 0x002D, 0x2212, 0x005D, 0x0000};
-
-static const UChar32 gMinusSigns[] = {
- 0x002D,
- 0x207B,
- 0x208B,
- 0x2212,
- 0x2796,
- 0xFE63,
- 0xFF0D};
-
-static const UChar32 gPlusSigns[] = {
- 0x002B,
- 0x207A,
- 0x208A,
- 0x2795,
- 0xfB29,
- 0xFE62,
- 0xFF0B};
-
-static void initUnicodeSet(const UChar32 *raw, int32_t len, UnicodeSet *s) {
- for (int32_t i = 0; i < len; ++i) {
- s->add(raw[i]);
- }
-}
-
-DecimalFormatStaticSets::DecimalFormatStaticSets(UErrorCode &status)
-: fDotEquivalents(NULL),
- fCommaEquivalents(NULL),
- fOtherGroupingSeparators(NULL),
- fDashEquivalents(NULL),
- fStrictDotEquivalents(NULL),
- fStrictCommaEquivalents(NULL),
- fStrictOtherGroupingSeparators(NULL),
- fStrictDashEquivalents(NULL),
- fDefaultGroupingSeparators(NULL),
- fStrictDefaultGroupingSeparators(NULL),
- fMinusSigns(NULL),
- fPlusSigns(NULL)
-{
- fDotEquivalents = new UnicodeSet(UnicodeString(TRUE, gDotEquivalentsPattern, -1), status);
- fCommaEquivalents = new UnicodeSet(UnicodeString(TRUE, gCommaEquivalentsPattern, -1), status);
- fOtherGroupingSeparators = new UnicodeSet(UnicodeString(TRUE, gOtherGroupingSeparatorsPattern, -1), status);
- fDashEquivalents = new UnicodeSet(UnicodeString(TRUE, gDashEquivalentsPattern, -1), status);
-
- fStrictDotEquivalents = new UnicodeSet(UnicodeString(TRUE, gStrictDotEquivalentsPattern, -1), status);
- fStrictCommaEquivalents = new UnicodeSet(UnicodeString(TRUE, gStrictCommaEquivalentsPattern, -1), status);
- fStrictOtherGroupingSeparators = new UnicodeSet(UnicodeString(TRUE, gStrictOtherGroupingSeparatorsPattern, -1), status);
- fStrictDashEquivalents = new UnicodeSet(UnicodeString(TRUE, gStrictDashEquivalentsPattern, -1), status);
-
-
- fDefaultGroupingSeparators = new UnicodeSet(*fDotEquivalents);
- fDefaultGroupingSeparators->addAll(*fCommaEquivalents);
- fDefaultGroupingSeparators->addAll(*fOtherGroupingSeparators);
-
- fStrictDefaultGroupingSeparators = new UnicodeSet(*fStrictDotEquivalents);
- fStrictDefaultGroupingSeparators->addAll(*fStrictCommaEquivalents);
- fStrictDefaultGroupingSeparators->addAll(*fStrictOtherGroupingSeparators);
-
- fMinusSigns = new UnicodeSet();
- fPlusSigns = new UnicodeSet();
-
- // Check for null pointers
- if (fDotEquivalents == NULL || fCommaEquivalents == NULL || fOtherGroupingSeparators == NULL || fDashEquivalents == NULL ||
- fStrictDotEquivalents == NULL || fStrictCommaEquivalents == NULL || fStrictOtherGroupingSeparators == NULL || fStrictDashEquivalents == NULL ||
- fDefaultGroupingSeparators == NULL || fStrictOtherGroupingSeparators == NULL ||
- fMinusSigns == NULL || fPlusSigns == NULL) {
- cleanup();
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- initUnicodeSet(
- gMinusSigns,
- UPRV_LENGTHOF(gMinusSigns),
- fMinusSigns);
- initUnicodeSet(
- gPlusSigns,
- UPRV_LENGTHOF(gPlusSigns),
- fPlusSigns);
-
- // Freeze all the sets
- fDotEquivalents->freeze();
- fCommaEquivalents->freeze();
- fOtherGroupingSeparators->freeze();
- fDashEquivalents->freeze();
- fStrictDotEquivalents->freeze();
- fStrictCommaEquivalents->freeze();
- fStrictOtherGroupingSeparators->freeze();
- fStrictDashEquivalents->freeze();
- fDefaultGroupingSeparators->freeze();
- fStrictDefaultGroupingSeparators->freeze();
- fMinusSigns->freeze();
- fPlusSigns->freeze();
-}
-
-DecimalFormatStaticSets::~DecimalFormatStaticSets() {
- cleanup();
-}
-
-void DecimalFormatStaticSets::cleanup() { // Be sure to clean up newly added fields!
- delete fDotEquivalents; fDotEquivalents = NULL;
- delete fCommaEquivalents; fCommaEquivalents = NULL;
- delete fOtherGroupingSeparators; fOtherGroupingSeparators = NULL;
- delete fDashEquivalents; fDashEquivalents = NULL;
- delete fStrictDotEquivalents; fStrictDotEquivalents = NULL;
- delete fStrictCommaEquivalents; fStrictCommaEquivalents = NULL;
- delete fStrictOtherGroupingSeparators; fStrictOtherGroupingSeparators = NULL;
- delete fStrictDashEquivalents; fStrictDashEquivalents = NULL;
- delete fDefaultGroupingSeparators; fDefaultGroupingSeparators = NULL;
- delete fStrictDefaultGroupingSeparators; fStrictDefaultGroupingSeparators = NULL;
- delete fStrictOtherGroupingSeparators; fStrictOtherGroupingSeparators = NULL;
- delete fMinusSigns; fMinusSigns = NULL;
- delete fPlusSigns; fPlusSigns = NULL;
-}
-
-static DecimalFormatStaticSets *gStaticSets;
-static icu::UInitOnce gStaticSetsInitOnce = U_INITONCE_INITIALIZER;
-
-
-//------------------------------------------------------------------------------
-//
-// decfmt_cleanup Memory cleanup function, free/delete all
-// cached memory. Called by ICU's u_cleanup() function.
-//
-//------------------------------------------------------------------------------
-U_CDECL_BEGIN
-static UBool U_CALLCONV
-decimfmt_cleanup(void)
-{
- delete gStaticSets;
- gStaticSets = NULL;
- gStaticSetsInitOnce.reset();
- return TRUE;
-}
-
-static void U_CALLCONV initSets(UErrorCode &status) {
- U_ASSERT(gStaticSets == NULL);
- ucln_i18n_registerCleanup(UCLN_I18N_DECFMT, decimfmt_cleanup);
- gStaticSets = new DecimalFormatStaticSets(status);
- if (U_FAILURE(status)) {
- delete gStaticSets;
- gStaticSets = NULL;
- return;
- }
- if (gStaticSets == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
-}
-U_CDECL_END
-
-const DecimalFormatStaticSets *DecimalFormatStaticSets::getStaticSets(UErrorCode &status) {
- umtx_initOnce(gStaticSetsInitOnce, initSets, status);
- return gStaticSets;
-}
-
-
-const UnicodeSet *DecimalFormatStaticSets::getSimilarDecimals(UChar32 decimal, UBool strictParse)
-{
- UErrorCode status = U_ZERO_ERROR;
- umtx_initOnce(gStaticSetsInitOnce, initSets, status);
- if (U_FAILURE(status)) {
- return NULL;
- }
-
- if (gStaticSets->fDotEquivalents->contains(decimal)) {
- return strictParse ? gStaticSets->fStrictDotEquivalents : gStaticSets->fDotEquivalents;
- }
-
- if (gStaticSets->fCommaEquivalents->contains(decimal)) {
- return strictParse ? gStaticSets->fStrictCommaEquivalents : gStaticSets->fCommaEquivalents;
- }
-
- // if there is no match, return NULL
- return NULL;
-}
-
-
-U_NAMESPACE_END
-#endif // !UCONFIG_NO_FORMATTING
+++ /dev/null
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2009-2016, International Business Machines Corporation and
-* others. All Rights Reserved.
-*******************************************************************************
-*
-* This file contains declarations for the class DecimalFormatStaticSets
-*
-* DecimalFormatStaticSets holds the UnicodeSets that are needed for lenient
-* parsing of decimal and group separators.
-********************************************************************************
-*/
-
-#ifndef DECFMTST_H
-#define DECFMTST_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/uobject.h"
-
-U_NAMESPACE_BEGIN
-
-class UnicodeSet;
-
-
-class DecimalFormatStaticSets : public UMemory
-{
-public:
- // Constructor and Destructor not for general use.
- // Public to permit access from plain C implementation functions.
- DecimalFormatStaticSets(UErrorCode &status);
- ~DecimalFormatStaticSets();
-
- /**
- * Return a pointer to a lazy-initialized singleton instance of this class.
- */
- static const DecimalFormatStaticSets *getStaticSets(UErrorCode &status);
-
- static const UnicodeSet *getSimilarDecimals(UChar32 decimal, UBool strictParse);
-
- UnicodeSet *fDotEquivalents;
- UnicodeSet *fCommaEquivalents;
- UnicodeSet *fOtherGroupingSeparators;
- UnicodeSet *fDashEquivalents;
-
- UnicodeSet *fStrictDotEquivalents;
- UnicodeSet *fStrictCommaEquivalents;
- UnicodeSet *fStrictOtherGroupingSeparators;
- UnicodeSet *fStrictDashEquivalents;
-
- UnicodeSet *fDefaultGroupingSeparators;
- UnicodeSet *fStrictDefaultGroupingSeparators;
-
- UnicodeSet *fMinusSigns;
- UnicodeSet *fPlusSigns;
-private:
- void cleanup();
-
-};
-
-
-U_NAMESPACE_END
-
-#endif // !UCONFIG_NO_FORMATTING
-#endif // DECFMTST_H
+++ /dev/null
-// © 2018 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
-
-// Allow implicit conversion from char16_t* to UnicodeString for this file:
-// Helpful in toString methods and elsewhere.
-#define UNISTR_FROM_STRING_EXPLICIT
-
-#include "numparse_unisets.h"
-#include "numparse_types.h"
-#include "umutex.h"
-#include "ucln_in.h"
-#include "unicode/uniset.h"
-
-using namespace icu;
-using namespace icu::numparse;
-using namespace icu::numparse::impl;
-using namespace icu::numparse::impl::unisets;
-
-
-namespace {
-
-static UnicodeSet* gUnicodeSets[COUNT] = {};
-
-UnicodeSet* computeUnion(Key k1, Key k2) {
- UnicodeSet* result = new UnicodeSet();
- if (result == nullptr) {
- return nullptr;
- }
- result->addAll(*gUnicodeSets[k1]);
- result->addAll(*gUnicodeSets[k2]);
- result->freeze();
- return result;
-}
-
-UnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
- UnicodeSet* result = new UnicodeSet();
- if (result == nullptr) {
- return nullptr;
- }
- result->addAll(*gUnicodeSets[k1]);
- result->addAll(*gUnicodeSets[k2]);
- result->addAll(*gUnicodeSets[k3]);
- result->freeze();
- return result;
-}
-
-icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
-
-UBool U_CALLCONV cleanupNumberParseUniSets() {
- for (int32_t i = 0; i < COUNT; i++) {
- delete gUnicodeSets[i];
- gUnicodeSets[i] = nullptr;
- }
- return TRUE;
-}
-
-void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
- ucln_i18n_registerCleanup(UCLN_I18N_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
-
- gUnicodeSets[EMPTY] = new UnicodeSet();
-
- // These characters are skipped over and ignored at any point in the string, even in strict mode.
- // See ticket #13084.
- gUnicodeSets[BIDI] = new UnicodeSet(u"[[:DI:]]", status);
-
- // This set was decided after discussion with icu-design@. See ticket #13309.
- // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
- gUnicodeSets[WHITESPACE] = new UnicodeSet(u"[[:Zs:][\\u0009]]", status);
-
- gUnicodeSets[DEFAULT_IGNORABLES] = computeUnion(BIDI, WHITESPACE);
- gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(*gUnicodeSets[BIDI]);
-
- // TODO: Re-generate these sets from the UCD. They probably haven't been updated in a while.
- gUnicodeSets[COMMA] = new UnicodeSet(u"[,،٫、︐︑﹐﹑,、]", status);
- gUnicodeSets[STRICT_COMMA] = new UnicodeSet(u"[,٫︐﹐,]", status);
- gUnicodeSets[PERIOD] = new UnicodeSet(u"[.․。︒﹒.。]", status);
- gUnicodeSets[STRICT_PERIOD] = new UnicodeSet(u"[.․﹒.。]", status);
- gUnicodeSets[OTHER_GROUPING_SEPARATORS] = new UnicodeSet(
- u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status);
- gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
- gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
- STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
-
- gUnicodeSets[MINUS_SIGN] = new UnicodeSet(u"[-⁻₋−➖﹣-]", status);
- gUnicodeSets[PLUS_SIGN] = new UnicodeSet(u"[+⁺₊➕﬩﹢+]", status);
-
- gUnicodeSets[PERCENT_SIGN] = new UnicodeSet(u"[%٪]", status);
- gUnicodeSets[PERMILLE_SIGN] = new UnicodeSet(u"[‰؉]", status);
- gUnicodeSets[INFINITY_KEY] = new UnicodeSet(u"[∞]", status);
-
- gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
- gUnicodeSets[CWCF] = new UnicodeSet(u"[:CWCF:]", status);
-
- gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
- gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
-
- for (int32_t i = 0; i < COUNT; i++) {
- gUnicodeSets[i]->freeze();
- }
-}
-
-}
-
-const UnicodeSet* unisets::get(Key key) {
- UErrorCode localStatus = U_ZERO_ERROR;
- umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
- if (U_FAILURE(localStatus)) {
- // TODO: This returns non-null in Java, and callers assume that.
- return nullptr;
- }
- return gUnicodeSets[key];
-}
-
-Key unisets::chooseFrom(UnicodeString str, Key key1) {
- return get(key1)->contains(str) ? key1 : COUNT;
-}
-
-Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
- return get(key1)->contains(str) ? key1 : chooseFrom(str, key2);
-}
-
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
#include "unicode/fpositer.h"
#include "unicode/utf16.h"
#include "unicode/uniset.h"
-#include "decfmtst.h"
#include "unicode/decimfmt.h"
+#include "numparse_unisets.h"
U_NAMESPACE_BEGIN
const UnicodeString &original,
FieldPositionIterator &fpi,
const UnicodeString &preExponent,
- const DecimalFormatStaticSets &staticSets,
UnicodeString &appendTo,
UErrorCode &status) const {
if (U_FAILURE(status)) {
break;
case UNUM_EXPONENT_SIGN_FIELD:
{
+ using namespace icu::numparse::impl;
int32_t beginIndex = fp.getBeginIndex();
int32_t endIndex = fp.getEndIndex();
UChar32 aChar = original.char32At(beginIndex);
- if (staticSets.fMinusSigns->contains(aChar)) {
+ if (unisets::get(unisets::MINUS_SIGN)->contains(aChar)) {
appendTo.append(
original,
copyFromOffset,
beginIndex - copyFromOffset);
appendTo.append(kSuperscriptMinusSign);
- } else if (staticSets.fPlusSigns->contains(aChar)) {
+ } else if (unisets::get(unisets::PLUS_SIGN)->contains(aChar)) {
appendTo.append(
original,
copyFromOffset,
const UnicodeString &original,
FieldPositionIterator &fpi,
const UnicodeString &preExponent,
- const DecimalFormatStaticSets & /*unusedDecimalFormatSets*/,
UnicodeString &appendTo,
UErrorCode &status) const {
if (U_FAILURE(status)) {
DecimalFormat *fmtToAdopt, Style *styleToAdopt, UErrorCode &status)
: fPreExponent(),
fDecimalFormat(fmtToAdopt),
- fStyle(styleToAdopt),
- fStaticSets(NULL) {
+ fStyle(styleToAdopt) {
if (U_FAILURE(status)) {
return;
}
return;
}
getPreExponent(*sym, fPreExponent);
- fStaticSets = DecimalFormatStaticSets::getStaticSets(status);
}
ScientificNumberFormatter::ScientificNumberFormatter(
: UObject(other),
fPreExponent(other.fPreExponent),
fDecimalFormat(NULL),
- fStyle(NULL),
- fStaticSets(other.fStaticSets) {
+ fStyle(NULL) {
fDecimalFormat = static_cast<DecimalFormat *>(
other.fDecimalFormat->clone());
fStyle = other.fStyle->clone();
original,
fpi,
fPreExponent,
- *fStaticSets,
appendTo,
status);
}
typedef enum ECleanupI18NType {
UCLN_I18N_START = -1,
UCLN_I18N_NUMBER_SKELETONS,
- UCLN_I18N_NUMPARSE_UNISETS,
UCLN_I18N_CURRENCY_SPACING,
UCLN_I18N_SPOOF,
UCLN_I18N_SPOOFDATA,
U_NAMESPACE_BEGIN
class FieldPositionIterator;
-class DecimalFormatStaticSets;
class DecimalFormatSymbols;
class DecimalFormat;
class Formattable;
const UnicodeString &original,
FieldPositionIterator &fpi,
const UnicodeString &preExponent,
- const DecimalFormatStaticSets &decimalFormatSets,
UnicodeString &appendTo,
UErrorCode &status) const = 0;
private:
const UnicodeString &original,
FieldPositionIterator &fpi,
const UnicodeString &preExponent,
- const DecimalFormatStaticSets &decimalFormatSets,
UnicodeString &appendTo,
UErrorCode &status) const;
};
const UnicodeString &original,
FieldPositionIterator &fpi,
const UnicodeString &preExponent,
- const DecimalFormatStaticSets &decimalFormatSets,
UnicodeString &appendTo,
UErrorCode &status) const;
private:
UnicodeString fPreExponent;
DecimalFormat *fDecimalFormat;
Style *fStyle;
- const DecimalFormatStaticSets *fStaticSets;
};
static const char *lenientMinusTestCases[] = {
"-5",
"\\u22125",
- "\\u20105"
+ "\\u27965"
};
static const char *lenientCurrencyTestCases[] = {
import java.util.EnumMap;
import java.util.Map;
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.UResource;
+import com.ibm.icu.impl.UResource.Value;
import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
/**
* This class statically initializes UnicodeSets useful for number parsing. Microbenchmarks show this to
public class UnicodeSetStaticCache {
public static enum Key {
// Ignorables
- BIDI,
- WHITESPACE,
DEFAULT_IGNORABLES,
STRICT_IGNORABLES,
PERMILLE_SIGN,
INFINITY,
+ // Currency Symbols
+ DOLLAR_SIGN,
+ POUND_SIGN,
+ RUPEE_SIGN,
+ YEN_SIGN, // not in CLDR data, but Currency.java wants it
+
// Other
DIGITS,
- CWCF, // TODO: Check if this is being used and remove it if not.
// Combined Separators with Digits (for lead code points)
DIGITS_OR_ALL_SEPARATORS,
return get(key1).contains(str) ? key1 : chooseFrom(str, key2);
}
+ public static Key chooseCurrency(String str) {
+ if (get(Key.DOLLAR_SIGN).contains(str)) {
+ return Key.DOLLAR_SIGN;
+ } else if (get(Key.POUND_SIGN).contains(str)) {
+ return Key.POUND_SIGN;
+ } else if (get(Key.RUPEE_SIGN).contains(str)) {
+ return Key.RUPEE_SIGN;
+ } else if (get(Key.YEN_SIGN).contains(str)) {
+ return Key.YEN_SIGN;
+ } else {
+ return null;
+ }
+ }
+
private static UnicodeSet computeUnion(Key k1, Key k2) {
return new UnicodeSet().addAll(get(k1)).addAll(get(k2)).freeze();
}
return new UnicodeSet().addAll(get(k1)).addAll(get(k2)).addAll(get(k3)).freeze();
}
- static {
- // These characters are skipped over and ignored at any point in the string, even in strict mode.
- // See ticket #13084.
- unicodeSets.put(Key.BIDI, new UnicodeSet("[[:DI:]]").freeze());
+ private static void saveSet(Key key, String unicodeSetPattern) {
+ assert unicodeSets.get(key) == null;
+ unicodeSets.put(key, new UnicodeSet(unicodeSetPattern).freeze());
+ }
- // This set was decided after discussion with icu-design@. See ticket #13309.
+ /*
+ parse{
+ date{
+ lenient{
+ "[\\--/]",
+ "[\\:∶]",
+ }
+ }
+ general{
+ lenient{
+ "[.․。︒﹒.。]",
+ "[\$﹩$$]",
+ "[£₤]",
+ "[₨₹{Rp}{Rs}]",
+ }
+ }
+ number{
+ lenient{
+ "[\\-‒⁻₋−➖﹣-]",
+ "[,،٫、︐︑﹐﹑,、]",
+ "[+⁺₊➕﬩﹢+]",
+ }
+ stricter{
+ "[,٫︐﹐,]",
+ "[.․﹒.。]",
+ }
+ }
+ }
+ */
+ static class ParseDataSink extends UResource.Sink {
+ @Override
+ public void put(com.ibm.icu.impl.UResource.Key key, Value value, boolean noFallback) {
+ UResource.Table contextsTable = value.getTable();
+ for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) {
+ if (key.contentEquals("date")) {
+ // ignore
+ } else {
+ assert key.contentEquals("general") || key.contentEquals("number");
+ UResource.Table strictnessTable = value.getTable();
+ for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) {
+ boolean isLenient = key.contentEquals("lenient");
+ UResource.Array array = value.getArray();
+ for (int k = 0; k < array.getSize(); k++) {
+ array.getValue(k, value);
+ String str = value.toString();
+ // There is both lenient and strict data for comma/period,
+ // but not for any of the other symbols.
+ if (str.indexOf('.') != -1) {
+ saveSet(isLenient ? Key.PERIOD : Key.STRICT_PERIOD, str);
+ } else if (str.indexOf(',') != -1) {
+ saveSet(isLenient ? Key.COMMA : Key.STRICT_COMMA, str);
+ } else if (str.indexOf('+') != -1) {
+ saveSet(Key.PLUS_SIGN, str);
+ } else if (str.indexOf('‒') != -1) {
+ saveSet(Key.MINUS_SIGN, str);
+ } else if (str.indexOf('$') != -1) {
+ saveSet(Key.DOLLAR_SIGN, str);
+ } else if (str.indexOf('£') != -1) {
+ saveSet(Key.POUND_SIGN, str);
+ } else if (str.indexOf('₨') != -1) {
+ saveSet(Key.RUPEE_SIGN, str);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ static {
+ // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309.
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
- unicodeSets.put(Key.WHITESPACE, new UnicodeSet("[[:Zs:][\\u0009]]").freeze());
+ unicodeSets.put(Key.DEFAULT_IGNORABLES,
+ new UnicodeSet("[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]").freeze());
+ unicodeSets.put(Key.STRICT_IGNORABLES, new UnicodeSet("[[:Bidi_Control:]]").freeze());
- unicodeSets.put(Key.DEFAULT_IGNORABLES, computeUnion(Key.BIDI, Key.WHITESPACE));
- unicodeSets.put(Key.STRICT_IGNORABLES, get(Key.BIDI));
+ // CLDR provides data for comma, period, minus sign, and plus sign.
+ ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle
+ .getBundleInstance(ICUData.ICU_BASE_NAME, ULocale.ROOT);
+ rb.getAllItemsWithFallback("parse", new ParseDataSink());
+
+ // TODO: Should there be fallback behavior if for some reason these sets didn't get populated?
+ assert unicodeSets.containsKey(Key.COMMA);
+ assert unicodeSets.containsKey(Key.STRICT_COMMA);
+ assert unicodeSets.containsKey(Key.PERIOD);
+ assert unicodeSets.containsKey(Key.STRICT_PERIOD);
- // TODO: Re-generate these sets from the UCD. They probably haven't been updated in a while.
- unicodeSets.put(Key.COMMA, new UnicodeSet("[,،٫、︐︑﹐﹑,、]").freeze());
- unicodeSets.put(Key.STRICT_COMMA, new UnicodeSet("[,٫︐﹐,]").freeze());
- unicodeSets.put(Key.PERIOD, new UnicodeSet("[.․。︒﹒.。]").freeze());
- unicodeSets.put(Key.STRICT_PERIOD, new UnicodeSet("[.․﹒.。]").freeze());
unicodeSets.put(Key.OTHER_GROUPING_SEPARATORS,
new UnicodeSet("['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]").freeze());
unicodeSets.put(Key.ALL_SEPARATORS,
unicodeSets.put(Key.STRICT_ALL_SEPARATORS,
computeUnion(Key.STRICT_COMMA, Key.STRICT_PERIOD, Key.OTHER_GROUPING_SEPARATORS));
- unicodeSets.put(Key.MINUS_SIGN, new UnicodeSet("[-⁻₋−➖﹣-]").freeze());
- unicodeSets.put(Key.PLUS_SIGN, new UnicodeSet("[+⁺₊➕﬩﹢+]").freeze());
+ assert unicodeSets.containsKey(Key.MINUS_SIGN);
+ assert unicodeSets.containsKey(Key.PLUS_SIGN);
unicodeSets.put(Key.PERCENT_SIGN, new UnicodeSet("[%٪]").freeze());
unicodeSets.put(Key.PERMILLE_SIGN, new UnicodeSet("[‰؉]").freeze());
unicodeSets.put(Key.INFINITY, new UnicodeSet("[∞]").freeze());
+ assert unicodeSets.containsKey(Key.DOLLAR_SIGN);
+ assert unicodeSets.containsKey(Key.POUND_SIGN);
+ assert unicodeSets.containsKey(Key.RUPEE_SIGN);
+ unicodeSets.put(Key.YEN_SIGN, new UnicodeSet("[¥\\uffe5]").freeze());
+
unicodeSets.put(Key.DIGITS, new UnicodeSet("[:digit:]").freeze());
- unicodeSets.put(Key.CWCF, new UnicodeSet("[:CWCF:]").freeze());
unicodeSets.put(Key.DIGITS_OR_ALL_SEPARATORS, computeUnion(Key.DIGITS, Key.ALL_SEPARATORS));
unicodeSets.put(Key.DIGITS_OR_STRICT_ALL_SEPARATORS,
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import com.ibm.icu.impl.SimpleCache;
import com.ibm.icu.impl.SoftCache;
import com.ibm.icu.impl.TextTrieMap;
+import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
import com.ibm.icu.text.CurrencyDisplayNames;
import com.ibm.icu.text.CurrencyMetaInfo;
import com.ibm.icu.text.CurrencyMetaInfo.CurrencyDigits;
import com.ibm.icu.text.CurrencyMetaInfo.CurrencyFilter;
+import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ULocale.Category;
/**
*/
public static final int NARROW_SYMBOL_NAME = 3;
- private static final EquivalenceRelation<String> EQUIVALENT_CURRENCY_SYMBOLS =
- new EquivalenceRelation<String>()
- .add("\u00a5", "\uffe5")
- .add("$", "\ufe69", "\uff04")
- .add("\u20a8", "\u20b9")
- .add("\u00a3", "\u20a4");
-
/**
* Currency Usage used for Decimal Format
* @stable ICU 54
String isoCode = e.getValue();
// Register under not just symbol, but under every equivalent symbol as well
// e.g short width yen and long width yen.
- for (String equivalentSymbol : EQUIVALENT_CURRENCY_SYMBOLS.get(symbol)) {
- symTrie.put(equivalentSymbol, new CurrencyStringInfo(isoCode, symbol));
+ UnicodeSetStaticCache.Key key = UnicodeSetStaticCache.chooseCurrency(symbol);
+ CurrencyStringInfo value = new CurrencyStringInfo(isoCode, symbol);
+ if (key != null) {
+ UnicodeSet equivalents = UnicodeSetStaticCache.get(key);
+ // The symbol itself is included in the UnicodeSet
+ for (String equivalentSymbol : equivalents) {
+ symTrie.put(equivalentSymbol, value);
+ }
+ } else {
+ symTrie.put(symbol, value);
}
}
for (Map.Entry<String, String> e : names.nameMap().entrySet()) {
return info.currencies(filter.withTender());
}
- private static final class EquivalenceRelation<T> {
-
- private Map<T, Set<T>> data = new HashMap<T, Set<T>>();
-
- @SuppressWarnings("unchecked") // See ticket #11395, this is safe.
- public EquivalenceRelation<T> add(T... items) {
- Set<T> group = new HashSet<T>();
- for (T item : items) {
- if (data.containsKey(item)) {
- throw new IllegalArgumentException("All groups passed to add must be disjoint.");
- }
- group.add(item);
- }
- for (T item : items) {
- data.put(item, group);
- }
- return this;
- }
-
- public Set<T> get(T item) {
- Set<T> result = data.get(item);
- if (result == null) {
- return Collections.singleton(item);
- }
- return Collections.unmodifiableSet(result);
- }
- }
-
private Object writeReplace() throws ObjectStreamException {
return new MeasureUnitProxy(type, subType);
}
}
// Test default ignorable characters. These should work in both lenient and strict.
- UnicodeSet defaultIgnorables = new UnicodeSet("[[:Default_Ignorable_Code_Point:]]").freeze();
+ UnicodeSet defaultIgnorables = new UnicodeSet("[[:Bidi_Control:]]").freeze();
fmt.setParseStrict(false);
for (String ignorable : defaultIgnorables) {
String str = "a b " + ignorable + "1234c ";