From bae575e95af88e115e5ddb58a890f96035a7c6ad Mon Sep 17 00:00:00 2001 From: John Emmons Date: Fri, 17 Feb 2012 23:01:16 +0000 Subject: [PATCH] ICU-7501 Use allowsParsing attribute from CLDR to detect unparseable RBNF rulesets X-SVN-Rev: 31416 --- icu4c/source/data/rbnf/ar.txt | 8 ++++---- icu4c/source/data/rbnf/ga.txt | 10 ++++----- icu4c/source/data/rbnf/he.txt | 8 ++++---- icu4c/source/data/rbnf/mt.txt | 8 ++++---- icu4c/source/i18n/nfrs.cpp | 13 +++++++++++- icu4c/source/i18n/nfrs.h | 11 +++------- icu4c/source/i18n/rbnf.cpp | 35 -------------------------------- icu4c/source/i18n/unicode/rbnf.h | 4 ---- 8 files changed, 32 insertions(+), 65 deletions(-) diff --git a/icu4c/source/data/rbnf/ar.txt b/icu4c/source/data/rbnf/ar.txt index ec6f470ff54..ef7ade7d17f 100644 --- a/icu4c/source/data/rbnf/ar.txt +++ b/icu4c/source/data/rbnf/ar.txt @@ -61,13 +61,13 @@ ar{ "1000000000000000: \u0628\u0644\u064A\u0627\u0631[ >%spellout-numbering>];", "2000000000000000: <%%spellout-numbering-m< \u0628\u0644\u064A\u0627\u0631[ >%spellout-numbering>];", "1000000000000000000: =#,##0=;", - "%spellout-cardinal-feminine-prefixpart:", + "%spellout-cardinal-feminine-prefixpart@noparse:", "-x: \u0646\u0627\u0642\u0635 >>;", "x.x: <%spellout-numbering< \u0641\u0627\u0635\u0644\u0629 >%%zz-fraction-feminine> ;", "0: \u0635\u0641\u0631 ;", "1: ;", "3: =%%spellout-cardinal-feminine-prefx= ;", - "%spellout-cardinal-feminine-postfixpart:", + "%spellout-cardinal-feminine-postfixpart@noparse:", "-x: >>;", "x.x: ;", "0: ;", @@ -155,13 +155,13 @@ ar{ "1000000000000000: \u0628\u0644\u064A\u0627\u0631[ >%%spellout-numbering-m>];", "2000000000000000: <%%spellout-numbering-m< \u0628\u0644\u064A\u0627\u0631[ >%%spellout-numbering-m>];", "1000000000000000000: =#,##0=;", - "%spellout-cardinal-masculine-prefixpart:", + "%spellout-cardinal-masculine-prefixpart@noparse:", "-x: \u0646\u0627\u0642\u0635 >>;", "x.x: <%%spellout-numbering-m< \u0641\u0627\u0635\u0644\u0629 >%%zz-fraction-masculine> ;", "0: \u0635\u0641\u0631 ;", "1: ;", "3: =%%spellout-cardinal-masculine-prefx= ;", - "%spellout-cardinal-masculine-postfixpart:", + "%spellout-cardinal-masculine-postfixpart@noparse:", "-x: >>;", "x.x: ;", "0: ;", diff --git a/icu4c/source/data/rbnf/ga.txt b/icu4c/source/data/rbnf/ga.txt index 08000ab7c77..8c525cda707 100644 --- a/icu4c/source/data/rbnf/ga.txt +++ b/icu4c/source/data/rbnf/ga.txt @@ -64,7 +64,7 @@ ga{ "10: =%%spellout-numbering-no-a=;", "%%lenient-parse:", "& ' ' , ',' ;", - "%spellout-numbering-year:", + "%spellout-numbering-year@noparse:", "-x: m\u00EDneas >>;", "x.x: =#,##0.#=;", "0: =%spellout-numbering=;", @@ -86,7 +86,7 @@ ga{ "12: >>=%spellout-cardinal-postfixparth=;", "13: >>=%spellout-cardinal-postfixpart=;", "20: =%spellout-numbering=;", - "%spellout-numbering:", + "%spellout-numbering@noparse:", "-x: m\u00EDneas >>;", "x.x: << pointe >>;", "0: a n\u00E1id;", @@ -126,7 +126,7 @@ ga{ "0: =%spellout-cardinal-prefixpart=;", "12: d\u00F3=%spellout-cardinal-postfixparth=;", "13: =%spellout-cardinal-prefixpart==%spellout-cardinal-postfixpart=;", - "%spellout-cardinal-prefixpart:", + "%spellout-cardinal-prefixpart@noparse:", "-x: m\u00EDneas >>;", "x.x: <%%numberp< pointe >>;", "0: n\u00E1id;", @@ -156,7 +156,7 @@ ga{ "1000000000000: <%%trillions<[, >%%numberp>];", "1000000000000000: <%%quadrillions<[, >%%numberp>];", "1000000000000000000: =#,##0=;", - "%spellout-cardinal-postfixpart:", + "%spellout-cardinal-postfixpart@noparse:", "-x: >>;", "x.x: ;", "0: ;", @@ -164,7 +164,7 @@ ga{ "2: ;", "11: ' d\u00E9ag;", "20: ;", - "%spellout-cardinal-postfixparth:", + "%spellout-cardinal-postfixparth@noparse:", "-x: >>;", "x.x: ;", "0: ;", diff --git a/icu4c/source/data/rbnf/he.txt b/icu4c/source/data/rbnf/he.txt index 08aa516a766..1d0ae96dc93 100644 --- a/icu4c/source/data/rbnf/he.txt +++ b/icu4c/source/data/rbnf/he.txt @@ -60,13 +60,13 @@ he{ "2000000000000000: \u05E9\u05E0\u05D9 \u05D8\u05E8\u05D9\u05DC\u05D9\u05E8\u05D3[ >%%and-feminine>];", "3000000000000000: <%%spellout-numbering-m< \u05D8\u05E8\u05D9\u05DC\u05D9\u05E8\u05D3[ >%%and-feminine>];", "1000000000000000000: =#,##0=;", - "%spellout-cardinal-feminine-prefixpart:", + "%spellout-cardinal-feminine-prefixpart@noparse:", "-x: \u05DE\u05D9\u05E0\u05D5\u05E1 >>;", "x.x: <%spellout-numbering< \u05E0\u05E7\u05D5\u05D3\u05D4 >%%zz-fraction-feminine> ;", "0: \u05D0\u05E4\u05E1 ;", "1: ;", "2: =%%spellout-cardinal-feminine-prefx= ;", - "%spellout-cardinal-feminine-postfixpart:", + "%spellout-cardinal-feminine-postfixpart@noparse:", "-x: >>;", "x.x: ;", "0: ;", @@ -211,13 +211,13 @@ he{ "2000000000000000: \u05E9\u05E0\u05D9 \u05D8\u05E8\u05D9\u05DC\u05D9\u05E8\u05D3[ >%%and-masculine>];", "3000000000000000: <%%spellout-numbering-m< \u05D8\u05E8\u05D9\u05DC\u05D9\u05E8\u05D3[ >%%and-masculine>];", "1000000000000000000: =#,##0=;", - "%spellout-cardinal-masculine-prefixpart:", + "%spellout-cardinal-masculine-prefixpart@noparse:", "-x: \u05DE\u05D9\u05E0\u05D5\u05E1 >>;", "x.x: <%%spellout-numbering-m< \u05E0\u05E7\u05D5\u05D3\u05D4 >%%zz-fraction-masculine> ;", "0: \u05D0\u05E4\u05E1 ;", "1: ;", "2: =%%spellout-cardinal-masculine-prefx= ;", - "%spellout-cardinal-masculine-postfixpart:", + "%spellout-cardinal-masculine-postfixpart@noparse:", "-x: >>;", "x.x: ;", "0: ;", diff --git a/icu4c/source/data/rbnf/mt.txt b/icu4c/source/data/rbnf/mt.txt index e26bff34bce..1dd75159b2e 100644 --- a/icu4c/source/data/rbnf/mt.txt +++ b/icu4c/source/data/rbnf/mt.txt @@ -200,7 +200,7 @@ mt{ "2000000000000000: <%%spellout-cardinal-masculine< kvadriljuni[>%%and-typeA-masculine>];", "11000000000000000/1,000: <%%spellout-cardinal-masculine< kvadriljun[>%%and-typeA-masculine>];", "1000000000000000000: =#,##0=;", - "%spellout-cardinal-masculine-prefixpart:", + "%spellout-cardinal-masculine-prefixpart@noparse:", "-x: minus >>;", "x.x: <%%spellout-cardinal-masculine< punt >%%zz-fraction-masculine> ;", "0: \u017Cero ;", @@ -208,7 +208,7 @@ mt{ "2: =%%spellout-cardinal-masculine-prefx= ;", "%%lenient-parse:", "&[last primary ignorable ] << ' ' << ',' << '-' << '\u00AD';", - "%spellout-cardinal-masculine-postfixpart:", + "%spellout-cardinal-masculine-postfixpart@noparse:", "-x: >>;", "x.x: ;", "0: ;", @@ -321,13 +321,13 @@ mt{ "2000000000000000: <%%spellout-cardinal-masculine< kvadriljuni[>%%and-typeA-feminine>];", "11000000000000000/1,000: <%%spellout-cardinal-masculine< kvadriljun[>%%and-typeA-feminine>];", "1000000000000000000: =#,##0=;", - "%spellout-cardinal-feminine-prefixpart:", + "%spellout-cardinal-feminine-prefixpart@noparse:", "-x: minus >>;", "x.x: <%%spellout-cardinal-feminine< punt >%%zz-fraction-feminine> ;", "0: \u017Cero ;", "1: ;", "2: =%%spellout-cardinal-feminine-prefx= ;", - "%spellout-cardinal-feminine-postfixpart:", + "%spellout-cardinal-feminine-postfixpart@noparse:", "-x: >>;", "x.x: ;", "0: ;", diff --git a/icu4c/source/i18n/nfrs.cpp b/icu4c/source/i18n/nfrs.cpp index d5682c31922..70f18a37c9e 100644 --- a/icu4c/source/i18n/nfrs.cpp +++ b/icu4c/source/i18n/nfrs.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1997-2011, International Business Machines +* Copyright (C) 1997-2012, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: nfrs.cpp @@ -113,12 +113,18 @@ static const UChar gPercentPercent[] = 0x25, 0x25, 0 }; /* "%%" */ +static const UChar gNoparse[] = +{ + 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0 +}; /* "@noparse" */ + NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status) : name() , rules(0) , negativeNumberRule(NULL) , fIsFractionRuleSet(FALSE) , fIsPublic(FALSE) + , fIsParseable(TRUE) , fRecursionCount(0) { for (int i = 0; i < 3; ++i) { @@ -163,6 +169,11 @@ NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& sta fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0; + if ( name.endsWith(gNoparse,8) ) { + fIsParseable = FALSE; + name.truncate(name.length()-8); // remove the @noparse from the name + } + // all of the other members of NFRuleSet are initialized // by parseRules() } diff --git a/icu4c/source/i18n/nfrs.h b/icu4c/source/i18n/nfrs.h index 93a29b08ee7..9bac043f76f 100644 --- a/icu4c/source/i18n/nfrs.h +++ b/icu4c/source/i18n/nfrs.h @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1997-2009, International Business Machines +* Copyright (C) 1997-2012, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: nfrs.h @@ -41,13 +41,7 @@ class NFRuleSet : public UMemory { UBool isPublic() const { return fIsPublic; } - UBool isParseable() const { - UnicodeString prefixpart = UNICODE_STRING_SIMPLE("-prefixpart"); - UnicodeString postfix = UNICODE_STRING_SIMPLE("-postfix"); - UnicodeString postfx = UNICODE_STRING_SIMPLE("-postfx"); - - return ( name.indexOf(prefixpart) == -1 && name.indexOf(postfix) == -1 && name.indexOf(postfx) == -1 ); - } + UBool isParseable() const { return fIsParseable; } UBool isFractionRuleSet() const { return fIsFractionRuleSet; } @@ -73,6 +67,7 @@ class NFRuleSet : public UMemory { NFRule *fractionRules[3]; UBool fIsFractionRuleSet; UBool fIsPublic; + UBool fIsParseable; int32_t fRecursionCount; NFRuleSet(const NFRuleSet &other); // forbid copying of this class diff --git a/icu4c/source/i18n/rbnf.cpp b/icu4c/source/i18n/rbnf.cpp index d11736a0a34..816009de9d0 100644 --- a/icu4c/source/i18n/rbnf.cpp +++ b/icu4c/source/i18n/rbnf.cpp @@ -58,10 +58,6 @@ static const UChar gSemiPercent[] = #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2) #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble) -// Temporary workaround - when noParse is true, do noting in parse. -// TODO: We need a real fix - see #6895/#6896 -static const char *NO_SPELLOUT_PARSE_LANGUAGES[] = { "ga", NULL }; - U_NAMESPACE_BEGIN UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat) @@ -660,7 +656,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) - , noParse(FALSE) //TODO: to be removed after #6895 { LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); init(description, locinfo, perror, status); @@ -677,7 +672,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) - , noParse(FALSE) //TODO: to be removed after #6895 { LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); init(description, locinfo, perror, status); @@ -694,7 +688,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) - , noParse(FALSE) //TODO: to be removed after #6895 { init(description, info, perror, status); } @@ -710,7 +703,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) - , noParse(FALSE) //TODO: to be removed after #6895 { init(description, NULL, perror, status); } @@ -727,7 +719,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) - , noParse(FALSE) //TODO: to be removed after #6895 { init(description, NULL, perror, status); } @@ -783,19 +774,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& init (desc, locinfo, perror, status); - //TODO: we need a real fix - see #6895 / #6896 - noParse = FALSE; - if (tag == URBNF_SPELLOUT) { - const char *lang = alocale.getLanguage(); - for (int32_t i = 0; NO_SPELLOUT_PARSE_LANGUAGES[i] != NULL; i++) { - if (uprv_strcmp(lang, NO_SPELLOUT_PARSE_LANGUAGES[i]) == 0) { - noParse = TRUE; - break; - } - } - } - //TODO: end - ures_close(ruleSets); ures_close(rbnfRules); } @@ -830,9 +808,6 @@ RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) UParseError perror; init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status); - //TODO: remove below when we fix the parse bug - See #6895 / #6896 - noParse = rhs.noParse; - return *this; } @@ -859,9 +834,6 @@ RuleBasedNumberFormat::clone(void) const result = 0; } else { result->lenient = lenient; - - //TODO: remove below when we fix the parse bug - See #6895 / #6896 - result->noParse = noParse; } return result; } @@ -1160,13 +1132,6 @@ RuleBasedNumberFormat::parse(const UnicodeString& text, Formattable& result, ParsePosition& parsePosition) const { - //TODO: We need a real fix. See #6895 / #6896 - if (noParse) { - // skip parsing - parsePosition.setErrorIndex(0); - return; - } - if (!ruleSets) { parsePosition.setErrorIndex(0); return; diff --git a/icu4c/source/i18n/unicode/rbnf.h b/icu4c/source/i18n/unicode/rbnf.h index d493abd61f6..c595fd61ad0 100644 --- a/icu4c/source/i18n/unicode/rbnf.h +++ b/icu4c/source/i18n/unicode/rbnf.h @@ -1018,10 +1018,6 @@ private: UBool lenient; UnicodeString* lenientParseRules; LocalizationInfo* localizations; - - // Temporary workaround - when noParse is true, do noting in parse. - // TODO: We need a real fix - see #6895/#6896 - UBool noParse; }; // --------------- -- 2.40.0