ICU-7501 Use allowsParsing attribute from CLDR to detect unparseable RBNF rulesets

author John Emmons <emmo@us.ibm.com>

Fri, 17 Feb 2012 23:01:16 +0000 (23:01 +0000)

committer John Emmons <emmo@us.ibm.com>

Fri, 17 Feb 2012 23:01:16 +0000 (23:01 +0000)
author John Emmons <emmo@us.ibm.com>
Fri, 17 Feb 2012 23:01:16 +0000 (23:01 +0000)
committer John Emmons <emmo@us.ibm.com>
Fri, 17 Feb 2012 23:01:16 +0000 (23:01 +0000)
diff --git a/icu4c/source/data/rbnf/ar.txt b/icu4c/source/data/rbnf/ar.txt

index ec6f470ff54082cf5f449e5fd2d5f917a38d55cf..ef7ade7d17fe8e37c3d0b3a526b8de934dd6ca95 100644 (file)
--- a/icu4c/source/data/rbnf/ar.txt
+++ b/icu4c/source/data/rbnf/ar.txt
@@ -61,13 +61,13 @@ ar{
              "1000000000000000: \u0628\u0644\u064A\u0627\u0631[ >%spellout-numbering>];",
              "2000000000000000: <%%spellout-numbering-m< \u0628\u0644\u064A\u0627\u0631[ >%spellout-numbering>];",
              "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-feminine-prefixpart:",
+            "%spellout-cardinal-feminine-prefixpart@noparse:",
              "-x: \u0646\u0627\u0642\u0635 >>;",
              "x.x: <%spellout-numbering< \u0641\u0627\u0635\u0644\u0629 >%%zz-fraction-feminine> ;",
              "0: \u0635\u0641\u0631 ;",
              "1: ;",
              "3: =%%spellout-cardinal-feminine-prefx= ;",
-            "%spellout-cardinal-feminine-postfixpart:",
+            "%spellout-cardinal-feminine-postfixpart@noparse:",
              "-x: >>;",
              "x.x: ;",
              "0: ;",
@@ -155,13 +155,13 @@ ar{
              "1000000000000000: \u0628\u0644\u064A\u0627\u0631[ >%%spellout-numbering-m>];",
              "2000000000000000: <%%spellout-numbering-m< \u0628\u0644\u064A\u0627\u0631[ >%%spellout-numbering-m>];",
              "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-masculine-prefixpart:",
+            "%spellout-cardinal-masculine-prefixpart@noparse:",
              "-x: \u0646\u0627\u0642\u0635 >>;",
              "x.x: <%%spellout-numbering-m< \u0641\u0627\u0635\u0644\u0629 >%%zz-fraction-masculine> ;",
              "0: \u0635\u0641\u0631 ;",
              "1: ;",
              "3: =%%spellout-cardinal-masculine-prefx= ;",
-            "%spellout-cardinal-masculine-postfixpart:",
+            "%spellout-cardinal-masculine-postfixpart@noparse:",
              "-x: >>;",
              "x.x: ;",
              "0: ;",
diff --git a/icu4c/source/data/rbnf/ga.txt b/icu4c/source/data/rbnf/ga.txt

index 08000ab7c777734e063a0ffe223da3cb19586f56..8c525cda707af3ed56f1cb37564948948904db72 100644 (file)
--- a/icu4c/source/data/rbnf/ga.txt
+++ b/icu4c/source/data/rbnf/ga.txt
@@ -64,7 +64,7 @@ ga{
              "10: =%%spellout-numbering-no-a=;",
              "%%lenient-parse:",
              "& ' ' , ',' ;",
-            "%spellout-numbering-year:",
+            "%spellout-numbering-year@noparse:",
              "-x: m\u00EDneas >>;",
              "x.x: =#,##0.#=;",
              "0: =%spellout-numbering=;",
@@ -86,7 +86,7 @@ ga{
              "12: >>=%spellout-cardinal-postfixparth=;",
              "13: >>=%spellout-cardinal-postfixpart=;",
              "20: =%spellout-numbering=;",
-            "%spellout-numbering:",
+            "%spellout-numbering@noparse:",
              "-x: m\u00EDneas >>;",
              "x.x: << pointe >>;",
              "0: a n\u00E1id;",
@@ -126,7 +126,7 @@ ga{
              "0: =%spellout-cardinal-prefixpart=;",
              "12: d\u00F3=%spellout-cardinal-postfixparth=;",
              "13: =%spellout-cardinal-prefixpart==%spellout-cardinal-postfixpart=;",
-            "%spellout-cardinal-prefixpart:",
+            "%spellout-cardinal-prefixpart@noparse:",
              "-x: m\u00EDneas >>;",
              "x.x: <%%numberp< pointe >>;",
              "0: n\u00E1id;",
@@ -156,7 +156,7 @@ ga{
              "1000000000000: <%%trillions<[, >%%numberp>];",
              "1000000000000000: <%%quadrillions<[, >%%numberp>];",
              "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-postfixpart:",
+            "%spellout-cardinal-postfixpart@noparse:",
              "-x: >>;",
              "x.x: ;",
              "0: ;",
@@ -164,7 +164,7 @@ ga{
              "2: ;",
              "11: ' d\u00E9ag;",
              "20: ;",
-            "%spellout-cardinal-postfixparth:",
+            "%spellout-cardinal-postfixparth@noparse:",
              "-x: >>;",
              "x.x: ;",
              "0: ;",
diff --git a/icu4c/source/data/rbnf/he.txt b/icu4c/source/data/rbnf/he.txt

index 08aa516a7665bbc1371c55987e36fb66756a555d..1d0ae96dc9360bf824c61645be30a043cdd4bba7 100644 (file)
--- a/icu4c/source/data/rbnf/he.txt
+++ b/icu4c/source/data/rbnf/he.txt
@@ -60,13 +60,13 @@ he{
              "2000000000000000: \u05E9\u05E0\u05D9 \u05D8\u05E8\u05D9\u05DC\u05D9\u05E8\u05D3[ >%%and-feminine>];",
              "3000000000000000: <%%spellout-numbering-m< \u05D8\u05E8\u05D9\u05DC\u05D9\u05E8\u05D3[ >%%and-feminine>];",
              "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-feminine-prefixpart:",
+            "%spellout-cardinal-feminine-prefixpart@noparse:",
              "-x: \u05DE\u05D9\u05E0\u05D5\u05E1 >>;",
              "x.x: <%spellout-numbering< \u05E0\u05E7\u05D5\u05D3\u05D4 >%%zz-fraction-feminine> ;",
              "0: \u05D0\u05E4\u05E1 ;",
              "1: ;",
              "2: =%%spellout-cardinal-feminine-prefx= ;",
-            "%spellout-cardinal-feminine-postfixpart:",
+            "%spellout-cardinal-feminine-postfixpart@noparse:",
              "-x: >>;",
              "x.x: ;",
              "0: ;",
@@ -211,13 +211,13 @@ he{
              "2000000000000000: \u05E9\u05E0\u05D9 \u05D8\u05E8\u05D9\u05DC\u05D9\u05E8\u05D3[ >%%and-masculine>];",
              "3000000000000000: <%%spellout-numbering-m< \u05D8\u05E8\u05D9\u05DC\u05D9\u05E8\u05D3[ >%%and-masculine>];",
              "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-masculine-prefixpart:",
+            "%spellout-cardinal-masculine-prefixpart@noparse:",
              "-x: \u05DE\u05D9\u05E0\u05D5\u05E1 >>;",
              "x.x: <%%spellout-numbering-m< \u05E0\u05E7\u05D5\u05D3\u05D4 >%%zz-fraction-masculine> ;",
              "0: \u05D0\u05E4\u05E1 ;",
              "1: ;",
              "2: =%%spellout-cardinal-masculine-prefx= ;",
-            "%spellout-cardinal-masculine-postfixpart:",
+            "%spellout-cardinal-masculine-postfixpart@noparse:",
              "-x: >>;",
              "x.x: ;",
              "0: ;",
diff --git a/icu4c/source/data/rbnf/mt.txt b/icu4c/source/data/rbnf/mt.txt

index e26bff34bce4eca3fa367dad9eb79cff41c43988..1dd75159b2e30cd839dd1e2f994714831d89f8c9 100644 (file)
--- a/icu4c/source/data/rbnf/mt.txt
+++ b/icu4c/source/data/rbnf/mt.txt
@@ -200,7 +200,7 @@ mt{
              "2000000000000000: <%%spellout-cardinal-masculine< kvadriljuni[>%%and-typeA-masculine>];",
              "11000000000000000/1,000: <%%spellout-cardinal-masculine< kvadriljun[>%%and-typeA-masculine>];",
              "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-masculine-prefixpart:",
+            "%spellout-cardinal-masculine-prefixpart@noparse:",
              "-x: minus >>;",
              "x.x: <%%spellout-cardinal-masculine< punt >%%zz-fraction-masculine> ;",
              "0: \u017Cero ;",
@@ -208,7 +208,7 @@ mt{
              "2: =%%spellout-cardinal-masculine-prefx= ;",
              "%%lenient-parse:",
              "&[last primary ignorable ] << ' ' << ',' << '-' << '\u00AD';",
-            "%spellout-cardinal-masculine-postfixpart:",
+            "%spellout-cardinal-masculine-postfixpart@noparse:",
              "-x: >>;",
              "x.x: ;",
              "0: ;",
@@ -321,13 +321,13 @@ mt{
              "2000000000000000: <%%spellout-cardinal-masculine< kvadriljuni[>%%and-typeA-feminine>];",
              "11000000000000000/1,000: <%%spellout-cardinal-masculine< kvadriljun[>%%and-typeA-feminine>];",
              "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-feminine-prefixpart:",
+            "%spellout-cardinal-feminine-prefixpart@noparse:",
              "-x: minus >>;",
              "x.x: <%%spellout-cardinal-feminine< punt >%%zz-fraction-feminine> ;",
              "0: \u017Cero ;",
              "1: ;",
              "2: =%%spellout-cardinal-feminine-prefx= ;",
-            "%spellout-cardinal-feminine-postfixpart:",
+            "%spellout-cardinal-feminine-postfixpart@noparse:",
              "-x: >>;",
              "x.x: ;",
              "0: ;",
diff --git a/icu4c/source/i18n/nfrs.cpp b/icu4c/source/i18n/nfrs.cpp

index d5682c31922594e9c95eb1fd4e8f51f0be5f9811..70f18a37c9ea0a20ad3600a957f6a0cc1f8fd041 100644 (file)
--- a/icu4c/source/i18n/nfrs.cpp
+++ b/icu4c/source/i18n/nfrs.cpp
@@ -1,6 +1,6 @@
  /*
  ******************************************************************************
-*   Copyright (C) 1997-2011, International Business Machines
+*   Copyright (C) 1997-2012, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  ******************************************************************************
  *   file name:  nfrs.cpp
@@ -113,12 +113,18 @@ static const UChar gPercentPercent[] =
      0x25, 0x25, 0
  }; /* "%%" */
  
+static const UChar gNoparse[] =
+{
+    0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
+}; /* "@noparse" */
+
  NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
    : name()
    , rules(0)
    , negativeNumberRule(NULL)
    , fIsFractionRuleSet(FALSE)
    , fIsPublic(FALSE)
+  , fIsParseable(TRUE)
    , fRecursionCount(0)
  {
      for (int i = 0; i < 3; ++i) {
@@ -163,6 +169,11 @@ NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& sta
  
      fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
  
+    if ( name.endsWith(gNoparse,8) ) {
+        fIsParseable = FALSE;
+        name.truncate(name.length()-8); // remove the @noparse from the name
+    }
+
      // all of the other members of NFRuleSet are initialized
      // by parseRules()
  }
diff --git a/icu4c/source/i18n/nfrs.h b/icu4c/source/i18n/nfrs.h

index 93a29b08ee7e5374b20e41f0056fe85067dea03f..9bac043f76fefd6aea18e6267bd1669c061ff5dd 100644 (file)
--- a/icu4c/source/i18n/nfrs.h
+++ b/icu4c/source/i18n/nfrs.h
@@ -1,6 +1,6 @@
  /*
  ******************************************************************************
-*   Copyright (C) 1997-2009, International Business Machines
+*   Copyright (C) 1997-2012, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  ******************************************************************************
  *   file name:  nfrs.h
@@ -41,13 +41,7 @@ class NFRuleSet : public UMemory {
  
    UBool isPublic() const { return fIsPublic; }
  
-  UBool isParseable() const { 
-      UnicodeString prefixpart = UNICODE_STRING_SIMPLE("-prefixpart");
-      UnicodeString postfix = UNICODE_STRING_SIMPLE("-postfix");
-      UnicodeString postfx = UNICODE_STRING_SIMPLE("-postfx");
-
-      return ( name.indexOf(prefixpart) == -1 && name.indexOf(postfix) == -1 && name.indexOf(postfx) == -1 );
-  }
+  UBool isParseable() const { return fIsParseable; }
  
    UBool isFractionRuleSet() const { return fIsFractionRuleSet; }
  
@@ -73,6 +67,7 @@ class NFRuleSet : public UMemory {
    NFRule *fractionRules[3];
    UBool fIsFractionRuleSet;
    UBool fIsPublic;
+  UBool fIsParseable;
    int32_t fRecursionCount;
  
    NFRuleSet(const NFRuleSet &other); // forbid copying of this class
diff --git a/icu4c/source/i18n/rbnf.cpp b/icu4c/source/i18n/rbnf.cpp

index d11736a0a3404bb0f7829ae59dc4e7ec23aa280f..816009de9d04c06d054118534c49d2c8c1bec4cb 100644 (file)
--- a/icu4c/source/i18n/rbnf.cpp
+++ b/icu4c/source/i18n/rbnf.cpp
@@ -58,10 +58,6 @@ static const UChar gSemiPercent[] =
  #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
  #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
  
-// Temporary workaround - when noParse is true, do noting in parse.
-// TODO: We need a real fix - see #6895/#6896
-static const char *NO_SPELLOUT_PARSE_LANGUAGES[] = { "ga", NULL };
-
  U_NAMESPACE_BEGIN
  
  UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
@@ -660,7 +656,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    , lenient(FALSE)
    , lenientParseRules(NULL)
    , localizations(NULL)
-  , noParse(FALSE) //TODO: to be removed after #6895
  {
    LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    init(description, locinfo, perror, status);
@@ -677,7 +672,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    , lenient(FALSE)
    , lenientParseRules(NULL)
    , localizations(NULL)
-  , noParse(FALSE) //TODO: to be removed after #6895
  {
    LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    init(description, locinfo, perror, status);
@@ -694,7 +688,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    , lenient(FALSE)
    , lenientParseRules(NULL)
    , localizations(NULL)
-  , noParse(FALSE) //TODO: to be removed after #6895
  {
    init(description, info, perror, status);
  }
@@ -710,7 +703,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    , lenient(FALSE)
    , lenientParseRules(NULL)
    , localizations(NULL)
-  , noParse(FALSE) //TODO: to be removed after #6895
  {
      init(description, NULL, perror, status);
  }
@@ -727,7 +719,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    , lenient(FALSE)
    , lenientParseRules(NULL)
    , localizations(NULL)
-  , noParse(FALSE) //TODO: to be removed after #6895
  {
      init(description, NULL, perror, status);
  }
@@ -783,19 +774,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale&
  
          init (desc, locinfo, perror, status);
  
-        //TODO: we need a real fix - see #6895 / #6896
-        noParse = FALSE;
-        if (tag == URBNF_SPELLOUT) {
-            const char *lang = alocale.getLanguage();
-            for (int32_t i = 0; NO_SPELLOUT_PARSE_LANGUAGES[i] != NULL; i++) {
-                if (uprv_strcmp(lang, NO_SPELLOUT_PARSE_LANGUAGES[i]) == 0) {
-                    noParse = TRUE;
-                    break;
-                }
-            }
-        }
-        //TODO: end
-
          ures_close(ruleSets);
          ures_close(rbnfRules);
      }
@@ -830,9 +808,6 @@ RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
      UParseError perror;
      init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
  
-    //TODO: remove below when we fix the parse bug - See #6895 / #6896
-    noParse = rhs.noParse;
-
      return *this;
  }
  
@@ -859,9 +834,6 @@ RuleBasedNumberFormat::clone(void) const
          result = 0;
      } else {
          result->lenient = lenient;
-
-        //TODO: remove below when we fix the parse bug - See #6895 / #6896
-        result->noParse = noParse;
      }
      return result;
  }
@@ -1160,13 +1132,6 @@ RuleBasedNumberFormat::parse(const UnicodeString& text,
                               Formattable& result,
                               ParsePosition& parsePosition) const
  {
-    //TODO: We need a real fix.  See #6895 / #6896
-    if (noParse) {
-        // skip parsing
-        parsePosition.setErrorIndex(0);
-        return;
-    }
-
      if (!ruleSets) {
          parsePosition.setErrorIndex(0);
          return;
diff --git a/icu4c/source/i18n/unicode/rbnf.h b/icu4c/source/i18n/unicode/rbnf.h

index d493abd61f6d75300cd6d4b869b4635aa802950d..c595fd61ad03739e677b27f7669bfb1417ccf433 100644 (file)
--- a/icu4c/source/i18n/unicode/rbnf.h
+++ b/icu4c/source/i18n/unicode/rbnf.h
@@ -1018,10 +1018,6 @@ private:
      UBool lenient;
      UnicodeString* lenientParseRules;
      LocalizationInfo* localizations;
-
-    // Temporary workaround - when noParse is true, do noting in parse.
-    // TODO: We need a real fix - see #6895/#6896
-    UBool noParse;
  };
  
  // ---------------
author	John Emmons <emmo@us.ibm.com>
	Fri, 17 Feb 2012 23:01:16 +0000 (23:01 +0000)
committer	John Emmons <emmo@us.ibm.com>
	Fri, 17 Feb 2012 23:01:16 +0000 (23:01 +0000)
icu4c/source/data/rbnf/ar.txt		patch \| blob \| history
icu4c/source/data/rbnf/ga.txt		patch \| blob \| history
icu4c/source/data/rbnf/he.txt		patch \| blob \| history
icu4c/source/data/rbnf/mt.txt		patch \| blob \| history
icu4c/source/i18n/nfrs.cpp		patch \| blob \| history
icu4c/source/i18n/nfrs.h		patch \| blob \| history
icu4c/source/i18n/rbnf.cpp		patch \| blob \| history
icu4c/source/i18n/unicode/rbnf.h		patch \| blob \| history