ICU-9013 deprecate API for Unicode 1.0 character names & ISO comments, remove support...

author Markus Scherer <markus.icu@gmail.com>

Thu, 15 Dec 2011 21:43:35 +0000 (21:43 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Thu, 15 Dec 2011 21:43:35 +0000 (21:43 +0000)
author Markus Scherer <markus.icu@gmail.com>
Thu, 15 Dec 2011 21:43:35 +0000 (21:43 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Thu, 15 Dec 2011 21:43:35 +0000 (21:43 +0000)
diff --git a/icu4c/source/common/unames.cpp b/icu4c/source/common/unames.cpp

index 989a87d5a0b2f938070bf7c639e1a6fb8ec711d6..932d2024968c5603e00bc87c7e1c323a69a090d4 100644 (file)
--- a/icu4c/source/common/unames.cpp
+++ b/icu4c/source/common/unames.cpp
@@ -700,6 +700,18 @@ enumNames(UCharNames *names,
      /* find the group that contains start, or the highest before it */
      group=getGroup(names, start);
  
+    if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
+        /* enumerate synthetic names between start and the group start */
+        UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
+        if(extLimit>limit) {
+            extLimit=limit;
+        }
+        if(!enumExtNames(start, extLimit-1, fn, context)) {
+            return FALSE;
+        }
+        start=extLimit;
+    }
+
      if(startGroupMSB==endGroupMSB) {
          if(startGroupMSB==group[GROUP_MSB]) {
              /* if start and limit-1 are in the same group, then enumerate only in that one */
@@ -1508,11 +1520,9 @@ u_charName(UChar32 code, UCharNameChoice nameChoice,
  }
  
  U_CAPI int32_t U_EXPORT2
-u_getISOComment(UChar32 c,
+u_getISOComment(UChar32 /*c*/,
                  char *dest, int32_t destCapacity,
                  UErrorCode *pErrorCode) {
-    int32_t length;
-
      /* check the argument values */
      if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
          return 0;
@@ -1521,13 +1531,7 @@ u_getISOComment(UChar32 c,
          return 0;
      }
  
-    if((uint32_t)c>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
-        return u_terminateChars(dest, destCapacity, 0, pErrorCode);
-    }
-
-    /* the ISO comment is stored like a normal character name */
-    length=getName(uCharNames, (uint32_t)c, U_ISO_COMMENT, dest, (uint16_t)destCapacity);
-    return u_terminateChars(dest, destCapacity, length, pErrorCode);
+    return u_terminateChars(dest, destCapacity, 0, pErrorCode);
  }
  
  U_CAPI UChar32 U_EXPORT2
diff --git a/icu4c/source/common/unicode/uchar.h b/icu4c/source/common/unicode/uchar.h

index 92906c0a9201eb3569cbf9436ae415e71053c8d9..f3d5420cb5e96161e31a9e1274d491c8bb939d9a 100644 (file)
--- a/icu4c/source/common/unicode/uchar.h
+++ b/icu4c/source/common/unicode/uchar.h
@@ -527,8 +527,8 @@ typedef enum UProperty {
      /** String property Case_Folding.
          Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
      UCHAR_CASE_FOLDING=0x4002,
-    /** String property ISO_Comment.
-        Corresponds to u_getISOComment. @stable ICU 2.4 */
+    /** Deprecated string property ISO_Comment.
+        Corresponds to u_getISOComment. @deprecated ICU 49 */
      UCHAR_ISO_COMMENT=0x4003,
      /** String property Lowercase_Mapping.
          Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
@@ -552,7 +552,9 @@ typedef enum UProperty {
          Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
      UCHAR_TITLECASE_MAPPING=0x400A,
      /** String property Unicode_1_Name.
-        Corresponds to u_charName. @stable ICU 2.4 */
+        This property is of little practical value.
+        Beginning with ICU 49, ICU APIs return an empty string for this property.
+        Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */
      UCHAR_UNICODE_1_NAME=0x400B,
      /** String property Uppercase_Mapping.
          Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
@@ -1451,10 +1453,19 @@ typedef enum UEastAsianWidth {
   * @stable ICU 2.0
   */
  typedef enum UCharNameChoice {
+    /** Unicode character name (Name property). @stable ICU 2.0 */
      U_UNICODE_CHAR_NAME,
+    /**
+     * The Unicode_1_Name property value which is of little practical value.
+     * Beginning with ICU 49, ICU APIs return an empty string for this name choice.
+     * @deprecated ICU 49
+     */
      U_UNICODE_10_CHAR_NAME,
+    /** Standard or synthetic character name. @stable ICU 2.0 */
      U_EXTENDED_CHAR_NAME,
-    U_CHAR_NAME_ALIAS,          /**< Corrected name from NameAliases.txt. @stable ICU 4.4 */
+    /** Corrected name from NameAliases.txt. @stable ICU 4.4 */
+    U_CHAR_NAME_ALIAS,
+    /** @stable ICU 2.0 */
      U_CHAR_NAME_CHOICE_COUNT
  } UCharNameChoice;
  
@@ -2584,12 +2595,9 @@ u_charName(UChar32 code, UCharNameChoice nameChoice,
             UErrorCode *pErrorCode);
  
  /**
- * Get the ISO 10646 comment for a character.
- * The ISO 10646 comment is an informative field in the Unicode Character
- * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
- *
- * Note: Unicode 5.2 removes all ISO comment data, resulting in empty strings
- * returned for all characters.
+ * Returns an empty string.
+ * Used to return the ISO 10646 comment for a character.
+ * The Unicode ISO_Comment property is deprecated and has no values.
   *
   * @param c The character (code point) for which to get the ISO comment.
   *             It must be <code>0<=c<=0x10ffff</code>.
@@ -2600,13 +2608,9 @@ u_charName(UChar32 code, UCharNameChoice nameChoice,
   * @param pErrorCode Pointer to a UErrorCode variable;
   *        check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>
   *        returns.
- * @return The length of the comment, or 0 if there is no comment for this character.
- *         If the destCapacity is less than or equal to the length, then the buffer
- *         contains the truncated name and the returned length indicates the full
- *         length of the name.
- *         The length does not include the zero-termination.
+ * @return 0
   *
- * @stable ICU 2.2
+ * @deprecated ICU 49
   */
  U_STABLE int32_t U_EXPORT2
  u_getISOComment(UChar32 c,
diff --git a/icu4c/source/common/uniset_props.cpp b/icu4c/source/common/uniset_props.cpp

index 115b5e094264b860822c7ce8faf1d4342f173abb..00043a039fda2cd0953dc8a902ba1977e9244fbf 100644 (file)
--- a/icu4c/source/common/uniset_props.cpp
+++ b/icu4c/source/common/uniset_props.cpp
@@ -1040,15 +1040,12 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                  }
                  break;
              case UCHAR_NAME:
-            case UCHAR_UNICODE_1_NAME:
                  {
                      // Must munge name, since u_charFromName() does not do
                      // 'loose' matching.
                      char buf[128]; // it suffices that this be > uprv_getMaxCharNameLength
                      if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec);
-                    UCharNameChoice choice = (p == UCHAR_NAME) ?
-                        U_EXTENDED_CHAR_NAME : U_UNICODE_10_CHAR_NAME;
-                    UChar32 ch = u_charFromName(choice, buf, &ec);
+                    UChar32 ch = u_charFromName(U_EXTENDED_CHAR_NAME, buf, &ec);
                      if (U_SUCCESS(ec)) {
                          clear();
                          add(ch);
@@ -1058,6 +1055,9 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                      }
                  }
                  break;
+            case UCHAR_UNICODE_1_NAME:
+                // ICU 49 deprecates the Unicode_1_Name property APIs.
+                FAIL(ec);
              case UCHAR_AGE:
                  {
                      // Must munge name, since u_versionFromString() does not do
diff --git a/icu4c/source/data/in/unames.icu b/icu4c/source/data/in/unames.icu

index d144e85a4f729462dcb8bec2e9018c2ca8906dba..44ed175a54cda03617281a7c0c804504d2ebd34b 100644 (file)

Binary files a/icu4c/source/data/in/unames.icu and b/icu4c/source/data/in/unames.icu differ
diff --git a/icu4c/source/samples/ucnv/convsamp.cpp b/icu4c/source/samples/ucnv/convsamp.cpp

index 38b9c28e152a7b75dd627f521d1067b5a401839a..63922c773639c613f8b916adc23a9718316ce50a 100644 (file)
--- a/icu4c/source/samples/ucnv/convsamp.cpp
+++ b/icu4c/source/samples/ucnv/convsamp.cpp
@@ -58,20 +58,13 @@ void prettyPrintUChar(UChar c)
      char buf[1000];
      UErrorCode status = U_ZERO_ERROR;
      int32_t o;
-    
-    o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 1000, &status);
+
+    o = u_charName(c, U_EXTENDED_CHAR_NAME, buf, 1000, &status);
      if(U_SUCCESS(status) && (o>0) ) {
        buf[6] = 0;
        printf("%7s", buf);
      } else {
-      o = u_charName(c, U_UNICODE_10_CHAR_NAME, buf, 1000, &status);
-      if(U_SUCCESS(status) && (o>0)) {
-        buf[5] = 0;
-        printf("~%6s", buf);
-      }
-      else {
-        printf(" ??????");
-      }
+      printf(" ??????");
      }
    } else {
      switch((char)(c & 0x007F)) {
diff --git a/icu4c/source/test/cintltst/cucdtst.c b/icu4c/source/test/cintltst/cucdtst.c

index a59c7388d93c315edca905abd7ff87e6ac9f844b..a04e495b9e143fcb3686baa97f13657317fceb14 100644 (file)
--- a/icu4c/source/test/cintltst/cucdtst.c
+++ b/icu4c/source/test/cintltst/cucdtst.c
@@ -1581,12 +1581,10 @@ static const struct {
      const char *name, *oldName, *extName, *alias;
  } names[]={
      {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
-    {0x01a2, "LATIN CAPITAL LETTER OI",
-             "LATIN CAPITAL LETTER O I",
+    {0x01a2, "LATIN CAPITAL LETTER OI", "",
               "LATIN CAPITAL LETTER OI",
               "LATIN CAPITAL LETTER GHA"},
-    {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
-             "LATIN SMALL LETTER DOTLESS J BAR HOOK",
+    {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
               "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
      {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
               "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
@@ -1597,7 +1595,7 @@ static const struct {
      {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
      {0xd800, "", "", "<lead surrogate-D800>" },
      {0xdc00, "", "", "<trail surrogate-DC00>" },
-    {0xff08, "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH OPENING PARENTHESIS", "FULLWIDTH LEFT PARENTHESIS" },
+    {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
      {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
      {0xffff, "", "", "<noncharacter-FFFF>" },
      {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp

index 8ea0f1515819c4ece464e24c7a481baafd0d3b4f..508da51d597e52ed3238e097b3c755f08f7ccc37 100644 (file)
--- a/icu4c/source/test/intltest/transtst.cpp
+++ b/icu4c/source/test/intltest/transtst.cpp
@@ -1277,8 +1277,8 @@ void TransliteratorTest::TestNameMap(void) {
  
      // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
      expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
-           CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
-    expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
+           CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
+    expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
             CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
  
      delete uni2name;
author	Markus Scherer <markus.icu@gmail.com>
	Thu, 15 Dec 2011 21:43:35 +0000 (21:43 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Thu, 15 Dec 2011 21:43:35 +0000 (21:43 +0000)
icu4c/source/common/unames.cpp		patch \| blob \| history
icu4c/source/common/unicode/uchar.h		patch \| blob \| history
icu4c/source/common/uniset_props.cpp		patch \| blob \| history
icu4c/source/data/in/unames.icu		patch \| blob \| history
icu4c/source/samples/ucnv/convsamp.cpp		patch \| blob \| history
icu4c/source/test/cintltst/cucdtst.c		patch \| blob \| history
icu4c/source/test/intltest/transtst.cpp		patch \| blob \| history