From: Markus Scherer Date: Thu, 15 Dec 2011 21:43:35 +0000 (+0000) Subject: ICU-9013 deprecate API for Unicode 1.0 character names & ISO comments, remove support... X-Git-Tag: milestone-59-0-1~4233 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2f70694e7fc7841ca893f8935e4513fe5995d1e0;p=icu ICU-9013 deprecate API for Unicode 1.0 character names & ISO comments, remove support & data X-SVN-Rev: 31125 --- diff --git a/icu4c/source/common/unames.cpp b/icu4c/source/common/unames.cpp index 989a87d5a0b..932d2024968 100644 --- a/icu4c/source/common/unames.cpp +++ b/icu4c/source/common/unames.cpp @@ -700,6 +700,18 @@ enumNames(UCharNames *names, /* find the group that contains start, or the highest before it */ group=getGroup(names, start); + if(startGroupMSBlimit) { + extLimit=limit; + } + if(!enumExtNames(start, extLimit-1, fn, context)) { + return FALSE; + } + start=extLimit; + } + if(startGroupMSB==endGroupMSB) { if(startGroupMSB==group[GROUP_MSB]) { /* if start and limit-1 are in the same group, then enumerate only in that one */ @@ -1508,11 +1520,9 @@ u_charName(UChar32 code, UCharNameChoice nameChoice, } U_CAPI int32_t U_EXPORT2 -u_getISOComment(UChar32 c, +u_getISOComment(UChar32 /*c*/, char *dest, int32_t destCapacity, UErrorCode *pErrorCode) { - int32_t length; - /* check the argument values */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; @@ -1521,13 +1531,7 @@ u_getISOComment(UChar32 c, return 0; } - if((uint32_t)c>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) { - return u_terminateChars(dest, destCapacity, 0, pErrorCode); - } - - /* the ISO comment is stored like a normal character name */ - length=getName(uCharNames, (uint32_t)c, U_ISO_COMMENT, dest, (uint16_t)destCapacity); - return u_terminateChars(dest, destCapacity, length, pErrorCode); + return u_terminateChars(dest, destCapacity, 0, pErrorCode); } U_CAPI UChar32 U_EXPORT2 diff --git a/icu4c/source/common/unicode/uchar.h b/icu4c/source/common/unicode/uchar.h index 92906c0a920..f3d5420cb5e 100644 --- a/icu4c/source/common/unicode/uchar.h +++ b/icu4c/source/common/unicode/uchar.h @@ -527,8 +527,8 @@ typedef enum UProperty { /** String property Case_Folding. Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */ UCHAR_CASE_FOLDING=0x4002, - /** String property ISO_Comment. - Corresponds to u_getISOComment. @stable ICU 2.4 */ + /** Deprecated string property ISO_Comment. + Corresponds to u_getISOComment. @deprecated ICU 49 */ UCHAR_ISO_COMMENT=0x4003, /** String property Lowercase_Mapping. Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */ @@ -552,7 +552,9 @@ typedef enum UProperty { Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */ UCHAR_TITLECASE_MAPPING=0x400A, /** String property Unicode_1_Name. - Corresponds to u_charName. @stable ICU 2.4 */ + This property is of little practical value. + Beginning with ICU 49, ICU APIs return an empty string for this property. + Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */ UCHAR_UNICODE_1_NAME=0x400B, /** String property Uppercase_Mapping. Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */ @@ -1451,10 +1453,19 @@ typedef enum UEastAsianWidth { * @stable ICU 2.0 */ typedef enum UCharNameChoice { + /** Unicode character name (Name property). @stable ICU 2.0 */ U_UNICODE_CHAR_NAME, + /** + * The Unicode_1_Name property value which is of little practical value. + * Beginning with ICU 49, ICU APIs return an empty string for this name choice. + * @deprecated ICU 49 + */ U_UNICODE_10_CHAR_NAME, + /** Standard or synthetic character name. @stable ICU 2.0 */ U_EXTENDED_CHAR_NAME, - U_CHAR_NAME_ALIAS, /**< Corrected name from NameAliases.txt. @stable ICU 4.4 */ + /** Corrected name from NameAliases.txt. @stable ICU 4.4 */ + U_CHAR_NAME_ALIAS, + /** @stable ICU 2.0 */ U_CHAR_NAME_CHOICE_COUNT } UCharNameChoice; @@ -2584,12 +2595,9 @@ u_charName(UChar32 code, UCharNameChoice nameChoice, UErrorCode *pErrorCode); /** - * Get the ISO 10646 comment for a character. - * The ISO 10646 comment is an informative field in the Unicode Character - * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list. - * - * Note: Unicode 5.2 removes all ISO comment data, resulting in empty strings - * returned for all characters. + * Returns an empty string. + * Used to return the ISO 10646 comment for a character. + * The Unicode ISO_Comment property is deprecated and has no values. * * @param c The character (code point) for which to get the ISO comment. * It must be 0<=c<=0x10ffff. @@ -2600,13 +2608,9 @@ u_charName(UChar32 code, UCharNameChoice nameChoice, * @param pErrorCode Pointer to a UErrorCode variable; * check for U_SUCCESS() after u_getISOComment() * returns. - * @return The length of the comment, or 0 if there is no comment for this character. - * If the destCapacity is less than or equal to the length, then the buffer - * contains the truncated name and the returned length indicates the full - * length of the name. - * The length does not include the zero-termination. + * @return 0 * - * @stable ICU 2.2 + * @deprecated ICU 49 */ U_STABLE int32_t U_EXPORT2 u_getISOComment(UChar32 c, diff --git a/icu4c/source/common/uniset_props.cpp b/icu4c/source/common/uniset_props.cpp index 115b5e09426..00043a039fd 100644 --- a/icu4c/source/common/uniset_props.cpp +++ b/icu4c/source/common/uniset_props.cpp @@ -1040,15 +1040,12 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop, } break; case UCHAR_NAME: - case UCHAR_UNICODE_1_NAME: { // Must munge name, since u_charFromName() does not do // 'loose' matching. char buf[128]; // it suffices that this be > uprv_getMaxCharNameLength if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec); - UCharNameChoice choice = (p == UCHAR_NAME) ? - U_EXTENDED_CHAR_NAME : U_UNICODE_10_CHAR_NAME; - UChar32 ch = u_charFromName(choice, buf, &ec); + UChar32 ch = u_charFromName(U_EXTENDED_CHAR_NAME, buf, &ec); if (U_SUCCESS(ec)) { clear(); add(ch); @@ -1058,6 +1055,9 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop, } } break; + case UCHAR_UNICODE_1_NAME: + // ICU 49 deprecates the Unicode_1_Name property APIs. + FAIL(ec); case UCHAR_AGE: { // Must munge name, since u_versionFromString() does not do diff --git a/icu4c/source/data/in/unames.icu b/icu4c/source/data/in/unames.icu index d144e85a4f7..44ed175a54c 100644 Binary files a/icu4c/source/data/in/unames.icu and b/icu4c/source/data/in/unames.icu differ diff --git a/icu4c/source/samples/ucnv/convsamp.cpp b/icu4c/source/samples/ucnv/convsamp.cpp index 38b9c28e152..63922c77363 100644 --- a/icu4c/source/samples/ucnv/convsamp.cpp +++ b/icu4c/source/samples/ucnv/convsamp.cpp @@ -58,20 +58,13 @@ void prettyPrintUChar(UChar c) char buf[1000]; UErrorCode status = U_ZERO_ERROR; int32_t o; - - o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 1000, &status); + + o = u_charName(c, U_EXTENDED_CHAR_NAME, buf, 1000, &status); if(U_SUCCESS(status) && (o>0) ) { buf[6] = 0; printf("%7s", buf); } else { - o = u_charName(c, U_UNICODE_10_CHAR_NAME, buf, 1000, &status); - if(U_SUCCESS(status) && (o>0)) { - buf[5] = 0; - printf("~%6s", buf); - } - else { - printf(" ??????"); - } + printf(" ??????"); } } else { switch((char)(c & 0x007F)) { diff --git a/icu4c/source/test/cintltst/cucdtst.c b/icu4c/source/test/cintltst/cucdtst.c index a59c7388d93..a04e495b9e1 100644 --- a/icu4c/source/test/cintltst/cucdtst.c +++ b/icu4c/source/test/cintltst/cucdtst.c @@ -1581,12 +1581,10 @@ static const struct { const char *name, *oldName, *extName, *alias; } names[]={ {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"}, - {0x01a2, "LATIN CAPITAL LETTER OI", - "LATIN CAPITAL LETTER O I", + {0x01a2, "LATIN CAPITAL LETTER OI", "", "LATIN CAPITAL LETTER OI", "LATIN CAPITAL LETTER GHA"}, - {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", - "LATIN SMALL LETTER DOTLESS J BAR HOOK", + {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "", "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" }, {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "", "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", @@ -1597,7 +1595,7 @@ static const struct { {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" }, {0xd800, "", "", "" }, {0xdc00, "", "", "" }, - {0xff08, "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH OPENING PARENTHESIS", "FULLWIDTH LEFT PARENTHESIS" }, + {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" }, {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" }, {0xffff, "", "", "" }, {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "", diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index 8ea0f151581..508da51d597 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -1277,8 +1277,8 @@ void TransliteratorTest::TestNameMap(void) { // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"), - CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{}\\\\N{}")); - expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{}\\N{}\\N{}\\N{"), + CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{}\\\\N{}\\\\N{}\\\\N{}")); + expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{}\\N{}\\N{}\\N{}\\N{}\\N{"), CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{")); delete uni2name;