From: Peter Edberg Date: Mon, 2 Oct 2017 03:42:54 +0000 (+0000) Subject: ICU-13366 part 1, fix TextTrieMap::search supplemental handling; make tz displayName... X-Git-Tag: release-60-rc~77 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9ccab14af17d7a6160692f2d30380ee6fd5ad55a;p=icu ICU-13366 part 1, fix TextTrieMap::search supplemental handling; make tz displayName buffer size bigger & logKnownIssue tests more specific X-SVN-Rev: 40522 --- diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp index 3c0670446b3..402672c0631 100644 --- a/icu4c/source/i18n/smpdtfmt.cpp +++ b/icu4c/source/i18n/smpdtfmt.cpp @@ -80,6 +80,9 @@ // class SimpleDateFormat // ***************************************************************************** +// Some zone display names involving supplementary characters can be over 50 chars, 100 UTF-16 code units, 200 UTF-8 bytes +#define ZONE_NAME_U16_MAX 128 + U_NAMESPACE_BEGIN /** @@ -1690,7 +1693,7 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, case UDAT_TIMEZONE_ISO_FIELD: // 'X' case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x' { - UChar zsbuf[64]; + UChar zsbuf[ZONE_NAME_U16_MAX]; UnicodeString zoneString(zsbuf, 0, UPRV_LENGTHOF(zsbuf)); const TimeZone& tz = cal.getTimeZone(); UDate date = cal.getTime(status); diff --git a/icu4c/source/i18n/tzfmt.cpp b/icu4c/source/i18n/tzfmt.cpp index a6590786797..f31db10d2ab 100644 --- a/icu4c/source/i18n/tzfmt.cpp +++ b/icu4c/source/i18n/tzfmt.cpp @@ -33,6 +33,9 @@ #include "tznames_impl.h" // TextTrieMap #include "patternprops.h" +// Some zone display names involving supplementary characters can be over 50 chars, 100 UTF-16 code units, 200 UTF-8 bytes +#define ZONE_NAME_U16_MAX 128 + U_NAMESPACE_BEGIN // Bit flags used by the parse method. @@ -790,7 +793,7 @@ TimeZoneFormat::format(const Formattable& obj, UnicodeString& appendTo, if (tz != NULL) { int32_t rawOffset, dstOffset; tz->getOffset(date, FALSE, rawOffset, dstOffset, status); - UChar buf[32]; + UChar buf[ZONE_NAME_U16_MAX]; UnicodeString result(buf, 0, UPRV_LENGTHOF(buf)); formatOffsetLocalizedGMT(rawOffset + dstOffset, result, status); if (U_SUCCESS(status)) { @@ -1416,7 +1419,7 @@ TimeZoneFormat::getTZDBTimeZoneNames(UErrorCode& status) const { UnicodeString& TimeZoneFormat::formatExemplarLocation(const TimeZone& tz, UnicodeString& name) const { - UChar locationBuf[64]; + UChar locationBuf[ZONE_NAME_U16_MAX]; UnicodeString location(locationBuf, 0, UPRV_LENGTHOF(locationBuf)); const UChar* canonicalID = ZoneMeta::getCanonicalCLDRID(tz); diff --git a/icu4c/source/i18n/tzgnames.cpp b/icu4c/source/i18n/tzgnames.cpp index 236ec760ed6..a8b036c4134 100644 --- a/icu4c/source/i18n/tzgnames.cpp +++ b/icu4c/source/i18n/tzgnames.cpp @@ -37,6 +37,8 @@ U_NAMESPACE_BEGIN #define ZID_KEY_MAX 128 +// Some zone display names involving supplementary characters can be over 50 chars, 100 UTF-16 code units, 200 UTF-8 bytes +#define ZONE_NAME_U16_MAX 128 static const char gZoneStrings[] = "zoneStrings"; @@ -615,7 +617,7 @@ TZGNCore::formatGenericNonLocationName(const TimeZone& tz, UTimeZoneGenericNameT UErrorCode status = U_ZERO_ERROR; UBool useStandard = FALSE; int32_t raw, sav; - UChar tmpNameBuf[64]; + UChar tmpNameBuf[ZONE_NAME_U16_MAX]; tz.getOffset(date, FALSE, raw, sav, status); if (U_FAILURE(status)) { @@ -683,7 +685,7 @@ TZGNCore::formatGenericNonLocationName(const TimeZone& tz, UTimeZoneGenericNameT // for some meta zones in some locales. This looks like a data bugs. // For now, we check if the standard name is different from its generic // name below. - UChar genNameBuf[64]; + UChar genNameBuf[ZONE_NAME_U16_MAX]; UnicodeString mzGenericName(genNameBuf, 0, UPRV_LENGTHOF(genNameBuf)); fTimeZoneNames->getMetaZoneDisplayName(mzID, nameType, mzGenericName); if (stdName.caseCompare(mzGenericName, 0) == 0) { diff --git a/icu4c/source/i18n/tznames_impl.cpp b/icu4c/source/i18n/tznames_impl.cpp index 7127b176751..080a7763b2b 100644 --- a/icu4c/source/i18n/tznames_impl.cpp +++ b/icu4c/source/i18n/tznames_impl.cpp @@ -411,25 +411,31 @@ TextTrieMap::search(CharacterNode *node, const UnicodeString &text, int32_t star return; } } - UChar32 c = text.char32At(index); if (fIgnoreCase) { - // size of character may grow after fold operation - UnicodeString tmp(c); + // for folding we need to get a complete code point. + // size of character may grow after fold operation; + // then we need to get result as UTF16 code units. + UChar32 c32 = text.char32At(index++); + if (c32 >= 0x10000) { + index++; + } + UnicodeString tmp(c32); tmp.foldCase(); int32_t tmpidx = 0; while (tmpidx < tmp.length()) { - c = tmp.char32At(tmpidx); + UChar c = tmp.charAt(tmpidx++); node = getChildNode(node, c); if (node == NULL) { break; } - tmpidx = tmp.moveIndex32(tmpidx, 1); } } else { + // here we just get the next UTF16 code unit + UChar c = text.charAt(index++); node = getChildNode(node, c); } if (node != NULL) { - search(node, text, start, index+1, handler, status); + search(node, text, start, index, handler, status); } } diff --git a/icu4c/source/test/intltest/dtfmtrtts.cpp b/icu4c/source/test/intltest/dtfmtrtts.cpp index cf172150b91..9d5d10ec60e 100644 --- a/icu4c/source/test/intltest/dtfmtrtts.cpp +++ b/icu4c/source/test/intltest/dtfmtrtts.cpp @@ -236,9 +236,6 @@ void DateFormatRoundTripTest::test(const Locale& loc) int32_t style = 0; for(style = DateFormat::FULL; style <= DateFormat::SHORT; ++style) { if(TEST_TABLE[itable++]) { - if (uprv_strcmp(loc.getLanguage(),"ccp")==0 && style==DateFormat::MEDIUM && logKnownIssue("13366", "Skip handling ccp until DateFormat parsing is fixed")) { - continue; - } logln("Testing style " + UnicodeString(styleName((DateFormat::EStyle)style))); DateFormat *df = DateFormat::createDateInstance((DateFormat::EStyle)style, loc); if(df == NULL) { @@ -252,7 +249,7 @@ void DateFormatRoundTripTest::test(const Locale& loc) for(style = DateFormat::FULL; style <= DateFormat::SHORT; ++style) { if (TEST_TABLE[itable++]) { - if (uprv_strcmp(loc.getLanguage(),"ccp")==0 && style==DateFormat::FULL && logKnownIssue("13366", "Skip handling ccp until DateFormat parsing is fixed")) { + if (uprv_strcmp(loc.getLanguage(),"ccp")==0 && style==DateFormat::LONG && logKnownIssue("13366", "Skip handling ccp until DateFormat parsing is fixed")) { continue; } logln("Testing style " + UnicodeString(styleName((DateFormat::EStyle)style))); @@ -269,7 +266,7 @@ void DateFormatRoundTripTest::test(const Locale& loc) for(int32_t dstyle = DateFormat::FULL; dstyle <= DateFormat::SHORT; ++dstyle) { for(int32_t tstyle = DateFormat::FULL; tstyle <= DateFormat::SHORT; ++tstyle) { if(TEST_TABLE[itable++]) { - if (uprv_strcmp(loc.getLanguage(),"ccp")==0 && logKnownIssue("13366", "Skip handling ccp until DateFormat parsing is fixed")) { + if (uprv_strcmp(loc.getLanguage(),"ccp")==0 && tstyle==DateFormat::LONG && logKnownIssue("13366", "Skip handling ccp until DateFormat parsing is fixed")) { continue; } logln("Testing dstyle" + UnicodeString(styleName((DateFormat::EStyle)dstyle)) + ", tstyle" + UnicodeString(styleName((DateFormat::EStyle)tstyle)) ); diff --git a/icu4c/source/test/intltest/tsdate.cpp b/icu4c/source/test/intltest/tsdate.cpp index 68806b8c2e5..df9988fa57e 100644 --- a/icu4c/source/test/intltest/tsdate.cpp +++ b/icu4c/source/test/intltest/tsdate.cpp @@ -104,6 +104,9 @@ IntlTestDateFormat::testLocale(/*char* par, */const Locale& locale, const Unicod timeStyle < (DateFormat::EStyle)4; timeStyle = (DateFormat::EStyle) (timeStyle+1)) { + if (uprv_strcmp(locale.getLanguage(),"ccp")==0 && logKnownIssue("13366", "Skip handling ccp until DateFormat parsing is fixed")) { + continue; + } fTestName = (UnicodeString) "DateTime test " + (int32_t) dateStyle + "/" + (int32_t) timeStyle + " (" + localeName + ")"; fFormat = DateFormat::createDateTimeInstance(dateStyle, timeStyle, locale); testFormat(/* par */); @@ -281,9 +284,6 @@ void IntlTestDateFormat::monsterTest(/*char *par*/) } for (int32_t i=0; i