From: Peter Edberg Date: Tue, 6 Dec 2011 20:46:32 +0000 (+0000) Subject: ICU-8958 Add support for parsing leapMonths (in addition to formatting per r31039... X-Git-Tag: milestone-59-0-1~4290 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=56d70749dfe951c3362beb16b93198a795268586;p=icu ICU-8958 Add support for parsing leapMonths (in addition to formatting per r31039, r31040) X-SVN-Rev: 31043 --- diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp index 43e2d961c7f..bbffc93a718 100644 --- a/icu4c/source/i18n/smpdtfmt.cpp +++ b/icu4c/source/i18n/smpdtfmt.cpp @@ -1903,6 +1903,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& UBool inQuote = FALSE; const UnicodeString numericFormatChars(NUMERIC_FORMAT_CHARS); + MessageFormat * numericLeapMonthFormatter = NULL; Calendar* calClone = NULL; Calendar *workCal = &cal; @@ -1923,6 +1924,14 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& goto ExitParse; } } + + if (fSymbols->fLeapMonthPatterns != NULL && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) { + UErrorCode nlmfStatus = U_ZERO_ERROR; + numericLeapMonthFormatter = new MessageFormat(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric], fLocale, nlmfStatus); + if (U_FAILURE(nlmfStatus)) { + numericLeapMonthFormatter = NULL; + } + } for (int32_t i=0; igetTime(status), status); } + if (numericLeapMonthFormatter != NULL) { + delete numericLeapMonthFormatter; + } if (calClone != NULL) { delete calClone; } @@ -2461,6 +2473,7 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text, UCalendarDateFields field, const UnicodeString* data, int32_t dataCount, + const UnicodeString* monthPattern, Calendar& cal) const { int32_t i = 0; @@ -2473,6 +2486,8 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text, // We keep track of the longest match, and return that. Note that this // unfortunately requires us to test all array elements. int32_t bestMatchLength = 0, bestMatch = -1; + UnicodeString bestMatchName; + int32_t isLeapMonth = 0; // {sfb} kludge to support case-insensitive comparison // {markus 2002oct11} do not just use caseCompareBetween because we do not know @@ -2497,6 +2512,28 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text, { bestMatch = i; bestMatchLength = length; + bestMatchName.setTo(data[i]); + isLeapMonth = 0; + } + + if (monthPattern != NULL) { + UErrorCode status = U_ZERO_ERROR; + UnicodeString leapMonthName; + Formattable monthName((const UnicodeString&)(data[i])); + MessageFormat::format(*monthPattern, &monthName, 1, leapMonthName, status); + if (U_SUCCESS(status)) { + lcase.fastCopyFrom(leapMonthName).foldCase(); + length = lcase.length(); + + if (length > bestMatchLength && + lcaseText.compareBetween(0, length, lcase, 0, length) == 0) + { + bestMatch = i; + bestMatchLength = length; + bestMatchName.setTo(leapMonthName); + isLeapMonth = 1; + } + } } } if (bestMatch >= 0) @@ -2508,11 +2545,14 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text, else { cal.set(field, bestMatch); } + if (isLeapMonth) { + cal.set(UCAL_IS_LEAP_MONTH, isLeapMonth); + } // Once we have a match, we have to determine the length of the // original source string. This will usually be == the length of // the case folded string, but it may differ (e.g. sharp s). - lcase.fastCopyFrom(data[bestMatch]).foldCase(); + lcase.fastCopyFrom(bestMatchName).foldCase(); // Most of the time, the length will be the same as the length // of the string from the locale data. Sometimes it will be @@ -2520,7 +2560,7 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text, // adding a character at a time, until we have a match. We do // this all in one loop, where we try 'len' first (at index // i==0). - int32_t len = data[bestMatch].length(); // 99+% of the time + int32_t len = bestMatchName.length(); // 99+% of the time int32_t n = text.length() - start; for (i=0; i<=n; ++i) { int32_t j=i; @@ -2560,7 +2600,7 @@ SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status) */ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count, UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal, - int32_t patLoc) const + int32_t patLoc, MessageFormat * numericLeapMonthFormatter) const { Formattable number; int32_t value = 0; @@ -2586,6 +2626,10 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC currentNumberFormat = getNumberFormatByIndex(patternCharIndex); UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex]; + if (numericLeapMonthFormatter != NULL) { + numericLeapMonthFormatter->setFormats((const Format **)¤tNumberFormat, 1); + } + // If there are any spaces here, skip over them. If we hit the end // of the string, then fail. for (;;) { @@ -2623,18 +2667,32 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // but that's going to be difficult. const UnicodeString* src; - if (obeyCount) { - if ((start+count) > text.length()) { - return -start; + UBool parsedNumericLeapMonth = FALSE; + if (numericLeapMonthFormatter != NULL && (patternCharIndex == UDAT_MONTH_FIELD || patternCharIndex == UDAT_STANDALONE_MONTH_FIELD)) { + int32_t argCount; + Formattable * args = numericLeapMonthFormatter->parse(text, pos, argCount); + if (args != NULL && argCount == 1 && pos.getIndex() > parseStart && args[0].isNumeric()) { + parsedNumericLeapMonth = TRUE; + number.setLong(args[0].getLong()); + } else { + pos.setIndex(parseStart); } - - text.extractBetween(0, start + count, temp); - src = &temp; - } else { - src = &text; } - parseInt(*src, number, pos, allowNegative,currentNumberFormat); + if (!parsedNumericLeapMonth) { + if (obeyCount) { + if ((start+count) > text.length()) { + return -start; + } + + text.extractBetween(0, start + count, temp); + src = &temp; + } else { + src = &text; + } + + parseInt(*src, number, pos, allowNegative,currentNumberFormat); + } int32_t txtLoc = pos.getIndex(); @@ -2698,11 +2756,11 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC switch (patternCharIndex) { case UDAT_ERA_FIELD: if (count == 5) { - ps = matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, cal); + ps = matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, NULL, cal); } else if (count == 4) { - ps = matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, cal); + ps = matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, NULL, cal); } else { - ps = matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, cal); + ps = matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, NULL, cal); } // check return position, if it equals -start, then matchString error @@ -2797,13 +2855,19 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // count >= 3 // i.e., MMM or MMMM // Want to be able to parse both short and long forms. // Try count == 4 first: + UnicodeString * wideMonthPat = NULL; + UnicodeString * shortMonthPat = NULL; + if (fSymbols->fLeapMonthPatterns != NULL && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) { + wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide]; + shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev]; + } int32_t newStart = 0; if ((newStart = matchString(text, start, UCAL_MONTH, // try MMMM - fSymbols->fMonths, fSymbols->fMonthsCount, cal)) > 0) + fSymbols->fMonths, fSymbols->fMonthsCount, wideMonthPat, cal)) > 0) return newStart; else if ((newStart = matchString(text, start, UCAL_MONTH, // try MMM - fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal)) > 0) + fSymbols->fShortMonths, fSymbols->fShortMonthsCount, shortMonthPat, cal)) > 0) return newStart; else if (!lenient) // currently we do not try to parse MMMMM: #8860 return newStart; @@ -2823,13 +2887,19 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // count >= 3 // i.e., LLL or LLLL // Want to be able to parse both short and long forms. // Try count == 4 first: + UnicodeString * wideMonthPat = NULL; + UnicodeString * shortMonthPat = NULL; + if (fSymbols->fLeapMonthPatterns != NULL && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) { + wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide]; + shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev]; + } int32_t newStart = 0; if ((newStart = matchString(text, start, UCAL_MONTH, - fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, cal)) > 0) + fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, wideMonthPat, cal)) > 0) return newStart; else if ((newStart = matchString(text, start, UCAL_MONTH, - fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal)) > 0) + fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, shortMonthPat, cal)) > 0) return newStart; else if (!lenient) // currently we do not try to parse LLLLL: #8860 return newStart; @@ -2882,15 +2952,15 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // Try count == 4 (EEEE) first: int32_t newStart = 0; if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, - fSymbols->fWeekdays, fSymbols->fWeekdaysCount, cal)) > 0) + fSymbols->fWeekdays, fSymbols->fWeekdaysCount, NULL, cal)) > 0) return newStart; // EEEE failed, now try EEE else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, - fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, cal)) > 0) + fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, NULL, cal)) > 0) return newStart; // EEE failed, now try EEEEE else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, - fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal)) > 0) + fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, NULL, cal)) > 0) return newStart; else if (!lenient || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD) return newStart; @@ -2910,10 +2980,10 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // Try count == 4 (cccc) first: int32_t newStart = 0; if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, - fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, cal)) > 0) + fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, NULL, cal)) > 0) return newStart; else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, - fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal)) > 0) + fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, NULL, cal)) > 0) return newStart; else if (!lenient) return newStart; @@ -2922,7 +2992,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC break; case UDAT_AM_PM_FIELD: - return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, cal); + return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, NULL, cal); case UDAT_HOUR1_FIELD: // [We computed 'value' above.] diff --git a/icu4c/source/i18n/unicode/smpdtfmt.h b/icu4c/source/i18n/unicode/smpdtfmt.h index e9be0212ab3..8f0bd161228 100644 --- a/icu4c/source/i18n/unicode/smpdtfmt.h +++ b/icu4c/source/i18n/unicode/smpdtfmt.h @@ -889,13 +889,15 @@ private: * @param field the date field being parsed. * @param stringArray the string array to parsed. * @param stringArrayCount the size of the array. + * @param monthPattern pointer to leap month pattern, or NULL if none. * @param cal a Calendar set to the date and time to be formatted * into a date/time string. * @return the new start position if matching succeeded; a negative number * indicating matching failure, otherwise. */ int32_t matchString(const UnicodeString& text, int32_t start, UCalendarDateFields field, - const UnicodeString* stringArray, int32_t stringArrayCount, Calendar& cal) const; + const UnicodeString* stringArray, int32_t stringArrayCount, + const UnicodeString* monthPattern, Calendar& cal) const; /** * Private code-size reduction function used by subParse. @@ -942,12 +944,13 @@ private: * @param cal a Calendar set to the date and time to be formatted * into a date/time string. * @param patLoc + * @param numericLeapMonthFormatter If non-null, used to parse numeric leap months. * @return the new start position if matching succeeded; a negative number * indicating matching failure, otherwise. */ int32_t subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count, UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal, - int32_t patLoc) const; + int32_t patLoc, MessageFormat * numericLeapMonthFormatter) const; void parseInt(const UnicodeString& text, Formattable& number, diff --git a/icu4c/source/test/intltest/dtfmttst.cpp b/icu4c/source/test/intltest/dtfmttst.cpp index 8ff4b54e279..8ac62289bd6 100644 --- a/icu4c/source/test/intltest/dtfmttst.cpp +++ b/icu4c/source/test/intltest/dtfmttst.cpp @@ -3757,14 +3757,24 @@ void DateFormatTest::TestMonthPatterns() rootChineseCalendar->set(datePtr->year, datePtr->month-1, datePtr->day); rootChineseCalendar->set(UCAL_IS_LEAP_MONTH, datePtr->isLeapMonth); UnicodeString result; - FieldPosition pos(0); - dmft->format(*rootChineseCalendar, result, pos); + FieldPosition fpos(0); + dmft->format(*rootChineseCalendar, result, fpos); if ( result.compare(itemPtr->dateString[idate]) != 0 ) { errln( UnicodeString("FAIL: Chinese calendar format for locale ") + UnicodeString(itemPtr->locale) + ", style " + itemPtr->style + ", expected \"" + itemPtr->dateString[idate] + "\", got \"" + result + "\""); } else { // formatted OK, try parse - // (to be supplied) + ParsePosition ppos(0); + dmft->parse(result, *rootChineseCalendar, ppos); + int32_t year = rootChineseCalendar->get(UCAL_YEAR, status); + int32_t month = rootChineseCalendar->get(UCAL_MONTH, status) + 1; + int32_t isLeapMonth = rootChineseCalendar->get(UCAL_IS_LEAP_MONTH, status); + int32_t day = rootChineseCalendar->get(UCAL_DATE, status); + if ( ppos.getIndex() < result.length() || year != datePtr->year || month != datePtr->month || isLeapMonth != datePtr->isLeapMonth || day != datePtr->day ) { + errln( UnicodeString("FAIL: Chinese calendar parse for locale ") + UnicodeString(itemPtr->locale) + ", style " + itemPtr->style + + ", string \"" + result + "\", expected " + datePtr->year +"-"+datePtr->month+"("+datePtr->isLeapMonth+")-"+datePtr->day + ", got pos " + + ppos.getIndex() + " " + year +"-"+month+"("+isLeapMonth+")-"+day); + } } } }