From 307efef92294be2913479a42b95e300deb08e7f6 Mon Sep 17 00:00:00 2001 From: Peter Edberg Date: Fri, 30 Sep 2011 20:07:28 +0000 Subject: [PATCH] ICU-8820 Parse string fields in pattern as strings before trying digits (if lenient) X-SVN-Rev: 30766 --- icu4c/source/i18n/smpdtfmt.cpp | 191 +++++++++++++++--------- icu4c/source/test/intltest/dtfmttst.cpp | 116 +++++++++----- 2 files changed, 200 insertions(+), 107 deletions(-) diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp index 35604af8460..25f2c8d03eb 100644 --- a/icu4c/source/i18n/smpdtfmt.cpp +++ b/icu4c/source/i18n/smpdtfmt.cpp @@ -2571,19 +2571,19 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // a number value. We handle further, more generic cases below. We need // to handle some of them here because some fields require extra processing on // the parsed value. - if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD || - patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD || - patternCharIndex == UDAT_HOUR1_FIELD || - patternCharIndex == UDAT_HOUR0_FIELD || - patternCharIndex == UDAT_DOW_LOCAL_FIELD || - patternCharIndex == UDAT_STANDALONE_DAY_FIELD || - patternCharIndex == UDAT_MONTH_FIELD || - patternCharIndex == UDAT_STANDALONE_MONTH_FIELD || - patternCharIndex == UDAT_QUARTER_FIELD || - patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD || - patternCharIndex == UDAT_YEAR_FIELD || - patternCharIndex == UDAT_YEAR_WOY_FIELD || - patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD) + if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD || // k + patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD || // H + patternCharIndex == UDAT_HOUR1_FIELD || // h + patternCharIndex == UDAT_HOUR0_FIELD || // K + (patternCharIndex == UDAT_DOW_LOCAL_FIELD && count <= 2) || // e + (patternCharIndex == UDAT_STANDALONE_DAY_FIELD && count <= 2) || // c + (patternCharIndex == UDAT_MONTH_FIELD && count <= 2) || // M + (patternCharIndex == UDAT_STANDALONE_MONTH_FIELD && count <= 2) || // L + (patternCharIndex == UDAT_QUARTER_FIELD && count <= 2) || // Q + (patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD && count <= 2) || // q + patternCharIndex == UDAT_YEAR_FIELD || // y + patternCharIndex == UDAT_YEAR_WOY_FIELD || // Y + patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD) // S { int32_t parseStart = pos.getIndex(); // It would be good to unify this with the obeyCount logic below, @@ -2657,26 +2657,6 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC break; - case UDAT_DOW_LOCAL_FIELD: - case UDAT_STANDALONE_DAY_FIELD: - case UDAT_MONTH_FIELD: - case UDAT_STANDALONE_MONTH_FIELD: - case UDAT_QUARTER_FIELD: - case UDAT_STANDALONE_QUARTER_FIELD: - // in strict mode, these can only - // be a number if count <= 2 - if (!lenient && gotNumber && count > 2) { - // We have a string pattern in strict mode - // but the input parsed as a number. Ignore - // the fact that the input parsed as a number - // and try to match it as a string. (Some - // locales have numbers for the month names.) - gotNumber = FALSE; - pos.setIndex(start); - } - - break; - default: // we check the rest of the fields below. break; @@ -2786,13 +2766,17 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // Try count == 4 first: int32_t newStart = 0; - if ((newStart = matchString(text, start, UCAL_MONTH, + if ((newStart = matchString(text, start, UCAL_MONTH, // try MMMM fSymbols->fMonths, fSymbols->fMonthsCount, cal)) > 0) return newStart; - else // count == 4 failed, now try count == 3 - return matchString(text, start, UCAL_MONTH, - fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal); + else if ((newStart = matchString(text, start, UCAL_MONTH, // try MMM + fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal)) > 0) + return newStart; + else if (!lenient) // currently we do not try to parse MMMMM: #8860 + return newStart; + // else we allowing parsing as number, below } + break; case UDAT_STANDALONE_MONTH_FIELD: if (gotNumber) // i.e., L or LL. @@ -2811,10 +2795,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC if ((newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, cal)) > 0) return newStart; - else // count == 4 failed, now try count == 3 - return matchString(text, start, UCAL_MONTH, - fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal); + else if ((newStart = matchString(text, start, UCAL_MONTH, + fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal)) > 0) + return newStart; + else if (!lenient) // currently we do not try to parse LLLLL: #8860 + return newStart; + // else we allowing parsing as number, below } + break; case UDAT_HOUR_OF_DAY1_FIELD: // [We computed 'value' above.] @@ -2868,10 +2856,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, cal)) > 0) return newStart; // EEE failed, now try EEEEE - else - return matchString(text, start, UCAL_DAY_OF_WEEK, - fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal); + else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, + fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal)) > 0) + return newStart; + else if (!lenient || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD) + return newStart; + // else we allowing parsing as number, below } + break; case UDAT_STANDALONE_DAY_FIELD: { @@ -2887,10 +2879,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, cal)) > 0) return newStart; - else // cccc failed, now try ccc - return matchString(text, start, UCAL_DAY_OF_WEEK, - fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal); + else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, + fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal)) > 0) + return newStart; + else if (!lenient) + return newStart; + // else we allowing parsing as number, below } + break; case UDAT_AM_PM_FIELD: return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, cal); @@ -2923,10 +2919,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC if ((newStart = matchQuarterString(text, start, UCAL_MONTH, fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0) return newStart; - else // count == 4 failed, now try count == 3 - return matchQuarterString(text, start, UCAL_MONTH, - fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal); + else if ((newStart = matchQuarterString(text, start, UCAL_MONTH, + fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal)) > 0) + return newStart; + else if (!lenient) + return newStart; + // else we allowing parsing as number, below } + break; case UDAT_STANDALONE_QUARTER_FIELD: if (gotNumber) // i.e., q or qq. @@ -2945,10 +2945,14 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC if ((newStart = matchQuarterString(text, start, UCAL_MONTH, fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0) return newStart; - else // count == 4 failed, now try count == 3 - return matchQuarterString(text, start, UCAL_MONTH, - fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal); + else if ((newStart = matchQuarterString(text, start, UCAL_MONTH, + fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal)) > 0) + return newStart; + else if (!lenient) + return newStart; + // else we allowing parsing as number, below } + break; case UDAT_TIMEZONE_FIELD: case UDAT_TIMEZONE_RFC_FIELD: @@ -3128,34 +3132,79 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC default: // Handle "generic" fields - int32_t parseStart = pos.getIndex(); - const UnicodeString* src; - if (obeyCount) { - if ((start+count) > text.length()) { + // this is now handled below, outside the switch block + break; + } + // Handle "generic" fields: + // switch default case now handled here (outside switch block) to allow + // parsing of some string fields as digits for lenient case + + int32_t parseStart = pos.getIndex(); + const UnicodeString* src; + if (obeyCount) { + if ((start+count) > text.length()) { + return -start; + } + text.extractBetween(0, start + count, temp); + src = &temp; + } else { + src = &text; + } + parseInt(*src, number, pos, allowNegative,currentNumberFormat); + if (pos.getIndex() != parseStart) { + int32_t value = number.getLong(); + + // Don't need suffix processing here (as in number processing at the beginning of the function); + // the new fields being handled as numeric values (month, weekdays, quarters) should not have suffixes. + + if (!lenient) { + // Check the range of the value + int32_t bias = gFieldRangeBias[patternCharIndex]; + if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) { return -start; } - text.extractBetween(0, start + count, temp); - src = &temp; - } else { - src = &text; } - parseInt(*src, number, pos, allowNegative,currentNumberFormat); - if (pos.getIndex() != parseStart) { - int32_t value = number.getLong(); - if (!lenient) { - // Check the range of the value - int32_t bias = gFieldRangeBias[patternCharIndex]; - if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) { - return -start; + // For the following, need to repeat some of the "if (gotNumber)" code above: + // UDAT_[STANDALONE_]MONTH_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_STANDALONE_DAY_FIELD, + // UDAT_[STANDALONE_]QUARTER_FIELD + switch (patternCharIndex) { + case UDAT_MONTH_FIELD: + // See notes under UDAT_MONTH_FIELD case above + if (!strcmp(cal.getType(),"hebrew")) { + HebrewCalendar *hc = (HebrewCalendar*)&cal; + if (cal.isSet(UCAL_YEAR)) { + UErrorCode status = U_ZERO_ERROR; + if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6) { + cal.set(UCAL_MONTH, value); + } else { + cal.set(UCAL_MONTH, value - 1); + } + } else { + saveHebrewMonth = value; } + } else { + cal.set(UCAL_MONTH, value - 1); } - + break; + case UDAT_STANDALONE_MONTH_FIELD: + cal.set(UCAL_MONTH, value - 1); + break; + case UDAT_DOW_LOCAL_FIELD: + case UDAT_STANDALONE_DAY_FIELD: + cal.set(UCAL_DOW_LOCAL, value); + break; + case UDAT_QUARTER_FIELD: + case UDAT_STANDALONE_QUARTER_FIELD: + cal.set(UCAL_MONTH, (value - 1) * 3); + break; + default: cal.set(field, value); - return pos.getIndex(); + break; } - return -start; + return pos.getIndex(); } + return -start; } /** diff --git a/icu4c/source/test/intltest/dtfmttst.cpp b/icu4c/source/test/intltest/dtfmttst.cpp index f3316c336fa..4bc46ddcffa 100644 --- a/icu4c/source/test/intltest/dtfmttst.cpp +++ b/icu4c/source/test/intltest/dtfmttst.cpp @@ -828,10 +828,15 @@ DateFormatTest::TestBadInput135() if (U_SUCCESS(status)) { UnicodeString format; + UnicodeString pattern; + SimpleDateFormat* sdtfmt = dynamic_cast(df); + if (sdtfmt != NULL) { + sdtfmt->toPattern(pattern); + } full->format(when, format); logln(prefix + "OK: " + format); if (0!=format.compareBetween(0, expected.length(), expected, 0, expected.length())) - errln((UnicodeString)"FAIL: Expected " + expected + " got " + format); + errln((UnicodeString)"FAIL: Parse \"" + text + "\", pattern \"" + pattern + "\", expected " + expected + " got " + format); } //} //catch(ParseException e) { @@ -958,7 +963,7 @@ DateFormatTest::TestBadInput135a() if (expected == 0) errln((UnicodeString)"FAIL: Expected parse failure, got " + result); else if (!(result == expected)) - errln(UnicodeString("FAIL: Expected ") + expected + UnicodeString(", got ") + result); + errln(UnicodeString("FAIL: Parse \"") + s + UnicodeString("\", expected ") + expected + UnicodeString(", got ") + result); } } else if (expected != 0) { @@ -1256,7 +1261,7 @@ void DateFormatTest::TestSpaceParsing() { // pattern, input, expected parse or NULL if expect parse failure "MMMM d yy", " 04 05 06", "2006 04 05 00:00:00", NULL, "04 05 06", "2006 04 05 00:00:00", - + "MM d yy", " 04 05 06", "2006 04 05 00:00:00", NULL, "04 05 06", "2006 04 05 00:00:00", NULL, "04/05/06", "2006 04 05 00:00:00", @@ -1266,16 +1271,16 @@ void DateFormatTest::TestSpaceParsing() { NULL, "Apr / 05/ 06", "2006 04 05 00:00:00", NULL, "Apr-05-06", "2006 04 05 00:00:00", NULL, "Apr 05, 2006", "2006 04 05 00:00:00", - + "MMMM d yy", " Apr 05 06", "2006 04 05 00:00:00", NULL, "Apr 05 06", "2006 04 05 00:00:00", - NULL, "Apr05 06", "2006 04 05 00:00:00", - - "hh:mm:ss a", "12:34:56 PM", "1970 01 01 12:34:56", - NULL, "12:34:56PM", "1970 01 01 12:34:56", + NULL, "Apr05 06", "2006 04 05 00:00:00", + + "hh:mm:ss a", "12:34:56 PM", "1970 01 01 12:34:56", + NULL, "12:34:56PM", "1970 01 01 12:34:56", NULL, "12.34.56PM", "1970 01 01 12:34:56", NULL, "12-34-56 PM", "1970 01 01 12:34:56", - NULL, "12 : 34 : 56 PM", "1970 01 01 12:34:56", + NULL, "12 : 34 : 56 PM", "1970 01 01 12:34:56", "MM d yy 'at' hh:mm:ss a", "04/05/06 12:34:56 PM", "2006 04 05 12:34:56", @@ -3482,35 +3487,73 @@ void DateFormatTest::Test6880() { delete fmt; } +typedef struct { + const char * localeStr; + UBool lenient; + UBool expectFail; + UnicodeString datePattern; + UnicodeString dateString; +} NumAsStringItem; + void DateFormatTest::TestNumberAsStringParsing() { - UErrorCode status = U_ZERO_ERROR; - UnicodeString dateString("2009 7 2 08:14:16"); - UnicodeString datePattern("y MMMM d HH:mm:ss"); - SimpleDateFormat *formatter = new SimpleDateFormat(datePattern, Locale(""), status); - UDate date1 = 0; - - if (formatter == NULL || U_FAILURE(status)) { - dataerrln("Unable to create SimpleDateFormat - %s", u_errorName(status)); - return; - } - - formatter->setLenient(FALSE); - date1 = formatter->parse(dateString, status); - - if (U_FAILURE(status)) { - errln("FAIL: Could not parse \"2009 7 2 08:14:16\" with pattern \"y MMMM d HH:mm:ss\""); - } else { - UnicodeString formatted; - - formatter->format(date1, formatted); - - if (formatted != dateString) { - errln("FAIL: parsed string did not match input."); + const NumAsStringItem items[] = { + // loc lenient fail? datePattern dateString + { "", FALSE, FALSE, UnicodeString("y MMMM d HH:mm:ss"), UnicodeString("2009 7 14 08:43:57") }, + { "", TRUE, FALSE, UnicodeString("y MMMM d HH:mm:ss"), UnicodeString("2009 7 14 08:43:57") }, + { "en", FALSE, FALSE, UnicodeString("MMM d, y"), UnicodeString("Jul 14, 2009") }, + { "en", TRUE, FALSE, UnicodeString("MMM d, y"), UnicodeString("Jul 14, 2009") }, + { "en", FALSE, TRUE, UnicodeString("MMM d, y"), UnicodeString("7 14, 2009") }, + { "en", TRUE, FALSE, UnicodeString("MMM d, y"), UnicodeString("7 14, 2009") }, + { "ja", FALSE, FALSE, UnicodeString("yyyy/MM/dd"), UnicodeString("2009/07/14") }, + { "ja", TRUE, FALSE, UnicodeString("yyyy/MM/dd"), UnicodeString("2009/07/14") }, + //{ "ja", FALSE, FALSE, UnicodeString("yyyy/MMMMM/d"), UnicodeString("2009/7/14") }, // #8860 covers test failure + { "ja", TRUE, FALSE, UnicodeString("yyyy/MMMMM/d"), UnicodeString("2009/7/14") }, + { "ja", FALSE, FALSE, CharsToUnicodeString("y\\u5E74M\\u6708d\\u65E5"), CharsToUnicodeString("2009\\u5E747\\u670814\\u65E5") }, + { "ja", TRUE, FALSE, CharsToUnicodeString("y\\u5E74M\\u6708d\\u65E5"), CharsToUnicodeString("2009\\u5E747\\u670814\\u65E5") }, + { "ja", FALSE, FALSE, CharsToUnicodeString("y\\u5E74MMMd\\u65E5"), CharsToUnicodeString("2009\\u5E747\\u670814\\u65E5") }, + { "ja", TRUE, FALSE, CharsToUnicodeString("y\\u5E74MMMd\\u65E5"), CharsToUnicodeString("2009\\u5E747\\u670814\\u65E5") }, // #8820 fixes test failure + { "ko", FALSE, FALSE, UnicodeString("yyyy. M. d."), UnicodeString("2009. 7. 14.") }, + { "ko", TRUE, FALSE, UnicodeString("yyyy. M. d."), UnicodeString("2009. 7. 14.") }, + { "ko", FALSE, FALSE, UnicodeString("yyyy. MMMMM d."), CharsToUnicodeString("2009. 7\\uC6D4 14.") }, + { "ko", TRUE, FALSE, UnicodeString("yyyy. MMMMM d."), CharsToUnicodeString("2009. 7\\uC6D4 14.") }, // #8820 fixes test failure + { "ko", FALSE, FALSE, CharsToUnicodeString("y\\uB144 M\\uC6D4 d\\uC77C"), CharsToUnicodeString("2009\\uB144 7\\uC6D4 14\\uC77C") }, + { "ko", TRUE, FALSE, CharsToUnicodeString("y\\uB144 M\\uC6D4 d\\uC77C"), CharsToUnicodeString("2009\\uB144 7\\uC6D4 14\\uC77C") }, + { "ko", FALSE, FALSE, CharsToUnicodeString("y\\uB144 MMM d\\uC77C"), CharsToUnicodeString("2009\\uB144 7\\uC6D4 14\\uC77C") }, + { "ko", TRUE, FALSE, CharsToUnicodeString("y\\uB144 MMM d\\uC77C"), CharsToUnicodeString("2009\\uB144 7\\uC6D4 14\\uC77C") }, // #8820 fixes test failure + { NULL, FALSE, FALSE, UnicodeString(""), UnicodeString("") } + }; + const NumAsStringItem * itemPtr; + for (itemPtr = items; itemPtr->localeStr != NULL; itemPtr++ ) { + Locale locale = Locale::createFromName(itemPtr->localeStr); + UErrorCode status = U_ZERO_ERROR; + SimpleDateFormat *formatter = new SimpleDateFormat(itemPtr->datePattern, locale, status); + if (formatter == NULL || U_FAILURE(status)) { + dataerrln("Unable to create SimpleDateFormat - %s", u_errorName(status)); + return; } + + formatter->setLenient(itemPtr->lenient); + UDate date1 = formatter->parse(itemPtr->dateString, status); + if (U_FAILURE(status)) { + if (!itemPtr->expectFail) { + errln("FAIL, err when expected success: Locale \"" + UnicodeString(itemPtr->localeStr) + "\", lenient " + itemPtr->lenient + + ": using pattern \"" + itemPtr->datePattern + "\", could not parse \"" + itemPtr->dateString + "\"; err: " + u_errorName(status) ); + } + } else if (itemPtr->expectFail) { + errln("FAIL, expected err but got none: Locale \"" + UnicodeString(itemPtr->localeStr) + "\", lenient " + itemPtr->lenient + + ": using pattern \"" + itemPtr->datePattern + "\", did parse \"" + itemPtr->dateString + "\"." ); + } else if (!itemPtr->lenient) { + UnicodeString formatted; + formatter->format(date1, formatted); + if (formatted != itemPtr->dateString) { + errln("FAIL, mismatch formatting parsed date: Locale \"" + UnicodeString(itemPtr->localeStr) + "\", lenient " + itemPtr->lenient + + ": using pattern \"" + itemPtr->datePattern + "\", did parse \"" + itemPtr->dateString + "\", formatted result \"" + formatted + "\"."); + } + } + + delete formatter; } - - delete formatter; } void DateFormatTest::TestISOEra() { @@ -3556,7 +3599,7 @@ void DateFormatTest::TestISOEra() { } } - delete fmt1; + delete fmt1; } void DateFormatTest::TestFormalChineseDate() { @@ -3648,7 +3691,8 @@ void DateFormatTest::TestParsePosition() { input += TestData[i][3]; ParsePosition pos(startPos); - UDate d = sdf->parse(input, pos); + //UDate d = sdf->parse(input, pos); + (void)sdf->parse(input, pos); if (pos.getIndex() != resPos) { errln(UnicodeString("FAIL: Parsing [") + input + "] with pattern [" + TestData[i][0] + "] returns position - " -- 2.40.0