]> granicus.if.org Git - icu/commitdiff
ICU-8958 Add support for parsing leapMonths (in addition to formatting per r31039...
authorPeter Edberg <pedberg@unicode.org>
Tue, 6 Dec 2011 20:46:32 +0000 (20:46 +0000)
committerPeter Edberg <pedberg@unicode.org>
Tue, 6 Dec 2011 20:46:32 +0000 (20:46 +0000)
X-SVN-Rev: 31043

icu4c/source/i18n/smpdtfmt.cpp
icu4c/source/i18n/unicode/smpdtfmt.h
icu4c/source/test/intltest/dtfmttst.cpp

index 43e2d961c7f3850b637e9b04f3e74a91d78d3fb0..bbffc93a7189708513ee867dafebad20da6939eb 100644 (file)
@@ -1903,6 +1903,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
     UBool inQuote = FALSE;
 
     const UnicodeString numericFormatChars(NUMERIC_FORMAT_CHARS);
+    MessageFormat * numericLeapMonthFormatter = NULL;
 
     Calendar* calClone = NULL;
     Calendar *workCal = &cal;
@@ -1923,6 +1924,14 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
             goto ExitParse;
         }
     }
+    
+    if (fSymbols->fLeapMonthPatterns != NULL && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) {
+        UErrorCode nlmfStatus = U_ZERO_ERROR;
+        numericLeapMonthFormatter = new MessageFormat(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric], fLocale, nlmfStatus);
+        if (U_FAILURE(nlmfStatus)) {
+            numericLeapMonthFormatter = NULL;
+        }
+    }
 
     for (int32_t i=0; i<fPattern.length(); ++i) {
         UChar ch = fPattern.charAt(i);
@@ -1995,7 +2004,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
                 }
 
                 pos = subParse(text, pos, ch, count,
-                               TRUE, FALSE, ambiguousYear, saveHebrewMonth, *workCal, i);
+                               TRUE, FALSE, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter);
 
                 // If the parse fails anywhere in the run, back up to the
                 // start of the run and retry.
@@ -2010,7 +2019,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
             // fields.
             else if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored
                 int32_t s = subParse(text, pos, ch, count,
-                               FALSE, TRUE, ambiguousYear, saveHebrewMonth, *workCal, i);
+                               FALSE, TRUE, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter);
 
                 if (s == -pos-1) {
                     // era not present, in special cases allow this to continue
@@ -2221,6 +2230,9 @@ ExitParse:
         cal.setTime(workCal->getTime(status), status);
     }
 
+    if (numericLeapMonthFormatter != NULL) {
+        delete numericLeapMonthFormatter;
+    }
     if (calClone != NULL) {
         delete calClone;
     }
@@ -2461,6 +2473,7 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text,
                               UCalendarDateFields field,
                               const UnicodeString* data,
                               int32_t dataCount,
+                              const UnicodeString* monthPattern,
                               Calendar& cal) const
 {
     int32_t i = 0;
@@ -2473,6 +2486,8 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text,
     // We keep track of the longest match, and return that.  Note that this
     // unfortunately requires us to test all array elements.
     int32_t bestMatchLength = 0, bestMatch = -1;
+    UnicodeString bestMatchName;
+    int32_t isLeapMonth = 0;
 
     // {sfb} kludge to support case-insensitive comparison
     // {markus 2002oct11} do not just use caseCompareBetween because we do not know
@@ -2497,6 +2512,28 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text,
         {
             bestMatch = i;
             bestMatchLength = length;
+            bestMatchName.setTo(data[i]);
+            isLeapMonth = 0;
+        }
+
+        if (monthPattern != NULL) {
+            UErrorCode status = U_ZERO_ERROR;
+            UnicodeString leapMonthName;
+            Formattable monthName((const UnicodeString&)(data[i]));
+            MessageFormat::format(*monthPattern, &monthName, 1, leapMonthName, status);
+            if (U_SUCCESS(status)) {
+                lcase.fastCopyFrom(leapMonthName).foldCase();
+                length = lcase.length();
+
+                if (length > bestMatchLength &&
+                    lcaseText.compareBetween(0, length, lcase, 0, length) == 0)
+                {
+                    bestMatch = i;
+                    bestMatchLength = length;
+                    bestMatchName.setTo(leapMonthName);
+                    isLeapMonth = 1;
+                }
+            }
         }
     }
     if (bestMatch >= 0)
@@ -2508,11 +2545,14 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text,
         else {
             cal.set(field, bestMatch);
         }
+        if (isLeapMonth) {
+            cal.set(UCAL_IS_LEAP_MONTH, isLeapMonth);
+        }
 
         // Once we have a match, we have to determine the length of the
         // original source string.  This will usually be == the length of
         // the case folded string, but it may differ (e.g. sharp s).
-        lcase.fastCopyFrom(data[bestMatch]).foldCase();
+        lcase.fastCopyFrom(bestMatchName).foldCase();
 
         // Most of the time, the length will be the same as the length
         // of the string from the locale data.  Sometimes it will be
@@ -2520,7 +2560,7 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text,
         // adding a character at a time, until we have a match.  We do
         // this all in one loop, where we try 'len' first (at index
         // i==0).
-        int32_t len = data[bestMatch].length(); // 99+% of the time
+        int32_t len = bestMatchName.length(); // 99+% of the time
         int32_t n = text.length() - start;
         for (i=0; i<=n; ++i) {
             int32_t j=i;
@@ -2560,7 +2600,7 @@ SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status)
  */
 int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count,
                            UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal,
-                           int32_t patLoc) const
+                           int32_t patLoc, MessageFormat * numericLeapMonthFormatter) const
 {
     Formattable number;
     int32_t value = 0;
@@ -2586,6 +2626,10 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
     currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
     UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
 
+    if (numericLeapMonthFormatter != NULL) {
+        numericLeapMonthFormatter->setFormats((const Format **)&currentNumberFormat, 1);
+    }
+
     // If there are any spaces here, skip over them.  If we hit the end
     // of the string, then fail.
     for (;;) {
@@ -2623,18 +2667,32 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
         // but that's going to be difficult.
         const UnicodeString* src;
 
-        if (obeyCount) {
-            if ((start+count) > text.length()) {
-                return -start;
+        UBool parsedNumericLeapMonth = FALSE;
+        if (numericLeapMonthFormatter != NULL && (patternCharIndex == UDAT_MONTH_FIELD || patternCharIndex == UDAT_STANDALONE_MONTH_FIELD)) {
+            int32_t argCount;
+            Formattable * args = numericLeapMonthFormatter->parse(text, pos, argCount);
+            if (args != NULL && argCount == 1 && pos.getIndex() > parseStart && args[0].isNumeric()) {
+                parsedNumericLeapMonth = TRUE;
+                number.setLong(args[0].getLong());
+            } else {
+                pos.setIndex(parseStart);
             }
-
-            text.extractBetween(0, start + count, temp);
-            src = &temp;
-        } else {
-            src = &text;
         }
 
-        parseInt(*src, number, pos, allowNegative,currentNumberFormat);
+        if (!parsedNumericLeapMonth) {
+            if (obeyCount) {
+                if ((start+count) > text.length()) {
+                    return -start;
+                }
+
+                text.extractBetween(0, start + count, temp);
+                src = &temp;
+            } else {
+                src = &text;
+            }
+
+            parseInt(*src, number, pos, allowNegative,currentNumberFormat);
+        }
 
         int32_t txtLoc = pos.getIndex();
 
@@ -2698,11 +2756,11 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
     switch (patternCharIndex) {
     case UDAT_ERA_FIELD:
         if (count == 5) {
-            ps = matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, cal);
+            ps = matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, NULL, cal);
         } else if (count == 4) {
-            ps = matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, cal);
+            ps = matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, NULL, cal);
         } else {
-            ps = matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, cal);
+            ps = matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, NULL, cal);
         }
 
         // check return position, if it equals -start, then matchString error
@@ -2797,13 +2855,19 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
             // count >= 3 // i.e., MMM or MMMM
             // Want to be able to parse both short and long forms.
             // Try count == 4 first:
+            UnicodeString * wideMonthPat = NULL;
+            UnicodeString * shortMonthPat = NULL;
+            if (fSymbols->fLeapMonthPatterns != NULL && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) {
+                wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide];
+                shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev];
+            }
             int32_t newStart = 0;
 
             if ((newStart = matchString(text, start, UCAL_MONTH, // try MMMM
-                                      fSymbols->fMonths, fSymbols->fMonthsCount, cal)) > 0)
+                                      fSymbols->fMonths, fSymbols->fMonthsCount, wideMonthPat, cal)) > 0)
                 return newStart;
             else if ((newStart = matchString(text, start, UCAL_MONTH, // try MMM
-                                          fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal)) > 0)
+                                          fSymbols->fShortMonths, fSymbols->fShortMonthsCount, shortMonthPat, cal)) > 0)
                 return newStart;
             else if (!lenient) // currently we do not try to parse MMMMM: #8860
                 return newStart;
@@ -2823,13 +2887,19 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
             // count >= 3 // i.e., LLL or LLLL
             // Want to be able to parse both short and long forms.
             // Try count == 4 first:
+            UnicodeString * wideMonthPat = NULL;
+            UnicodeString * shortMonthPat = NULL;
+            if (fSymbols->fLeapMonthPatterns != NULL && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) {
+                wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide];
+                shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev];
+            }
             int32_t newStart = 0;
 
             if ((newStart = matchString(text, start, UCAL_MONTH,
-                                      fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, cal)) > 0)
+                                      fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, wideMonthPat, cal)) > 0)
                 return newStart;
             else if ((newStart = matchString(text, start, UCAL_MONTH,
-                                          fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal)) > 0)
+                                          fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, shortMonthPat, cal)) > 0)
                 return newStart;
             else if (!lenient) // currently we do not try to parse LLLLL: #8860
                 return newStart;
@@ -2882,15 +2952,15 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
             // Try count == 4 (EEEE) first:
             int32_t newStart = 0;
             if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
-                                      fSymbols->fWeekdays, fSymbols->fWeekdaysCount, cal)) > 0)
+                                      fSymbols->fWeekdays, fSymbols->fWeekdaysCount, NULL, cal)) > 0)
                 return newStart;
             // EEEE failed, now try EEE
             else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
-                                   fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, cal)) > 0)
+                                   fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, NULL, cal)) > 0)
                 return newStart;
             // EEE failed, now try EEEEE
             else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
-                                   fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal)) > 0)
+                                   fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, NULL, cal)) > 0)
                 return newStart;
             else if (!lenient || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD)
                 return newStart;
@@ -2910,10 +2980,10 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
             // Try count == 4 (cccc) first:
             int32_t newStart = 0;
             if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
-                                      fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, cal)) > 0)
+                                      fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, NULL, cal)) > 0)
                 return newStart;
             else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
-                                          fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal)) > 0)
+                                          fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, NULL, cal)) > 0)
                 return newStart;
             else if (!lenient)
                 return newStart;
@@ -2922,7 +2992,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
         break;
 
     case UDAT_AM_PM_FIELD:
-        return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, cal);
+        return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, NULL, cal);
 
     case UDAT_HOUR1_FIELD:
         // [We computed 'value' above.]
index e9be0212ab3478b1a62a7c69f2a5a6370848a53b..8f0bd1612282db077f492a78d188fe586e571d79 100644 (file)
@@ -889,13 +889,15 @@ private:
      * @param field the date field being parsed.
      * @param stringArray the string array to parsed.
      * @param stringArrayCount the size of the array.
+     * @param monthPattern pointer to leap month pattern, or NULL if none.
      * @param cal a Calendar set to the date and time to be formatted
      *            into a date/time string.
      * @return the new start position if matching succeeded; a negative number
      * indicating matching failure, otherwise.
      */
     int32_t matchString(const UnicodeString& text, int32_t start, UCalendarDateFields field,
-                        const UnicodeString* stringArray, int32_t stringArrayCount, Calendar& cal) const;
+                        const UnicodeString* stringArray, int32_t stringArrayCount,
+                        const UnicodeString* monthPattern, Calendar& cal) const;
 
     /**
      * Private code-size reduction function used by subParse.
@@ -942,12 +944,13 @@ private:
      * @param cal a Calendar set to the date and time to be formatted
      *            into a date/time string.
      * @param patLoc
+     * @param numericLeapMonthFormatter If non-null, used to parse numeric leap months.
      * @return the new start position if matching succeeded; a negative number
      * indicating matching failure, otherwise.
      */
     int32_t subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count,
                      UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal,
-                     int32_t patLoc) const;
+                     int32_t patLoc, MessageFormat * numericLeapMonthFormatter) const;
 
     void parseInt(const UnicodeString& text,
                   Formattable& number,
index 8ff4b54e2795a1f4f3b642c012234277a3910bff..8ac62289bd67ad223c74c6fe7e3de4db11b30466 100644 (file)
@@ -3757,14 +3757,24 @@ void DateFormatTest::TestMonthPatterns()
                     rootChineseCalendar->set(datePtr->year, datePtr->month-1, datePtr->day);
                     rootChineseCalendar->set(UCAL_IS_LEAP_MONTH, datePtr->isLeapMonth);
                     UnicodeString result;
-                    FieldPosition pos(0);
-                    dmft->format(*rootChineseCalendar, result, pos);
+                    FieldPosition fpos(0);
+                    dmft->format(*rootChineseCalendar, result, fpos);
                     if ( result.compare(itemPtr->dateString[idate]) != 0 ) {
                         errln( UnicodeString("FAIL: Chinese calendar format for locale ") + UnicodeString(itemPtr->locale) + ", style " + itemPtr->style +
                                 ", expected \"" + itemPtr->dateString[idate] + "\", got \"" + result + "\"");
                     } else {
                         // formatted OK, try parse
-                        // (to be supplied)
+                        ParsePosition ppos(0);
+                        dmft->parse(result, *rootChineseCalendar, ppos);
+                        int32_t year = rootChineseCalendar->get(UCAL_YEAR, status);
+                        int32_t month = rootChineseCalendar->get(UCAL_MONTH, status) + 1;
+                        int32_t isLeapMonth = rootChineseCalendar->get(UCAL_IS_LEAP_MONTH, status);
+                        int32_t day = rootChineseCalendar->get(UCAL_DATE, status);
+                        if ( ppos.getIndex() < result.length() || year != datePtr->year || month != datePtr->month || isLeapMonth != datePtr->isLeapMonth || day != datePtr->day ) {
+                            errln( UnicodeString("FAIL: Chinese calendar parse for locale ") + UnicodeString(itemPtr->locale) + ", style " + itemPtr->style +
+                                ", string \"" + result + "\", expected " + datePtr->year +"-"+datePtr->month+"("+datePtr->isLeapMonth+")-"+datePtr->day + ", got pos " +
+                                ppos.getIndex() + " " + year +"-"+month+"("+isLeapMonth+")-"+day);
+                        }
                     }
                 }
             }