ICU-9789 lenient date/time parsing: accept missing or added trailing dot after non...

author Markus Scherer <markus.icu@gmail.com>

Fri, 14 Dec 2012 06:35:11 +0000 (06:35 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Fri, 14 Dec 2012 06:35:11 +0000 (06:35 +0000)
author Markus Scherer <markus.icu@gmail.com>
Fri, 14 Dec 2012 06:35:11 +0000 (06:35 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Fri, 14 Dec 2012 06:35:11 +0000 (06:35 +0000)
diff --git a/icu4c/source/i18n/dtfmtsym.cpp b/icu4c/source/i18n/dtfmtsym.cpp

index 77d8d0527f10d166bd434cecbfc89d348c4a5f9f..49fc310d20e448e41c9866302b3a44e0ca571b36 100644 (file)
--- a/icu4c/source/i18n/dtfmtsym.cpp
+++ b/icu4c/source/i18n/dtfmtsym.cpp
@@ -1152,6 +1152,48 @@ DateFormatSymbols::getPatternUChars(void)
      return gPatternChars;
  }
  
+UDateFormatField U_EXPORT2
+DateFormatSymbols::getPatternCharIndex(UChar c) {
+    const UChar *p = u_strchr(gPatternChars, c);
+    if (p == NULL) {
+        return UDAT_FIELD_COUNT;
+    } else {
+        return static_cast<UDateFormatField>(p - gPatternChars);
+    }
+}
+
+static const uint32_t kNumericFields =
+    ((uint32_t)1 << UDAT_YEAR_FIELD) |                      // y
+    ((uint32_t)1 << UDAT_MONTH_FIELD) |                     // M or MM
+    ((uint32_t)1 << UDAT_DATE_FIELD) |                      // d
+    ((uint32_t)1 << UDAT_HOUR_OF_DAY1_FIELD) |              // k
+    ((uint32_t)1 << UDAT_HOUR_OF_DAY0_FIELD) |              // H
+    ((uint32_t)1 << UDAT_MINUTE_FIELD) |                    // m
+    ((uint32_t)1 << UDAT_SECOND_FIELD) |                    // s
+    ((uint32_t)1 << UDAT_FRACTIONAL_SECOND_FIELD) |         // S
+    ((uint32_t)1 << UDAT_DAY_OF_YEAR_FIELD) |               // D
+    ((uint32_t)1 << UDAT_DAY_OF_WEEK_IN_MONTH_FIELD) |      // F
+    ((uint32_t)1 << UDAT_WEEK_OF_YEAR_FIELD) |              // w
+    ((uint32_t)1 << UDAT_WEEK_OF_MONTH_FIELD) |             // W
+    ((uint32_t)1 << UDAT_HOUR1_FIELD) |                     // h
+    ((uint32_t)1 << UDAT_HOUR0_FIELD) |                     // K
+    ((uint32_t)1 << UDAT_YEAR_WOY_FIELD) |                  // Y
+    ((uint32_t)1 << UDAT_DOW_LOCAL_FIELD) |                 // e
+    ((uint32_t)1 << UDAT_EXTENDED_YEAR_FIELD);              // u
+
+UBool U_EXPORT2
+DateFormatSymbols::isNumericField(UDateFormatField f, int32_t count) {
+    return
+        f != UDAT_FIELD_COUNT &&
+        (kNumericFields & ((uint32_t)1 << f)) != 0 &&
+        (f != UDAT_MONTH_FIELD || count < 3);
+}
+
+UBool U_EXPORT2
+DateFormatSymbols::isNumericPatternChar(UChar c, int32_t count) {
+    return isNumericField(getPatternCharIndex(c), count);
+}
+
  //------------------------------------------------------
  
  UnicodeString&
diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp

index fa5dc42cfbc724cc5e2a576febcd754af207256e..622a2f311a676902a78a66d003b544d749b88c5e 100644 (file)
--- a/icu4c/source/i18n/smpdtfmt.cpp
+++ b/icu4c/source/i18n/smpdtfmt.cpp
@@ -1151,16 +1151,13 @@ SimpleDateFormat::processOverrideString(const Locale &locale, const UnicodeStrin
                  }
              }
          } else {
-           UChar ch = ovrField.charAt(0);
-           UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
-           UDateFormatField patternCharIndex;
-
             // if the pattern character is unrecognized, signal an error and bail out
-           if (patternCharPtr == NULL) {
+           UDateFormatField patternCharIndex =
+              DateFormatSymbols::getPatternCharIndex(ovrField.charAt(0));
+           if (patternCharIndex == UDAT_FIELD_COUNT) {
                 status = U_INVALID_FORMAT_ERROR;
                 return;
             }
-           patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
  
             // Set the number formatter in the table
             fNumberFormatters[patternCharIndex] = nf;
@@ -1169,6 +1166,7 @@ SimpleDateFormat::processOverrideString(const Locale &locale, const UnicodeStrin
          start = delimiterPosition + 1;
      }
  }
+
  //---------------------------------------------------------------------
  void
  SimpleDateFormat::subFormat(UnicodeString &appendTo,
@@ -1187,8 +1185,7 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo,
      // this function gets called by format() to produce the appropriate substitution
      // text for an individual pattern symbol (e.g., "HH" or "yyyy")
  
-    UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
-    UDateFormatField patternCharIndex;
+    UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch);
      const int32_t maxIntCount = 10;
      int32_t beginOffset = appendTo.length();
      NumberFormat *currentNumberFormat;
@@ -1198,7 +1195,7 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo,
      UBool isChineseCalendar = (uprv_strcmp(cal.getType(),"chinese") == 0);
  
      // if the pattern character is unrecognized, signal an error and dump out
-    if (patternCharPtr == NULL)
+    if (patternCharIndex == UDAT_FIELD_COUNT)
      {
          if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored
              status = U_INVALID_FORMAT_ERROR;
@@ -1206,7 +1203,6 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo,
          return;
      }
  
-    patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
      UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
      int32_t value = cal.get(field, status);
      if (U_FAILURE(status)) {
@@ -1569,20 +1565,46 @@ SimpleDateFormat::zeroPaddingNumber(NumberFormat *currentNumberFormat,UnicodeStr
  
  //----------------------------------------------------------------------
  
-/**
- * Format characters that indicate numeric fields.  The character
- * at index 0 is treated specially.
- */
-static const UChar NUMERIC_FORMAT_CHARS[] = {0x4D, 0x59, 0x79, 0x75, 0x64, 0x65, 0x68, 0x48, 0x6D, 0x73, 0x53, 0x44, 0x46, 0x77, 0x57, 0x6B, 0x4B, 0x00}; /* "MYyudehHmsSDFwWkK" */
-
  /**
   * Return true if the given format character, occuring count
   * times, represents a numeric field.
   */
  UBool SimpleDateFormat::isNumeric(UChar formatChar, int32_t count) {
-    UnicodeString s(NUMERIC_FORMAT_CHARS);
-    int32_t i = s.indexOf(formatChar);
-    return (i > 0 || (i == 0 && count < 3));
+    return DateFormatSymbols::isNumericPatternChar(formatChar, count);
+}
+
+UBool
+SimpleDateFormat::isAtNumericField(const UnicodeString &pattern, int32_t patternOffset) {
+    if (patternOffset >= pattern.length()) {
+        // not at any field
+        return FALSE;
+    }
+    UChar ch = pattern.charAt(patternOffset);
+    UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch);
+    if (f == UDAT_FIELD_COUNT) {
+        // not at any field
+        return FALSE;
+    }
+    int32_t i = patternOffset;
+    while (pattern.charAt(++i) == ch) {}
+    return DateFormatSymbols::isNumericField(f, i - patternOffset);
+}
+
+UBool
+SimpleDateFormat::isAfterNonNumericField(const UnicodeString &pattern, int32_t patternOffset) {
+    if (patternOffset <= 0) {
+        // not after any field
+        return FALSE;
+    }
+    UChar ch = pattern.charAt(--patternOffset);
+    UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch);
+    if (f == UDAT_FIELD_COUNT) {
+        // not after any field
+        return FALSE;
+    }
+    int32_t i = patternOffset;
+    while (pattern.charAt(--i) == ch) {}
+    return !DateFormatSymbols::isNumericField(f, patternOffset - i);
  }
  
  void
@@ -1611,7 +1633,6 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
      int32_t abutPass = 0;
      UBool inQuote = FALSE;
  
-    const UnicodeString numericFormatChars(NUMERIC_FORMAT_CHARS);
      MessageFormat * numericLeapMonthFormatter = NULL;
  
      Calendar* calClone = NULL;
@@ -1661,33 +1682,12 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
  
              if (isNumeric(ch, count)) {
                  if (abutPat < 0) {
-                    // Determine if there is an abutting numeric field.  For
-                    // most fields we can just look at the next characters,
-                    // but the 'm' field is either numeric or text,
-                    // depending on the count, so we have to look ahead for
-                    // that field.
-                    if ((i+1)<fPattern.length()) {
-                        UBool abutting;
-                        UChar nextCh = fPattern.charAt(i+1);
-                        int32_t k = numericFormatChars.indexOf(nextCh);
-                        if (k == 0) {
-                            int32_t j = i+2;
-                            while (j<fPattern.length() &&
-                                   fPattern.charAt(j) == nextCh) {
-                                ++j;
-                            }
-                            abutting = (j-i) < 4; // nextCount < 3
-                        } else {
-                            abutting = k > 0;
-                        }
-
-                        // Record the start of a set of abutting numeric
-                        // fields.
-                        if (abutting) {
-                            abutPat = fieldPat;
-                            abutStart = pos;
-                            abutPass = 0;
-                        }
+                    // Determine if there is an abutting numeric field.
+                    // Record the start of a set of abutting numeric fields.
+                    if (isAtNumericField(fPattern, i + 1)) {
+                        abutPat = fieldPat;
+                        abutStart = pos;
+                        abutPass = 0;
                      }
                  }
              } else {
@@ -1774,6 +1774,14 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
          }
      }
  
+    // Special hack for trailing "." after non-numeric field.
+    if (text.charAt(pos) == 0x2e && lenient) {
+        // only do if the last field is not numeric
+        if (isAfterNonNumericField(fPattern, fPattern.length())) {
+            pos++; // skip the extra "."
+        }
+    }
+
      // At this point the fields of Calendar have been set.  Calendar
      // will fill in default values for missing fields when the time
      // is computed.
@@ -1975,6 +1983,12 @@ SimpleDateFormat::parse(const UnicodeString& text, UErrorCode& status) const
  }
  //----------------------------------------------------------------------
  
+static UBool
+newBestMatchWithOptionalDot(const UnicodeString &lcaseText,
+                            const UnicodeString &data,
+                            UnicodeString &bestMatchName,
+                            int32_t &bestMatchLength);
+
  int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
                                int32_t start,
                                UCalendarDateFields field,
@@ -1990,6 +2004,7 @@ int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
      // We keep track of the longest match, and return that.  Note that this
      // unfortunately requires us to test all array elements.
      int32_t bestMatchLength = 0, bestMatch = -1;
+    UnicodeString bestMatchName;
  
      // {sfb} kludge to support case-insensitive comparison
      // {markus 2002oct11} do not just use caseCompareBetween because we do not know
@@ -1997,7 +2012,7 @@ int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
      // {alan 20040607} don't case change the whole string, since the length
      // can change
      // TODO we need a case-insensitive startsWith function
-    UnicodeString lcase, lcaseText;
+    UnicodeString lcaseText;
      text.extract(start, INT32_MAX, lcaseText);
      lcaseText.foldCase();
  
@@ -2006,14 +2021,8 @@ int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
          // Always compare if we have no match yet; otherwise only compare
          // against potentially better matches (longer strings).
  
-        lcase.fastCopyFrom(data[i]).foldCase();
-        int32_t length = lcase.length();
-
-        if (length > bestMatchLength &&
-            lcaseText.compareBetween(0, length, lcase, 0, length) == 0)
-        {
+        if (newBestMatchWithOptionalDot(lcaseText, data[i], bestMatchName, bestMatchLength)) {
              bestMatch = i;
-            bestMatchLength = length;
          }
      }
      if (bestMatch >= 0)
@@ -2023,7 +2032,6 @@ int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
          // Once we have a match, we have to determine the length of the
          // original source string.  This will usually be == the length of
          // the case folded string, but it may differ (e.g. sharp s).
-        lcase.fastCopyFrom(data[bestMatch]).foldCase();
  
          // Most of the time, the length will be the same as the length
          // of the string from the locale data.  Sometimes it will be
@@ -2031,7 +2039,7 @@ int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
          // adding a character at a time, until we have a match.  We do
          // this all in one loop, where we try 'len' first (at index
          // i==0).
-        int32_t len = data[bestMatch].length(); // 99+% of the time
+        int32_t len = bestMatchName.length(); // 99+% of the time
          int32_t n = text.length() - start;
          for (i=0; i<=n; ++i) {
              int32_t j=i;
@@ -2042,7 +2050,7 @@ int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
              }
              text.extract(start, j, lcaseText);
              lcaseText.foldCase();
-            if (lcase == lcaseText) {
+            if (bestMatchName == lcaseText) {
                  return start + j;
              }
          }
@@ -2099,7 +2107,7 @@ UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern,
          }
      }
          
-    for (p = 0; p < literal.length() && t < text.length(); p += 1, t += 1) {
+    for (p = 0; p < literal.length() && t < text.length();) {
          UBool needWhitespace = FALSE;
          
          while (p < literal.length() && PatternProps::isWhiteSpace(literal.charAt(p))) {
@@ -2140,11 +2148,20 @@ UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern,
              // Ran out of text, or found a non-matching character:
              // OK in lenient mode, an error in strict mode.
              if (lenient) {
+                if (t == textOffset && text.charAt(t) == 0x2e &&
+                        isAfterNonNumericField(pattern, patternOffset)) {
+                    // Lenient mode and the literal input text begins with a "." and
+                    // we are after a non-numeric field: We skip the "."
+                    ++t;
+                    continue;  // Do not update p.
+                }
                  break;
              }
              
              return FALSE;
          }
+        ++p;
+        ++t;
      }
      
      // At this point if we're in strict mode we have a complete match.
@@ -2154,11 +2171,8 @@ UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern,
          // no match. Pretend it matched a run of whitespace
          // and ignorables in the text.
          const  UnicodeSet *ignorables = NULL;
-        UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), pattern.charAt(i));
-        
-        if (patternCharPtr != NULL) {
-            UDateFormatField patternCharIndex = (UDateFormatField) (patternCharPtr - DateFormatSymbols::getPatternUChars());
-            
+        UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(pattern.charAt(i));
+        if (patternCharIndex != UDAT_FIELD_COUNT) {
              ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex);
          }
          
@@ -2207,7 +2221,7 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text,
      // {alan 20040607} don't case change the whole string, since the length
      // can change
      // TODO we need a case-insensitive startsWith function
-    UnicodeString lcase, lcaseText;
+    UnicodeString lcaseText;
      text.extract(start, INT32_MAX, lcaseText);
      lcaseText.foldCase();
  
@@ -2216,15 +2230,8 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text,
          // Always compare if we have no match yet; otherwise only compare
          // against potentially better matches (longer strings).
  
-        lcase.fastCopyFrom(data[i]).foldCase();
-        int32_t length = lcase.length();
-
-        if (length > bestMatchLength &&
-            lcaseText.compareBetween(0, length, lcase, 0, length) == 0)
-        {
+        if (newBestMatchWithOptionalDot(lcaseText, data[i], bestMatchName, bestMatchLength)) {
              bestMatch = i;
-            bestMatchLength = length;
-            bestMatchName.setTo(data[i]);
              isLeapMonth = 0;
          }
  
@@ -2234,15 +2241,8 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text,
              Formattable monthName((const UnicodeString&)(data[i]));
              MessageFormat::format(*monthPattern, &monthName, 1, leapMonthName, status);
              if (U_SUCCESS(status)) {
-                lcase.fastCopyFrom(leapMonthName).foldCase();
-                length = lcase.length();
-
-                if (length > bestMatchLength &&
-                    lcaseText.compareBetween(0, length, lcase, 0, length) == 0)
-                {
+                if (newBestMatchWithOptionalDot(lcaseText, leapMonthName, bestMatchName, bestMatchLength)) {
                      bestMatch = i;
-                    bestMatchLength = length;
-                    bestMatchName.setTo(leapMonthName);
                      isLeapMonth = 1;
                  }
              }
@@ -2267,7 +2267,6 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text,
          // Once we have a match, we have to determine the length of the
          // original source string.  This will usually be == the length of
          // the case folded string, but it may differ (e.g. sharp s).
-        lcase.fastCopyFrom(bestMatchName).foldCase();
  
          // Most of the time, the length will be the same as the length
          // of the string from the locale data.  Sometimes it will be
@@ -2286,7 +2285,7 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text,
              }
              text.extract(start, j, lcaseText);
              lcaseText.foldCase();
-            if (lcase == lcaseText) {
+            if (bestMatchName == lcaseText) {
                  return start + j;
              }
          }
@@ -2295,6 +2294,37 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text,
      return -start;
  }
  
+static UBool
+newBestMatchWithOptionalDot(const UnicodeString &lcaseText,
+                            const UnicodeString &data,
+                            UnicodeString &bestMatchName,
+                            int32_t &bestMatchLength) {
+    UnicodeString lcase;
+    lcase.fastCopyFrom(data).foldCase();
+    int32_t length = lcase.length();
+    if (length <= bestMatchLength) {
+        // data cannot provide a better match.
+        return FALSE;
+    }
+
+    if (lcaseText.compareBetween(0, length, lcase, 0, length) == 0) {
+        // normal match
+        bestMatchName = lcase;
+        bestMatchLength = length;
+        return TRUE;
+    }
+    if (lcase.charAt(--length) == 0x2e) {
+        if (lcaseText.compareBetween(0, length, lcase, 0, length) == 0) {
+            // The input text matches the data except for data's trailing dot.
+            bestMatchName = lcase;
+            bestMatchName.truncate(length);
+            bestMatchLength = length;
+            return TRUE;
+        }
+    }
+    return FALSE;
+}
+
  //----------------------------------------------------------------------
  
  void
@@ -2322,10 +2352,9 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
      int32_t i;
      int32_t ps = 0;
      ParsePosition pos(0);
-    UDateFormatField patternCharIndex;
+    UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch);
      NumberFormat *currentNumberFormat;
      UnicodeString temp;
-    UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
      UBool lenient = isLenient();
      UBool gotNumber = FALSE;
  
@@ -2333,11 +2362,10 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
      //fprintf(stderr, "%s:%d - [%c]  st=%d \n", __FILE__, __LINE__, (char) ch, start);
  #endif
  
-    if (patternCharPtr == NULL) {
+    if (patternCharIndex == UDAT_FIELD_COUNT) {
          return -start;
      }
  
-    patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
      currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
      UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
      UnicodeString hebr("hebr", 4, US_INV);
diff --git a/icu4c/source/i18n/unicode/dtfmtsym.h b/icu4c/source/i18n/unicode/dtfmtsym.h

index 1da51eb464487bc9147a849a2bdab4013760d3be..f6ba1de3b398d52dfd08f898600937e7343d54e9 100644 (file)
--- a/icu4c/source/i18n/unicode/dtfmtsym.h
+++ b/icu4c/source/i18n/unicode/dtfmtsym.h
@@ -25,6 +25,7 @@
  #include "unicode/calendar.h"
  #include "unicode/uobject.h"
  #include "unicode/locid.h"
+#include "unicode/udat.h"
  #include "unicode/ures.h"
  
  /**
@@ -802,6 +803,22 @@ private:
       * Delete just the zone strings.
       */
      void disposeZoneStrings(void);
+
+    /**
+     * Returns the date format field index of the pattern character c,
+     * or UDAT_FIELD_COUNT if c is not a pattern character.
+     */
+    static UDateFormatField U_EXPORT2 getPatternCharIndex(UChar c);
+
+    /**
+     * Returns TRUE if f (with its pattern character repeated count times) is a numeric field.
+     */
+    static UBool U_EXPORT2 isNumericField(UDateFormatField f, int32_t count);
+
+    /**
+     * Returns TRUE if c (repeated count times) is the pattern character for a numeric field.
+     */
+    static UBool U_EXPORT2 isNumericPatternChar(UChar c, int32_t count);
  };
  
  U_NAMESPACE_END
diff --git a/icu4c/source/i18n/unicode/smpdtfmt.h b/icu4c/source/i18n/unicode/smpdtfmt.h

index 475c17d3598e2a818b8f709decdcaf330fca9cea..299449d2f9d5689b96061dc59e1697917496b96a 100644 (file)
--- a/icu4c/source/i18n/unicode/smpdtfmt.h
+++ b/icu4c/source/i18n/unicode/smpdtfmt.h
@@ -945,6 +945,16 @@ private:
       */
      static UBool isNumeric(UChar formatChar, int32_t count);
  
+    /**
+     * Returns TRUE if the patternOffset is at the start of a numeric field.
+     */
+    static UBool isAtNumericField(const UnicodeString &pattern, int32_t patternOffset);
+
+    /**
+     * Returns TRUE if the patternOffset is right after a non-numeric field.
+     */
+    static UBool isAfterNonNumericField(const UnicodeString &pattern, int32_t patternOffset);
+
      /**
       * initializes fCalendar from parameters.  Returns fCalendar as a convenience.
       * @param adoptZone  Zone to be adopted, or NULL for TimeZone::createDefault().
diff --git a/icu4c/source/test/intltest/dtfmttst.cpp b/icu4c/source/test/intltest/dtfmttst.cpp

index afe018fd6c006939c73d4a40d929e2dbb9788934..f9d0319fff1b07c44e27103c8988f1ffbc7c290c 100644 (file)
--- a/icu4c/source/test/intltest/dtfmttst.cpp
+++ b/icu4c/source/test/intltest/dtfmttst.cpp
@@ -98,6 +98,7 @@ void DateFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &nam
      TESTCASE_AUTO(TestRelativeError);
      TESTCASE_AUTO(TestRelativeOther);
      */
+    TESTCASE_AUTO(TestDotAndAtLeniency);
      TESTCASE_AUTO_END;
  }
  
@@ -4093,6 +4094,66 @@ void DateFormatTest::TestNonGregoFmtParse()
      }
  }
  
+static const UDate TEST_DATE = 1326585600000.;  // 2012-jan-15
+
+void DateFormatTest::TestDotAndAtLeniency() {
+    // Test for date/time parsing regression with CLDR 22.1/ICU 50 pattern strings.
+    // For details see http://bugs.icu-project.org/trac/ticket/9789
+    static const char *locales[] = { "en", "fr" };
+    for (int32_t i = 0; i < LENGTHOF(locales); ++i) {
+        Locale locale(locales[i]);
+
+        for (DateFormat::EStyle dateStyle = DateFormat::FULL; dateStyle <= DateFormat::SHORT; ++dateStyle) {
+            LocalPointer<DateFormat> dateFormat(DateFormat::createDateInstance(dateStyle, locale));
+
+            for (DateFormat::EStyle timeStyle = DateFormat::FULL; timeStyle <= DateFormat::SHORT; ++timeStyle) {
+                LocalPointer<DateFormat> format(DateFormat::createDateTimeInstance(dateStyle, timeStyle, locale));
+                LocalPointer<DateFormat> timeFormat(DateFormat::createTimeInstance(timeStyle, locale));
+                UnicodeString formattedString;
+                format->format(TEST_DATE, formattedString);
+
+                if (!showParse(*format, formattedString)) {
+                    errln(UnicodeString("    with date-time: dateStyle=") + dateStyle + " timeStyle=" + timeStyle);
+                }
+
+                UnicodeString ds, ts;
+                formattedString = dateFormat->format(TEST_DATE, ds) + "  " + timeFormat->format(TEST_DATE, ts);
+                if (!showParse(*format, formattedString)) {
+                    errln(UnicodeString("    with date sp sp time: dateStyle=") + dateStyle + " timeStyle=" + timeStyle);
+                }
+                if (formattedString.indexOf("n ") >= 0) { // will add "." after the end of text ending in 'n', like Jan.
+                    UnicodeString plusDot(formattedString);
+                    plusDot.findAndReplace("n ", "n. ").append(".");
+                    if (!showParse(*format, plusDot)) {
+                        errln(UnicodeString("    with date plus-dot time: dateStyle=") + dateStyle + " timeStyle=" + timeStyle);
+                    }
+                }
+                if (formattedString.indexOf(". ") >= 0) { // will subtract "." at the end of strings.
+                    UnicodeString minusDot(formattedString);
+                    minusDot.findAndReplace(". ", " ");
+                    if (!showParse(*format, minusDot)) {
+                        errln(UnicodeString("    with date minus-dot time: dateStyle=") + dateStyle + " timeStyle=" + timeStyle);
+                    }
+                }
+            }
+        }
+    }
+}
+
+UBool DateFormatTest::showParse(DateFormat &format, const UnicodeString &formattedString) {
+    ParsePosition parsePosition;
+    UDate parsed = format.parse(formattedString, parsePosition);
+    UBool ok = TEST_DATE == parsed && parsePosition.getIndex() == formattedString.length();
+    UnicodeString pattern;
+    static_cast<SimpleDateFormat &>(format).toPattern(pattern);
+    if (ok) {
+        logln(pattern + "  parsed: " + formattedString);
+    } else {
+        errln(pattern + "  fails to parse: " + formattedString);
+    }
+    return ok;
+}
+
  #endif /* #if !UCONFIG_NO_FORMATTING */
  
  //eof
diff --git a/icu4c/source/test/intltest/dtfmttst.h b/icu4c/source/test/intltest/dtfmttst.h

index b70c8af1380b8d281bd26f5c61af23c92534c299..fc25df3ce391bcd4630a5d13f3840addbf3e860f 100644 (file)
--- a/icu4c/source/test/intltest/dtfmttst.h
+++ b/icu4c/source/test/intltest/dtfmttst.h
@@ -225,6 +225,10 @@ public:
       void TestRelativeOther(void);
  */
  
+    void TestDotAndAtLeniency();
+private:
+    UBool showParse(DateFormat &format, const UnicodeString &formattedString);
+
  public:
      /**
       * Test parsing a number as a string
author	Markus Scherer <markus.icu@gmail.com>
	Fri, 14 Dec 2012 06:35:11 +0000 (06:35 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Fri, 14 Dec 2012 06:35:11 +0000 (06:35 +0000)
icu4c/source/i18n/dtfmtsym.cpp		patch \| blob \| history
icu4c/source/i18n/smpdtfmt.cpp		patch \| blob \| history
icu4c/source/i18n/unicode/dtfmtsym.h		patch \| blob \| history
icu4c/source/i18n/unicode/smpdtfmt.h		patch \| blob \| history
icu4c/source/test/intltest/dtfmttst.cpp		patch \| blob \| history
icu4c/source/test/intltest/dtfmttst.h		patch \| blob \| history